Merge remote-tracking branch 'remotes/hreitz/tags/pull-block-2021-09-01' into staging
[qemu.git] / target / arm / translate-sve.c
blobbc91a641711cbd72fd6b69f0a581eaa6b1bc8db9
1 /*
2 * AArch64 SVE translation
4 * Copyright (c) 2018 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "tcg/tcg-op.h"
24 #include "tcg/tcg-op-gvec.h"
25 #include "tcg/tcg-gvec-desc.h"
26 #include "qemu/log.h"
27 #include "arm_ldst.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
32 #include "exec/log.h"
33 #include "translate-a64.h"
34 #include "fpu/softfloat.h"
37 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
40 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
42 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
45 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
46 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
47 TCGv_ptr, TCGv_i64, TCGv_i32);
50 * Helpers for extracting complex instruction fields.
53 /* See e.g. ASR (immediate, predicated).
54 * Returns -1 for unallocated encoding; diagnose later.
56 static int tszimm_esz(DisasContext *s, int x)
58 x >>= 3; /* discard imm3 */
59 return 31 - clz32(x);
62 static int tszimm_shr(DisasContext *s, int x)
64 return (16 << tszimm_esz(s, x)) - x;
67 /* See e.g. LSL (immediate, predicated). */
68 static int tszimm_shl(DisasContext *s, int x)
70 return x - (8 << tszimm_esz(s, x));
73 /* The SH bit is in bit 8. Extract the low 8 and shift. */
74 static inline int expand_imm_sh8s(DisasContext *s, int x)
76 return (int8_t)x << (x & 0x100 ? 8 : 0);
79 static inline int expand_imm_sh8u(DisasContext *s, int x)
81 return (uint8_t)x << (x & 0x100 ? 8 : 0);
84 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
85 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
87 static inline int msz_dtype(DisasContext *s, int msz)
89 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
90 return dtype[msz];
94 * Include the generated decoder.
97 #include "decode-sve.c.inc"
100 * Implement all of the translator functions referenced by the decoder.
103 /* Return the offset info CPUARMState of the predicate vector register Pn.
104 * Note for this purpose, FFR is P16.
106 static inline int pred_full_reg_offset(DisasContext *s, int regno)
108 return offsetof(CPUARMState, vfp.pregs[regno]);
111 /* Return the byte size of the whole predicate register, VL / 64. */
112 static inline int pred_full_reg_size(DisasContext *s)
114 return s->sve_len >> 3;
117 /* Round up the size of a register to a size allowed by
118 * the tcg vector infrastructure. Any operation which uses this
119 * size may assume that the bits above pred_full_reg_size are zero,
120 * and must leave them the same way.
122 * Note that this is not needed for the vector registers as they
123 * are always properly sized for tcg vectors.
125 static int size_for_gvec(int size)
127 if (size <= 8) {
128 return 8;
129 } else {
130 return QEMU_ALIGN_UP(size, 16);
134 static int pred_gvec_reg_size(DisasContext *s)
136 return size_for_gvec(pred_full_reg_size(s));
139 /* Invoke an out-of-line helper on 2 Zregs. */
140 static void gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
141 int rd, int rn, int data)
143 unsigned vsz = vec_full_reg_size(s);
144 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
145 vec_full_reg_offset(s, rn),
146 vsz, vsz, data, fn);
149 /* Invoke an out-of-line helper on 3 Zregs. */
150 static void gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
151 int rd, int rn, int rm, int data)
153 unsigned vsz = vec_full_reg_size(s);
154 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
155 vec_full_reg_offset(s, rn),
156 vec_full_reg_offset(s, rm),
157 vsz, vsz, data, fn);
160 /* Invoke an out-of-line helper on 4 Zregs. */
161 static void gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
162 int rd, int rn, int rm, int ra, int data)
164 unsigned vsz = vec_full_reg_size(s);
165 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
166 vec_full_reg_offset(s, rn),
167 vec_full_reg_offset(s, rm),
168 vec_full_reg_offset(s, ra),
169 vsz, vsz, data, fn);
172 /* Invoke an out-of-line helper on 2 Zregs and a predicate. */
173 static void gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
174 int rd, int rn, int pg, int data)
176 unsigned vsz = vec_full_reg_size(s);
177 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
178 vec_full_reg_offset(s, rn),
179 pred_full_reg_offset(s, pg),
180 vsz, vsz, data, fn);
183 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */
184 static void gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
185 int rd, int rn, int rm, int pg, int data)
187 unsigned vsz = vec_full_reg_size(s);
188 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
189 vec_full_reg_offset(s, rn),
190 vec_full_reg_offset(s, rm),
191 pred_full_reg_offset(s, pg),
192 vsz, vsz, data, fn);
195 /* Invoke a vector expander on two Zregs. */
196 static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn,
197 int esz, int rd, int rn)
199 unsigned vsz = vec_full_reg_size(s);
200 gvec_fn(esz, vec_full_reg_offset(s, rd),
201 vec_full_reg_offset(s, rn), vsz, vsz);
204 /* Invoke a vector expander on three Zregs. */
205 static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
206 int esz, int rd, int rn, int rm)
208 unsigned vsz = vec_full_reg_size(s);
209 gvec_fn(esz, vec_full_reg_offset(s, rd),
210 vec_full_reg_offset(s, rn),
211 vec_full_reg_offset(s, rm), vsz, vsz);
214 /* Invoke a vector expander on four Zregs. */
215 static void gen_gvec_fn_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
216 int esz, int rd, int rn, int rm, int ra)
218 unsigned vsz = vec_full_reg_size(s);
219 gvec_fn(esz, vec_full_reg_offset(s, rd),
220 vec_full_reg_offset(s, rn),
221 vec_full_reg_offset(s, rm),
222 vec_full_reg_offset(s, ra), vsz, vsz);
225 /* Invoke a vector move on two Zregs. */
226 static bool do_mov_z(DisasContext *s, int rd, int rn)
228 if (sve_access_check(s)) {
229 gen_gvec_fn_zz(s, tcg_gen_gvec_mov, MO_8, rd, rn);
231 return true;
234 /* Initialize a Zreg with replications of a 64-bit immediate. */
235 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
237 unsigned vsz = vec_full_reg_size(s);
238 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
241 /* Invoke a vector expander on three Pregs. */
242 static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
243 int rd, int rn, int rm)
245 unsigned psz = pred_gvec_reg_size(s);
246 gvec_fn(MO_64, pred_full_reg_offset(s, rd),
247 pred_full_reg_offset(s, rn),
248 pred_full_reg_offset(s, rm), psz, psz);
251 /* Invoke a vector move on two Pregs. */
252 static bool do_mov_p(DisasContext *s, int rd, int rn)
254 if (sve_access_check(s)) {
255 unsigned psz = pred_gvec_reg_size(s);
256 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
257 pred_full_reg_offset(s, rn), psz, psz);
259 return true;
262 /* Set the cpu flags as per a return from an SVE helper. */
263 static void do_pred_flags(TCGv_i32 t)
265 tcg_gen_mov_i32(cpu_NF, t);
266 tcg_gen_andi_i32(cpu_ZF, t, 2);
267 tcg_gen_andi_i32(cpu_CF, t, 1);
268 tcg_gen_movi_i32(cpu_VF, 0);
271 /* Subroutines computing the ARM PredTest psuedofunction. */
272 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
274 TCGv_i32 t = tcg_temp_new_i32();
276 gen_helper_sve_predtest1(t, d, g);
277 do_pred_flags(t);
278 tcg_temp_free_i32(t);
281 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
283 TCGv_ptr dptr = tcg_temp_new_ptr();
284 TCGv_ptr gptr = tcg_temp_new_ptr();
285 TCGv_i32 t;
287 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
288 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
289 t = tcg_const_i32(words);
291 gen_helper_sve_predtest(t, dptr, gptr, t);
292 tcg_temp_free_ptr(dptr);
293 tcg_temp_free_ptr(gptr);
295 do_pred_flags(t);
296 tcg_temp_free_i32(t);
299 /* For each element size, the bits within a predicate word that are active. */
300 const uint64_t pred_esz_masks[4] = {
301 0xffffffffffffffffull, 0x5555555555555555ull,
302 0x1111111111111111ull, 0x0101010101010101ull
306 *** SVE Logical - Unpredicated Group
309 static bool do_zzz_fn(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *gvec_fn)
311 if (sve_access_check(s)) {
312 gen_gvec_fn_zzz(s, gvec_fn, a->esz, a->rd, a->rn, a->rm);
314 return true;
317 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
319 return do_zzz_fn(s, a, tcg_gen_gvec_and);
322 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
324 return do_zzz_fn(s, a, tcg_gen_gvec_or);
327 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
329 return do_zzz_fn(s, a, tcg_gen_gvec_xor);
332 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
334 return do_zzz_fn(s, a, tcg_gen_gvec_andc);
337 static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
339 TCGv_i64 t = tcg_temp_new_i64();
340 uint64_t mask = dup_const(MO_8, 0xff >> sh);
342 tcg_gen_xor_i64(t, n, m);
343 tcg_gen_shri_i64(d, t, sh);
344 tcg_gen_shli_i64(t, t, 8 - sh);
345 tcg_gen_andi_i64(d, d, mask);
346 tcg_gen_andi_i64(t, t, ~mask);
347 tcg_gen_or_i64(d, d, t);
348 tcg_temp_free_i64(t);
351 static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
353 TCGv_i64 t = tcg_temp_new_i64();
354 uint64_t mask = dup_const(MO_16, 0xffff >> sh);
356 tcg_gen_xor_i64(t, n, m);
357 tcg_gen_shri_i64(d, t, sh);
358 tcg_gen_shli_i64(t, t, 16 - sh);
359 tcg_gen_andi_i64(d, d, mask);
360 tcg_gen_andi_i64(t, t, ~mask);
361 tcg_gen_or_i64(d, d, t);
362 tcg_temp_free_i64(t);
365 static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
367 tcg_gen_xor_i32(d, n, m);
368 tcg_gen_rotri_i32(d, d, sh);
371 static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
373 tcg_gen_xor_i64(d, n, m);
374 tcg_gen_rotri_i64(d, d, sh);
377 static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
378 TCGv_vec m, int64_t sh)
380 tcg_gen_xor_vec(vece, d, n, m);
381 tcg_gen_rotri_vec(vece, d, d, sh);
384 void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
385 uint32_t rm_ofs, int64_t shift,
386 uint32_t opr_sz, uint32_t max_sz)
388 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
389 static const GVecGen3i ops[4] = {
390 { .fni8 = gen_xar8_i64,
391 .fniv = gen_xar_vec,
392 .fno = gen_helper_sve2_xar_b,
393 .opt_opc = vecop,
394 .vece = MO_8 },
395 { .fni8 = gen_xar16_i64,
396 .fniv = gen_xar_vec,
397 .fno = gen_helper_sve2_xar_h,
398 .opt_opc = vecop,
399 .vece = MO_16 },
400 { .fni4 = gen_xar_i32,
401 .fniv = gen_xar_vec,
402 .fno = gen_helper_sve2_xar_s,
403 .opt_opc = vecop,
404 .vece = MO_32 },
405 { .fni8 = gen_xar_i64,
406 .fniv = gen_xar_vec,
407 .fno = gen_helper_gvec_xar_d,
408 .opt_opc = vecop,
409 .vece = MO_64 }
411 int esize = 8 << vece;
413 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
414 tcg_debug_assert(shift >= 0);
415 tcg_debug_assert(shift <= esize);
416 shift &= esize - 1;
418 if (shift == 0) {
419 /* xar with no rotate devolves to xor. */
420 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
421 } else {
422 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
423 shift, &ops[vece]);
427 static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
429 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
430 return false;
432 if (sve_access_check(s)) {
433 unsigned vsz = vec_full_reg_size(s);
434 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd),
435 vec_full_reg_offset(s, a->rn),
436 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz);
438 return true;
441 static bool do_sve2_zzzz_fn(DisasContext *s, arg_rrrr_esz *a, GVecGen4Fn *fn)
443 if (!dc_isar_feature(aa64_sve2, s)) {
444 return false;
446 if (sve_access_check(s)) {
447 gen_gvec_fn_zzzz(s, fn, a->esz, a->rd, a->rn, a->rm, a->ra);
449 return true;
452 static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
454 tcg_gen_xor_i64(d, n, m);
455 tcg_gen_xor_i64(d, d, k);
458 static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
459 TCGv_vec m, TCGv_vec k)
461 tcg_gen_xor_vec(vece, d, n, m);
462 tcg_gen_xor_vec(vece, d, d, k);
465 static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
466 uint32_t a, uint32_t oprsz, uint32_t maxsz)
468 static const GVecGen4 op = {
469 .fni8 = gen_eor3_i64,
470 .fniv = gen_eor3_vec,
471 .fno = gen_helper_sve2_eor3,
472 .vece = MO_64,
473 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
475 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
478 static bool trans_EOR3(DisasContext *s, arg_rrrr_esz *a)
480 return do_sve2_zzzz_fn(s, a, gen_eor3);
483 static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
485 tcg_gen_andc_i64(d, m, k);
486 tcg_gen_xor_i64(d, d, n);
489 static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
490 TCGv_vec m, TCGv_vec k)
492 tcg_gen_andc_vec(vece, d, m, k);
493 tcg_gen_xor_vec(vece, d, d, n);
496 static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
497 uint32_t a, uint32_t oprsz, uint32_t maxsz)
499 static const GVecGen4 op = {
500 .fni8 = gen_bcax_i64,
501 .fniv = gen_bcax_vec,
502 .fno = gen_helper_sve2_bcax,
503 .vece = MO_64,
504 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
506 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
509 static bool trans_BCAX(DisasContext *s, arg_rrrr_esz *a)
511 return do_sve2_zzzz_fn(s, a, gen_bcax);
514 static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
515 uint32_t a, uint32_t oprsz, uint32_t maxsz)
517 /* BSL differs from the generic bitsel in argument ordering. */
518 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
521 static bool trans_BSL(DisasContext *s, arg_rrrr_esz *a)
523 return do_sve2_zzzz_fn(s, a, gen_bsl);
526 static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
528 tcg_gen_andc_i64(n, k, n);
529 tcg_gen_andc_i64(m, m, k);
530 tcg_gen_or_i64(d, n, m);
533 static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
534 TCGv_vec m, TCGv_vec k)
536 if (TCG_TARGET_HAS_bitsel_vec) {
537 tcg_gen_not_vec(vece, n, n);
538 tcg_gen_bitsel_vec(vece, d, k, n, m);
539 } else {
540 tcg_gen_andc_vec(vece, n, k, n);
541 tcg_gen_andc_vec(vece, m, m, k);
542 tcg_gen_or_vec(vece, d, n, m);
546 static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
547 uint32_t a, uint32_t oprsz, uint32_t maxsz)
549 static const GVecGen4 op = {
550 .fni8 = gen_bsl1n_i64,
551 .fniv = gen_bsl1n_vec,
552 .fno = gen_helper_sve2_bsl1n,
553 .vece = MO_64,
554 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
556 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
559 static bool trans_BSL1N(DisasContext *s, arg_rrrr_esz *a)
561 return do_sve2_zzzz_fn(s, a, gen_bsl1n);
564 static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
567 * Z[dn] = (n & k) | (~m & ~k)
568 * = | ~(m | k)
570 tcg_gen_and_i64(n, n, k);
571 if (TCG_TARGET_HAS_orc_i64) {
572 tcg_gen_or_i64(m, m, k);
573 tcg_gen_orc_i64(d, n, m);
574 } else {
575 tcg_gen_nor_i64(m, m, k);
576 tcg_gen_or_i64(d, n, m);
580 static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
581 TCGv_vec m, TCGv_vec k)
583 if (TCG_TARGET_HAS_bitsel_vec) {
584 tcg_gen_not_vec(vece, m, m);
585 tcg_gen_bitsel_vec(vece, d, k, n, m);
586 } else {
587 tcg_gen_and_vec(vece, n, n, k);
588 tcg_gen_or_vec(vece, m, m, k);
589 tcg_gen_orc_vec(vece, d, n, m);
593 static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
594 uint32_t a, uint32_t oprsz, uint32_t maxsz)
596 static const GVecGen4 op = {
597 .fni8 = gen_bsl2n_i64,
598 .fniv = gen_bsl2n_vec,
599 .fno = gen_helper_sve2_bsl2n,
600 .vece = MO_64,
601 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
603 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
606 static bool trans_BSL2N(DisasContext *s, arg_rrrr_esz *a)
608 return do_sve2_zzzz_fn(s, a, gen_bsl2n);
611 static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
613 tcg_gen_and_i64(n, n, k);
614 tcg_gen_andc_i64(m, m, k);
615 tcg_gen_nor_i64(d, n, m);
618 static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
619 TCGv_vec m, TCGv_vec k)
621 tcg_gen_bitsel_vec(vece, d, k, n, m);
622 tcg_gen_not_vec(vece, d, d);
625 static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
626 uint32_t a, uint32_t oprsz, uint32_t maxsz)
628 static const GVecGen4 op = {
629 .fni8 = gen_nbsl_i64,
630 .fniv = gen_nbsl_vec,
631 .fno = gen_helper_sve2_nbsl,
632 .vece = MO_64,
633 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
635 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
638 static bool trans_NBSL(DisasContext *s, arg_rrrr_esz *a)
640 return do_sve2_zzzz_fn(s, a, gen_nbsl);
644 *** SVE Integer Arithmetic - Unpredicated Group
647 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
649 return do_zzz_fn(s, a, tcg_gen_gvec_add);
652 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
654 return do_zzz_fn(s, a, tcg_gen_gvec_sub);
657 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
659 return do_zzz_fn(s, a, tcg_gen_gvec_ssadd);
662 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
664 return do_zzz_fn(s, a, tcg_gen_gvec_sssub);
667 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
669 return do_zzz_fn(s, a, tcg_gen_gvec_usadd);
672 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
674 return do_zzz_fn(s, a, tcg_gen_gvec_ussub);
678 *** SVE Integer Arithmetic - Binary Predicated Group
681 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
683 if (fn == NULL) {
684 return false;
686 if (sve_access_check(s)) {
687 gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0);
689 return true;
692 /* Select active elememnts from Zn and inactive elements from Zm,
693 * storing the result in Zd.
695 static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
697 static gen_helper_gvec_4 * const fns[4] = {
698 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
699 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
701 gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
704 #define DO_ZPZZ(NAME, name) \
705 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a) \
707 static gen_helper_gvec_4 * const fns[4] = { \
708 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
709 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
710 }; \
711 return do_zpzz_ool(s, a, fns[a->esz]); \
714 DO_ZPZZ(AND, and)
715 DO_ZPZZ(EOR, eor)
716 DO_ZPZZ(ORR, orr)
717 DO_ZPZZ(BIC, bic)
719 DO_ZPZZ(ADD, add)
720 DO_ZPZZ(SUB, sub)
722 DO_ZPZZ(SMAX, smax)
723 DO_ZPZZ(UMAX, umax)
724 DO_ZPZZ(SMIN, smin)
725 DO_ZPZZ(UMIN, umin)
726 DO_ZPZZ(SABD, sabd)
727 DO_ZPZZ(UABD, uabd)
729 DO_ZPZZ(MUL, mul)
730 DO_ZPZZ(SMULH, smulh)
731 DO_ZPZZ(UMULH, umulh)
733 DO_ZPZZ(ASR, asr)
734 DO_ZPZZ(LSR, lsr)
735 DO_ZPZZ(LSL, lsl)
737 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
739 static gen_helper_gvec_4 * const fns[4] = {
740 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
742 return do_zpzz_ool(s, a, fns[a->esz]);
745 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
747 static gen_helper_gvec_4 * const fns[4] = {
748 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
750 return do_zpzz_ool(s, a, fns[a->esz]);
753 static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
755 if (sve_access_check(s)) {
756 do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
758 return true;
761 #undef DO_ZPZZ
764 *** SVE Integer Arithmetic - Unary Predicated Group
767 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
769 if (fn == NULL) {
770 return false;
772 if (sve_access_check(s)) {
773 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, 0);
775 return true;
778 #define DO_ZPZ(NAME, name) \
779 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
781 static gen_helper_gvec_3 * const fns[4] = { \
782 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
783 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
784 }; \
785 return do_zpz_ool(s, a, fns[a->esz]); \
788 DO_ZPZ(CLS, cls)
789 DO_ZPZ(CLZ, clz)
790 DO_ZPZ(CNT_zpz, cnt_zpz)
791 DO_ZPZ(CNOT, cnot)
792 DO_ZPZ(NOT_zpz, not_zpz)
793 DO_ZPZ(ABS, abs)
794 DO_ZPZ(NEG, neg)
796 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
798 static gen_helper_gvec_3 * const fns[4] = {
799 NULL,
800 gen_helper_sve_fabs_h,
801 gen_helper_sve_fabs_s,
802 gen_helper_sve_fabs_d
804 return do_zpz_ool(s, a, fns[a->esz]);
807 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
809 static gen_helper_gvec_3 * const fns[4] = {
810 NULL,
811 gen_helper_sve_fneg_h,
812 gen_helper_sve_fneg_s,
813 gen_helper_sve_fneg_d
815 return do_zpz_ool(s, a, fns[a->esz]);
818 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
820 static gen_helper_gvec_3 * const fns[4] = {
821 NULL,
822 gen_helper_sve_sxtb_h,
823 gen_helper_sve_sxtb_s,
824 gen_helper_sve_sxtb_d
826 return do_zpz_ool(s, a, fns[a->esz]);
829 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
831 static gen_helper_gvec_3 * const fns[4] = {
832 NULL,
833 gen_helper_sve_uxtb_h,
834 gen_helper_sve_uxtb_s,
835 gen_helper_sve_uxtb_d
837 return do_zpz_ool(s, a, fns[a->esz]);
840 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
842 static gen_helper_gvec_3 * const fns[4] = {
843 NULL, NULL,
844 gen_helper_sve_sxth_s,
845 gen_helper_sve_sxth_d
847 return do_zpz_ool(s, a, fns[a->esz]);
850 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
852 static gen_helper_gvec_3 * const fns[4] = {
853 NULL, NULL,
854 gen_helper_sve_uxth_s,
855 gen_helper_sve_uxth_d
857 return do_zpz_ool(s, a, fns[a->esz]);
860 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
862 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
865 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
867 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
870 #undef DO_ZPZ
873 *** SVE Integer Reduction Group
876 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
877 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
878 gen_helper_gvec_reduc *fn)
880 unsigned vsz = vec_full_reg_size(s);
881 TCGv_ptr t_zn, t_pg;
882 TCGv_i32 desc;
883 TCGv_i64 temp;
885 if (fn == NULL) {
886 return false;
888 if (!sve_access_check(s)) {
889 return true;
892 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
893 temp = tcg_temp_new_i64();
894 t_zn = tcg_temp_new_ptr();
895 t_pg = tcg_temp_new_ptr();
897 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
898 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
899 fn(temp, t_zn, t_pg, desc);
900 tcg_temp_free_ptr(t_zn);
901 tcg_temp_free_ptr(t_pg);
902 tcg_temp_free_i32(desc);
904 write_fp_dreg(s, a->rd, temp);
905 tcg_temp_free_i64(temp);
906 return true;
909 #define DO_VPZ(NAME, name) \
910 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
912 static gen_helper_gvec_reduc * const fns[4] = { \
913 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
914 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
915 }; \
916 return do_vpz_ool(s, a, fns[a->esz]); \
919 DO_VPZ(ORV, orv)
920 DO_VPZ(ANDV, andv)
921 DO_VPZ(EORV, eorv)
923 DO_VPZ(UADDV, uaddv)
924 DO_VPZ(SMAXV, smaxv)
925 DO_VPZ(UMAXV, umaxv)
926 DO_VPZ(SMINV, sminv)
927 DO_VPZ(UMINV, uminv)
929 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
931 static gen_helper_gvec_reduc * const fns[4] = {
932 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
933 gen_helper_sve_saddv_s, NULL
935 return do_vpz_ool(s, a, fns[a->esz]);
938 #undef DO_VPZ
941 *** SVE Shift by Immediate - Predicated Group
945 * Copy Zn into Zd, storing zeros into inactive elements.
946 * If invert, store zeros into the active elements.
948 static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
949 int esz, bool invert)
951 static gen_helper_gvec_3 * const fns[4] = {
952 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
953 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
956 if (sve_access_check(s)) {
957 gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
959 return true;
962 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
963 gen_helper_gvec_3 *fn)
965 if (sve_access_check(s)) {
966 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
968 return true;
971 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
973 static gen_helper_gvec_3 * const fns[4] = {
974 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
975 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
977 if (a->esz < 0) {
978 /* Invalid tsz encoding -- see tszimm_esz. */
979 return false;
981 /* Shift by element size is architecturally valid. For
982 arithmetic right-shift, it's the same as by one less. */
983 a->imm = MIN(a->imm, (8 << a->esz) - 1);
984 return do_zpzi_ool(s, a, fns[a->esz]);
987 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
989 static gen_helper_gvec_3 * const fns[4] = {
990 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
991 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
993 if (a->esz < 0) {
994 return false;
996 /* Shift by element size is architecturally valid.
997 For logical shifts, it is a zeroing operation. */
998 if (a->imm >= (8 << a->esz)) {
999 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
1000 } else {
1001 return do_zpzi_ool(s, a, fns[a->esz]);
1005 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
1007 static gen_helper_gvec_3 * const fns[4] = {
1008 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
1009 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
1011 if (a->esz < 0) {
1012 return false;
1014 /* Shift by element size is architecturally valid.
1015 For logical shifts, it is a zeroing operation. */
1016 if (a->imm >= (8 << a->esz)) {
1017 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
1018 } else {
1019 return do_zpzi_ool(s, a, fns[a->esz]);
1023 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
1025 static gen_helper_gvec_3 * const fns[4] = {
1026 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
1027 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
1029 if (a->esz < 0) {
1030 return false;
1032 /* Shift by element size is architecturally valid. For arithmetic
1033 right shift for division, it is a zeroing operation. */
1034 if (a->imm >= (8 << a->esz)) {
1035 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
1036 } else {
1037 return do_zpzi_ool(s, a, fns[a->esz]);
1041 static bool trans_SQSHL_zpzi(DisasContext *s, arg_rpri_esz *a)
1043 static gen_helper_gvec_3 * const fns[4] = {
1044 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h,
1045 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d,
1047 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1048 return false;
1050 return do_zpzi_ool(s, a, fns[a->esz]);
1053 static bool trans_UQSHL_zpzi(DisasContext *s, arg_rpri_esz *a)
1055 static gen_helper_gvec_3 * const fns[4] = {
1056 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h,
1057 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d,
1059 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1060 return false;
1062 return do_zpzi_ool(s, a, fns[a->esz]);
1065 static bool trans_SRSHR(DisasContext *s, arg_rpri_esz *a)
1067 static gen_helper_gvec_3 * const fns[4] = {
1068 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h,
1069 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d,
1071 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1072 return false;
1074 return do_zpzi_ool(s, a, fns[a->esz]);
1077 static bool trans_URSHR(DisasContext *s, arg_rpri_esz *a)
1079 static gen_helper_gvec_3 * const fns[4] = {
1080 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h,
1081 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d,
1083 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1084 return false;
1086 return do_zpzi_ool(s, a, fns[a->esz]);
1089 static bool trans_SQSHLU(DisasContext *s, arg_rpri_esz *a)
1091 static gen_helper_gvec_3 * const fns[4] = {
1092 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h,
1093 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d,
1095 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1096 return false;
1098 return do_zpzi_ool(s, a, fns[a->esz]);
1102 *** SVE Bitwise Shift - Predicated Group
1105 #define DO_ZPZW(NAME, name) \
1106 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a) \
1108 static gen_helper_gvec_4 * const fns[3] = { \
1109 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
1110 gen_helper_sve_##name##_zpzw_s, \
1111 }; \
1112 if (a->esz < 0 || a->esz >= 3) { \
1113 return false; \
1115 return do_zpzz_ool(s, a, fns[a->esz]); \
1118 DO_ZPZW(ASR, asr)
1119 DO_ZPZW(LSR, lsr)
1120 DO_ZPZW(LSL, lsl)
1122 #undef DO_ZPZW
1125 *** SVE Bitwise Shift - Unpredicated Group
1128 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
1129 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
1130 int64_t, uint32_t, uint32_t))
1132 if (a->esz < 0) {
1133 /* Invalid tsz encoding -- see tszimm_esz. */
1134 return false;
1136 if (sve_access_check(s)) {
1137 unsigned vsz = vec_full_reg_size(s);
1138 /* Shift by element size is architecturally valid. For
1139 arithmetic right-shift, it's the same as by one less.
1140 Otherwise it is a zeroing operation. */
1141 if (a->imm >= 8 << a->esz) {
1142 if (asr) {
1143 a->imm = (8 << a->esz) - 1;
1144 } else {
1145 do_dupi_z(s, a->rd, 0);
1146 return true;
1149 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
1150 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
1152 return true;
1155 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
1157 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
1160 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
1162 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
1165 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
1167 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
1170 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
1172 if (fn == NULL) {
1173 return false;
1175 if (sve_access_check(s)) {
1176 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
1178 return true;
1181 #define DO_ZZW(NAME, name) \
1182 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a) \
1184 static gen_helper_gvec_3 * const fns[4] = { \
1185 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
1186 gen_helper_sve_##name##_zzw_s, NULL \
1187 }; \
1188 return do_zzw_ool(s, a, fns[a->esz]); \
1191 DO_ZZW(ASR, asr)
1192 DO_ZZW(LSR, lsr)
1193 DO_ZZW(LSL, lsl)
1195 #undef DO_ZZW
1198 *** SVE Integer Multiply-Add Group
1201 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
1202 gen_helper_gvec_5 *fn)
1204 if (sve_access_check(s)) {
1205 unsigned vsz = vec_full_reg_size(s);
1206 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
1207 vec_full_reg_offset(s, a->ra),
1208 vec_full_reg_offset(s, a->rn),
1209 vec_full_reg_offset(s, a->rm),
1210 pred_full_reg_offset(s, a->pg),
1211 vsz, vsz, 0, fn);
1213 return true;
1216 #define DO_ZPZZZ(NAME, name) \
1217 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
1219 static gen_helper_gvec_5 * const fns[4] = { \
1220 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
1221 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
1222 }; \
1223 return do_zpzzz_ool(s, a, fns[a->esz]); \
1226 DO_ZPZZZ(MLA, mla)
1227 DO_ZPZZZ(MLS, mls)
1229 #undef DO_ZPZZZ
1232 *** SVE Index Generation Group
1235 static void do_index(DisasContext *s, int esz, int rd,
1236 TCGv_i64 start, TCGv_i64 incr)
1238 unsigned vsz = vec_full_reg_size(s);
1239 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1240 TCGv_ptr t_zd = tcg_temp_new_ptr();
1242 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1243 if (esz == 3) {
1244 gen_helper_sve_index_d(t_zd, start, incr, desc);
1245 } else {
1246 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
1247 static index_fn * const fns[3] = {
1248 gen_helper_sve_index_b,
1249 gen_helper_sve_index_h,
1250 gen_helper_sve_index_s,
1252 TCGv_i32 s32 = tcg_temp_new_i32();
1253 TCGv_i32 i32 = tcg_temp_new_i32();
1255 tcg_gen_extrl_i64_i32(s32, start);
1256 tcg_gen_extrl_i64_i32(i32, incr);
1257 fns[esz](t_zd, s32, i32, desc);
1259 tcg_temp_free_i32(s32);
1260 tcg_temp_free_i32(i32);
1262 tcg_temp_free_ptr(t_zd);
1263 tcg_temp_free_i32(desc);
1266 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
1268 if (sve_access_check(s)) {
1269 TCGv_i64 start = tcg_const_i64(a->imm1);
1270 TCGv_i64 incr = tcg_const_i64(a->imm2);
1271 do_index(s, a->esz, a->rd, start, incr);
1272 tcg_temp_free_i64(start);
1273 tcg_temp_free_i64(incr);
1275 return true;
1278 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
1280 if (sve_access_check(s)) {
1281 TCGv_i64 start = tcg_const_i64(a->imm);
1282 TCGv_i64 incr = cpu_reg(s, a->rm);
1283 do_index(s, a->esz, a->rd, start, incr);
1284 tcg_temp_free_i64(start);
1286 return true;
1289 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
1291 if (sve_access_check(s)) {
1292 TCGv_i64 start = cpu_reg(s, a->rn);
1293 TCGv_i64 incr = tcg_const_i64(a->imm);
1294 do_index(s, a->esz, a->rd, start, incr);
1295 tcg_temp_free_i64(incr);
1297 return true;
1300 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
1302 if (sve_access_check(s)) {
1303 TCGv_i64 start = cpu_reg(s, a->rn);
1304 TCGv_i64 incr = cpu_reg(s, a->rm);
1305 do_index(s, a->esz, a->rd, start, incr);
1307 return true;
1311 *** SVE Stack Allocation Group
1314 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
1316 if (sve_access_check(s)) {
1317 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1318 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1319 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
1321 return true;
1324 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
1326 if (sve_access_check(s)) {
1327 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1328 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1329 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
1331 return true;
1334 static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
1336 if (sve_access_check(s)) {
1337 TCGv_i64 reg = cpu_reg(s, a->rd);
1338 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
1340 return true;
1344 *** SVE Compute Vector Address Group
1347 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
1349 if (sve_access_check(s)) {
1350 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
1352 return true;
1355 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
1357 return do_adr(s, a, gen_helper_sve_adr_p32);
1360 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
1362 return do_adr(s, a, gen_helper_sve_adr_p64);
1365 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
1367 return do_adr(s, a, gen_helper_sve_adr_s32);
1370 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
1372 return do_adr(s, a, gen_helper_sve_adr_u32);
1376 *** SVE Integer Misc - Unpredicated Group
1379 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
1381 static gen_helper_gvec_2 * const fns[4] = {
1382 NULL,
1383 gen_helper_sve_fexpa_h,
1384 gen_helper_sve_fexpa_s,
1385 gen_helper_sve_fexpa_d,
1387 if (a->esz == 0) {
1388 return false;
1390 if (sve_access_check(s)) {
1391 gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
1393 return true;
1396 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
1398 static gen_helper_gvec_3 * const fns[4] = {
1399 NULL,
1400 gen_helper_sve_ftssel_h,
1401 gen_helper_sve_ftssel_s,
1402 gen_helper_sve_ftssel_d,
1404 if (a->esz == 0) {
1405 return false;
1407 if (sve_access_check(s)) {
1408 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
1410 return true;
1414 *** SVE Predicate Logical Operations Group
1417 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1418 const GVecGen4 *gvec_op)
1420 if (!sve_access_check(s)) {
1421 return true;
1424 unsigned psz = pred_gvec_reg_size(s);
1425 int dofs = pred_full_reg_offset(s, a->rd);
1426 int nofs = pred_full_reg_offset(s, a->rn);
1427 int mofs = pred_full_reg_offset(s, a->rm);
1428 int gofs = pred_full_reg_offset(s, a->pg);
1430 if (!a->s) {
1431 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1432 return true;
1435 if (psz == 8) {
1436 /* Do the operation and the flags generation in temps. */
1437 TCGv_i64 pd = tcg_temp_new_i64();
1438 TCGv_i64 pn = tcg_temp_new_i64();
1439 TCGv_i64 pm = tcg_temp_new_i64();
1440 TCGv_i64 pg = tcg_temp_new_i64();
1442 tcg_gen_ld_i64(pn, cpu_env, nofs);
1443 tcg_gen_ld_i64(pm, cpu_env, mofs);
1444 tcg_gen_ld_i64(pg, cpu_env, gofs);
1446 gvec_op->fni8(pd, pn, pm, pg);
1447 tcg_gen_st_i64(pd, cpu_env, dofs);
1449 do_predtest1(pd, pg);
1451 tcg_temp_free_i64(pd);
1452 tcg_temp_free_i64(pn);
1453 tcg_temp_free_i64(pm);
1454 tcg_temp_free_i64(pg);
1455 } else {
1456 /* The operation and flags generation is large. The computation
1457 * of the flags depends on the original contents of the guarding
1458 * predicate. If the destination overwrites the guarding predicate,
1459 * then the easiest way to get this right is to save a copy.
1461 int tofs = gofs;
1462 if (a->rd == a->pg) {
1463 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1464 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1467 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1468 do_predtest(s, dofs, tofs, psz / 8);
1470 return true;
1473 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1475 tcg_gen_and_i64(pd, pn, pm);
1476 tcg_gen_and_i64(pd, pd, pg);
1479 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1480 TCGv_vec pm, TCGv_vec pg)
1482 tcg_gen_and_vec(vece, pd, pn, pm);
1483 tcg_gen_and_vec(vece, pd, pd, pg);
1486 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
1488 static const GVecGen4 op = {
1489 .fni8 = gen_and_pg_i64,
1490 .fniv = gen_and_pg_vec,
1491 .fno = gen_helper_sve_and_pppp,
1492 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1495 if (!a->s) {
1496 if (!sve_access_check(s)) {
1497 return true;
1499 if (a->rn == a->rm) {
1500 if (a->pg == a->rn) {
1501 do_mov_p(s, a->rd, a->rn);
1502 } else {
1503 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
1505 return true;
1506 } else if (a->pg == a->rn || a->pg == a->rm) {
1507 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
1508 return true;
1511 return do_pppp_flags(s, a, &op);
1514 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1516 tcg_gen_andc_i64(pd, pn, pm);
1517 tcg_gen_and_i64(pd, pd, pg);
1520 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1521 TCGv_vec pm, TCGv_vec pg)
1523 tcg_gen_andc_vec(vece, pd, pn, pm);
1524 tcg_gen_and_vec(vece, pd, pd, pg);
1527 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
1529 static const GVecGen4 op = {
1530 .fni8 = gen_bic_pg_i64,
1531 .fniv = gen_bic_pg_vec,
1532 .fno = gen_helper_sve_bic_pppp,
1533 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1536 if (!a->s && a->pg == a->rn) {
1537 if (sve_access_check(s)) {
1538 gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
1540 return true;
1542 return do_pppp_flags(s, a, &op);
1545 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1547 tcg_gen_xor_i64(pd, pn, pm);
1548 tcg_gen_and_i64(pd, pd, pg);
1551 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1552 TCGv_vec pm, TCGv_vec pg)
1554 tcg_gen_xor_vec(vece, pd, pn, pm);
1555 tcg_gen_and_vec(vece, pd, pd, pg);
1558 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
1560 static const GVecGen4 op = {
1561 .fni8 = gen_eor_pg_i64,
1562 .fniv = gen_eor_pg_vec,
1563 .fno = gen_helper_sve_eor_pppp,
1564 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1566 return do_pppp_flags(s, a, &op);
1569 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
1571 if (a->s) {
1572 return false;
1574 if (sve_access_check(s)) {
1575 unsigned psz = pred_gvec_reg_size(s);
1576 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1577 pred_full_reg_offset(s, a->pg),
1578 pred_full_reg_offset(s, a->rn),
1579 pred_full_reg_offset(s, a->rm), psz, psz);
1581 return true;
1584 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1586 tcg_gen_or_i64(pd, pn, pm);
1587 tcg_gen_and_i64(pd, pd, pg);
1590 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1591 TCGv_vec pm, TCGv_vec pg)
1593 tcg_gen_or_vec(vece, pd, pn, pm);
1594 tcg_gen_and_vec(vece, pd, pd, pg);
1597 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
1599 static const GVecGen4 op = {
1600 .fni8 = gen_orr_pg_i64,
1601 .fniv = gen_orr_pg_vec,
1602 .fno = gen_helper_sve_orr_pppp,
1603 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1606 if (!a->s && a->pg == a->rn && a->rn == a->rm) {
1607 return do_mov_p(s, a->rd, a->rn);
1609 return do_pppp_flags(s, a, &op);
1612 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1614 tcg_gen_orc_i64(pd, pn, pm);
1615 tcg_gen_and_i64(pd, pd, pg);
1618 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1619 TCGv_vec pm, TCGv_vec pg)
1621 tcg_gen_orc_vec(vece, pd, pn, pm);
1622 tcg_gen_and_vec(vece, pd, pd, pg);
1625 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
1627 static const GVecGen4 op = {
1628 .fni8 = gen_orn_pg_i64,
1629 .fniv = gen_orn_pg_vec,
1630 .fno = gen_helper_sve_orn_pppp,
1631 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1633 return do_pppp_flags(s, a, &op);
1636 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1638 tcg_gen_or_i64(pd, pn, pm);
1639 tcg_gen_andc_i64(pd, pg, pd);
1642 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1643 TCGv_vec pm, TCGv_vec pg)
1645 tcg_gen_or_vec(vece, pd, pn, pm);
1646 tcg_gen_andc_vec(vece, pd, pg, pd);
1649 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
1651 static const GVecGen4 op = {
1652 .fni8 = gen_nor_pg_i64,
1653 .fniv = gen_nor_pg_vec,
1654 .fno = gen_helper_sve_nor_pppp,
1655 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1657 return do_pppp_flags(s, a, &op);
1660 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1662 tcg_gen_and_i64(pd, pn, pm);
1663 tcg_gen_andc_i64(pd, pg, pd);
1666 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1667 TCGv_vec pm, TCGv_vec pg)
1669 tcg_gen_and_vec(vece, pd, pn, pm);
1670 tcg_gen_andc_vec(vece, pd, pg, pd);
1673 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
1675 static const GVecGen4 op = {
1676 .fni8 = gen_nand_pg_i64,
1677 .fniv = gen_nand_pg_vec,
1678 .fno = gen_helper_sve_nand_pppp,
1679 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1681 return do_pppp_flags(s, a, &op);
1685 *** SVE Predicate Misc Group
1688 static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
1690 if (sve_access_check(s)) {
1691 int nofs = pred_full_reg_offset(s, a->rn);
1692 int gofs = pred_full_reg_offset(s, a->pg);
1693 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1695 if (words == 1) {
1696 TCGv_i64 pn = tcg_temp_new_i64();
1697 TCGv_i64 pg = tcg_temp_new_i64();
1699 tcg_gen_ld_i64(pn, cpu_env, nofs);
1700 tcg_gen_ld_i64(pg, cpu_env, gofs);
1701 do_predtest1(pn, pg);
1703 tcg_temp_free_i64(pn);
1704 tcg_temp_free_i64(pg);
1705 } else {
1706 do_predtest(s, nofs, gofs, words);
1709 return true;
1712 /* See the ARM pseudocode DecodePredCount. */
1713 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1715 unsigned elements = fullsz >> esz;
1716 unsigned bound;
1718 switch (pattern) {
1719 case 0x0: /* POW2 */
1720 return pow2floor(elements);
1721 case 0x1: /* VL1 */
1722 case 0x2: /* VL2 */
1723 case 0x3: /* VL3 */
1724 case 0x4: /* VL4 */
1725 case 0x5: /* VL5 */
1726 case 0x6: /* VL6 */
1727 case 0x7: /* VL7 */
1728 case 0x8: /* VL8 */
1729 bound = pattern;
1730 break;
1731 case 0x9: /* VL16 */
1732 case 0xa: /* VL32 */
1733 case 0xb: /* VL64 */
1734 case 0xc: /* VL128 */
1735 case 0xd: /* VL256 */
1736 bound = 16 << (pattern - 9);
1737 break;
1738 case 0x1d: /* MUL4 */
1739 return elements - elements % 4;
1740 case 0x1e: /* MUL3 */
1741 return elements - elements % 3;
1742 case 0x1f: /* ALL */
1743 return elements;
1744 default: /* #uimm5 */
1745 return 0;
1747 return elements >= bound ? bound : 0;
1750 /* This handles all of the predicate initialization instructions,
1751 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1752 * so that decode_pred_count returns 0. For SETFFR, we will have
1753 * set RD == 16 == FFR.
1755 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1757 if (!sve_access_check(s)) {
1758 return true;
1761 unsigned fullsz = vec_full_reg_size(s);
1762 unsigned ofs = pred_full_reg_offset(s, rd);
1763 unsigned numelem, setsz, i;
1764 uint64_t word, lastword;
1765 TCGv_i64 t;
1767 numelem = decode_pred_count(fullsz, pat, esz);
1769 /* Determine what we must store into each bit, and how many. */
1770 if (numelem == 0) {
1771 lastword = word = 0;
1772 setsz = fullsz;
1773 } else {
1774 setsz = numelem << esz;
1775 lastword = word = pred_esz_masks[esz];
1776 if (setsz % 64) {
1777 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
1781 t = tcg_temp_new_i64();
1782 if (fullsz <= 64) {
1783 tcg_gen_movi_i64(t, lastword);
1784 tcg_gen_st_i64(t, cpu_env, ofs);
1785 goto done;
1788 if (word == lastword) {
1789 unsigned maxsz = size_for_gvec(fullsz / 8);
1790 unsigned oprsz = size_for_gvec(setsz / 8);
1792 if (oprsz * 8 == setsz) {
1793 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
1794 goto done;
1798 setsz /= 8;
1799 fullsz /= 8;
1801 tcg_gen_movi_i64(t, word);
1802 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
1803 tcg_gen_st_i64(t, cpu_env, ofs + i);
1805 if (lastword != word) {
1806 tcg_gen_movi_i64(t, lastword);
1807 tcg_gen_st_i64(t, cpu_env, ofs + i);
1808 i += 8;
1810 if (i < fullsz) {
1811 tcg_gen_movi_i64(t, 0);
1812 for (; i < fullsz; i += 8) {
1813 tcg_gen_st_i64(t, cpu_env, ofs + i);
1817 done:
1818 tcg_temp_free_i64(t);
1820 /* PTRUES */
1821 if (setflag) {
1822 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1823 tcg_gen_movi_i32(cpu_CF, word == 0);
1824 tcg_gen_movi_i32(cpu_VF, 0);
1825 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1827 return true;
1830 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
1832 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1835 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
1837 /* Note pat == 31 is #all, to set all elements. */
1838 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1841 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
1843 /* Note pat == 32 is #unimp, to set no elements. */
1844 return do_predset(s, 0, a->rd, 32, false);
1847 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
1849 /* The path through do_pppp_flags is complicated enough to want to avoid
1850 * duplication. Frob the arguments into the form of a predicated AND.
1852 arg_rprr_s alt_a = {
1853 .rd = a->rd, .pg = a->pg, .s = a->s,
1854 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1856 return trans_AND_pppp(s, &alt_a);
1859 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
1861 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1864 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
1866 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1869 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1870 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1871 TCGv_ptr, TCGv_i32))
1873 if (!sve_access_check(s)) {
1874 return true;
1877 TCGv_ptr t_pd = tcg_temp_new_ptr();
1878 TCGv_ptr t_pg = tcg_temp_new_ptr();
1879 TCGv_i32 t;
1880 unsigned desc = 0;
1882 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1883 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
1885 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1886 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1887 t = tcg_const_i32(desc);
1889 gen_fn(t, t_pd, t_pg, t);
1890 tcg_temp_free_ptr(t_pd);
1891 tcg_temp_free_ptr(t_pg);
1893 do_pred_flags(t);
1894 tcg_temp_free_i32(t);
1895 return true;
1898 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
1900 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1903 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
1905 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1909 *** SVE Element Count Group
1912 /* Perform an inline saturating addition of a 32-bit value within
1913 * a 64-bit register. The second operand is known to be positive,
1914 * which halves the comparisions we must perform to bound the result.
1916 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1918 int64_t ibound;
1919 TCGv_i64 bound;
1920 TCGCond cond;
1922 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1923 if (u) {
1924 tcg_gen_ext32u_i64(reg, reg);
1925 } else {
1926 tcg_gen_ext32s_i64(reg, reg);
1928 if (d) {
1929 tcg_gen_sub_i64(reg, reg, val);
1930 ibound = (u ? 0 : INT32_MIN);
1931 cond = TCG_COND_LT;
1932 } else {
1933 tcg_gen_add_i64(reg, reg, val);
1934 ibound = (u ? UINT32_MAX : INT32_MAX);
1935 cond = TCG_COND_GT;
1937 bound = tcg_const_i64(ibound);
1938 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1939 tcg_temp_free_i64(bound);
1942 /* Similarly with 64-bit values. */
1943 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1945 TCGv_i64 t0 = tcg_temp_new_i64();
1946 TCGv_i64 t1 = tcg_temp_new_i64();
1947 TCGv_i64 t2;
1949 if (u) {
1950 if (d) {
1951 tcg_gen_sub_i64(t0, reg, val);
1952 tcg_gen_movi_i64(t1, 0);
1953 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1954 } else {
1955 tcg_gen_add_i64(t0, reg, val);
1956 tcg_gen_movi_i64(t1, -1);
1957 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1959 } else {
1960 if (d) {
1961 /* Detect signed overflow for subtraction. */
1962 tcg_gen_xor_i64(t0, reg, val);
1963 tcg_gen_sub_i64(t1, reg, val);
1964 tcg_gen_xor_i64(reg, reg, t1);
1965 tcg_gen_and_i64(t0, t0, reg);
1967 /* Bound the result. */
1968 tcg_gen_movi_i64(reg, INT64_MIN);
1969 t2 = tcg_const_i64(0);
1970 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1971 } else {
1972 /* Detect signed overflow for addition. */
1973 tcg_gen_xor_i64(t0, reg, val);
1974 tcg_gen_add_i64(reg, reg, val);
1975 tcg_gen_xor_i64(t1, reg, val);
1976 tcg_gen_andc_i64(t0, t1, t0);
1978 /* Bound the result. */
1979 tcg_gen_movi_i64(t1, INT64_MAX);
1980 t2 = tcg_const_i64(0);
1981 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1983 tcg_temp_free_i64(t2);
1985 tcg_temp_free_i64(t0);
1986 tcg_temp_free_i64(t1);
1989 /* Similarly with a vector and a scalar operand. */
1990 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1991 TCGv_i64 val, bool u, bool d)
1993 unsigned vsz = vec_full_reg_size(s);
1994 TCGv_ptr dptr, nptr;
1995 TCGv_i32 t32, desc;
1996 TCGv_i64 t64;
1998 dptr = tcg_temp_new_ptr();
1999 nptr = tcg_temp_new_ptr();
2000 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
2001 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
2002 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2004 switch (esz) {
2005 case MO_8:
2006 t32 = tcg_temp_new_i32();
2007 tcg_gen_extrl_i64_i32(t32, val);
2008 if (d) {
2009 tcg_gen_neg_i32(t32, t32);
2011 if (u) {
2012 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
2013 } else {
2014 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
2016 tcg_temp_free_i32(t32);
2017 break;
2019 case MO_16:
2020 t32 = tcg_temp_new_i32();
2021 tcg_gen_extrl_i64_i32(t32, val);
2022 if (d) {
2023 tcg_gen_neg_i32(t32, t32);
2025 if (u) {
2026 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
2027 } else {
2028 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
2030 tcg_temp_free_i32(t32);
2031 break;
2033 case MO_32:
2034 t64 = tcg_temp_new_i64();
2035 if (d) {
2036 tcg_gen_neg_i64(t64, val);
2037 } else {
2038 tcg_gen_mov_i64(t64, val);
2040 if (u) {
2041 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
2042 } else {
2043 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
2045 tcg_temp_free_i64(t64);
2046 break;
2048 case MO_64:
2049 if (u) {
2050 if (d) {
2051 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
2052 } else {
2053 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
2055 } else if (d) {
2056 t64 = tcg_temp_new_i64();
2057 tcg_gen_neg_i64(t64, val);
2058 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
2059 tcg_temp_free_i64(t64);
2060 } else {
2061 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
2063 break;
2065 default:
2066 g_assert_not_reached();
2069 tcg_temp_free_ptr(dptr);
2070 tcg_temp_free_ptr(nptr);
2071 tcg_temp_free_i32(desc);
2074 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
2076 if (sve_access_check(s)) {
2077 unsigned fullsz = vec_full_reg_size(s);
2078 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2079 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
2081 return true;
2084 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
2086 if (sve_access_check(s)) {
2087 unsigned fullsz = vec_full_reg_size(s);
2088 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2089 int inc = numelem * a->imm * (a->d ? -1 : 1);
2090 TCGv_i64 reg = cpu_reg(s, a->rd);
2092 tcg_gen_addi_i64(reg, reg, inc);
2094 return true;
2097 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
2099 if (!sve_access_check(s)) {
2100 return true;
2103 unsigned fullsz = vec_full_reg_size(s);
2104 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2105 int inc = numelem * a->imm;
2106 TCGv_i64 reg = cpu_reg(s, a->rd);
2108 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
2109 if (inc == 0) {
2110 if (a->u) {
2111 tcg_gen_ext32u_i64(reg, reg);
2112 } else {
2113 tcg_gen_ext32s_i64(reg, reg);
2115 } else {
2116 TCGv_i64 t = tcg_const_i64(inc);
2117 do_sat_addsub_32(reg, t, a->u, a->d);
2118 tcg_temp_free_i64(t);
2120 return true;
2123 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
2125 if (!sve_access_check(s)) {
2126 return true;
2129 unsigned fullsz = vec_full_reg_size(s);
2130 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2131 int inc = numelem * a->imm;
2132 TCGv_i64 reg = cpu_reg(s, a->rd);
2134 if (inc != 0) {
2135 TCGv_i64 t = tcg_const_i64(inc);
2136 do_sat_addsub_64(reg, t, a->u, a->d);
2137 tcg_temp_free_i64(t);
2139 return true;
2142 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
2144 if (a->esz == 0) {
2145 return false;
2148 unsigned fullsz = vec_full_reg_size(s);
2149 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2150 int inc = numelem * a->imm;
2152 if (inc != 0) {
2153 if (sve_access_check(s)) {
2154 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
2155 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
2156 vec_full_reg_offset(s, a->rn),
2157 t, fullsz, fullsz);
2158 tcg_temp_free_i64(t);
2160 } else {
2161 do_mov_z(s, a->rd, a->rn);
2163 return true;
2166 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
2168 if (a->esz == 0) {
2169 return false;
2172 unsigned fullsz = vec_full_reg_size(s);
2173 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2174 int inc = numelem * a->imm;
2176 if (inc != 0) {
2177 if (sve_access_check(s)) {
2178 TCGv_i64 t = tcg_const_i64(inc);
2179 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
2180 tcg_temp_free_i64(t);
2182 } else {
2183 do_mov_z(s, a->rd, a->rn);
2185 return true;
2189 *** SVE Bitwise Immediate Group
2192 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
2194 uint64_t imm;
2195 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2196 extract32(a->dbm, 0, 6),
2197 extract32(a->dbm, 6, 6))) {
2198 return false;
2200 if (sve_access_check(s)) {
2201 unsigned vsz = vec_full_reg_size(s);
2202 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
2203 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
2205 return true;
2208 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
2210 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
2213 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
2215 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
2218 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
2220 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
2223 static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
2225 uint64_t imm;
2226 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2227 extract32(a->dbm, 0, 6),
2228 extract32(a->dbm, 6, 6))) {
2229 return false;
2231 if (sve_access_check(s)) {
2232 do_dupi_z(s, a->rd, imm);
2234 return true;
2238 *** SVE Integer Wide Immediate - Predicated Group
2241 /* Implement all merging copies. This is used for CPY (immediate),
2242 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
2244 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
2245 TCGv_i64 val)
2247 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2248 static gen_cpy * const fns[4] = {
2249 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
2250 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
2252 unsigned vsz = vec_full_reg_size(s);
2253 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2254 TCGv_ptr t_zd = tcg_temp_new_ptr();
2255 TCGv_ptr t_zn = tcg_temp_new_ptr();
2256 TCGv_ptr t_pg = tcg_temp_new_ptr();
2258 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
2259 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
2260 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2262 fns[esz](t_zd, t_zn, t_pg, val, desc);
2264 tcg_temp_free_ptr(t_zd);
2265 tcg_temp_free_ptr(t_zn);
2266 tcg_temp_free_ptr(t_pg);
2267 tcg_temp_free_i32(desc);
2270 static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
2272 if (a->esz == 0) {
2273 return false;
2275 if (sve_access_check(s)) {
2276 /* Decode the VFP immediate. */
2277 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
2278 TCGv_i64 t_imm = tcg_const_i64(imm);
2279 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
2280 tcg_temp_free_i64(t_imm);
2282 return true;
2285 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
2287 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
2288 return false;
2290 if (sve_access_check(s)) {
2291 TCGv_i64 t_imm = tcg_const_i64(a->imm);
2292 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
2293 tcg_temp_free_i64(t_imm);
2295 return true;
2298 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
2300 static gen_helper_gvec_2i * const fns[4] = {
2301 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
2302 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
2305 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
2306 return false;
2308 if (sve_access_check(s)) {
2309 unsigned vsz = vec_full_reg_size(s);
2310 TCGv_i64 t_imm = tcg_const_i64(a->imm);
2311 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
2312 pred_full_reg_offset(s, a->pg),
2313 t_imm, vsz, vsz, 0, fns[a->esz]);
2314 tcg_temp_free_i64(t_imm);
2316 return true;
2320 *** SVE Permute Extract Group
2323 static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
2325 if (!sve_access_check(s)) {
2326 return true;
2329 unsigned vsz = vec_full_reg_size(s);
2330 unsigned n_ofs = imm >= vsz ? 0 : imm;
2331 unsigned n_siz = vsz - n_ofs;
2332 unsigned d = vec_full_reg_offset(s, rd);
2333 unsigned n = vec_full_reg_offset(s, rn);
2334 unsigned m = vec_full_reg_offset(s, rm);
2336 /* Use host vector move insns if we have appropriate sizes
2337 * and no unfortunate overlap.
2339 if (m != d
2340 && n_ofs == size_for_gvec(n_ofs)
2341 && n_siz == size_for_gvec(n_siz)
2342 && (d != n || n_siz <= n_ofs)) {
2343 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2344 if (n_ofs != 0) {
2345 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2347 } else {
2348 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2350 return true;
2353 static bool trans_EXT(DisasContext *s, arg_EXT *a)
2355 return do_EXT(s, a->rd, a->rn, a->rm, a->imm);
2358 static bool trans_EXT_sve2(DisasContext *s, arg_rri *a)
2360 if (!dc_isar_feature(aa64_sve2, s)) {
2361 return false;
2363 return do_EXT(s, a->rd, a->rn, (a->rn + 1) % 32, a->imm);
2367 *** SVE Permute - Unpredicated Group
2370 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
2372 if (sve_access_check(s)) {
2373 unsigned vsz = vec_full_reg_size(s);
2374 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2375 vsz, vsz, cpu_reg_sp(s, a->rn));
2377 return true;
2380 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
2382 if ((a->imm & 0x1f) == 0) {
2383 return false;
2385 if (sve_access_check(s)) {
2386 unsigned vsz = vec_full_reg_size(s);
2387 unsigned dofs = vec_full_reg_offset(s, a->rd);
2388 unsigned esz, index;
2390 esz = ctz32(a->imm);
2391 index = a->imm >> (esz + 1);
2393 if ((index << esz) < vsz) {
2394 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2395 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2396 } else {
2398 * While dup_mem handles 128-bit elements, dup_imm does not.
2399 * Thankfully element size doesn't matter for splatting zero.
2401 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
2404 return true;
2407 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2409 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2410 static gen_insr * const fns[4] = {
2411 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2412 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2414 unsigned vsz = vec_full_reg_size(s);
2415 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2416 TCGv_ptr t_zd = tcg_temp_new_ptr();
2417 TCGv_ptr t_zn = tcg_temp_new_ptr();
2419 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2420 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2422 fns[a->esz](t_zd, t_zn, val, desc);
2424 tcg_temp_free_ptr(t_zd);
2425 tcg_temp_free_ptr(t_zn);
2426 tcg_temp_free_i32(desc);
2429 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
2431 if (sve_access_check(s)) {
2432 TCGv_i64 t = tcg_temp_new_i64();
2433 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2434 do_insr_i64(s, a, t);
2435 tcg_temp_free_i64(t);
2437 return true;
2440 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
2442 if (sve_access_check(s)) {
2443 do_insr_i64(s, a, cpu_reg(s, a->rm));
2445 return true;
2448 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
2450 static gen_helper_gvec_2 * const fns[4] = {
2451 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2452 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2455 if (sve_access_check(s)) {
2456 gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
2458 return true;
2461 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
2463 static gen_helper_gvec_3 * const fns[4] = {
2464 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2465 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2468 if (sve_access_check(s)) {
2469 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
2471 return true;
2474 static bool trans_TBL_sve2(DisasContext *s, arg_rrr_esz *a)
2476 static gen_helper_gvec_4 * const fns[4] = {
2477 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
2478 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d
2481 if (!dc_isar_feature(aa64_sve2, s)) {
2482 return false;
2484 if (sve_access_check(s)) {
2485 gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn,
2486 (a->rn + 1) % 32, a->rm, 0);
2488 return true;
2491 static bool trans_TBX(DisasContext *s, arg_rrr_esz *a)
2493 static gen_helper_gvec_3 * const fns[4] = {
2494 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
2495 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
2498 if (!dc_isar_feature(aa64_sve2, s)) {
2499 return false;
2501 if (sve_access_check(s)) {
2502 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
2504 return true;
2507 static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
2509 static gen_helper_gvec_2 * const fns[4][2] = {
2510 { NULL, NULL },
2511 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2512 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2513 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2516 if (a->esz == 0) {
2517 return false;
2519 if (sve_access_check(s)) {
2520 unsigned vsz = vec_full_reg_size(s);
2521 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2522 vec_full_reg_offset(s, a->rn)
2523 + (a->h ? vsz / 2 : 0),
2524 vsz, vsz, 0, fns[a->esz][a->u]);
2526 return true;
2530 *** SVE Permute - Predicates Group
2533 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2534 gen_helper_gvec_3 *fn)
2536 if (!sve_access_check(s)) {
2537 return true;
2540 unsigned vsz = pred_full_reg_size(s);
2542 TCGv_ptr t_d = tcg_temp_new_ptr();
2543 TCGv_ptr t_n = tcg_temp_new_ptr();
2544 TCGv_ptr t_m = tcg_temp_new_ptr();
2545 TCGv_i32 t_desc;
2546 uint32_t desc = 0;
2548 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2549 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2550 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
2552 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2553 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2554 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2555 t_desc = tcg_const_i32(desc);
2557 fn(t_d, t_n, t_m, t_desc);
2559 tcg_temp_free_ptr(t_d);
2560 tcg_temp_free_ptr(t_n);
2561 tcg_temp_free_ptr(t_m);
2562 tcg_temp_free_i32(t_desc);
2563 return true;
2566 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2567 gen_helper_gvec_2 *fn)
2569 if (!sve_access_check(s)) {
2570 return true;
2573 unsigned vsz = pred_full_reg_size(s);
2574 TCGv_ptr t_d = tcg_temp_new_ptr();
2575 TCGv_ptr t_n = tcg_temp_new_ptr();
2576 TCGv_i32 t_desc;
2577 uint32_t desc = 0;
2579 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2580 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2582 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2583 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2584 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
2585 t_desc = tcg_const_i32(desc);
2587 fn(t_d, t_n, t_desc);
2589 tcg_temp_free_i32(t_desc);
2590 tcg_temp_free_ptr(t_d);
2591 tcg_temp_free_ptr(t_n);
2592 return true;
2595 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
2597 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2600 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
2602 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2605 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
2607 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2610 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
2612 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2615 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
2617 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2620 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
2622 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2625 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
2627 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2630 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
2632 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2635 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
2637 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2641 *** SVE Permute - Interleaving Group
2644 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2646 static gen_helper_gvec_3 * const fns[4] = {
2647 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2648 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2651 if (sve_access_check(s)) {
2652 unsigned vsz = vec_full_reg_size(s);
2653 unsigned high_ofs = high ? vsz / 2 : 0;
2654 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2655 vec_full_reg_offset(s, a->rn) + high_ofs,
2656 vec_full_reg_offset(s, a->rm) + high_ofs,
2657 vsz, vsz, 0, fns[a->esz]);
2659 return true;
2662 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2663 gen_helper_gvec_3 *fn)
2665 if (sve_access_check(s)) {
2666 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
2668 return true;
2671 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
2673 return do_zip(s, a, false);
2676 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
2678 return do_zip(s, a, true);
2681 static bool do_zip_q(DisasContext *s, arg_rrr_esz *a, bool high)
2683 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2684 return false;
2686 if (sve_access_check(s)) {
2687 unsigned vsz = vec_full_reg_size(s);
2688 unsigned high_ofs = high ? QEMU_ALIGN_DOWN(vsz, 32) / 2 : 0;
2689 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2690 vec_full_reg_offset(s, a->rn) + high_ofs,
2691 vec_full_reg_offset(s, a->rm) + high_ofs,
2692 vsz, vsz, 0, gen_helper_sve2_zip_q);
2694 return true;
2697 static bool trans_ZIP1_q(DisasContext *s, arg_rrr_esz *a)
2699 return do_zip_q(s, a, false);
2702 static bool trans_ZIP2_q(DisasContext *s, arg_rrr_esz *a)
2704 return do_zip_q(s, a, true);
2707 static gen_helper_gvec_3 * const uzp_fns[4] = {
2708 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2709 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2712 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
2714 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2717 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
2719 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2722 static bool trans_UZP1_q(DisasContext *s, arg_rrr_esz *a)
2724 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2725 return false;
2727 return do_zzz_data_ool(s, a, 0, gen_helper_sve2_uzp_q);
2730 static bool trans_UZP2_q(DisasContext *s, arg_rrr_esz *a)
2732 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2733 return false;
2735 return do_zzz_data_ool(s, a, 16, gen_helper_sve2_uzp_q);
2738 static gen_helper_gvec_3 * const trn_fns[4] = {
2739 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2740 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2743 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
2745 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2748 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
2750 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2753 static bool trans_TRN1_q(DisasContext *s, arg_rrr_esz *a)
2755 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2756 return false;
2758 return do_zzz_data_ool(s, a, 0, gen_helper_sve2_trn_q);
2761 static bool trans_TRN2_q(DisasContext *s, arg_rrr_esz *a)
2763 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2764 return false;
2766 return do_zzz_data_ool(s, a, 16, gen_helper_sve2_trn_q);
2770 *** SVE Permute Vector - Predicated Group
2773 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
2775 static gen_helper_gvec_3 * const fns[4] = {
2776 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2778 return do_zpz_ool(s, a, fns[a->esz]);
2781 /* Call the helper that computes the ARM LastActiveElement pseudocode
2782 * function, scaled by the element size. This includes the not found
2783 * indication; e.g. not found for esz=3 is -8.
2785 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2787 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2788 * round up, as we do elsewhere, because we need the exact size.
2790 TCGv_ptr t_p = tcg_temp_new_ptr();
2791 TCGv_i32 t_desc;
2792 unsigned desc = 0;
2794 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
2795 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
2797 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2798 t_desc = tcg_const_i32(desc);
2800 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2802 tcg_temp_free_i32(t_desc);
2803 tcg_temp_free_ptr(t_p);
2806 /* Increment LAST to the offset of the next element in the vector,
2807 * wrapping around to 0.
2809 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2811 unsigned vsz = vec_full_reg_size(s);
2813 tcg_gen_addi_i32(last, last, 1 << esz);
2814 if (is_power_of_2(vsz)) {
2815 tcg_gen_andi_i32(last, last, vsz - 1);
2816 } else {
2817 TCGv_i32 max = tcg_const_i32(vsz);
2818 TCGv_i32 zero = tcg_const_i32(0);
2819 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2820 tcg_temp_free_i32(max);
2821 tcg_temp_free_i32(zero);
2825 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
2826 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2828 unsigned vsz = vec_full_reg_size(s);
2830 if (is_power_of_2(vsz)) {
2831 tcg_gen_andi_i32(last, last, vsz - 1);
2832 } else {
2833 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2834 TCGv_i32 zero = tcg_const_i32(0);
2835 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2836 tcg_temp_free_i32(max);
2837 tcg_temp_free_i32(zero);
2841 /* Load an unsigned element of ESZ from BASE+OFS. */
2842 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2844 TCGv_i64 r = tcg_temp_new_i64();
2846 switch (esz) {
2847 case 0:
2848 tcg_gen_ld8u_i64(r, base, ofs);
2849 break;
2850 case 1:
2851 tcg_gen_ld16u_i64(r, base, ofs);
2852 break;
2853 case 2:
2854 tcg_gen_ld32u_i64(r, base, ofs);
2855 break;
2856 case 3:
2857 tcg_gen_ld_i64(r, base, ofs);
2858 break;
2859 default:
2860 g_assert_not_reached();
2862 return r;
2865 /* Load an unsigned element of ESZ from RM[LAST]. */
2866 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2867 int rm, int esz)
2869 TCGv_ptr p = tcg_temp_new_ptr();
2870 TCGv_i64 r;
2872 /* Convert offset into vector into offset into ENV.
2873 * The final adjustment for the vector register base
2874 * is added via constant offset to the load.
2876 #ifdef HOST_WORDS_BIGENDIAN
2877 /* Adjust for element ordering. See vec_reg_offset. */
2878 if (esz < 3) {
2879 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2881 #endif
2882 tcg_gen_ext_i32_ptr(p, last);
2883 tcg_gen_add_ptr(p, p, cpu_env);
2885 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2886 tcg_temp_free_ptr(p);
2888 return r;
2891 /* Compute CLAST for a Zreg. */
2892 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2894 TCGv_i32 last;
2895 TCGLabel *over;
2896 TCGv_i64 ele;
2897 unsigned vsz, esz = a->esz;
2899 if (!sve_access_check(s)) {
2900 return true;
2903 last = tcg_temp_local_new_i32();
2904 over = gen_new_label();
2906 find_last_active(s, last, esz, a->pg);
2908 /* There is of course no movcond for a 2048-bit vector,
2909 * so we must branch over the actual store.
2911 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2913 if (!before) {
2914 incr_last_active(s, last, esz);
2917 ele = load_last_active(s, last, a->rm, esz);
2918 tcg_temp_free_i32(last);
2920 vsz = vec_full_reg_size(s);
2921 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2922 tcg_temp_free_i64(ele);
2924 /* If this insn used MOVPRFX, we may need a second move. */
2925 if (a->rd != a->rn) {
2926 TCGLabel *done = gen_new_label();
2927 tcg_gen_br(done);
2929 gen_set_label(over);
2930 do_mov_z(s, a->rd, a->rn);
2932 gen_set_label(done);
2933 } else {
2934 gen_set_label(over);
2936 return true;
2939 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
2941 return do_clast_vector(s, a, false);
2944 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
2946 return do_clast_vector(s, a, true);
2949 /* Compute CLAST for a scalar. */
2950 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2951 bool before, TCGv_i64 reg_val)
2953 TCGv_i32 last = tcg_temp_new_i32();
2954 TCGv_i64 ele, cmp, zero;
2956 find_last_active(s, last, esz, pg);
2958 /* Extend the original value of last prior to incrementing. */
2959 cmp = tcg_temp_new_i64();
2960 tcg_gen_ext_i32_i64(cmp, last);
2962 if (!before) {
2963 incr_last_active(s, last, esz);
2966 /* The conceit here is that while last < 0 indicates not found, after
2967 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2968 * from which we can load garbage. We then discard the garbage with
2969 * a conditional move.
2971 ele = load_last_active(s, last, rm, esz);
2972 tcg_temp_free_i32(last);
2974 zero = tcg_const_i64(0);
2975 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2977 tcg_temp_free_i64(zero);
2978 tcg_temp_free_i64(cmp);
2979 tcg_temp_free_i64(ele);
2982 /* Compute CLAST for a Vreg. */
2983 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2985 if (sve_access_check(s)) {
2986 int esz = a->esz;
2987 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2988 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2990 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2991 write_fp_dreg(s, a->rd, reg);
2992 tcg_temp_free_i64(reg);
2994 return true;
2997 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
2999 return do_clast_fp(s, a, false);
3002 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
3004 return do_clast_fp(s, a, true);
3007 /* Compute CLAST for a Xreg. */
3008 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
3010 TCGv_i64 reg;
3012 if (!sve_access_check(s)) {
3013 return true;
3016 reg = cpu_reg(s, a->rd);
3017 switch (a->esz) {
3018 case 0:
3019 tcg_gen_ext8u_i64(reg, reg);
3020 break;
3021 case 1:
3022 tcg_gen_ext16u_i64(reg, reg);
3023 break;
3024 case 2:
3025 tcg_gen_ext32u_i64(reg, reg);
3026 break;
3027 case 3:
3028 break;
3029 default:
3030 g_assert_not_reached();
3033 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
3034 return true;
3037 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
3039 return do_clast_general(s, a, false);
3042 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
3044 return do_clast_general(s, a, true);
3047 /* Compute LAST for a scalar. */
3048 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
3049 int pg, int rm, bool before)
3051 TCGv_i32 last = tcg_temp_new_i32();
3052 TCGv_i64 ret;
3054 find_last_active(s, last, esz, pg);
3055 if (before) {
3056 wrap_last_active(s, last, esz);
3057 } else {
3058 incr_last_active(s, last, esz);
3061 ret = load_last_active(s, last, rm, esz);
3062 tcg_temp_free_i32(last);
3063 return ret;
3066 /* Compute LAST for a Vreg. */
3067 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
3069 if (sve_access_check(s)) {
3070 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
3071 write_fp_dreg(s, a->rd, val);
3072 tcg_temp_free_i64(val);
3074 return true;
3077 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
3079 return do_last_fp(s, a, false);
3082 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
3084 return do_last_fp(s, a, true);
3087 /* Compute LAST for a Xreg. */
3088 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
3090 if (sve_access_check(s)) {
3091 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
3092 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
3093 tcg_temp_free_i64(val);
3095 return true;
3098 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
3100 return do_last_general(s, a, false);
3103 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
3105 return do_last_general(s, a, true);
3108 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
3110 if (sve_access_check(s)) {
3111 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
3113 return true;
3116 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
3118 if (sve_access_check(s)) {
3119 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
3120 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
3121 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
3122 tcg_temp_free_i64(t);
3124 return true;
3127 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
3129 static gen_helper_gvec_3 * const fns[4] = {
3130 NULL,
3131 gen_helper_sve_revb_h,
3132 gen_helper_sve_revb_s,
3133 gen_helper_sve_revb_d,
3135 return do_zpz_ool(s, a, fns[a->esz]);
3138 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
3140 static gen_helper_gvec_3 * const fns[4] = {
3141 NULL,
3142 NULL,
3143 gen_helper_sve_revh_s,
3144 gen_helper_sve_revh_d,
3146 return do_zpz_ool(s, a, fns[a->esz]);
3149 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
3151 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
3154 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
3156 static gen_helper_gvec_3 * const fns[4] = {
3157 gen_helper_sve_rbit_b,
3158 gen_helper_sve_rbit_h,
3159 gen_helper_sve_rbit_s,
3160 gen_helper_sve_rbit_d,
3162 return do_zpz_ool(s, a, fns[a->esz]);
3165 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
3167 if (sve_access_check(s)) {
3168 gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
3169 a->rd, a->rn, a->rm, a->pg, a->esz);
3171 return true;
3174 static bool trans_SPLICE_sve2(DisasContext *s, arg_rpr_esz *a)
3176 if (!dc_isar_feature(aa64_sve2, s)) {
3177 return false;
3179 if (sve_access_check(s)) {
3180 gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
3181 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz);
3183 return true;
3187 *** SVE Integer Compare - Vectors Group
3190 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
3191 gen_helper_gvec_flags_4 *gen_fn)
3193 TCGv_ptr pd, zn, zm, pg;
3194 unsigned vsz;
3195 TCGv_i32 t;
3197 if (gen_fn == NULL) {
3198 return false;
3200 if (!sve_access_check(s)) {
3201 return true;
3204 vsz = vec_full_reg_size(s);
3205 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
3206 pd = tcg_temp_new_ptr();
3207 zn = tcg_temp_new_ptr();
3208 zm = tcg_temp_new_ptr();
3209 pg = tcg_temp_new_ptr();
3211 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
3212 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
3213 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
3214 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3216 gen_fn(t, pd, zn, zm, pg, t);
3218 tcg_temp_free_ptr(pd);
3219 tcg_temp_free_ptr(zn);
3220 tcg_temp_free_ptr(zm);
3221 tcg_temp_free_ptr(pg);
3223 do_pred_flags(t);
3225 tcg_temp_free_i32(t);
3226 return true;
3229 #define DO_PPZZ(NAME, name) \
3230 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
3232 static gen_helper_gvec_flags_4 * const fns[4] = { \
3233 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
3234 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
3235 }; \
3236 return do_ppzz_flags(s, a, fns[a->esz]); \
3239 DO_PPZZ(CMPEQ, cmpeq)
3240 DO_PPZZ(CMPNE, cmpne)
3241 DO_PPZZ(CMPGT, cmpgt)
3242 DO_PPZZ(CMPGE, cmpge)
3243 DO_PPZZ(CMPHI, cmphi)
3244 DO_PPZZ(CMPHS, cmphs)
3246 #undef DO_PPZZ
3248 #define DO_PPZW(NAME, name) \
3249 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a) \
3251 static gen_helper_gvec_flags_4 * const fns[4] = { \
3252 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
3253 gen_helper_sve_##name##_ppzw_s, NULL \
3254 }; \
3255 return do_ppzz_flags(s, a, fns[a->esz]); \
3258 DO_PPZW(CMPEQ, cmpeq)
3259 DO_PPZW(CMPNE, cmpne)
3260 DO_PPZW(CMPGT, cmpgt)
3261 DO_PPZW(CMPGE, cmpge)
3262 DO_PPZW(CMPHI, cmphi)
3263 DO_PPZW(CMPHS, cmphs)
3264 DO_PPZW(CMPLT, cmplt)
3265 DO_PPZW(CMPLE, cmple)
3266 DO_PPZW(CMPLO, cmplo)
3267 DO_PPZW(CMPLS, cmpls)
3269 #undef DO_PPZW
3272 *** SVE Integer Compare - Immediate Groups
3275 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
3276 gen_helper_gvec_flags_3 *gen_fn)
3278 TCGv_ptr pd, zn, pg;
3279 unsigned vsz;
3280 TCGv_i32 t;
3282 if (gen_fn == NULL) {
3283 return false;
3285 if (!sve_access_check(s)) {
3286 return true;
3289 vsz = vec_full_reg_size(s);
3290 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
3291 pd = tcg_temp_new_ptr();
3292 zn = tcg_temp_new_ptr();
3293 pg = tcg_temp_new_ptr();
3295 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
3296 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
3297 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3299 gen_fn(t, pd, zn, pg, t);
3301 tcg_temp_free_ptr(pd);
3302 tcg_temp_free_ptr(zn);
3303 tcg_temp_free_ptr(pg);
3305 do_pred_flags(t);
3307 tcg_temp_free_i32(t);
3308 return true;
3311 #define DO_PPZI(NAME, name) \
3312 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a) \
3314 static gen_helper_gvec_flags_3 * const fns[4] = { \
3315 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
3316 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
3317 }; \
3318 return do_ppzi_flags(s, a, fns[a->esz]); \
3321 DO_PPZI(CMPEQ, cmpeq)
3322 DO_PPZI(CMPNE, cmpne)
3323 DO_PPZI(CMPGT, cmpgt)
3324 DO_PPZI(CMPGE, cmpge)
3325 DO_PPZI(CMPHI, cmphi)
3326 DO_PPZI(CMPHS, cmphs)
3327 DO_PPZI(CMPLT, cmplt)
3328 DO_PPZI(CMPLE, cmple)
3329 DO_PPZI(CMPLO, cmplo)
3330 DO_PPZI(CMPLS, cmpls)
3332 #undef DO_PPZI
3335 *** SVE Partition Break Group
3338 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
3339 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
3341 if (!sve_access_check(s)) {
3342 return true;
3345 unsigned vsz = pred_full_reg_size(s);
3347 /* Predicate sizes may be smaller and cannot use simd_desc. */
3348 TCGv_ptr d = tcg_temp_new_ptr();
3349 TCGv_ptr n = tcg_temp_new_ptr();
3350 TCGv_ptr m = tcg_temp_new_ptr();
3351 TCGv_ptr g = tcg_temp_new_ptr();
3352 TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
3354 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3355 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3356 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
3357 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3359 if (a->s) {
3360 fn_s(t, d, n, m, g, t);
3361 do_pred_flags(t);
3362 } else {
3363 fn(d, n, m, g, t);
3365 tcg_temp_free_ptr(d);
3366 tcg_temp_free_ptr(n);
3367 tcg_temp_free_ptr(m);
3368 tcg_temp_free_ptr(g);
3369 tcg_temp_free_i32(t);
3370 return true;
3373 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
3374 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
3376 if (!sve_access_check(s)) {
3377 return true;
3380 unsigned vsz = pred_full_reg_size(s);
3382 /* Predicate sizes may be smaller and cannot use simd_desc. */
3383 TCGv_ptr d = tcg_temp_new_ptr();
3384 TCGv_ptr n = tcg_temp_new_ptr();
3385 TCGv_ptr g = tcg_temp_new_ptr();
3386 TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
3388 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3389 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3390 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3392 if (a->s) {
3393 fn_s(t, d, n, g, t);
3394 do_pred_flags(t);
3395 } else {
3396 fn(d, n, g, t);
3398 tcg_temp_free_ptr(d);
3399 tcg_temp_free_ptr(n);
3400 tcg_temp_free_ptr(g);
3401 tcg_temp_free_i32(t);
3402 return true;
3405 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
3407 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
3410 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
3412 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
3415 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
3417 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
3420 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
3422 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
3425 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
3427 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
3430 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
3432 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3435 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
3437 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3441 *** SVE Predicate Count Group
3444 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3446 unsigned psz = pred_full_reg_size(s);
3448 if (psz <= 8) {
3449 uint64_t psz_mask;
3451 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3452 if (pn != pg) {
3453 TCGv_i64 g = tcg_temp_new_i64();
3454 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3455 tcg_gen_and_i64(val, val, g);
3456 tcg_temp_free_i64(g);
3459 /* Reduce the pred_esz_masks value simply to reduce the
3460 * size of the code generated here.
3462 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3463 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3465 tcg_gen_ctpop_i64(val, val);
3466 } else {
3467 TCGv_ptr t_pn = tcg_temp_new_ptr();
3468 TCGv_ptr t_pg = tcg_temp_new_ptr();
3469 unsigned desc = 0;
3470 TCGv_i32 t_desc;
3472 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
3473 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
3475 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3476 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3477 t_desc = tcg_const_i32(desc);
3479 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3480 tcg_temp_free_ptr(t_pn);
3481 tcg_temp_free_ptr(t_pg);
3482 tcg_temp_free_i32(t_desc);
3486 static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
3488 if (sve_access_check(s)) {
3489 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3491 return true;
3494 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
3496 if (sve_access_check(s)) {
3497 TCGv_i64 reg = cpu_reg(s, a->rd);
3498 TCGv_i64 val = tcg_temp_new_i64();
3500 do_cntp(s, val, a->esz, a->pg, a->pg);
3501 if (a->d) {
3502 tcg_gen_sub_i64(reg, reg, val);
3503 } else {
3504 tcg_gen_add_i64(reg, reg, val);
3506 tcg_temp_free_i64(val);
3508 return true;
3511 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3513 if (a->esz == 0) {
3514 return false;
3516 if (sve_access_check(s)) {
3517 unsigned vsz = vec_full_reg_size(s);
3518 TCGv_i64 val = tcg_temp_new_i64();
3519 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3521 do_cntp(s, val, a->esz, a->pg, a->pg);
3522 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3523 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3525 return true;
3528 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
3530 if (sve_access_check(s)) {
3531 TCGv_i64 reg = cpu_reg(s, a->rd);
3532 TCGv_i64 val = tcg_temp_new_i64();
3534 do_cntp(s, val, a->esz, a->pg, a->pg);
3535 do_sat_addsub_32(reg, val, a->u, a->d);
3537 return true;
3540 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
3542 if (sve_access_check(s)) {
3543 TCGv_i64 reg = cpu_reg(s, a->rd);
3544 TCGv_i64 val = tcg_temp_new_i64();
3546 do_cntp(s, val, a->esz, a->pg, a->pg);
3547 do_sat_addsub_64(reg, val, a->u, a->d);
3549 return true;
3552 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3554 if (a->esz == 0) {
3555 return false;
3557 if (sve_access_check(s)) {
3558 TCGv_i64 val = tcg_temp_new_i64();
3559 do_cntp(s, val, a->esz, a->pg, a->pg);
3560 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3562 return true;
3566 *** SVE Integer Compare Scalars Group
3569 static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
3571 if (!sve_access_check(s)) {
3572 return true;
3575 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3576 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3577 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3578 TCGv_i64 cmp = tcg_temp_new_i64();
3580 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3581 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3582 tcg_temp_free_i64(cmp);
3584 /* VF = !NF & !CF. */
3585 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3586 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3588 /* Both NF and VF actually look at bit 31. */
3589 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3590 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3591 return true;
3594 static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
3596 TCGv_i64 op0, op1, t0, t1, tmax;
3597 TCGv_i32 t2, t3;
3598 TCGv_ptr ptr;
3599 unsigned vsz = vec_full_reg_size(s);
3600 unsigned desc = 0;
3601 TCGCond cond;
3602 uint64_t maxval;
3603 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
3604 bool eq = a->eq == a->lt;
3606 /* The greater-than conditions are all SVE2. */
3607 if (!a->lt && !dc_isar_feature(aa64_sve2, s)) {
3608 return false;
3610 if (!sve_access_check(s)) {
3611 return true;
3614 op0 = read_cpu_reg(s, a->rn, 1);
3615 op1 = read_cpu_reg(s, a->rm, 1);
3617 if (!a->sf) {
3618 if (a->u) {
3619 tcg_gen_ext32u_i64(op0, op0);
3620 tcg_gen_ext32u_i64(op1, op1);
3621 } else {
3622 tcg_gen_ext32s_i64(op0, op0);
3623 tcg_gen_ext32s_i64(op1, op1);
3627 /* For the helper, compress the different conditions into a computation
3628 * of how many iterations for which the condition is true.
3630 t0 = tcg_temp_new_i64();
3631 t1 = tcg_temp_new_i64();
3633 if (a->lt) {
3634 tcg_gen_sub_i64(t0, op1, op0);
3635 if (a->u) {
3636 maxval = a->sf ? UINT64_MAX : UINT32_MAX;
3637 cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
3638 } else {
3639 maxval = a->sf ? INT64_MAX : INT32_MAX;
3640 cond = eq ? TCG_COND_LE : TCG_COND_LT;
3642 } else {
3643 tcg_gen_sub_i64(t0, op0, op1);
3644 if (a->u) {
3645 maxval = 0;
3646 cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
3647 } else {
3648 maxval = a->sf ? INT64_MIN : INT32_MIN;
3649 cond = eq ? TCG_COND_GE : TCG_COND_GT;
3653 tmax = tcg_const_i64(vsz >> a->esz);
3654 if (eq) {
3655 /* Equality means one more iteration. */
3656 tcg_gen_addi_i64(t0, t0, 1);
3659 * For the less-than while, if op1 is maxval (and the only time
3660 * the addition above could overflow), then we produce an all-true
3661 * predicate by setting the count to the vector length. This is
3662 * because the pseudocode is described as an increment + compare
3663 * loop, and the maximum integer would always compare true.
3664 * Similarly, the greater-than while has the same issue with the
3665 * minimum integer due to the decrement + compare loop.
3667 tcg_gen_movi_i64(t1, maxval);
3668 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
3671 /* Bound to the maximum. */
3672 tcg_gen_umin_i64(t0, t0, tmax);
3673 tcg_temp_free_i64(tmax);
3675 /* Set the count to zero if the condition is false. */
3676 tcg_gen_movi_i64(t1, 0);
3677 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3678 tcg_temp_free_i64(t1);
3680 /* Since we're bounded, pass as a 32-bit type. */
3681 t2 = tcg_temp_new_i32();
3682 tcg_gen_extrl_i64_i32(t2, t0);
3683 tcg_temp_free_i64(t0);
3685 /* Scale elements to bits. */
3686 tcg_gen_shli_i32(t2, t2, a->esz);
3688 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3689 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
3690 t3 = tcg_const_i32(desc);
3692 ptr = tcg_temp_new_ptr();
3693 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3695 if (a->lt) {
3696 gen_helper_sve_whilel(t2, ptr, t2, t3);
3697 } else {
3698 gen_helper_sve_whileg(t2, ptr, t2, t3);
3700 do_pred_flags(t2);
3702 tcg_temp_free_ptr(ptr);
3703 tcg_temp_free_i32(t2);
3704 tcg_temp_free_i32(t3);
3705 return true;
3708 static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
3710 TCGv_i64 op0, op1, diff, t1, tmax;
3711 TCGv_i32 t2, t3;
3712 TCGv_ptr ptr;
3713 unsigned vsz = vec_full_reg_size(s);
3714 unsigned desc = 0;
3716 if (!dc_isar_feature(aa64_sve2, s)) {
3717 return false;
3719 if (!sve_access_check(s)) {
3720 return true;
3723 op0 = read_cpu_reg(s, a->rn, 1);
3724 op1 = read_cpu_reg(s, a->rm, 1);
3726 tmax = tcg_const_i64(vsz);
3727 diff = tcg_temp_new_i64();
3729 if (a->rw) {
3730 /* WHILERW */
3731 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
3732 t1 = tcg_temp_new_i64();
3733 tcg_gen_sub_i64(diff, op0, op1);
3734 tcg_gen_sub_i64(t1, op1, op0);
3735 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
3736 tcg_temp_free_i64(t1);
3737 /* Round down to a multiple of ESIZE. */
3738 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3739 /* If op1 == op0, diff == 0, and the condition is always true. */
3740 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
3741 } else {
3742 /* WHILEWR */
3743 tcg_gen_sub_i64(diff, op1, op0);
3744 /* Round down to a multiple of ESIZE. */
3745 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3746 /* If op0 >= op1, diff <= 0, the condition is always true. */
3747 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
3750 /* Bound to the maximum. */
3751 tcg_gen_umin_i64(diff, diff, tmax);
3752 tcg_temp_free_i64(tmax);
3754 /* Since we're bounded, pass as a 32-bit type. */
3755 t2 = tcg_temp_new_i32();
3756 tcg_gen_extrl_i64_i32(t2, diff);
3757 tcg_temp_free_i64(diff);
3759 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3760 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
3761 t3 = tcg_const_i32(desc);
3763 ptr = tcg_temp_new_ptr();
3764 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3766 gen_helper_sve_whilel(t2, ptr, t2, t3);
3767 do_pred_flags(t2);
3769 tcg_temp_free_ptr(ptr);
3770 tcg_temp_free_i32(t2);
3771 tcg_temp_free_i32(t3);
3772 return true;
3776 *** SVE Integer Wide Immediate - Unpredicated Group
3779 static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
3781 if (a->esz == 0) {
3782 return false;
3784 if (sve_access_check(s)) {
3785 unsigned vsz = vec_full_reg_size(s);
3786 int dofs = vec_full_reg_offset(s, a->rd);
3787 uint64_t imm;
3789 /* Decode the VFP immediate. */
3790 imm = vfp_expand_imm(a->esz, a->imm);
3791 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
3793 return true;
3796 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
3798 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3799 return false;
3801 if (sve_access_check(s)) {
3802 unsigned vsz = vec_full_reg_size(s);
3803 int dofs = vec_full_reg_offset(s, a->rd);
3805 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
3807 return true;
3810 static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
3812 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3813 return false;
3815 if (sve_access_check(s)) {
3816 unsigned vsz = vec_full_reg_size(s);
3817 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3818 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3820 return true;
3823 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
3825 a->imm = -a->imm;
3826 return trans_ADD_zzi(s, a);
3829 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
3831 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
3832 static const GVecGen2s op[4] = {
3833 { .fni8 = tcg_gen_vec_sub8_i64,
3834 .fniv = tcg_gen_sub_vec,
3835 .fno = gen_helper_sve_subri_b,
3836 .opt_opc = vecop_list,
3837 .vece = MO_8,
3838 .scalar_first = true },
3839 { .fni8 = tcg_gen_vec_sub16_i64,
3840 .fniv = tcg_gen_sub_vec,
3841 .fno = gen_helper_sve_subri_h,
3842 .opt_opc = vecop_list,
3843 .vece = MO_16,
3844 .scalar_first = true },
3845 { .fni4 = tcg_gen_sub_i32,
3846 .fniv = tcg_gen_sub_vec,
3847 .fno = gen_helper_sve_subri_s,
3848 .opt_opc = vecop_list,
3849 .vece = MO_32,
3850 .scalar_first = true },
3851 { .fni8 = tcg_gen_sub_i64,
3852 .fniv = tcg_gen_sub_vec,
3853 .fno = gen_helper_sve_subri_d,
3854 .opt_opc = vecop_list,
3855 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3856 .vece = MO_64,
3857 .scalar_first = true }
3860 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3861 return false;
3863 if (sve_access_check(s)) {
3864 unsigned vsz = vec_full_reg_size(s);
3865 TCGv_i64 c = tcg_const_i64(a->imm);
3866 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3867 vec_full_reg_offset(s, a->rn),
3868 vsz, vsz, c, &op[a->esz]);
3869 tcg_temp_free_i64(c);
3871 return true;
3874 static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
3876 if (sve_access_check(s)) {
3877 unsigned vsz = vec_full_reg_size(s);
3878 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3879 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3881 return true;
3884 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
3886 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3887 return false;
3889 if (sve_access_check(s)) {
3890 TCGv_i64 val = tcg_const_i64(a->imm);
3891 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3892 tcg_temp_free_i64(val);
3894 return true;
3897 static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
3899 return do_zzi_sat(s, a, false, false);
3902 static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
3904 return do_zzi_sat(s, a, true, false);
3907 static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3909 return do_zzi_sat(s, a, false, true);
3912 static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3914 return do_zzi_sat(s, a, true, true);
3917 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3919 if (sve_access_check(s)) {
3920 unsigned vsz = vec_full_reg_size(s);
3921 TCGv_i64 c = tcg_const_i64(a->imm);
3923 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3924 vec_full_reg_offset(s, a->rn),
3925 c, vsz, vsz, 0, fn);
3926 tcg_temp_free_i64(c);
3928 return true;
3931 #define DO_ZZI(NAME, name) \
3932 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a) \
3934 static gen_helper_gvec_2i * const fns[4] = { \
3935 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3936 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3937 }; \
3938 return do_zzi_ool(s, a, fns[a->esz]); \
3941 DO_ZZI(SMAX, smax)
3942 DO_ZZI(UMAX, umax)
3943 DO_ZZI(SMIN, smin)
3944 DO_ZZI(UMIN, umin)
3946 #undef DO_ZZI
3948 static bool trans_DOT_zzzz(DisasContext *s, arg_DOT_zzzz *a)
3950 static gen_helper_gvec_4 * const fns[2][2] = {
3951 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3952 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3955 if (sve_access_check(s)) {
3956 gen_gvec_ool_zzzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0);
3958 return true;
3962 * SVE Multiply - Indexed
3965 static bool do_zzxz_ool(DisasContext *s, arg_rrxr_esz *a,
3966 gen_helper_gvec_4 *fn)
3968 if (fn == NULL) {
3969 return false;
3971 if (sve_access_check(s)) {
3972 gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
3974 return true;
3977 #define DO_RRXR(NAME, FUNC) \
3978 static bool NAME(DisasContext *s, arg_rrxr_esz *a) \
3979 { return do_zzxz_ool(s, a, FUNC); }
3981 DO_RRXR(trans_SDOT_zzxw_s, gen_helper_gvec_sdot_idx_b)
3982 DO_RRXR(trans_SDOT_zzxw_d, gen_helper_gvec_sdot_idx_h)
3983 DO_RRXR(trans_UDOT_zzxw_s, gen_helper_gvec_udot_idx_b)
3984 DO_RRXR(trans_UDOT_zzxw_d, gen_helper_gvec_udot_idx_h)
3986 static bool trans_SUDOT_zzxw_s(DisasContext *s, arg_rrxr_esz *a)
3988 if (!dc_isar_feature(aa64_sve_i8mm, s)) {
3989 return false;
3991 return do_zzxz_ool(s, a, gen_helper_gvec_sudot_idx_b);
3994 static bool trans_USDOT_zzxw_s(DisasContext *s, arg_rrxr_esz *a)
3996 if (!dc_isar_feature(aa64_sve_i8mm, s)) {
3997 return false;
3999 return do_zzxz_ool(s, a, gen_helper_gvec_usdot_idx_b);
4002 #undef DO_RRXR
4004 static bool do_sve2_zzz_data(DisasContext *s, int rd, int rn, int rm, int data,
4005 gen_helper_gvec_3 *fn)
4007 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
4008 return false;
4010 if (sve_access_check(s)) {
4011 unsigned vsz = vec_full_reg_size(s);
4012 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
4013 vec_full_reg_offset(s, rn),
4014 vec_full_reg_offset(s, rm),
4015 vsz, vsz, data, fn);
4017 return true;
4020 #define DO_SVE2_RRX(NAME, FUNC) \
4021 static bool NAME(DisasContext *s, arg_rrx_esz *a) \
4022 { return do_sve2_zzz_data(s, a->rd, a->rn, a->rm, a->index, FUNC); }
4024 DO_SVE2_RRX(trans_MUL_zzx_h, gen_helper_gvec_mul_idx_h)
4025 DO_SVE2_RRX(trans_MUL_zzx_s, gen_helper_gvec_mul_idx_s)
4026 DO_SVE2_RRX(trans_MUL_zzx_d, gen_helper_gvec_mul_idx_d)
4028 DO_SVE2_RRX(trans_SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h)
4029 DO_SVE2_RRX(trans_SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s)
4030 DO_SVE2_RRX(trans_SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d)
4032 DO_SVE2_RRX(trans_SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h)
4033 DO_SVE2_RRX(trans_SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s)
4034 DO_SVE2_RRX(trans_SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d)
4036 #undef DO_SVE2_RRX
4038 #define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
4039 static bool NAME(DisasContext *s, arg_rrx_esz *a) \
4041 return do_sve2_zzz_data(s, a->rd, a->rn, a->rm, \
4042 (a->index << 1) | TOP, FUNC); \
4045 DO_SVE2_RRX_TB(trans_SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false)
4046 DO_SVE2_RRX_TB(trans_SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false)
4047 DO_SVE2_RRX_TB(trans_SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true)
4048 DO_SVE2_RRX_TB(trans_SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true)
4050 DO_SVE2_RRX_TB(trans_SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false)
4051 DO_SVE2_RRX_TB(trans_SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false)
4052 DO_SVE2_RRX_TB(trans_SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true)
4053 DO_SVE2_RRX_TB(trans_SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true)
4055 DO_SVE2_RRX_TB(trans_UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false)
4056 DO_SVE2_RRX_TB(trans_UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false)
4057 DO_SVE2_RRX_TB(trans_UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true)
4058 DO_SVE2_RRX_TB(trans_UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true)
4060 #undef DO_SVE2_RRX_TB
4062 static bool do_sve2_zzzz_data(DisasContext *s, int rd, int rn, int rm, int ra,
4063 int data, gen_helper_gvec_4 *fn)
4065 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
4066 return false;
4068 if (sve_access_check(s)) {
4069 unsigned vsz = vec_full_reg_size(s);
4070 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
4071 vec_full_reg_offset(s, rn),
4072 vec_full_reg_offset(s, rm),
4073 vec_full_reg_offset(s, ra),
4074 vsz, vsz, data, fn);
4076 return true;
4079 #define DO_SVE2_RRXR(NAME, FUNC) \
4080 static bool NAME(DisasContext *s, arg_rrxr_esz *a) \
4081 { return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->ra, a->index, FUNC); }
4083 DO_SVE2_RRXR(trans_MLA_zzxz_h, gen_helper_gvec_mla_idx_h)
4084 DO_SVE2_RRXR(trans_MLA_zzxz_s, gen_helper_gvec_mla_idx_s)
4085 DO_SVE2_RRXR(trans_MLA_zzxz_d, gen_helper_gvec_mla_idx_d)
4087 DO_SVE2_RRXR(trans_MLS_zzxz_h, gen_helper_gvec_mls_idx_h)
4088 DO_SVE2_RRXR(trans_MLS_zzxz_s, gen_helper_gvec_mls_idx_s)
4089 DO_SVE2_RRXR(trans_MLS_zzxz_d, gen_helper_gvec_mls_idx_d)
4091 DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h)
4092 DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s)
4093 DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d)
4095 DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h)
4096 DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s)
4097 DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d)
4099 #undef DO_SVE2_RRXR
4101 #define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
4102 static bool NAME(DisasContext *s, arg_rrxr_esz *a) \
4104 return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->rd, \
4105 (a->index << 1) | TOP, FUNC); \
4108 DO_SVE2_RRXR_TB(trans_SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false)
4109 DO_SVE2_RRXR_TB(trans_SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false)
4110 DO_SVE2_RRXR_TB(trans_SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true)
4111 DO_SVE2_RRXR_TB(trans_SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true)
4113 DO_SVE2_RRXR_TB(trans_SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false)
4114 DO_SVE2_RRXR_TB(trans_SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false)
4115 DO_SVE2_RRXR_TB(trans_SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true)
4116 DO_SVE2_RRXR_TB(trans_SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true)
4118 DO_SVE2_RRXR_TB(trans_SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false)
4119 DO_SVE2_RRXR_TB(trans_SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false)
4120 DO_SVE2_RRXR_TB(trans_SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true)
4121 DO_SVE2_RRXR_TB(trans_SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true)
4123 DO_SVE2_RRXR_TB(trans_UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false)
4124 DO_SVE2_RRXR_TB(trans_UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false)
4125 DO_SVE2_RRXR_TB(trans_UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true)
4126 DO_SVE2_RRXR_TB(trans_UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true)
4128 DO_SVE2_RRXR_TB(trans_SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false)
4129 DO_SVE2_RRXR_TB(trans_SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false)
4130 DO_SVE2_RRXR_TB(trans_SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true)
4131 DO_SVE2_RRXR_TB(trans_SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true)
4133 DO_SVE2_RRXR_TB(trans_UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false)
4134 DO_SVE2_RRXR_TB(trans_UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false)
4135 DO_SVE2_RRXR_TB(trans_UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true)
4136 DO_SVE2_RRXR_TB(trans_UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true)
4138 #undef DO_SVE2_RRXR_TB
4140 #define DO_SVE2_RRXR_ROT(NAME, FUNC) \
4141 static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
4143 return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->ra, \
4144 (a->index << 2) | a->rot, FUNC); \
4147 DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h)
4148 DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s)
4150 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h)
4151 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s)
4153 DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s)
4154 DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
4156 #undef DO_SVE2_RRXR_ROT
4159 *** SVE Floating Point Multiply-Add Indexed Group
4162 static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
4164 static gen_helper_gvec_4_ptr * const fns[3] = {
4165 gen_helper_gvec_fmla_idx_h,
4166 gen_helper_gvec_fmla_idx_s,
4167 gen_helper_gvec_fmla_idx_d,
4170 if (sve_access_check(s)) {
4171 unsigned vsz = vec_full_reg_size(s);
4172 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4173 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4174 vec_full_reg_offset(s, a->rn),
4175 vec_full_reg_offset(s, a->rm),
4176 vec_full_reg_offset(s, a->ra),
4177 status, vsz, vsz, (a->index << 1) | sub,
4178 fns[a->esz - 1]);
4179 tcg_temp_free_ptr(status);
4181 return true;
4184 static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
4186 return do_FMLA_zzxz(s, a, false);
4189 static bool trans_FMLS_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
4191 return do_FMLA_zzxz(s, a, true);
4195 *** SVE Floating Point Multiply Indexed Group
4198 static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
4200 static gen_helper_gvec_3_ptr * const fns[3] = {
4201 gen_helper_gvec_fmul_idx_h,
4202 gen_helper_gvec_fmul_idx_s,
4203 gen_helper_gvec_fmul_idx_d,
4206 if (sve_access_check(s)) {
4207 unsigned vsz = vec_full_reg_size(s);
4208 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4209 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4210 vec_full_reg_offset(s, a->rn),
4211 vec_full_reg_offset(s, a->rm),
4212 status, vsz, vsz, a->index, fns[a->esz - 1]);
4213 tcg_temp_free_ptr(status);
4215 return true;
4219 *** SVE Floating Point Fast Reduction Group
4222 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
4223 TCGv_ptr, TCGv_i32);
4225 static void do_reduce(DisasContext *s, arg_rpr_esz *a,
4226 gen_helper_fp_reduce *fn)
4228 unsigned vsz = vec_full_reg_size(s);
4229 unsigned p2vsz = pow2ceil(vsz);
4230 TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, vsz, p2vsz));
4231 TCGv_ptr t_zn, t_pg, status;
4232 TCGv_i64 temp;
4234 temp = tcg_temp_new_i64();
4235 t_zn = tcg_temp_new_ptr();
4236 t_pg = tcg_temp_new_ptr();
4238 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
4239 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
4240 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4242 fn(temp, t_zn, t_pg, status, t_desc);
4243 tcg_temp_free_ptr(t_zn);
4244 tcg_temp_free_ptr(t_pg);
4245 tcg_temp_free_ptr(status);
4246 tcg_temp_free_i32(t_desc);
4248 write_fp_dreg(s, a->rd, temp);
4249 tcg_temp_free_i64(temp);
4252 #define DO_VPZ(NAME, name) \
4253 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
4255 static gen_helper_fp_reduce * const fns[3] = { \
4256 gen_helper_sve_##name##_h, \
4257 gen_helper_sve_##name##_s, \
4258 gen_helper_sve_##name##_d, \
4259 }; \
4260 if (a->esz == 0) { \
4261 return false; \
4263 if (sve_access_check(s)) { \
4264 do_reduce(s, a, fns[a->esz - 1]); \
4266 return true; \
4269 DO_VPZ(FADDV, faddv)
4270 DO_VPZ(FMINNMV, fminnmv)
4271 DO_VPZ(FMAXNMV, fmaxnmv)
4272 DO_VPZ(FMINV, fminv)
4273 DO_VPZ(FMAXV, fmaxv)
4276 *** SVE Floating Point Unary Operations - Unpredicated Group
4279 static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
4281 unsigned vsz = vec_full_reg_size(s);
4282 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4284 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
4285 vec_full_reg_offset(s, a->rn),
4286 status, vsz, vsz, 0, fn);
4287 tcg_temp_free_ptr(status);
4290 static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
4292 static gen_helper_gvec_2_ptr * const fns[3] = {
4293 gen_helper_gvec_frecpe_h,
4294 gen_helper_gvec_frecpe_s,
4295 gen_helper_gvec_frecpe_d,
4297 if (a->esz == 0) {
4298 return false;
4300 if (sve_access_check(s)) {
4301 do_zz_fp(s, a, fns[a->esz - 1]);
4303 return true;
4306 static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
4308 static gen_helper_gvec_2_ptr * const fns[3] = {
4309 gen_helper_gvec_frsqrte_h,
4310 gen_helper_gvec_frsqrte_s,
4311 gen_helper_gvec_frsqrte_d,
4313 if (a->esz == 0) {
4314 return false;
4316 if (sve_access_check(s)) {
4317 do_zz_fp(s, a, fns[a->esz - 1]);
4319 return true;
4323 *** SVE Floating Point Compare with Zero Group
4326 static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
4327 gen_helper_gvec_3_ptr *fn)
4329 unsigned vsz = vec_full_reg_size(s);
4330 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4332 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
4333 vec_full_reg_offset(s, a->rn),
4334 pred_full_reg_offset(s, a->pg),
4335 status, vsz, vsz, 0, fn);
4336 tcg_temp_free_ptr(status);
4339 #define DO_PPZ(NAME, name) \
4340 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
4342 static gen_helper_gvec_3_ptr * const fns[3] = { \
4343 gen_helper_sve_##name##_h, \
4344 gen_helper_sve_##name##_s, \
4345 gen_helper_sve_##name##_d, \
4346 }; \
4347 if (a->esz == 0) { \
4348 return false; \
4350 if (sve_access_check(s)) { \
4351 do_ppz_fp(s, a, fns[a->esz - 1]); \
4353 return true; \
4356 DO_PPZ(FCMGE_ppz0, fcmge0)
4357 DO_PPZ(FCMGT_ppz0, fcmgt0)
4358 DO_PPZ(FCMLE_ppz0, fcmle0)
4359 DO_PPZ(FCMLT_ppz0, fcmlt0)
4360 DO_PPZ(FCMEQ_ppz0, fcmeq0)
4361 DO_PPZ(FCMNE_ppz0, fcmne0)
4363 #undef DO_PPZ
4366 *** SVE floating-point trig multiply-add coefficient
4369 static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
4371 static gen_helper_gvec_3_ptr * const fns[3] = {
4372 gen_helper_sve_ftmad_h,
4373 gen_helper_sve_ftmad_s,
4374 gen_helper_sve_ftmad_d,
4377 if (a->esz == 0) {
4378 return false;
4380 if (sve_access_check(s)) {
4381 unsigned vsz = vec_full_reg_size(s);
4382 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4383 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4384 vec_full_reg_offset(s, a->rn),
4385 vec_full_reg_offset(s, a->rm),
4386 status, vsz, vsz, a->imm, fns[a->esz - 1]);
4387 tcg_temp_free_ptr(status);
4389 return true;
4393 *** SVE Floating Point Accumulating Reduction Group
4396 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
4398 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
4399 TCGv_ptr, TCGv_ptr, TCGv_i32);
4400 static fadda_fn * const fns[3] = {
4401 gen_helper_sve_fadda_h,
4402 gen_helper_sve_fadda_s,
4403 gen_helper_sve_fadda_d,
4405 unsigned vsz = vec_full_reg_size(s);
4406 TCGv_ptr t_rm, t_pg, t_fpst;
4407 TCGv_i64 t_val;
4408 TCGv_i32 t_desc;
4410 if (a->esz == 0) {
4411 return false;
4413 if (!sve_access_check(s)) {
4414 return true;
4417 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
4418 t_rm = tcg_temp_new_ptr();
4419 t_pg = tcg_temp_new_ptr();
4420 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
4421 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
4422 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4423 t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
4425 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
4427 tcg_temp_free_i32(t_desc);
4428 tcg_temp_free_ptr(t_fpst);
4429 tcg_temp_free_ptr(t_pg);
4430 tcg_temp_free_ptr(t_rm);
4432 write_fp_dreg(s, a->rd, t_val);
4433 tcg_temp_free_i64(t_val);
4434 return true;
4438 *** SVE Floating Point Arithmetic - Unpredicated Group
4441 static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
4442 gen_helper_gvec_3_ptr *fn)
4444 if (fn == NULL) {
4445 return false;
4447 if (sve_access_check(s)) {
4448 unsigned vsz = vec_full_reg_size(s);
4449 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4450 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4451 vec_full_reg_offset(s, a->rn),
4452 vec_full_reg_offset(s, a->rm),
4453 status, vsz, vsz, 0, fn);
4454 tcg_temp_free_ptr(status);
4456 return true;
4460 #define DO_FP3(NAME, name) \
4461 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
4463 static gen_helper_gvec_3_ptr * const fns[4] = { \
4464 NULL, gen_helper_gvec_##name##_h, \
4465 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
4466 }; \
4467 return do_zzz_fp(s, a, fns[a->esz]); \
4470 DO_FP3(FADD_zzz, fadd)
4471 DO_FP3(FSUB_zzz, fsub)
4472 DO_FP3(FMUL_zzz, fmul)
4473 DO_FP3(FTSMUL, ftsmul)
4474 DO_FP3(FRECPS, recps)
4475 DO_FP3(FRSQRTS, rsqrts)
4477 #undef DO_FP3
4480 *** SVE Floating Point Arithmetic - Predicated Group
4483 static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
4484 gen_helper_gvec_4_ptr *fn)
4486 if (fn == NULL) {
4487 return false;
4489 if (sve_access_check(s)) {
4490 unsigned vsz = vec_full_reg_size(s);
4491 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4492 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4493 vec_full_reg_offset(s, a->rn),
4494 vec_full_reg_offset(s, a->rm),
4495 pred_full_reg_offset(s, a->pg),
4496 status, vsz, vsz, 0, fn);
4497 tcg_temp_free_ptr(status);
4499 return true;
4502 #define DO_FP3(NAME, name) \
4503 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
4505 static gen_helper_gvec_4_ptr * const fns[4] = { \
4506 NULL, gen_helper_sve_##name##_h, \
4507 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4508 }; \
4509 return do_zpzz_fp(s, a, fns[a->esz]); \
4512 DO_FP3(FADD_zpzz, fadd)
4513 DO_FP3(FSUB_zpzz, fsub)
4514 DO_FP3(FMUL_zpzz, fmul)
4515 DO_FP3(FMIN_zpzz, fmin)
4516 DO_FP3(FMAX_zpzz, fmax)
4517 DO_FP3(FMINNM_zpzz, fminnum)
4518 DO_FP3(FMAXNM_zpzz, fmaxnum)
4519 DO_FP3(FABD, fabd)
4520 DO_FP3(FSCALE, fscalbn)
4521 DO_FP3(FDIV, fdiv)
4522 DO_FP3(FMULX, fmulx)
4524 #undef DO_FP3
4526 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
4527 TCGv_i64, TCGv_ptr, TCGv_i32);
4529 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
4530 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
4532 unsigned vsz = vec_full_reg_size(s);
4533 TCGv_ptr t_zd, t_zn, t_pg, status;
4534 TCGv_i32 desc;
4536 t_zd = tcg_temp_new_ptr();
4537 t_zn = tcg_temp_new_ptr();
4538 t_pg = tcg_temp_new_ptr();
4539 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
4540 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
4541 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4543 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
4544 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
4545 fn(t_zd, t_zn, t_pg, scalar, status, desc);
4547 tcg_temp_free_i32(desc);
4548 tcg_temp_free_ptr(status);
4549 tcg_temp_free_ptr(t_pg);
4550 tcg_temp_free_ptr(t_zn);
4551 tcg_temp_free_ptr(t_zd);
4554 static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
4555 gen_helper_sve_fp2scalar *fn)
4557 TCGv_i64 temp = tcg_const_i64(imm);
4558 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
4559 tcg_temp_free_i64(temp);
4562 #define DO_FP_IMM(NAME, name, const0, const1) \
4563 static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
4565 static gen_helper_sve_fp2scalar * const fns[3] = { \
4566 gen_helper_sve_##name##_h, \
4567 gen_helper_sve_##name##_s, \
4568 gen_helper_sve_##name##_d \
4569 }; \
4570 static uint64_t const val[3][2] = { \
4571 { float16_##const0, float16_##const1 }, \
4572 { float32_##const0, float32_##const1 }, \
4573 { float64_##const0, float64_##const1 }, \
4574 }; \
4575 if (a->esz == 0) { \
4576 return false; \
4578 if (sve_access_check(s)) { \
4579 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
4581 return true; \
4584 DO_FP_IMM(FADD, fadds, half, one)
4585 DO_FP_IMM(FSUB, fsubs, half, one)
4586 DO_FP_IMM(FMUL, fmuls, half, two)
4587 DO_FP_IMM(FSUBR, fsubrs, half, one)
4588 DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
4589 DO_FP_IMM(FMINNM, fminnms, zero, one)
4590 DO_FP_IMM(FMAX, fmaxs, zero, one)
4591 DO_FP_IMM(FMIN, fmins, zero, one)
4593 #undef DO_FP_IMM
4595 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
4596 gen_helper_gvec_4_ptr *fn)
4598 if (fn == NULL) {
4599 return false;
4601 if (sve_access_check(s)) {
4602 unsigned vsz = vec_full_reg_size(s);
4603 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4604 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
4605 vec_full_reg_offset(s, a->rn),
4606 vec_full_reg_offset(s, a->rm),
4607 pred_full_reg_offset(s, a->pg),
4608 status, vsz, vsz, 0, fn);
4609 tcg_temp_free_ptr(status);
4611 return true;
4614 #define DO_FPCMP(NAME, name) \
4615 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
4617 static gen_helper_gvec_4_ptr * const fns[4] = { \
4618 NULL, gen_helper_sve_##name##_h, \
4619 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4620 }; \
4621 return do_fp_cmp(s, a, fns[a->esz]); \
4624 DO_FPCMP(FCMGE, fcmge)
4625 DO_FPCMP(FCMGT, fcmgt)
4626 DO_FPCMP(FCMEQ, fcmeq)
4627 DO_FPCMP(FCMNE, fcmne)
4628 DO_FPCMP(FCMUO, fcmuo)
4629 DO_FPCMP(FACGE, facge)
4630 DO_FPCMP(FACGT, facgt)
4632 #undef DO_FPCMP
4634 static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
4636 static gen_helper_gvec_4_ptr * const fns[3] = {
4637 gen_helper_sve_fcadd_h,
4638 gen_helper_sve_fcadd_s,
4639 gen_helper_sve_fcadd_d
4642 if (a->esz == 0) {
4643 return false;
4645 if (sve_access_check(s)) {
4646 unsigned vsz = vec_full_reg_size(s);
4647 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4648 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4649 vec_full_reg_offset(s, a->rn),
4650 vec_full_reg_offset(s, a->rm),
4651 pred_full_reg_offset(s, a->pg),
4652 status, vsz, vsz, a->rot, fns[a->esz - 1]);
4653 tcg_temp_free_ptr(status);
4655 return true;
4658 static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
4659 gen_helper_gvec_5_ptr *fn)
4661 if (a->esz == 0) {
4662 return false;
4664 if (sve_access_check(s)) {
4665 unsigned vsz = vec_full_reg_size(s);
4666 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4667 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4668 vec_full_reg_offset(s, a->rn),
4669 vec_full_reg_offset(s, a->rm),
4670 vec_full_reg_offset(s, a->ra),
4671 pred_full_reg_offset(s, a->pg),
4672 status, vsz, vsz, 0, fn);
4673 tcg_temp_free_ptr(status);
4675 return true;
4678 #define DO_FMLA(NAME, name) \
4679 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
4681 static gen_helper_gvec_5_ptr * const fns[4] = { \
4682 NULL, gen_helper_sve_##name##_h, \
4683 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4684 }; \
4685 return do_fmla(s, a, fns[a->esz]); \
4688 DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
4689 DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
4690 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
4691 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
4693 #undef DO_FMLA
4695 static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
4697 static gen_helper_gvec_5_ptr * const fns[4] = {
4698 NULL,
4699 gen_helper_sve_fcmla_zpzzz_h,
4700 gen_helper_sve_fcmla_zpzzz_s,
4701 gen_helper_sve_fcmla_zpzzz_d,
4704 if (a->esz == 0) {
4705 return false;
4707 if (sve_access_check(s)) {
4708 unsigned vsz = vec_full_reg_size(s);
4709 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4710 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4711 vec_full_reg_offset(s, a->rn),
4712 vec_full_reg_offset(s, a->rm),
4713 vec_full_reg_offset(s, a->ra),
4714 pred_full_reg_offset(s, a->pg),
4715 status, vsz, vsz, a->rot, fns[a->esz]);
4716 tcg_temp_free_ptr(status);
4718 return true;
4721 static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
4723 static gen_helper_gvec_4_ptr * const fns[2] = {
4724 gen_helper_gvec_fcmlah_idx,
4725 gen_helper_gvec_fcmlas_idx,
4728 tcg_debug_assert(a->esz == 1 || a->esz == 2);
4729 tcg_debug_assert(a->rd == a->ra);
4730 if (sve_access_check(s)) {
4731 unsigned vsz = vec_full_reg_size(s);
4732 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4733 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4734 vec_full_reg_offset(s, a->rn),
4735 vec_full_reg_offset(s, a->rm),
4736 vec_full_reg_offset(s, a->ra),
4737 status, vsz, vsz,
4738 a->index * 4 + a->rot,
4739 fns[a->esz - 1]);
4740 tcg_temp_free_ptr(status);
4742 return true;
4746 *** SVE Floating Point Unary Operations Predicated Group
4749 static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4750 bool is_fp16, gen_helper_gvec_3_ptr *fn)
4752 if (sve_access_check(s)) {
4753 unsigned vsz = vec_full_reg_size(s);
4754 TCGv_ptr status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
4755 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4756 vec_full_reg_offset(s, rn),
4757 pred_full_reg_offset(s, pg),
4758 status, vsz, vsz, 0, fn);
4759 tcg_temp_free_ptr(status);
4761 return true;
4764 static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
4766 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
4769 static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
4771 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4774 static bool trans_BFCVT(DisasContext *s, arg_rpr_esz *a)
4776 if (!dc_isar_feature(aa64_sve_bf16, s)) {
4777 return false;
4779 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvt);
4782 static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
4784 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
4787 static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
4789 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4792 static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
4794 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4797 static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
4799 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4802 static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
4804 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4807 static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
4809 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4812 static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
4814 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4817 static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
4819 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4822 static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
4824 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4827 static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
4829 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4832 static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
4834 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4837 static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
4839 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4842 static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
4844 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4847 static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
4849 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4852 static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
4854 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4857 static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
4859 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4862 static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
4864 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4867 static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
4869 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4872 static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4873 gen_helper_sve_frint_h,
4874 gen_helper_sve_frint_s,
4875 gen_helper_sve_frint_d
4878 static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
4880 if (a->esz == 0) {
4881 return false;
4883 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4884 frint_fns[a->esz - 1]);
4887 static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
4889 static gen_helper_gvec_3_ptr * const fns[3] = {
4890 gen_helper_sve_frintx_h,
4891 gen_helper_sve_frintx_s,
4892 gen_helper_sve_frintx_d
4894 if (a->esz == 0) {
4895 return false;
4897 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4900 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
4901 int mode, gen_helper_gvec_3_ptr *fn)
4903 if (sve_access_check(s)) {
4904 unsigned vsz = vec_full_reg_size(s);
4905 TCGv_i32 tmode = tcg_const_i32(mode);
4906 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4908 gen_helper_set_rmode(tmode, tmode, status);
4910 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4911 vec_full_reg_offset(s, a->rn),
4912 pred_full_reg_offset(s, a->pg),
4913 status, vsz, vsz, 0, fn);
4915 gen_helper_set_rmode(tmode, tmode, status);
4916 tcg_temp_free_i32(tmode);
4917 tcg_temp_free_ptr(status);
4919 return true;
4922 static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
4924 if (a->esz == 0) {
4925 return false;
4927 return do_frint_mode(s, a, float_round_nearest_even, frint_fns[a->esz - 1]);
4930 static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
4932 if (a->esz == 0) {
4933 return false;
4935 return do_frint_mode(s, a, float_round_up, frint_fns[a->esz - 1]);
4938 static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
4940 if (a->esz == 0) {
4941 return false;
4943 return do_frint_mode(s, a, float_round_down, frint_fns[a->esz - 1]);
4946 static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
4948 if (a->esz == 0) {
4949 return false;
4951 return do_frint_mode(s, a, float_round_to_zero, frint_fns[a->esz - 1]);
4954 static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
4956 if (a->esz == 0) {
4957 return false;
4959 return do_frint_mode(s, a, float_round_ties_away, frint_fns[a->esz - 1]);
4962 static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
4964 static gen_helper_gvec_3_ptr * const fns[3] = {
4965 gen_helper_sve_frecpx_h,
4966 gen_helper_sve_frecpx_s,
4967 gen_helper_sve_frecpx_d
4969 if (a->esz == 0) {
4970 return false;
4972 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4975 static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
4977 static gen_helper_gvec_3_ptr * const fns[3] = {
4978 gen_helper_sve_fsqrt_h,
4979 gen_helper_sve_fsqrt_s,
4980 gen_helper_sve_fsqrt_d
4982 if (a->esz == 0) {
4983 return false;
4985 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4988 static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4990 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4993 static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4995 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4998 static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
5000 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
5003 static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
5005 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
5008 static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
5010 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
5013 static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
5015 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
5018 static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
5020 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
5023 static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
5025 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
5028 static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
5030 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
5033 static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
5035 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
5038 static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
5040 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
5043 static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
5045 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
5048 static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
5050 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
5053 static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
5055 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
5059 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
5062 /* Subroutine loading a vector register at VOFS of LEN bytes.
5063 * The load should begin at the address Rn + IMM.
5066 static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
5068 int len_align = QEMU_ALIGN_DOWN(len, 8);
5069 int len_remain = len % 8;
5070 int nparts = len / 8 + ctpop8(len_remain);
5071 int midx = get_mem_index(s);
5072 TCGv_i64 dirty_addr, clean_addr, t0, t1;
5074 dirty_addr = tcg_temp_new_i64();
5075 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
5076 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
5077 tcg_temp_free_i64(dirty_addr);
5080 * Note that unpredicated load/store of vector/predicate registers
5081 * are defined as a stream of bytes, which equates to little-endian
5082 * operations on larger quantities.
5083 * Attempt to keep code expansion to a minimum by limiting the
5084 * amount of unrolling done.
5086 if (nparts <= 4) {
5087 int i;
5089 t0 = tcg_temp_new_i64();
5090 for (i = 0; i < len_align; i += 8) {
5091 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
5092 tcg_gen_st_i64(t0, cpu_env, vofs + i);
5093 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5095 tcg_temp_free_i64(t0);
5096 } else {
5097 TCGLabel *loop = gen_new_label();
5098 TCGv_ptr tp, i = tcg_const_local_ptr(0);
5100 /* Copy the clean address into a local temp, live across the loop. */
5101 t0 = clean_addr;
5102 clean_addr = new_tmp_a64_local(s);
5103 tcg_gen_mov_i64(clean_addr, t0);
5105 gen_set_label(loop);
5107 t0 = tcg_temp_new_i64();
5108 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
5109 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5111 tp = tcg_temp_new_ptr();
5112 tcg_gen_add_ptr(tp, cpu_env, i);
5113 tcg_gen_addi_ptr(i, i, 8);
5114 tcg_gen_st_i64(t0, tp, vofs);
5115 tcg_temp_free_ptr(tp);
5116 tcg_temp_free_i64(t0);
5118 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
5119 tcg_temp_free_ptr(i);
5123 * Predicate register loads can be any multiple of 2.
5124 * Note that we still store the entire 64-bit unit into cpu_env.
5126 if (len_remain) {
5127 t0 = tcg_temp_new_i64();
5128 switch (len_remain) {
5129 case 2:
5130 case 4:
5131 case 8:
5132 tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
5133 MO_LE | ctz32(len_remain));
5134 break;
5136 case 6:
5137 t1 = tcg_temp_new_i64();
5138 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
5139 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5140 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
5141 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
5142 tcg_temp_free_i64(t1);
5143 break;
5145 default:
5146 g_assert_not_reached();
5148 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
5149 tcg_temp_free_i64(t0);
5153 /* Similarly for stores. */
5154 static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
5156 int len_align = QEMU_ALIGN_DOWN(len, 8);
5157 int len_remain = len % 8;
5158 int nparts = len / 8 + ctpop8(len_remain);
5159 int midx = get_mem_index(s);
5160 TCGv_i64 dirty_addr, clean_addr, t0;
5162 dirty_addr = tcg_temp_new_i64();
5163 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
5164 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
5165 tcg_temp_free_i64(dirty_addr);
5167 /* Note that unpredicated load/store of vector/predicate registers
5168 * are defined as a stream of bytes, which equates to little-endian
5169 * operations on larger quantities. There is no nice way to force
5170 * a little-endian store for aarch64_be-linux-user out of line.
5172 * Attempt to keep code expansion to a minimum by limiting the
5173 * amount of unrolling done.
5175 if (nparts <= 4) {
5176 int i;
5178 t0 = tcg_temp_new_i64();
5179 for (i = 0; i < len_align; i += 8) {
5180 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
5181 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
5182 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5184 tcg_temp_free_i64(t0);
5185 } else {
5186 TCGLabel *loop = gen_new_label();
5187 TCGv_ptr tp, i = tcg_const_local_ptr(0);
5189 /* Copy the clean address into a local temp, live across the loop. */
5190 t0 = clean_addr;
5191 clean_addr = new_tmp_a64_local(s);
5192 tcg_gen_mov_i64(clean_addr, t0);
5194 gen_set_label(loop);
5196 t0 = tcg_temp_new_i64();
5197 tp = tcg_temp_new_ptr();
5198 tcg_gen_add_ptr(tp, cpu_env, i);
5199 tcg_gen_ld_i64(t0, tp, vofs);
5200 tcg_gen_addi_ptr(i, i, 8);
5201 tcg_temp_free_ptr(tp);
5203 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
5204 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5205 tcg_temp_free_i64(t0);
5207 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
5208 tcg_temp_free_ptr(i);
5211 /* Predicate register stores can be any multiple of 2. */
5212 if (len_remain) {
5213 t0 = tcg_temp_new_i64();
5214 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
5216 switch (len_remain) {
5217 case 2:
5218 case 4:
5219 case 8:
5220 tcg_gen_qemu_st_i64(t0, clean_addr, midx,
5221 MO_LE | ctz32(len_remain));
5222 break;
5224 case 6:
5225 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
5226 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5227 tcg_gen_shri_i64(t0, t0, 32);
5228 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
5229 break;
5231 default:
5232 g_assert_not_reached();
5234 tcg_temp_free_i64(t0);
5238 static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
5240 if (sve_access_check(s)) {
5241 int size = vec_full_reg_size(s);
5242 int off = vec_full_reg_offset(s, a->rd);
5243 do_ldr(s, off, size, a->rn, a->imm * size);
5245 return true;
5248 static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
5250 if (sve_access_check(s)) {
5251 int size = pred_full_reg_size(s);
5252 int off = pred_full_reg_offset(s, a->rd);
5253 do_ldr(s, off, size, a->rn, a->imm * size);
5255 return true;
5258 static bool trans_STR_zri(DisasContext *s, arg_rri *a)
5260 if (sve_access_check(s)) {
5261 int size = vec_full_reg_size(s);
5262 int off = vec_full_reg_offset(s, a->rd);
5263 do_str(s, off, size, a->rn, a->imm * size);
5265 return true;
5268 static bool trans_STR_pri(DisasContext *s, arg_rri *a)
5270 if (sve_access_check(s)) {
5271 int size = pred_full_reg_size(s);
5272 int off = pred_full_reg_offset(s, a->rd);
5273 do_str(s, off, size, a->rn, a->imm * size);
5275 return true;
5279 *** SVE Memory - Contiguous Load Group
5282 /* The memory mode of the dtype. */
5283 static const MemOp dtype_mop[16] = {
5284 MO_UB, MO_UB, MO_UB, MO_UB,
5285 MO_SL, MO_UW, MO_UW, MO_UW,
5286 MO_SW, MO_SW, MO_UL, MO_UL,
5287 MO_SB, MO_SB, MO_SB, MO_Q
5290 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
5292 /* The vector element size of dtype. */
5293 static const uint8_t dtype_esz[16] = {
5294 0, 1, 2, 3,
5295 3, 1, 2, 3,
5296 3, 2, 2, 3,
5297 3, 2, 1, 3
5300 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5301 int dtype, uint32_t mte_n, bool is_write,
5302 gen_helper_gvec_mem *fn)
5304 unsigned vsz = vec_full_reg_size(s);
5305 TCGv_ptr t_pg;
5306 TCGv_i32 t_desc;
5307 int desc = 0;
5310 * For e.g. LD4, there are not enough arguments to pass all 4
5311 * registers as pointers, so encode the regno into the data field.
5312 * For consistency, do this even for LD1.
5314 if (s->mte_active[0]) {
5315 int msz = dtype_msz(dtype);
5317 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5318 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5319 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5320 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
5321 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1);
5322 desc <<= SVE_MTEDESC_SHIFT;
5323 } else {
5324 addr = clean_data_tbi(s, addr);
5327 desc = simd_desc(vsz, vsz, zt | desc);
5328 t_desc = tcg_const_i32(desc);
5329 t_pg = tcg_temp_new_ptr();
5331 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5332 fn(cpu_env, t_pg, addr, t_desc);
5334 tcg_temp_free_ptr(t_pg);
5335 tcg_temp_free_i32(t_desc);
5338 /* Indexed by [mte][be][dtype][nreg] */
5339 static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
5340 { /* mte inactive, little-endian */
5341 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
5342 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
5343 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
5344 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
5345 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
5347 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
5348 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
5349 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
5350 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
5351 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
5353 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
5354 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
5355 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
5356 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
5357 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
5359 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
5360 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
5361 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
5362 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
5363 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
5365 /* mte inactive, big-endian */
5366 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
5367 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
5368 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
5369 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
5370 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
5372 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
5373 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
5374 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
5375 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
5376 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
5378 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
5379 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
5380 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
5381 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
5382 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
5384 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
5385 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
5386 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
5387 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
5388 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
5390 { /* mte active, little-endian */
5391 { { gen_helper_sve_ld1bb_r_mte,
5392 gen_helper_sve_ld2bb_r_mte,
5393 gen_helper_sve_ld3bb_r_mte,
5394 gen_helper_sve_ld4bb_r_mte },
5395 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
5396 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
5397 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
5399 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
5400 { gen_helper_sve_ld1hh_le_r_mte,
5401 gen_helper_sve_ld2hh_le_r_mte,
5402 gen_helper_sve_ld3hh_le_r_mte,
5403 gen_helper_sve_ld4hh_le_r_mte },
5404 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
5405 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
5407 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
5408 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
5409 { gen_helper_sve_ld1ss_le_r_mte,
5410 gen_helper_sve_ld2ss_le_r_mte,
5411 gen_helper_sve_ld3ss_le_r_mte,
5412 gen_helper_sve_ld4ss_le_r_mte },
5413 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
5415 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
5416 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
5417 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
5418 { gen_helper_sve_ld1dd_le_r_mte,
5419 gen_helper_sve_ld2dd_le_r_mte,
5420 gen_helper_sve_ld3dd_le_r_mte,
5421 gen_helper_sve_ld4dd_le_r_mte } },
5423 /* mte active, big-endian */
5424 { { gen_helper_sve_ld1bb_r_mte,
5425 gen_helper_sve_ld2bb_r_mte,
5426 gen_helper_sve_ld3bb_r_mte,
5427 gen_helper_sve_ld4bb_r_mte },
5428 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
5429 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
5430 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
5432 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
5433 { gen_helper_sve_ld1hh_be_r_mte,
5434 gen_helper_sve_ld2hh_be_r_mte,
5435 gen_helper_sve_ld3hh_be_r_mte,
5436 gen_helper_sve_ld4hh_be_r_mte },
5437 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
5438 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
5440 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
5441 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
5442 { gen_helper_sve_ld1ss_be_r_mte,
5443 gen_helper_sve_ld2ss_be_r_mte,
5444 gen_helper_sve_ld3ss_be_r_mte,
5445 gen_helper_sve_ld4ss_be_r_mte },
5446 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
5448 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
5449 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
5450 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
5451 { gen_helper_sve_ld1dd_be_r_mte,
5452 gen_helper_sve_ld2dd_be_r_mte,
5453 gen_helper_sve_ld3dd_be_r_mte,
5454 gen_helper_sve_ld4dd_be_r_mte } } },
5457 static void do_ld_zpa(DisasContext *s, int zt, int pg,
5458 TCGv_i64 addr, int dtype, int nreg)
5460 gen_helper_gvec_mem *fn
5461 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
5464 * While there are holes in the table, they are not
5465 * accessible via the instruction encoding.
5467 assert(fn != NULL);
5468 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
5471 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
5473 if (a->rm == 31) {
5474 return false;
5476 if (sve_access_check(s)) {
5477 TCGv_i64 addr = new_tmp_a64(s);
5478 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5479 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5480 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
5482 return true;
5485 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
5487 if (sve_access_check(s)) {
5488 int vsz = vec_full_reg_size(s);
5489 int elements = vsz >> dtype_esz[a->dtype];
5490 TCGv_i64 addr = new_tmp_a64(s);
5492 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5493 (a->imm * elements * (a->nreg + 1))
5494 << dtype_msz(a->dtype));
5495 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
5497 return true;
5500 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
5502 static gen_helper_gvec_mem * const fns[2][2][16] = {
5503 { /* mte inactive, little-endian */
5504 { gen_helper_sve_ldff1bb_r,
5505 gen_helper_sve_ldff1bhu_r,
5506 gen_helper_sve_ldff1bsu_r,
5507 gen_helper_sve_ldff1bdu_r,
5509 gen_helper_sve_ldff1sds_le_r,
5510 gen_helper_sve_ldff1hh_le_r,
5511 gen_helper_sve_ldff1hsu_le_r,
5512 gen_helper_sve_ldff1hdu_le_r,
5514 gen_helper_sve_ldff1hds_le_r,
5515 gen_helper_sve_ldff1hss_le_r,
5516 gen_helper_sve_ldff1ss_le_r,
5517 gen_helper_sve_ldff1sdu_le_r,
5519 gen_helper_sve_ldff1bds_r,
5520 gen_helper_sve_ldff1bss_r,
5521 gen_helper_sve_ldff1bhs_r,
5522 gen_helper_sve_ldff1dd_le_r },
5524 /* mte inactive, big-endian */
5525 { gen_helper_sve_ldff1bb_r,
5526 gen_helper_sve_ldff1bhu_r,
5527 gen_helper_sve_ldff1bsu_r,
5528 gen_helper_sve_ldff1bdu_r,
5530 gen_helper_sve_ldff1sds_be_r,
5531 gen_helper_sve_ldff1hh_be_r,
5532 gen_helper_sve_ldff1hsu_be_r,
5533 gen_helper_sve_ldff1hdu_be_r,
5535 gen_helper_sve_ldff1hds_be_r,
5536 gen_helper_sve_ldff1hss_be_r,
5537 gen_helper_sve_ldff1ss_be_r,
5538 gen_helper_sve_ldff1sdu_be_r,
5540 gen_helper_sve_ldff1bds_r,
5541 gen_helper_sve_ldff1bss_r,
5542 gen_helper_sve_ldff1bhs_r,
5543 gen_helper_sve_ldff1dd_be_r } },
5545 { /* mte active, little-endian */
5546 { gen_helper_sve_ldff1bb_r_mte,
5547 gen_helper_sve_ldff1bhu_r_mte,
5548 gen_helper_sve_ldff1bsu_r_mte,
5549 gen_helper_sve_ldff1bdu_r_mte,
5551 gen_helper_sve_ldff1sds_le_r_mte,
5552 gen_helper_sve_ldff1hh_le_r_mte,
5553 gen_helper_sve_ldff1hsu_le_r_mte,
5554 gen_helper_sve_ldff1hdu_le_r_mte,
5556 gen_helper_sve_ldff1hds_le_r_mte,
5557 gen_helper_sve_ldff1hss_le_r_mte,
5558 gen_helper_sve_ldff1ss_le_r_mte,
5559 gen_helper_sve_ldff1sdu_le_r_mte,
5561 gen_helper_sve_ldff1bds_r_mte,
5562 gen_helper_sve_ldff1bss_r_mte,
5563 gen_helper_sve_ldff1bhs_r_mte,
5564 gen_helper_sve_ldff1dd_le_r_mte },
5566 /* mte active, big-endian */
5567 { gen_helper_sve_ldff1bb_r_mte,
5568 gen_helper_sve_ldff1bhu_r_mte,
5569 gen_helper_sve_ldff1bsu_r_mte,
5570 gen_helper_sve_ldff1bdu_r_mte,
5572 gen_helper_sve_ldff1sds_be_r_mte,
5573 gen_helper_sve_ldff1hh_be_r_mte,
5574 gen_helper_sve_ldff1hsu_be_r_mte,
5575 gen_helper_sve_ldff1hdu_be_r_mte,
5577 gen_helper_sve_ldff1hds_be_r_mte,
5578 gen_helper_sve_ldff1hss_be_r_mte,
5579 gen_helper_sve_ldff1ss_be_r_mte,
5580 gen_helper_sve_ldff1sdu_be_r_mte,
5582 gen_helper_sve_ldff1bds_r_mte,
5583 gen_helper_sve_ldff1bss_r_mte,
5584 gen_helper_sve_ldff1bhs_r_mte,
5585 gen_helper_sve_ldff1dd_be_r_mte } },
5588 if (sve_access_check(s)) {
5589 TCGv_i64 addr = new_tmp_a64(s);
5590 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5591 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5592 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5593 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
5595 return true;
5598 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
5600 static gen_helper_gvec_mem * const fns[2][2][16] = {
5601 { /* mte inactive, little-endian */
5602 { gen_helper_sve_ldnf1bb_r,
5603 gen_helper_sve_ldnf1bhu_r,
5604 gen_helper_sve_ldnf1bsu_r,
5605 gen_helper_sve_ldnf1bdu_r,
5607 gen_helper_sve_ldnf1sds_le_r,
5608 gen_helper_sve_ldnf1hh_le_r,
5609 gen_helper_sve_ldnf1hsu_le_r,
5610 gen_helper_sve_ldnf1hdu_le_r,
5612 gen_helper_sve_ldnf1hds_le_r,
5613 gen_helper_sve_ldnf1hss_le_r,
5614 gen_helper_sve_ldnf1ss_le_r,
5615 gen_helper_sve_ldnf1sdu_le_r,
5617 gen_helper_sve_ldnf1bds_r,
5618 gen_helper_sve_ldnf1bss_r,
5619 gen_helper_sve_ldnf1bhs_r,
5620 gen_helper_sve_ldnf1dd_le_r },
5622 /* mte inactive, big-endian */
5623 { gen_helper_sve_ldnf1bb_r,
5624 gen_helper_sve_ldnf1bhu_r,
5625 gen_helper_sve_ldnf1bsu_r,
5626 gen_helper_sve_ldnf1bdu_r,
5628 gen_helper_sve_ldnf1sds_be_r,
5629 gen_helper_sve_ldnf1hh_be_r,
5630 gen_helper_sve_ldnf1hsu_be_r,
5631 gen_helper_sve_ldnf1hdu_be_r,
5633 gen_helper_sve_ldnf1hds_be_r,
5634 gen_helper_sve_ldnf1hss_be_r,
5635 gen_helper_sve_ldnf1ss_be_r,
5636 gen_helper_sve_ldnf1sdu_be_r,
5638 gen_helper_sve_ldnf1bds_r,
5639 gen_helper_sve_ldnf1bss_r,
5640 gen_helper_sve_ldnf1bhs_r,
5641 gen_helper_sve_ldnf1dd_be_r } },
5643 { /* mte inactive, little-endian */
5644 { gen_helper_sve_ldnf1bb_r_mte,
5645 gen_helper_sve_ldnf1bhu_r_mte,
5646 gen_helper_sve_ldnf1bsu_r_mte,
5647 gen_helper_sve_ldnf1bdu_r_mte,
5649 gen_helper_sve_ldnf1sds_le_r_mte,
5650 gen_helper_sve_ldnf1hh_le_r_mte,
5651 gen_helper_sve_ldnf1hsu_le_r_mte,
5652 gen_helper_sve_ldnf1hdu_le_r_mte,
5654 gen_helper_sve_ldnf1hds_le_r_mte,
5655 gen_helper_sve_ldnf1hss_le_r_mte,
5656 gen_helper_sve_ldnf1ss_le_r_mte,
5657 gen_helper_sve_ldnf1sdu_le_r_mte,
5659 gen_helper_sve_ldnf1bds_r_mte,
5660 gen_helper_sve_ldnf1bss_r_mte,
5661 gen_helper_sve_ldnf1bhs_r_mte,
5662 gen_helper_sve_ldnf1dd_le_r_mte },
5664 /* mte inactive, big-endian */
5665 { gen_helper_sve_ldnf1bb_r_mte,
5666 gen_helper_sve_ldnf1bhu_r_mte,
5667 gen_helper_sve_ldnf1bsu_r_mte,
5668 gen_helper_sve_ldnf1bdu_r_mte,
5670 gen_helper_sve_ldnf1sds_be_r_mte,
5671 gen_helper_sve_ldnf1hh_be_r_mte,
5672 gen_helper_sve_ldnf1hsu_be_r_mte,
5673 gen_helper_sve_ldnf1hdu_be_r_mte,
5675 gen_helper_sve_ldnf1hds_be_r_mte,
5676 gen_helper_sve_ldnf1hss_be_r_mte,
5677 gen_helper_sve_ldnf1ss_be_r_mte,
5678 gen_helper_sve_ldnf1sdu_be_r_mte,
5680 gen_helper_sve_ldnf1bds_r_mte,
5681 gen_helper_sve_ldnf1bss_r_mte,
5682 gen_helper_sve_ldnf1bhs_r_mte,
5683 gen_helper_sve_ldnf1dd_be_r_mte } },
5686 if (sve_access_check(s)) {
5687 int vsz = vec_full_reg_size(s);
5688 int elements = vsz >> dtype_esz[a->dtype];
5689 int off = (a->imm * elements) << dtype_msz(a->dtype);
5690 TCGv_i64 addr = new_tmp_a64(s);
5692 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
5693 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5694 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
5696 return true;
5699 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
5701 unsigned vsz = vec_full_reg_size(s);
5702 TCGv_ptr t_pg;
5703 int poff;
5705 /* Load the first quadword using the normal predicated load helpers. */
5706 poff = pred_full_reg_offset(s, pg);
5707 if (vsz > 16) {
5709 * Zero-extend the first 16 bits of the predicate into a temporary.
5710 * This avoids triggering an assert making sure we don't have bits
5711 * set within a predicate beyond VQ, but we have lowered VQ to 1
5712 * for this load operation.
5714 TCGv_i64 tmp = tcg_temp_new_i64();
5715 #ifdef HOST_WORDS_BIGENDIAN
5716 poff += 6;
5717 #endif
5718 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
5720 poff = offsetof(CPUARMState, vfp.preg_tmp);
5721 tcg_gen_st_i64(tmp, cpu_env, poff);
5722 tcg_temp_free_i64(tmp);
5725 t_pg = tcg_temp_new_ptr();
5726 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
5728 gen_helper_gvec_mem *fn
5729 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
5730 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt)));
5732 tcg_temp_free_ptr(t_pg);
5734 /* Replicate that first quadword. */
5735 if (vsz > 16) {
5736 int doff = vec_full_reg_offset(s, zt);
5737 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16);
5741 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
5743 if (a->rm == 31) {
5744 return false;
5746 if (sve_access_check(s)) {
5747 int msz = dtype_msz(a->dtype);
5748 TCGv_i64 addr = new_tmp_a64(s);
5749 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
5750 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5751 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
5753 return true;
5756 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
5758 if (sve_access_check(s)) {
5759 TCGv_i64 addr = new_tmp_a64(s);
5760 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
5761 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
5763 return true;
5766 static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
5768 unsigned vsz = vec_full_reg_size(s);
5769 unsigned vsz_r32;
5770 TCGv_ptr t_pg;
5771 int poff, doff;
5773 if (vsz < 32) {
5775 * Note that this UNDEFINED check comes after CheckSVEEnabled()
5776 * in the ARM pseudocode, which is the sve_access_check() done
5777 * in our caller. We should not now return false from the caller.
5779 unallocated_encoding(s);
5780 return;
5783 /* Load the first octaword using the normal predicated load helpers. */
5785 poff = pred_full_reg_offset(s, pg);
5786 if (vsz > 32) {
5788 * Zero-extend the first 32 bits of the predicate into a temporary.
5789 * This avoids triggering an assert making sure we don't have bits
5790 * set within a predicate beyond VQ, but we have lowered VQ to 2
5791 * for this load operation.
5793 TCGv_i64 tmp = tcg_temp_new_i64();
5794 #ifdef HOST_WORDS_BIGENDIAN
5795 poff += 4;
5796 #endif
5797 tcg_gen_ld32u_i64(tmp, cpu_env, poff);
5799 poff = offsetof(CPUARMState, vfp.preg_tmp);
5800 tcg_gen_st_i64(tmp, cpu_env, poff);
5801 tcg_temp_free_i64(tmp);
5804 t_pg = tcg_temp_new_ptr();
5805 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
5807 gen_helper_gvec_mem *fn
5808 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
5809 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt)));
5811 tcg_temp_free_ptr(t_pg);
5814 * Replicate that first octaword.
5815 * The replication happens in units of 32; if the full vector size
5816 * is not a multiple of 32, the final bits are zeroed.
5818 doff = vec_full_reg_offset(s, zt);
5819 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
5820 if (vsz >= 64) {
5821 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32);
5823 vsz -= vsz_r32;
5824 if (vsz) {
5825 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0);
5829 static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
5831 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5832 return false;
5834 if (a->rm == 31) {
5835 return false;
5837 if (sve_access_check(s)) {
5838 TCGv_i64 addr = new_tmp_a64(s);
5839 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5840 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5841 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5843 return true;
5846 static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
5848 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5849 return false;
5851 if (sve_access_check(s)) {
5852 TCGv_i64 addr = new_tmp_a64(s);
5853 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
5854 do_ldro(s, a->rd, a->pg, addr, a->dtype);
5856 return true;
5859 /* Load and broadcast element. */
5860 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
5862 unsigned vsz = vec_full_reg_size(s);
5863 unsigned psz = pred_full_reg_size(s);
5864 unsigned esz = dtype_esz[a->dtype];
5865 unsigned msz = dtype_msz(a->dtype);
5866 TCGLabel *over;
5867 TCGv_i64 temp, clean_addr;
5869 if (!sve_access_check(s)) {
5870 return true;
5873 over = gen_new_label();
5875 /* If the guarding predicate has no bits set, no load occurs. */
5876 if (psz <= 8) {
5877 /* Reduce the pred_esz_masks value simply to reduce the
5878 * size of the code generated here.
5880 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
5881 temp = tcg_temp_new_i64();
5882 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
5883 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
5884 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
5885 tcg_temp_free_i64(temp);
5886 } else {
5887 TCGv_i32 t32 = tcg_temp_new_i32();
5888 find_last_active(s, t32, esz, a->pg);
5889 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
5890 tcg_temp_free_i32(t32);
5893 /* Load the data. */
5894 temp = tcg_temp_new_i64();
5895 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
5896 clean_addr = gen_mte_check1(s, temp, false, true, msz);
5898 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
5899 finalize_memop(s, dtype_mop[a->dtype]));
5901 /* Broadcast to *all* elements. */
5902 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5903 vsz, vsz, temp);
5904 tcg_temp_free_i64(temp);
5906 /* Zero the inactive elements. */
5907 gen_set_label(over);
5908 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
5911 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5912 int msz, int esz, int nreg)
5914 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5915 { { { gen_helper_sve_st1bb_r,
5916 gen_helper_sve_st1bh_r,
5917 gen_helper_sve_st1bs_r,
5918 gen_helper_sve_st1bd_r },
5919 { NULL,
5920 gen_helper_sve_st1hh_le_r,
5921 gen_helper_sve_st1hs_le_r,
5922 gen_helper_sve_st1hd_le_r },
5923 { NULL, NULL,
5924 gen_helper_sve_st1ss_le_r,
5925 gen_helper_sve_st1sd_le_r },
5926 { NULL, NULL, NULL,
5927 gen_helper_sve_st1dd_le_r } },
5928 { { gen_helper_sve_st1bb_r,
5929 gen_helper_sve_st1bh_r,
5930 gen_helper_sve_st1bs_r,
5931 gen_helper_sve_st1bd_r },
5932 { NULL,
5933 gen_helper_sve_st1hh_be_r,
5934 gen_helper_sve_st1hs_be_r,
5935 gen_helper_sve_st1hd_be_r },
5936 { NULL, NULL,
5937 gen_helper_sve_st1ss_be_r,
5938 gen_helper_sve_st1sd_be_r },
5939 { NULL, NULL, NULL,
5940 gen_helper_sve_st1dd_be_r } } },
5942 { { { gen_helper_sve_st1bb_r_mte,
5943 gen_helper_sve_st1bh_r_mte,
5944 gen_helper_sve_st1bs_r_mte,
5945 gen_helper_sve_st1bd_r_mte },
5946 { NULL,
5947 gen_helper_sve_st1hh_le_r_mte,
5948 gen_helper_sve_st1hs_le_r_mte,
5949 gen_helper_sve_st1hd_le_r_mte },
5950 { NULL, NULL,
5951 gen_helper_sve_st1ss_le_r_mte,
5952 gen_helper_sve_st1sd_le_r_mte },
5953 { NULL, NULL, NULL,
5954 gen_helper_sve_st1dd_le_r_mte } },
5955 { { gen_helper_sve_st1bb_r_mte,
5956 gen_helper_sve_st1bh_r_mte,
5957 gen_helper_sve_st1bs_r_mte,
5958 gen_helper_sve_st1bd_r_mte },
5959 { NULL,
5960 gen_helper_sve_st1hh_be_r_mte,
5961 gen_helper_sve_st1hs_be_r_mte,
5962 gen_helper_sve_st1hd_be_r_mte },
5963 { NULL, NULL,
5964 gen_helper_sve_st1ss_be_r_mte,
5965 gen_helper_sve_st1sd_be_r_mte },
5966 { NULL, NULL, NULL,
5967 gen_helper_sve_st1dd_be_r_mte } } },
5969 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5970 { { { gen_helper_sve_st2bb_r,
5971 gen_helper_sve_st2hh_le_r,
5972 gen_helper_sve_st2ss_le_r,
5973 gen_helper_sve_st2dd_le_r },
5974 { gen_helper_sve_st3bb_r,
5975 gen_helper_sve_st3hh_le_r,
5976 gen_helper_sve_st3ss_le_r,
5977 gen_helper_sve_st3dd_le_r },
5978 { gen_helper_sve_st4bb_r,
5979 gen_helper_sve_st4hh_le_r,
5980 gen_helper_sve_st4ss_le_r,
5981 gen_helper_sve_st4dd_le_r } },
5982 { { gen_helper_sve_st2bb_r,
5983 gen_helper_sve_st2hh_be_r,
5984 gen_helper_sve_st2ss_be_r,
5985 gen_helper_sve_st2dd_be_r },
5986 { gen_helper_sve_st3bb_r,
5987 gen_helper_sve_st3hh_be_r,
5988 gen_helper_sve_st3ss_be_r,
5989 gen_helper_sve_st3dd_be_r },
5990 { gen_helper_sve_st4bb_r,
5991 gen_helper_sve_st4hh_be_r,
5992 gen_helper_sve_st4ss_be_r,
5993 gen_helper_sve_st4dd_be_r } } },
5994 { { { gen_helper_sve_st2bb_r_mte,
5995 gen_helper_sve_st2hh_le_r_mte,
5996 gen_helper_sve_st2ss_le_r_mte,
5997 gen_helper_sve_st2dd_le_r_mte },
5998 { gen_helper_sve_st3bb_r_mte,
5999 gen_helper_sve_st3hh_le_r_mte,
6000 gen_helper_sve_st3ss_le_r_mte,
6001 gen_helper_sve_st3dd_le_r_mte },
6002 { gen_helper_sve_st4bb_r_mte,
6003 gen_helper_sve_st4hh_le_r_mte,
6004 gen_helper_sve_st4ss_le_r_mte,
6005 gen_helper_sve_st4dd_le_r_mte } },
6006 { { gen_helper_sve_st2bb_r_mte,
6007 gen_helper_sve_st2hh_be_r_mte,
6008 gen_helper_sve_st2ss_be_r_mte,
6009 gen_helper_sve_st2dd_be_r_mte },
6010 { gen_helper_sve_st3bb_r_mte,
6011 gen_helper_sve_st3hh_be_r_mte,
6012 gen_helper_sve_st3ss_be_r_mte,
6013 gen_helper_sve_st3dd_be_r_mte },
6014 { gen_helper_sve_st4bb_r_mte,
6015 gen_helper_sve_st4hh_be_r_mte,
6016 gen_helper_sve_st4ss_be_r_mte,
6017 gen_helper_sve_st4dd_be_r_mte } } },
6019 gen_helper_gvec_mem *fn;
6020 int be = s->be_data == MO_BE;
6022 if (nreg == 0) {
6023 /* ST1 */
6024 fn = fn_single[s->mte_active[0]][be][msz][esz];
6025 nreg = 1;
6026 } else {
6027 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
6028 assert(msz == esz);
6029 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
6031 assert(fn != NULL);
6032 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
6035 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
6037 if (a->rm == 31 || a->msz > a->esz) {
6038 return false;
6040 if (sve_access_check(s)) {
6041 TCGv_i64 addr = new_tmp_a64(s);
6042 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
6043 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
6044 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
6046 return true;
6049 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
6051 if (a->msz > a->esz) {
6052 return false;
6054 if (sve_access_check(s)) {
6055 int vsz = vec_full_reg_size(s);
6056 int elements = vsz >> a->esz;
6057 TCGv_i64 addr = new_tmp_a64(s);
6059 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
6060 (a->imm * elements * (a->nreg + 1)) << a->msz);
6061 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
6063 return true;
6067 *** SVE gather loads / scatter stores
6070 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
6071 int scale, TCGv_i64 scalar, int msz, bool is_write,
6072 gen_helper_gvec_mem_scatter *fn)
6074 unsigned vsz = vec_full_reg_size(s);
6075 TCGv_ptr t_zm = tcg_temp_new_ptr();
6076 TCGv_ptr t_pg = tcg_temp_new_ptr();
6077 TCGv_ptr t_zt = tcg_temp_new_ptr();
6078 TCGv_i32 t_desc;
6079 int desc = 0;
6081 if (s->mte_active[0]) {
6082 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
6083 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
6084 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
6085 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
6086 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1);
6087 desc <<= SVE_MTEDESC_SHIFT;
6089 desc = simd_desc(vsz, vsz, desc | scale);
6090 t_desc = tcg_const_i32(desc);
6092 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
6093 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
6094 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
6095 fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
6097 tcg_temp_free_ptr(t_zt);
6098 tcg_temp_free_ptr(t_zm);
6099 tcg_temp_free_ptr(t_pg);
6100 tcg_temp_free_i32(t_desc);
6103 /* Indexed by [mte][be][ff][xs][u][msz]. */
6104 static gen_helper_gvec_mem_scatter * const
6105 gather_load_fn32[2][2][2][2][2][3] = {
6106 { /* MTE Inactive */
6107 { /* Little-endian */
6108 { { { gen_helper_sve_ldbss_zsu,
6109 gen_helper_sve_ldhss_le_zsu,
6110 NULL, },
6111 { gen_helper_sve_ldbsu_zsu,
6112 gen_helper_sve_ldhsu_le_zsu,
6113 gen_helper_sve_ldss_le_zsu, } },
6114 { { gen_helper_sve_ldbss_zss,
6115 gen_helper_sve_ldhss_le_zss,
6116 NULL, },
6117 { gen_helper_sve_ldbsu_zss,
6118 gen_helper_sve_ldhsu_le_zss,
6119 gen_helper_sve_ldss_le_zss, } } },
6121 /* First-fault */
6122 { { { gen_helper_sve_ldffbss_zsu,
6123 gen_helper_sve_ldffhss_le_zsu,
6124 NULL, },
6125 { gen_helper_sve_ldffbsu_zsu,
6126 gen_helper_sve_ldffhsu_le_zsu,
6127 gen_helper_sve_ldffss_le_zsu, } },
6128 { { gen_helper_sve_ldffbss_zss,
6129 gen_helper_sve_ldffhss_le_zss,
6130 NULL, },
6131 { gen_helper_sve_ldffbsu_zss,
6132 gen_helper_sve_ldffhsu_le_zss,
6133 gen_helper_sve_ldffss_le_zss, } } } },
6135 { /* Big-endian */
6136 { { { gen_helper_sve_ldbss_zsu,
6137 gen_helper_sve_ldhss_be_zsu,
6138 NULL, },
6139 { gen_helper_sve_ldbsu_zsu,
6140 gen_helper_sve_ldhsu_be_zsu,
6141 gen_helper_sve_ldss_be_zsu, } },
6142 { { gen_helper_sve_ldbss_zss,
6143 gen_helper_sve_ldhss_be_zss,
6144 NULL, },
6145 { gen_helper_sve_ldbsu_zss,
6146 gen_helper_sve_ldhsu_be_zss,
6147 gen_helper_sve_ldss_be_zss, } } },
6149 /* First-fault */
6150 { { { gen_helper_sve_ldffbss_zsu,
6151 gen_helper_sve_ldffhss_be_zsu,
6152 NULL, },
6153 { gen_helper_sve_ldffbsu_zsu,
6154 gen_helper_sve_ldffhsu_be_zsu,
6155 gen_helper_sve_ldffss_be_zsu, } },
6156 { { gen_helper_sve_ldffbss_zss,
6157 gen_helper_sve_ldffhss_be_zss,
6158 NULL, },
6159 { gen_helper_sve_ldffbsu_zss,
6160 gen_helper_sve_ldffhsu_be_zss,
6161 gen_helper_sve_ldffss_be_zss, } } } } },
6162 { /* MTE Active */
6163 { /* Little-endian */
6164 { { { gen_helper_sve_ldbss_zsu_mte,
6165 gen_helper_sve_ldhss_le_zsu_mte,
6166 NULL, },
6167 { gen_helper_sve_ldbsu_zsu_mte,
6168 gen_helper_sve_ldhsu_le_zsu_mte,
6169 gen_helper_sve_ldss_le_zsu_mte, } },
6170 { { gen_helper_sve_ldbss_zss_mte,
6171 gen_helper_sve_ldhss_le_zss_mte,
6172 NULL, },
6173 { gen_helper_sve_ldbsu_zss_mte,
6174 gen_helper_sve_ldhsu_le_zss_mte,
6175 gen_helper_sve_ldss_le_zss_mte, } } },
6177 /* First-fault */
6178 { { { gen_helper_sve_ldffbss_zsu_mte,
6179 gen_helper_sve_ldffhss_le_zsu_mte,
6180 NULL, },
6181 { gen_helper_sve_ldffbsu_zsu_mte,
6182 gen_helper_sve_ldffhsu_le_zsu_mte,
6183 gen_helper_sve_ldffss_le_zsu_mte, } },
6184 { { gen_helper_sve_ldffbss_zss_mte,
6185 gen_helper_sve_ldffhss_le_zss_mte,
6186 NULL, },
6187 { gen_helper_sve_ldffbsu_zss_mte,
6188 gen_helper_sve_ldffhsu_le_zss_mte,
6189 gen_helper_sve_ldffss_le_zss_mte, } } } },
6191 { /* Big-endian */
6192 { { { gen_helper_sve_ldbss_zsu_mte,
6193 gen_helper_sve_ldhss_be_zsu_mte,
6194 NULL, },
6195 { gen_helper_sve_ldbsu_zsu_mte,
6196 gen_helper_sve_ldhsu_be_zsu_mte,
6197 gen_helper_sve_ldss_be_zsu_mte, } },
6198 { { gen_helper_sve_ldbss_zss_mte,
6199 gen_helper_sve_ldhss_be_zss_mte,
6200 NULL, },
6201 { gen_helper_sve_ldbsu_zss_mte,
6202 gen_helper_sve_ldhsu_be_zss_mte,
6203 gen_helper_sve_ldss_be_zss_mte, } } },
6205 /* First-fault */
6206 { { { gen_helper_sve_ldffbss_zsu_mte,
6207 gen_helper_sve_ldffhss_be_zsu_mte,
6208 NULL, },
6209 { gen_helper_sve_ldffbsu_zsu_mte,
6210 gen_helper_sve_ldffhsu_be_zsu_mte,
6211 gen_helper_sve_ldffss_be_zsu_mte, } },
6212 { { gen_helper_sve_ldffbss_zss_mte,
6213 gen_helper_sve_ldffhss_be_zss_mte,
6214 NULL, },
6215 { gen_helper_sve_ldffbsu_zss_mte,
6216 gen_helper_sve_ldffhsu_be_zss_mte,
6217 gen_helper_sve_ldffss_be_zss_mte, } } } } },
6220 /* Note that we overload xs=2 to indicate 64-bit offset. */
6221 static gen_helper_gvec_mem_scatter * const
6222 gather_load_fn64[2][2][2][3][2][4] = {
6223 { /* MTE Inactive */
6224 { /* Little-endian */
6225 { { { gen_helper_sve_ldbds_zsu,
6226 gen_helper_sve_ldhds_le_zsu,
6227 gen_helper_sve_ldsds_le_zsu,
6228 NULL, },
6229 { gen_helper_sve_ldbdu_zsu,
6230 gen_helper_sve_ldhdu_le_zsu,
6231 gen_helper_sve_ldsdu_le_zsu,
6232 gen_helper_sve_lddd_le_zsu, } },
6233 { { gen_helper_sve_ldbds_zss,
6234 gen_helper_sve_ldhds_le_zss,
6235 gen_helper_sve_ldsds_le_zss,
6236 NULL, },
6237 { gen_helper_sve_ldbdu_zss,
6238 gen_helper_sve_ldhdu_le_zss,
6239 gen_helper_sve_ldsdu_le_zss,
6240 gen_helper_sve_lddd_le_zss, } },
6241 { { gen_helper_sve_ldbds_zd,
6242 gen_helper_sve_ldhds_le_zd,
6243 gen_helper_sve_ldsds_le_zd,
6244 NULL, },
6245 { gen_helper_sve_ldbdu_zd,
6246 gen_helper_sve_ldhdu_le_zd,
6247 gen_helper_sve_ldsdu_le_zd,
6248 gen_helper_sve_lddd_le_zd, } } },
6250 /* First-fault */
6251 { { { gen_helper_sve_ldffbds_zsu,
6252 gen_helper_sve_ldffhds_le_zsu,
6253 gen_helper_sve_ldffsds_le_zsu,
6254 NULL, },
6255 { gen_helper_sve_ldffbdu_zsu,
6256 gen_helper_sve_ldffhdu_le_zsu,
6257 gen_helper_sve_ldffsdu_le_zsu,
6258 gen_helper_sve_ldffdd_le_zsu, } },
6259 { { gen_helper_sve_ldffbds_zss,
6260 gen_helper_sve_ldffhds_le_zss,
6261 gen_helper_sve_ldffsds_le_zss,
6262 NULL, },
6263 { gen_helper_sve_ldffbdu_zss,
6264 gen_helper_sve_ldffhdu_le_zss,
6265 gen_helper_sve_ldffsdu_le_zss,
6266 gen_helper_sve_ldffdd_le_zss, } },
6267 { { gen_helper_sve_ldffbds_zd,
6268 gen_helper_sve_ldffhds_le_zd,
6269 gen_helper_sve_ldffsds_le_zd,
6270 NULL, },
6271 { gen_helper_sve_ldffbdu_zd,
6272 gen_helper_sve_ldffhdu_le_zd,
6273 gen_helper_sve_ldffsdu_le_zd,
6274 gen_helper_sve_ldffdd_le_zd, } } } },
6275 { /* Big-endian */
6276 { { { gen_helper_sve_ldbds_zsu,
6277 gen_helper_sve_ldhds_be_zsu,
6278 gen_helper_sve_ldsds_be_zsu,
6279 NULL, },
6280 { gen_helper_sve_ldbdu_zsu,
6281 gen_helper_sve_ldhdu_be_zsu,
6282 gen_helper_sve_ldsdu_be_zsu,
6283 gen_helper_sve_lddd_be_zsu, } },
6284 { { gen_helper_sve_ldbds_zss,
6285 gen_helper_sve_ldhds_be_zss,
6286 gen_helper_sve_ldsds_be_zss,
6287 NULL, },
6288 { gen_helper_sve_ldbdu_zss,
6289 gen_helper_sve_ldhdu_be_zss,
6290 gen_helper_sve_ldsdu_be_zss,
6291 gen_helper_sve_lddd_be_zss, } },
6292 { { gen_helper_sve_ldbds_zd,
6293 gen_helper_sve_ldhds_be_zd,
6294 gen_helper_sve_ldsds_be_zd,
6295 NULL, },
6296 { gen_helper_sve_ldbdu_zd,
6297 gen_helper_sve_ldhdu_be_zd,
6298 gen_helper_sve_ldsdu_be_zd,
6299 gen_helper_sve_lddd_be_zd, } } },
6301 /* First-fault */
6302 { { { gen_helper_sve_ldffbds_zsu,
6303 gen_helper_sve_ldffhds_be_zsu,
6304 gen_helper_sve_ldffsds_be_zsu,
6305 NULL, },
6306 { gen_helper_sve_ldffbdu_zsu,
6307 gen_helper_sve_ldffhdu_be_zsu,
6308 gen_helper_sve_ldffsdu_be_zsu,
6309 gen_helper_sve_ldffdd_be_zsu, } },
6310 { { gen_helper_sve_ldffbds_zss,
6311 gen_helper_sve_ldffhds_be_zss,
6312 gen_helper_sve_ldffsds_be_zss,
6313 NULL, },
6314 { gen_helper_sve_ldffbdu_zss,
6315 gen_helper_sve_ldffhdu_be_zss,
6316 gen_helper_sve_ldffsdu_be_zss,
6317 gen_helper_sve_ldffdd_be_zss, } },
6318 { { gen_helper_sve_ldffbds_zd,
6319 gen_helper_sve_ldffhds_be_zd,
6320 gen_helper_sve_ldffsds_be_zd,
6321 NULL, },
6322 { gen_helper_sve_ldffbdu_zd,
6323 gen_helper_sve_ldffhdu_be_zd,
6324 gen_helper_sve_ldffsdu_be_zd,
6325 gen_helper_sve_ldffdd_be_zd, } } } } },
6326 { /* MTE Active */
6327 { /* Little-endian */
6328 { { { gen_helper_sve_ldbds_zsu_mte,
6329 gen_helper_sve_ldhds_le_zsu_mte,
6330 gen_helper_sve_ldsds_le_zsu_mte,
6331 NULL, },
6332 { gen_helper_sve_ldbdu_zsu_mte,
6333 gen_helper_sve_ldhdu_le_zsu_mte,
6334 gen_helper_sve_ldsdu_le_zsu_mte,
6335 gen_helper_sve_lddd_le_zsu_mte, } },
6336 { { gen_helper_sve_ldbds_zss_mte,
6337 gen_helper_sve_ldhds_le_zss_mte,
6338 gen_helper_sve_ldsds_le_zss_mte,
6339 NULL, },
6340 { gen_helper_sve_ldbdu_zss_mte,
6341 gen_helper_sve_ldhdu_le_zss_mte,
6342 gen_helper_sve_ldsdu_le_zss_mte,
6343 gen_helper_sve_lddd_le_zss_mte, } },
6344 { { gen_helper_sve_ldbds_zd_mte,
6345 gen_helper_sve_ldhds_le_zd_mte,
6346 gen_helper_sve_ldsds_le_zd_mte,
6347 NULL, },
6348 { gen_helper_sve_ldbdu_zd_mte,
6349 gen_helper_sve_ldhdu_le_zd_mte,
6350 gen_helper_sve_ldsdu_le_zd_mte,
6351 gen_helper_sve_lddd_le_zd_mte, } } },
6353 /* First-fault */
6354 { { { gen_helper_sve_ldffbds_zsu_mte,
6355 gen_helper_sve_ldffhds_le_zsu_mte,
6356 gen_helper_sve_ldffsds_le_zsu_mte,
6357 NULL, },
6358 { gen_helper_sve_ldffbdu_zsu_mte,
6359 gen_helper_sve_ldffhdu_le_zsu_mte,
6360 gen_helper_sve_ldffsdu_le_zsu_mte,
6361 gen_helper_sve_ldffdd_le_zsu_mte, } },
6362 { { gen_helper_sve_ldffbds_zss_mte,
6363 gen_helper_sve_ldffhds_le_zss_mte,
6364 gen_helper_sve_ldffsds_le_zss_mte,
6365 NULL, },
6366 { gen_helper_sve_ldffbdu_zss_mte,
6367 gen_helper_sve_ldffhdu_le_zss_mte,
6368 gen_helper_sve_ldffsdu_le_zss_mte,
6369 gen_helper_sve_ldffdd_le_zss_mte, } },
6370 { { gen_helper_sve_ldffbds_zd_mte,
6371 gen_helper_sve_ldffhds_le_zd_mte,
6372 gen_helper_sve_ldffsds_le_zd_mte,
6373 NULL, },
6374 { gen_helper_sve_ldffbdu_zd_mte,
6375 gen_helper_sve_ldffhdu_le_zd_mte,
6376 gen_helper_sve_ldffsdu_le_zd_mte,
6377 gen_helper_sve_ldffdd_le_zd_mte, } } } },
6378 { /* Big-endian */
6379 { { { gen_helper_sve_ldbds_zsu_mte,
6380 gen_helper_sve_ldhds_be_zsu_mte,
6381 gen_helper_sve_ldsds_be_zsu_mte,
6382 NULL, },
6383 { gen_helper_sve_ldbdu_zsu_mte,
6384 gen_helper_sve_ldhdu_be_zsu_mte,
6385 gen_helper_sve_ldsdu_be_zsu_mte,
6386 gen_helper_sve_lddd_be_zsu_mte, } },
6387 { { gen_helper_sve_ldbds_zss_mte,
6388 gen_helper_sve_ldhds_be_zss_mte,
6389 gen_helper_sve_ldsds_be_zss_mte,
6390 NULL, },
6391 { gen_helper_sve_ldbdu_zss_mte,
6392 gen_helper_sve_ldhdu_be_zss_mte,
6393 gen_helper_sve_ldsdu_be_zss_mte,
6394 gen_helper_sve_lddd_be_zss_mte, } },
6395 { { gen_helper_sve_ldbds_zd_mte,
6396 gen_helper_sve_ldhds_be_zd_mte,
6397 gen_helper_sve_ldsds_be_zd_mte,
6398 NULL, },
6399 { gen_helper_sve_ldbdu_zd_mte,
6400 gen_helper_sve_ldhdu_be_zd_mte,
6401 gen_helper_sve_ldsdu_be_zd_mte,
6402 gen_helper_sve_lddd_be_zd_mte, } } },
6404 /* First-fault */
6405 { { { gen_helper_sve_ldffbds_zsu_mte,
6406 gen_helper_sve_ldffhds_be_zsu_mte,
6407 gen_helper_sve_ldffsds_be_zsu_mte,
6408 NULL, },
6409 { gen_helper_sve_ldffbdu_zsu_mte,
6410 gen_helper_sve_ldffhdu_be_zsu_mte,
6411 gen_helper_sve_ldffsdu_be_zsu_mte,
6412 gen_helper_sve_ldffdd_be_zsu_mte, } },
6413 { { gen_helper_sve_ldffbds_zss_mte,
6414 gen_helper_sve_ldffhds_be_zss_mte,
6415 gen_helper_sve_ldffsds_be_zss_mte,
6416 NULL, },
6417 { gen_helper_sve_ldffbdu_zss_mte,
6418 gen_helper_sve_ldffhdu_be_zss_mte,
6419 gen_helper_sve_ldffsdu_be_zss_mte,
6420 gen_helper_sve_ldffdd_be_zss_mte, } },
6421 { { gen_helper_sve_ldffbds_zd_mte,
6422 gen_helper_sve_ldffhds_be_zd_mte,
6423 gen_helper_sve_ldffsds_be_zd_mte,
6424 NULL, },
6425 { gen_helper_sve_ldffbdu_zd_mte,
6426 gen_helper_sve_ldffhdu_be_zd_mte,
6427 gen_helper_sve_ldffsdu_be_zd_mte,
6428 gen_helper_sve_ldffdd_be_zd_mte, } } } } },
6431 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
6433 gen_helper_gvec_mem_scatter *fn = NULL;
6434 bool be = s->be_data == MO_BE;
6435 bool mte = s->mte_active[0];
6437 if (!sve_access_check(s)) {
6438 return true;
6441 switch (a->esz) {
6442 case MO_32:
6443 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
6444 break;
6445 case MO_64:
6446 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
6447 break;
6449 assert(fn != NULL);
6451 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
6452 cpu_reg_sp(s, a->rn), a->msz, false, fn);
6453 return true;
6456 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
6458 gen_helper_gvec_mem_scatter *fn = NULL;
6459 bool be = s->be_data == MO_BE;
6460 bool mte = s->mte_active[0];
6461 TCGv_i64 imm;
6463 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
6464 return false;
6466 if (!sve_access_check(s)) {
6467 return true;
6470 switch (a->esz) {
6471 case MO_32:
6472 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
6473 break;
6474 case MO_64:
6475 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
6476 break;
6478 assert(fn != NULL);
6480 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
6481 * by loading the immediate into the scalar parameter.
6483 imm = tcg_const_i64(a->imm << a->msz);
6484 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, false, fn);
6485 tcg_temp_free_i64(imm);
6486 return true;
6489 static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
6491 if (!dc_isar_feature(aa64_sve2, s)) {
6492 return false;
6494 return trans_LD1_zprz(s, a);
6497 /* Indexed by [mte][be][xs][msz]. */
6498 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
6499 { /* MTE Inactive */
6500 { /* Little-endian */
6501 { gen_helper_sve_stbs_zsu,
6502 gen_helper_sve_sths_le_zsu,
6503 gen_helper_sve_stss_le_zsu, },
6504 { gen_helper_sve_stbs_zss,
6505 gen_helper_sve_sths_le_zss,
6506 gen_helper_sve_stss_le_zss, } },
6507 { /* Big-endian */
6508 { gen_helper_sve_stbs_zsu,
6509 gen_helper_sve_sths_be_zsu,
6510 gen_helper_sve_stss_be_zsu, },
6511 { gen_helper_sve_stbs_zss,
6512 gen_helper_sve_sths_be_zss,
6513 gen_helper_sve_stss_be_zss, } } },
6514 { /* MTE Active */
6515 { /* Little-endian */
6516 { gen_helper_sve_stbs_zsu_mte,
6517 gen_helper_sve_sths_le_zsu_mte,
6518 gen_helper_sve_stss_le_zsu_mte, },
6519 { gen_helper_sve_stbs_zss_mte,
6520 gen_helper_sve_sths_le_zss_mte,
6521 gen_helper_sve_stss_le_zss_mte, } },
6522 { /* Big-endian */
6523 { gen_helper_sve_stbs_zsu_mte,
6524 gen_helper_sve_sths_be_zsu_mte,
6525 gen_helper_sve_stss_be_zsu_mte, },
6526 { gen_helper_sve_stbs_zss_mte,
6527 gen_helper_sve_sths_be_zss_mte,
6528 gen_helper_sve_stss_be_zss_mte, } } },
6531 /* Note that we overload xs=2 to indicate 64-bit offset. */
6532 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
6533 { /* MTE Inactive */
6534 { /* Little-endian */
6535 { gen_helper_sve_stbd_zsu,
6536 gen_helper_sve_sthd_le_zsu,
6537 gen_helper_sve_stsd_le_zsu,
6538 gen_helper_sve_stdd_le_zsu, },
6539 { gen_helper_sve_stbd_zss,
6540 gen_helper_sve_sthd_le_zss,
6541 gen_helper_sve_stsd_le_zss,
6542 gen_helper_sve_stdd_le_zss, },
6543 { gen_helper_sve_stbd_zd,
6544 gen_helper_sve_sthd_le_zd,
6545 gen_helper_sve_stsd_le_zd,
6546 gen_helper_sve_stdd_le_zd, } },
6547 { /* Big-endian */
6548 { gen_helper_sve_stbd_zsu,
6549 gen_helper_sve_sthd_be_zsu,
6550 gen_helper_sve_stsd_be_zsu,
6551 gen_helper_sve_stdd_be_zsu, },
6552 { gen_helper_sve_stbd_zss,
6553 gen_helper_sve_sthd_be_zss,
6554 gen_helper_sve_stsd_be_zss,
6555 gen_helper_sve_stdd_be_zss, },
6556 { gen_helper_sve_stbd_zd,
6557 gen_helper_sve_sthd_be_zd,
6558 gen_helper_sve_stsd_be_zd,
6559 gen_helper_sve_stdd_be_zd, } } },
6560 { /* MTE Inactive */
6561 { /* Little-endian */
6562 { gen_helper_sve_stbd_zsu_mte,
6563 gen_helper_sve_sthd_le_zsu_mte,
6564 gen_helper_sve_stsd_le_zsu_mte,
6565 gen_helper_sve_stdd_le_zsu_mte, },
6566 { gen_helper_sve_stbd_zss_mte,
6567 gen_helper_sve_sthd_le_zss_mte,
6568 gen_helper_sve_stsd_le_zss_mte,
6569 gen_helper_sve_stdd_le_zss_mte, },
6570 { gen_helper_sve_stbd_zd_mte,
6571 gen_helper_sve_sthd_le_zd_mte,
6572 gen_helper_sve_stsd_le_zd_mte,
6573 gen_helper_sve_stdd_le_zd_mte, } },
6574 { /* Big-endian */
6575 { gen_helper_sve_stbd_zsu_mte,
6576 gen_helper_sve_sthd_be_zsu_mte,
6577 gen_helper_sve_stsd_be_zsu_mte,
6578 gen_helper_sve_stdd_be_zsu_mte, },
6579 { gen_helper_sve_stbd_zss_mte,
6580 gen_helper_sve_sthd_be_zss_mte,
6581 gen_helper_sve_stsd_be_zss_mte,
6582 gen_helper_sve_stdd_be_zss_mte, },
6583 { gen_helper_sve_stbd_zd_mte,
6584 gen_helper_sve_sthd_be_zd_mte,
6585 gen_helper_sve_stsd_be_zd_mte,
6586 gen_helper_sve_stdd_be_zd_mte, } } },
6589 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
6591 gen_helper_gvec_mem_scatter *fn;
6592 bool be = s->be_data == MO_BE;
6593 bool mte = s->mte_active[0];
6595 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
6596 return false;
6598 if (!sve_access_check(s)) {
6599 return true;
6601 switch (a->esz) {
6602 case MO_32:
6603 fn = scatter_store_fn32[mte][be][a->xs][a->msz];
6604 break;
6605 case MO_64:
6606 fn = scatter_store_fn64[mte][be][a->xs][a->msz];
6607 break;
6608 default:
6609 g_assert_not_reached();
6611 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
6612 cpu_reg_sp(s, a->rn), a->msz, true, fn);
6613 return true;
6616 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
6618 gen_helper_gvec_mem_scatter *fn = NULL;
6619 bool be = s->be_data == MO_BE;
6620 bool mte = s->mte_active[0];
6621 TCGv_i64 imm;
6623 if (a->esz < a->msz) {
6624 return false;
6626 if (!sve_access_check(s)) {
6627 return true;
6630 switch (a->esz) {
6631 case MO_32:
6632 fn = scatter_store_fn32[mte][be][0][a->msz];
6633 break;
6634 case MO_64:
6635 fn = scatter_store_fn64[mte][be][2][a->msz];
6636 break;
6638 assert(fn != NULL);
6640 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
6641 * by loading the immediate into the scalar parameter.
6643 imm = tcg_const_i64(a->imm << a->msz);
6644 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, true, fn);
6645 tcg_temp_free_i64(imm);
6646 return true;
6649 static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
6651 if (!dc_isar_feature(aa64_sve2, s)) {
6652 return false;
6654 return trans_ST1_zprz(s, a);
6658 * Prefetches
6661 static bool trans_PRF(DisasContext *s, arg_PRF *a)
6663 /* Prefetch is a nop within QEMU. */
6664 (void)sve_access_check(s);
6665 return true;
6668 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
6670 if (a->rm == 31) {
6671 return false;
6673 /* Prefetch is a nop within QEMU. */
6674 (void)sve_access_check(s);
6675 return true;
6679 * Move Prefix
6681 * TODO: The implementation so far could handle predicated merging movprfx.
6682 * The helper functions as written take an extra source register to
6683 * use in the operation, but the result is only written when predication
6684 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
6685 * to allow the final write back to the destination to be unconditional.
6686 * For predicated zeroing movprfx, we need to rearrange the helpers to
6687 * allow the final write back to zero inactives.
6689 * In the meantime, just emit the moves.
6692 static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
6694 return do_mov_z(s, a->rd, a->rn);
6697 static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
6699 if (sve_access_check(s)) {
6700 do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
6702 return true;
6705 static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
6707 return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);
6711 * SVE2 Integer Multiply - Unpredicated
6714 static bool trans_MUL_zzz(DisasContext *s, arg_rrr_esz *a)
6716 if (!dc_isar_feature(aa64_sve2, s)) {
6717 return false;
6719 if (sve_access_check(s)) {
6720 gen_gvec_fn_zzz(s, tcg_gen_gvec_mul, a->esz, a->rd, a->rn, a->rm);
6722 return true;
6725 static bool do_sve2_zzz_ool(DisasContext *s, arg_rrr_esz *a,
6726 gen_helper_gvec_3 *fn)
6728 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
6729 return false;
6731 if (sve_access_check(s)) {
6732 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
6734 return true;
6737 static bool trans_SMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6739 static gen_helper_gvec_3 * const fns[4] = {
6740 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
6741 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
6743 return do_sve2_zzz_ool(s, a, fns[a->esz]);
6746 static bool trans_UMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6748 static gen_helper_gvec_3 * const fns[4] = {
6749 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
6750 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
6752 return do_sve2_zzz_ool(s, a, fns[a->esz]);
6755 static bool trans_PMUL_zzz(DisasContext *s, arg_rrr_esz *a)
6757 return do_sve2_zzz_ool(s, a, gen_helper_gvec_pmul_b);
6760 static bool trans_SQDMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6762 static gen_helper_gvec_3 * const fns[4] = {
6763 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
6764 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
6766 return do_sve2_zzz_ool(s, a, fns[a->esz]);
6769 static bool trans_SQRDMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6771 static gen_helper_gvec_3 * const fns[4] = {
6772 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
6773 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
6775 return do_sve2_zzz_ool(s, a, fns[a->esz]);
6779 * SVE2 Integer - Predicated
6782 static bool do_sve2_zpzz_ool(DisasContext *s, arg_rprr_esz *a,
6783 gen_helper_gvec_4 *fn)
6785 if (!dc_isar_feature(aa64_sve2, s)) {
6786 return false;
6788 return do_zpzz_ool(s, a, fn);
6791 static bool trans_SADALP_zpzz(DisasContext *s, arg_rprr_esz *a)
6793 static gen_helper_gvec_4 * const fns[3] = {
6794 gen_helper_sve2_sadalp_zpzz_h,
6795 gen_helper_sve2_sadalp_zpzz_s,
6796 gen_helper_sve2_sadalp_zpzz_d,
6798 if (a->esz == 0) {
6799 return false;
6801 return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]);
6804 static bool trans_UADALP_zpzz(DisasContext *s, arg_rprr_esz *a)
6806 static gen_helper_gvec_4 * const fns[3] = {
6807 gen_helper_sve2_uadalp_zpzz_h,
6808 gen_helper_sve2_uadalp_zpzz_s,
6809 gen_helper_sve2_uadalp_zpzz_d,
6811 if (a->esz == 0) {
6812 return false;
6814 return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]);
6818 * SVE2 integer unary operations (predicated)
6821 static bool do_sve2_zpz_ool(DisasContext *s, arg_rpr_esz *a,
6822 gen_helper_gvec_3 *fn)
6824 if (!dc_isar_feature(aa64_sve2, s)) {
6825 return false;
6827 return do_zpz_ool(s, a, fn);
6830 static bool trans_URECPE(DisasContext *s, arg_rpr_esz *a)
6832 if (a->esz != 2) {
6833 return false;
6835 return do_sve2_zpz_ool(s, a, gen_helper_sve2_urecpe_s);
6838 static bool trans_URSQRTE(DisasContext *s, arg_rpr_esz *a)
6840 if (a->esz != 2) {
6841 return false;
6843 return do_sve2_zpz_ool(s, a, gen_helper_sve2_ursqrte_s);
6846 static bool trans_SQABS(DisasContext *s, arg_rpr_esz *a)
6848 static gen_helper_gvec_3 * const fns[4] = {
6849 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
6850 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
6852 return do_sve2_zpz_ool(s, a, fns[a->esz]);
6855 static bool trans_SQNEG(DisasContext *s, arg_rpr_esz *a)
6857 static gen_helper_gvec_3 * const fns[4] = {
6858 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
6859 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
6861 return do_sve2_zpz_ool(s, a, fns[a->esz]);
6864 #define DO_SVE2_ZPZZ(NAME, name) \
6865 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
6867 static gen_helper_gvec_4 * const fns[4] = { \
6868 gen_helper_sve2_##name##_zpzz_b, gen_helper_sve2_##name##_zpzz_h, \
6869 gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d, \
6870 }; \
6871 return do_sve2_zpzz_ool(s, a, fns[a->esz]); \
6874 DO_SVE2_ZPZZ(SQSHL, sqshl)
6875 DO_SVE2_ZPZZ(SQRSHL, sqrshl)
6876 DO_SVE2_ZPZZ(SRSHL, srshl)
6878 DO_SVE2_ZPZZ(UQSHL, uqshl)
6879 DO_SVE2_ZPZZ(UQRSHL, uqrshl)
6880 DO_SVE2_ZPZZ(URSHL, urshl)
6882 DO_SVE2_ZPZZ(SHADD, shadd)
6883 DO_SVE2_ZPZZ(SRHADD, srhadd)
6884 DO_SVE2_ZPZZ(SHSUB, shsub)
6886 DO_SVE2_ZPZZ(UHADD, uhadd)
6887 DO_SVE2_ZPZZ(URHADD, urhadd)
6888 DO_SVE2_ZPZZ(UHSUB, uhsub)
6890 DO_SVE2_ZPZZ(ADDP, addp)
6891 DO_SVE2_ZPZZ(SMAXP, smaxp)
6892 DO_SVE2_ZPZZ(UMAXP, umaxp)
6893 DO_SVE2_ZPZZ(SMINP, sminp)
6894 DO_SVE2_ZPZZ(UMINP, uminp)
6896 DO_SVE2_ZPZZ(SQADD_zpzz, sqadd)
6897 DO_SVE2_ZPZZ(UQADD_zpzz, uqadd)
6898 DO_SVE2_ZPZZ(SQSUB_zpzz, sqsub)
6899 DO_SVE2_ZPZZ(UQSUB_zpzz, uqsub)
6900 DO_SVE2_ZPZZ(SUQADD, suqadd)
6901 DO_SVE2_ZPZZ(USQADD, usqadd)
6904 * SVE2 Widening Integer Arithmetic
6907 static bool do_sve2_zzw_ool(DisasContext *s, arg_rrr_esz *a,
6908 gen_helper_gvec_3 *fn, int data)
6910 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
6911 return false;
6913 if (sve_access_check(s)) {
6914 unsigned vsz = vec_full_reg_size(s);
6915 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
6916 vec_full_reg_offset(s, a->rn),
6917 vec_full_reg_offset(s, a->rm),
6918 vsz, vsz, data, fn);
6920 return true;
6923 #define DO_SVE2_ZZZ_TB(NAME, name, SEL1, SEL2) \
6924 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
6926 static gen_helper_gvec_3 * const fns[4] = { \
6927 NULL, gen_helper_sve2_##name##_h, \
6928 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
6929 }; \
6930 return do_sve2_zzw_ool(s, a, fns[a->esz], (SEL2 << 1) | SEL1); \
6933 DO_SVE2_ZZZ_TB(SADDLB, saddl, false, false)
6934 DO_SVE2_ZZZ_TB(SSUBLB, ssubl, false, false)
6935 DO_SVE2_ZZZ_TB(SABDLB, sabdl, false, false)
6937 DO_SVE2_ZZZ_TB(UADDLB, uaddl, false, false)
6938 DO_SVE2_ZZZ_TB(USUBLB, usubl, false, false)
6939 DO_SVE2_ZZZ_TB(UABDLB, uabdl, false, false)
6941 DO_SVE2_ZZZ_TB(SADDLT, saddl, true, true)
6942 DO_SVE2_ZZZ_TB(SSUBLT, ssubl, true, true)
6943 DO_SVE2_ZZZ_TB(SABDLT, sabdl, true, true)
6945 DO_SVE2_ZZZ_TB(UADDLT, uaddl, true, true)
6946 DO_SVE2_ZZZ_TB(USUBLT, usubl, true, true)
6947 DO_SVE2_ZZZ_TB(UABDLT, uabdl, true, true)
6949 DO_SVE2_ZZZ_TB(SADDLBT, saddl, false, true)
6950 DO_SVE2_ZZZ_TB(SSUBLBT, ssubl, false, true)
6951 DO_SVE2_ZZZ_TB(SSUBLTB, ssubl, true, false)
6953 DO_SVE2_ZZZ_TB(SQDMULLB_zzz, sqdmull_zzz, false, false)
6954 DO_SVE2_ZZZ_TB(SQDMULLT_zzz, sqdmull_zzz, true, true)
6956 DO_SVE2_ZZZ_TB(SMULLB_zzz, smull_zzz, false, false)
6957 DO_SVE2_ZZZ_TB(SMULLT_zzz, smull_zzz, true, true)
6959 DO_SVE2_ZZZ_TB(UMULLB_zzz, umull_zzz, false, false)
6960 DO_SVE2_ZZZ_TB(UMULLT_zzz, umull_zzz, true, true)
6962 static bool do_eor_tb(DisasContext *s, arg_rrr_esz *a, bool sel1)
6964 static gen_helper_gvec_3 * const fns[4] = {
6965 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
6966 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
6968 return do_sve2_zzw_ool(s, a, fns[a->esz], (!sel1 << 1) | sel1);
6971 static bool trans_EORBT(DisasContext *s, arg_rrr_esz *a)
6973 return do_eor_tb(s, a, false);
6976 static bool trans_EORTB(DisasContext *s, arg_rrr_esz *a)
6978 return do_eor_tb(s, a, true);
6981 static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
6983 static gen_helper_gvec_3 * const fns[4] = {
6984 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
6985 NULL, gen_helper_sve2_pmull_d,
6987 if (a->esz == 0 && !dc_isar_feature(aa64_sve2_pmull128, s)) {
6988 return false;
6990 return do_sve2_zzw_ool(s, a, fns[a->esz], sel);
6993 static bool trans_PMULLB(DisasContext *s, arg_rrr_esz *a)
6995 return do_trans_pmull(s, a, false);
6998 static bool trans_PMULLT(DisasContext *s, arg_rrr_esz *a)
7000 return do_trans_pmull(s, a, true);
7003 #define DO_SVE2_ZZZ_WTB(NAME, name, SEL2) \
7004 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
7006 static gen_helper_gvec_3 * const fns[4] = { \
7007 NULL, gen_helper_sve2_##name##_h, \
7008 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
7009 }; \
7010 return do_sve2_zzw_ool(s, a, fns[a->esz], SEL2); \
7013 DO_SVE2_ZZZ_WTB(SADDWB, saddw, false)
7014 DO_SVE2_ZZZ_WTB(SADDWT, saddw, true)
7015 DO_SVE2_ZZZ_WTB(SSUBWB, ssubw, false)
7016 DO_SVE2_ZZZ_WTB(SSUBWT, ssubw, true)
7018 DO_SVE2_ZZZ_WTB(UADDWB, uaddw, false)
7019 DO_SVE2_ZZZ_WTB(UADDWT, uaddw, true)
7020 DO_SVE2_ZZZ_WTB(USUBWB, usubw, false)
7021 DO_SVE2_ZZZ_WTB(USUBWT, usubw, true)
7023 static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
7025 int top = imm & 1;
7026 int shl = imm >> 1;
7027 int halfbits = 4 << vece;
7029 if (top) {
7030 if (shl == halfbits) {
7031 TCGv_vec t = tcg_temp_new_vec_matching(d);
7032 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
7033 tcg_gen_and_vec(vece, d, n, t);
7034 tcg_temp_free_vec(t);
7035 } else {
7036 tcg_gen_sari_vec(vece, d, n, halfbits);
7037 tcg_gen_shli_vec(vece, d, d, shl);
7039 } else {
7040 tcg_gen_shli_vec(vece, d, n, halfbits);
7041 tcg_gen_sari_vec(vece, d, d, halfbits - shl);
7045 static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
7047 int halfbits = 4 << vece;
7048 int top = imm & 1;
7049 int shl = (imm >> 1);
7050 int shift;
7051 uint64_t mask;
7053 mask = MAKE_64BIT_MASK(0, halfbits);
7054 mask <<= shl;
7055 mask = dup_const(vece, mask);
7057 shift = shl - top * halfbits;
7058 if (shift < 0) {
7059 tcg_gen_shri_i64(d, n, -shift);
7060 } else {
7061 tcg_gen_shli_i64(d, n, shift);
7063 tcg_gen_andi_i64(d, d, mask);
7066 static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
7068 gen_ushll_i64(MO_16, d, n, imm);
7071 static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
7073 gen_ushll_i64(MO_32, d, n, imm);
7076 static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
7078 gen_ushll_i64(MO_64, d, n, imm);
7081 static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
7083 int halfbits = 4 << vece;
7084 int top = imm & 1;
7085 int shl = imm >> 1;
7087 if (top) {
7088 if (shl == halfbits) {
7089 TCGv_vec t = tcg_temp_new_vec_matching(d);
7090 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
7091 tcg_gen_and_vec(vece, d, n, t);
7092 tcg_temp_free_vec(t);
7093 } else {
7094 tcg_gen_shri_vec(vece, d, n, halfbits);
7095 tcg_gen_shli_vec(vece, d, d, shl);
7097 } else {
7098 if (shl == 0) {
7099 TCGv_vec t = tcg_temp_new_vec_matching(d);
7100 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7101 tcg_gen_and_vec(vece, d, n, t);
7102 tcg_temp_free_vec(t);
7103 } else {
7104 tcg_gen_shli_vec(vece, d, n, halfbits);
7105 tcg_gen_shri_vec(vece, d, d, halfbits - shl);
7110 static bool do_sve2_shll_tb(DisasContext *s, arg_rri_esz *a,
7111 bool sel, bool uns)
7113 static const TCGOpcode sshll_list[] = {
7114 INDEX_op_shli_vec, INDEX_op_sari_vec, 0
7116 static const TCGOpcode ushll_list[] = {
7117 INDEX_op_shli_vec, INDEX_op_shri_vec, 0
7119 static const GVecGen2i ops[2][3] = {
7120 { { .fniv = gen_sshll_vec,
7121 .opt_opc = sshll_list,
7122 .fno = gen_helper_sve2_sshll_h,
7123 .vece = MO_16 },
7124 { .fniv = gen_sshll_vec,
7125 .opt_opc = sshll_list,
7126 .fno = gen_helper_sve2_sshll_s,
7127 .vece = MO_32 },
7128 { .fniv = gen_sshll_vec,
7129 .opt_opc = sshll_list,
7130 .fno = gen_helper_sve2_sshll_d,
7131 .vece = MO_64 } },
7132 { { .fni8 = gen_ushll16_i64,
7133 .fniv = gen_ushll_vec,
7134 .opt_opc = ushll_list,
7135 .fno = gen_helper_sve2_ushll_h,
7136 .vece = MO_16 },
7137 { .fni8 = gen_ushll32_i64,
7138 .fniv = gen_ushll_vec,
7139 .opt_opc = ushll_list,
7140 .fno = gen_helper_sve2_ushll_s,
7141 .vece = MO_32 },
7142 { .fni8 = gen_ushll64_i64,
7143 .fniv = gen_ushll_vec,
7144 .opt_opc = ushll_list,
7145 .fno = gen_helper_sve2_ushll_d,
7146 .vece = MO_64 } },
7149 if (a->esz < 0 || a->esz > 2 || !dc_isar_feature(aa64_sve2, s)) {
7150 return false;
7152 if (sve_access_check(s)) {
7153 unsigned vsz = vec_full_reg_size(s);
7154 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
7155 vec_full_reg_offset(s, a->rn),
7156 vsz, vsz, (a->imm << 1) | sel,
7157 &ops[uns][a->esz]);
7159 return true;
7162 static bool trans_SSHLLB(DisasContext *s, arg_rri_esz *a)
7164 return do_sve2_shll_tb(s, a, false, false);
7167 static bool trans_SSHLLT(DisasContext *s, arg_rri_esz *a)
7169 return do_sve2_shll_tb(s, a, true, false);
7172 static bool trans_USHLLB(DisasContext *s, arg_rri_esz *a)
7174 return do_sve2_shll_tb(s, a, false, true);
7177 static bool trans_USHLLT(DisasContext *s, arg_rri_esz *a)
7179 return do_sve2_shll_tb(s, a, true, true);
7182 static bool trans_BEXT(DisasContext *s, arg_rrr_esz *a)
7184 static gen_helper_gvec_3 * const fns[4] = {
7185 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
7186 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
7188 if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
7189 return false;
7191 return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
7194 static bool trans_BDEP(DisasContext *s, arg_rrr_esz *a)
7196 static gen_helper_gvec_3 * const fns[4] = {
7197 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
7198 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
7200 if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
7201 return false;
7203 return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
7206 static bool trans_BGRP(DisasContext *s, arg_rrr_esz *a)
7208 static gen_helper_gvec_3 * const fns[4] = {
7209 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
7210 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
7212 if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
7213 return false;
7215 return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
7218 static bool do_cadd(DisasContext *s, arg_rrr_esz *a, bool sq, bool rot)
7220 static gen_helper_gvec_3 * const fns[2][4] = {
7221 { gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
7222 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d },
7223 { gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
7224 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d },
7226 return do_sve2_zzw_ool(s, a, fns[sq][a->esz], rot);
7229 static bool trans_CADD_rot90(DisasContext *s, arg_rrr_esz *a)
7231 return do_cadd(s, a, false, false);
7234 static bool trans_CADD_rot270(DisasContext *s, arg_rrr_esz *a)
7236 return do_cadd(s, a, false, true);
7239 static bool trans_SQCADD_rot90(DisasContext *s, arg_rrr_esz *a)
7241 return do_cadd(s, a, true, false);
7244 static bool trans_SQCADD_rot270(DisasContext *s, arg_rrr_esz *a)
7246 return do_cadd(s, a, true, true);
7249 static bool do_sve2_zzzz_ool(DisasContext *s, arg_rrrr_esz *a,
7250 gen_helper_gvec_4 *fn, int data)
7252 if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
7253 return false;
7255 if (sve_access_check(s)) {
7256 gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
7258 return true;
7261 static bool do_abal(DisasContext *s, arg_rrrr_esz *a, bool uns, bool sel)
7263 static gen_helper_gvec_4 * const fns[2][4] = {
7264 { NULL, gen_helper_sve2_sabal_h,
7265 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d },
7266 { NULL, gen_helper_sve2_uabal_h,
7267 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d },
7269 return do_sve2_zzzz_ool(s, a, fns[uns][a->esz], sel);
7272 static bool trans_SABALB(DisasContext *s, arg_rrrr_esz *a)
7274 return do_abal(s, a, false, false);
7277 static bool trans_SABALT(DisasContext *s, arg_rrrr_esz *a)
7279 return do_abal(s, a, false, true);
7282 static bool trans_UABALB(DisasContext *s, arg_rrrr_esz *a)
7284 return do_abal(s, a, true, false);
7287 static bool trans_UABALT(DisasContext *s, arg_rrrr_esz *a)
7289 return do_abal(s, a, true, true);
7292 static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
7294 static gen_helper_gvec_4 * const fns[2] = {
7295 gen_helper_sve2_adcl_s,
7296 gen_helper_sve2_adcl_d,
7299 * Note that in this case the ESZ field encodes both size and sign.
7300 * Split out 'subtract' into bit 1 of the data field for the helper.
7302 return do_sve2_zzzz_ool(s, a, fns[a->esz & 1], (a->esz & 2) | sel);
7305 static bool trans_ADCLB(DisasContext *s, arg_rrrr_esz *a)
7307 return do_adcl(s, a, false);
7310 static bool trans_ADCLT(DisasContext *s, arg_rrrr_esz *a)
7312 return do_adcl(s, a, true);
7315 static bool do_sve2_fn2i(DisasContext *s, arg_rri_esz *a, GVecGen2iFn *fn)
7317 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
7318 return false;
7320 if (sve_access_check(s)) {
7321 unsigned vsz = vec_full_reg_size(s);
7322 unsigned rd_ofs = vec_full_reg_offset(s, a->rd);
7323 unsigned rn_ofs = vec_full_reg_offset(s, a->rn);
7324 fn(a->esz, rd_ofs, rn_ofs, a->imm, vsz, vsz);
7326 return true;
7329 static bool trans_SSRA(DisasContext *s, arg_rri_esz *a)
7331 return do_sve2_fn2i(s, a, gen_gvec_ssra);
7334 static bool trans_USRA(DisasContext *s, arg_rri_esz *a)
7336 return do_sve2_fn2i(s, a, gen_gvec_usra);
7339 static bool trans_SRSRA(DisasContext *s, arg_rri_esz *a)
7341 return do_sve2_fn2i(s, a, gen_gvec_srsra);
7344 static bool trans_URSRA(DisasContext *s, arg_rri_esz *a)
7346 return do_sve2_fn2i(s, a, gen_gvec_ursra);
7349 static bool trans_SRI(DisasContext *s, arg_rri_esz *a)
7351 return do_sve2_fn2i(s, a, gen_gvec_sri);
7354 static bool trans_SLI(DisasContext *s, arg_rri_esz *a)
7356 return do_sve2_fn2i(s, a, gen_gvec_sli);
7359 static bool do_sve2_fn_zzz(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *fn)
7361 if (!dc_isar_feature(aa64_sve2, s)) {
7362 return false;
7364 if (sve_access_check(s)) {
7365 gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
7367 return true;
7370 static bool trans_SABA(DisasContext *s, arg_rrr_esz *a)
7372 return do_sve2_fn_zzz(s, a, gen_gvec_saba);
7375 static bool trans_UABA(DisasContext *s, arg_rrr_esz *a)
7377 return do_sve2_fn_zzz(s, a, gen_gvec_uaba);
7380 static bool do_sve2_narrow_extract(DisasContext *s, arg_rri_esz *a,
7381 const GVecGen2 ops[3])
7383 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0 ||
7384 !dc_isar_feature(aa64_sve2, s)) {
7385 return false;
7387 if (sve_access_check(s)) {
7388 unsigned vsz = vec_full_reg_size(s);
7389 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
7390 vec_full_reg_offset(s, a->rn),
7391 vsz, vsz, &ops[a->esz]);
7393 return true;
7396 static const TCGOpcode sqxtn_list[] = {
7397 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
7400 static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7402 TCGv_vec t = tcg_temp_new_vec_matching(d);
7403 int halfbits = 4 << vece;
7404 int64_t mask = (1ull << halfbits) - 1;
7405 int64_t min = -1ull << (halfbits - 1);
7406 int64_t max = -min - 1;
7408 tcg_gen_dupi_vec(vece, t, min);
7409 tcg_gen_smax_vec(vece, d, n, t);
7410 tcg_gen_dupi_vec(vece, t, max);
7411 tcg_gen_smin_vec(vece, d, d, t);
7412 tcg_gen_dupi_vec(vece, t, mask);
7413 tcg_gen_and_vec(vece, d, d, t);
7414 tcg_temp_free_vec(t);
7417 static bool trans_SQXTNB(DisasContext *s, arg_rri_esz *a)
7419 static const GVecGen2 ops[3] = {
7420 { .fniv = gen_sqxtnb_vec,
7421 .opt_opc = sqxtn_list,
7422 .fno = gen_helper_sve2_sqxtnb_h,
7423 .vece = MO_16 },
7424 { .fniv = gen_sqxtnb_vec,
7425 .opt_opc = sqxtn_list,
7426 .fno = gen_helper_sve2_sqxtnb_s,
7427 .vece = MO_32 },
7428 { .fniv = gen_sqxtnb_vec,
7429 .opt_opc = sqxtn_list,
7430 .fno = gen_helper_sve2_sqxtnb_d,
7431 .vece = MO_64 },
7433 return do_sve2_narrow_extract(s, a, ops);
7436 static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7438 TCGv_vec t = tcg_temp_new_vec_matching(d);
7439 int halfbits = 4 << vece;
7440 int64_t mask = (1ull << halfbits) - 1;
7441 int64_t min = -1ull << (halfbits - 1);
7442 int64_t max = -min - 1;
7444 tcg_gen_dupi_vec(vece, t, min);
7445 tcg_gen_smax_vec(vece, n, n, t);
7446 tcg_gen_dupi_vec(vece, t, max);
7447 tcg_gen_smin_vec(vece, n, n, t);
7448 tcg_gen_shli_vec(vece, n, n, halfbits);
7449 tcg_gen_dupi_vec(vece, t, mask);
7450 tcg_gen_bitsel_vec(vece, d, t, d, n);
7451 tcg_temp_free_vec(t);
7454 static bool trans_SQXTNT(DisasContext *s, arg_rri_esz *a)
7456 static const GVecGen2 ops[3] = {
7457 { .fniv = gen_sqxtnt_vec,
7458 .opt_opc = sqxtn_list,
7459 .load_dest = true,
7460 .fno = gen_helper_sve2_sqxtnt_h,
7461 .vece = MO_16 },
7462 { .fniv = gen_sqxtnt_vec,
7463 .opt_opc = sqxtn_list,
7464 .load_dest = true,
7465 .fno = gen_helper_sve2_sqxtnt_s,
7466 .vece = MO_32 },
7467 { .fniv = gen_sqxtnt_vec,
7468 .opt_opc = sqxtn_list,
7469 .load_dest = true,
7470 .fno = gen_helper_sve2_sqxtnt_d,
7471 .vece = MO_64 },
7473 return do_sve2_narrow_extract(s, a, ops);
7476 static const TCGOpcode uqxtn_list[] = {
7477 INDEX_op_shli_vec, INDEX_op_umin_vec, 0
7480 static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7482 TCGv_vec t = tcg_temp_new_vec_matching(d);
7483 int halfbits = 4 << vece;
7484 int64_t max = (1ull << halfbits) - 1;
7486 tcg_gen_dupi_vec(vece, t, max);
7487 tcg_gen_umin_vec(vece, d, n, t);
7488 tcg_temp_free_vec(t);
7491 static bool trans_UQXTNB(DisasContext *s, arg_rri_esz *a)
7493 static const GVecGen2 ops[3] = {
7494 { .fniv = gen_uqxtnb_vec,
7495 .opt_opc = uqxtn_list,
7496 .fno = gen_helper_sve2_uqxtnb_h,
7497 .vece = MO_16 },
7498 { .fniv = gen_uqxtnb_vec,
7499 .opt_opc = uqxtn_list,
7500 .fno = gen_helper_sve2_uqxtnb_s,
7501 .vece = MO_32 },
7502 { .fniv = gen_uqxtnb_vec,
7503 .opt_opc = uqxtn_list,
7504 .fno = gen_helper_sve2_uqxtnb_d,
7505 .vece = MO_64 },
7507 return do_sve2_narrow_extract(s, a, ops);
7510 static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7512 TCGv_vec t = tcg_temp_new_vec_matching(d);
7513 int halfbits = 4 << vece;
7514 int64_t max = (1ull << halfbits) - 1;
7516 tcg_gen_dupi_vec(vece, t, max);
7517 tcg_gen_umin_vec(vece, n, n, t);
7518 tcg_gen_shli_vec(vece, n, n, halfbits);
7519 tcg_gen_bitsel_vec(vece, d, t, d, n);
7520 tcg_temp_free_vec(t);
7523 static bool trans_UQXTNT(DisasContext *s, arg_rri_esz *a)
7525 static const GVecGen2 ops[3] = {
7526 { .fniv = gen_uqxtnt_vec,
7527 .opt_opc = uqxtn_list,
7528 .load_dest = true,
7529 .fno = gen_helper_sve2_uqxtnt_h,
7530 .vece = MO_16 },
7531 { .fniv = gen_uqxtnt_vec,
7532 .opt_opc = uqxtn_list,
7533 .load_dest = true,
7534 .fno = gen_helper_sve2_uqxtnt_s,
7535 .vece = MO_32 },
7536 { .fniv = gen_uqxtnt_vec,
7537 .opt_opc = uqxtn_list,
7538 .load_dest = true,
7539 .fno = gen_helper_sve2_uqxtnt_d,
7540 .vece = MO_64 },
7542 return do_sve2_narrow_extract(s, a, ops);
7545 static const TCGOpcode sqxtun_list[] = {
7546 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
7549 static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7551 TCGv_vec t = tcg_temp_new_vec_matching(d);
7552 int halfbits = 4 << vece;
7553 int64_t max = (1ull << halfbits) - 1;
7555 tcg_gen_dupi_vec(vece, t, 0);
7556 tcg_gen_smax_vec(vece, d, n, t);
7557 tcg_gen_dupi_vec(vece, t, max);
7558 tcg_gen_umin_vec(vece, d, d, t);
7559 tcg_temp_free_vec(t);
7562 static bool trans_SQXTUNB(DisasContext *s, arg_rri_esz *a)
7564 static const GVecGen2 ops[3] = {
7565 { .fniv = gen_sqxtunb_vec,
7566 .opt_opc = sqxtun_list,
7567 .fno = gen_helper_sve2_sqxtunb_h,
7568 .vece = MO_16 },
7569 { .fniv = gen_sqxtunb_vec,
7570 .opt_opc = sqxtun_list,
7571 .fno = gen_helper_sve2_sqxtunb_s,
7572 .vece = MO_32 },
7573 { .fniv = gen_sqxtunb_vec,
7574 .opt_opc = sqxtun_list,
7575 .fno = gen_helper_sve2_sqxtunb_d,
7576 .vece = MO_64 },
7578 return do_sve2_narrow_extract(s, a, ops);
7581 static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7583 TCGv_vec t = tcg_temp_new_vec_matching(d);
7584 int halfbits = 4 << vece;
7585 int64_t max = (1ull << halfbits) - 1;
7587 tcg_gen_dupi_vec(vece, t, 0);
7588 tcg_gen_smax_vec(vece, n, n, t);
7589 tcg_gen_dupi_vec(vece, t, max);
7590 tcg_gen_umin_vec(vece, n, n, t);
7591 tcg_gen_shli_vec(vece, n, n, halfbits);
7592 tcg_gen_bitsel_vec(vece, d, t, d, n);
7593 tcg_temp_free_vec(t);
7596 static bool trans_SQXTUNT(DisasContext *s, arg_rri_esz *a)
7598 static const GVecGen2 ops[3] = {
7599 { .fniv = gen_sqxtunt_vec,
7600 .opt_opc = sqxtun_list,
7601 .load_dest = true,
7602 .fno = gen_helper_sve2_sqxtunt_h,
7603 .vece = MO_16 },
7604 { .fniv = gen_sqxtunt_vec,
7605 .opt_opc = sqxtun_list,
7606 .load_dest = true,
7607 .fno = gen_helper_sve2_sqxtunt_s,
7608 .vece = MO_32 },
7609 { .fniv = gen_sqxtunt_vec,
7610 .opt_opc = sqxtun_list,
7611 .load_dest = true,
7612 .fno = gen_helper_sve2_sqxtunt_d,
7613 .vece = MO_64 },
7615 return do_sve2_narrow_extract(s, a, ops);
7618 static bool do_sve2_shr_narrow(DisasContext *s, arg_rri_esz *a,
7619 const GVecGen2i ops[3])
7621 if (a->esz < 0 || a->esz > MO_32 || !dc_isar_feature(aa64_sve2, s)) {
7622 return false;
7624 assert(a->imm > 0 && a->imm <= (8 << a->esz));
7625 if (sve_access_check(s)) {
7626 unsigned vsz = vec_full_reg_size(s);
7627 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
7628 vec_full_reg_offset(s, a->rn),
7629 vsz, vsz, a->imm, &ops[a->esz]);
7631 return true;
7634 static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
7636 int halfbits = 4 << vece;
7637 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
7639 tcg_gen_shri_i64(d, n, shr);
7640 tcg_gen_andi_i64(d, d, mask);
7643 static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7645 gen_shrnb_i64(MO_16, d, n, shr);
7648 static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7650 gen_shrnb_i64(MO_32, d, n, shr);
7653 static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7655 gen_shrnb_i64(MO_64, d, n, shr);
7658 static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
7660 TCGv_vec t = tcg_temp_new_vec_matching(d);
7661 int halfbits = 4 << vece;
7662 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
7664 tcg_gen_shri_vec(vece, n, n, shr);
7665 tcg_gen_dupi_vec(vece, t, mask);
7666 tcg_gen_and_vec(vece, d, n, t);
7667 tcg_temp_free_vec(t);
7670 static bool trans_SHRNB(DisasContext *s, arg_rri_esz *a)
7672 static const TCGOpcode vec_list[] = { INDEX_op_shri_vec, 0 };
7673 static const GVecGen2i ops[3] = {
7674 { .fni8 = gen_shrnb16_i64,
7675 .fniv = gen_shrnb_vec,
7676 .opt_opc = vec_list,
7677 .fno = gen_helper_sve2_shrnb_h,
7678 .vece = MO_16 },
7679 { .fni8 = gen_shrnb32_i64,
7680 .fniv = gen_shrnb_vec,
7681 .opt_opc = vec_list,
7682 .fno = gen_helper_sve2_shrnb_s,
7683 .vece = MO_32 },
7684 { .fni8 = gen_shrnb64_i64,
7685 .fniv = gen_shrnb_vec,
7686 .opt_opc = vec_list,
7687 .fno = gen_helper_sve2_shrnb_d,
7688 .vece = MO_64 },
7690 return do_sve2_shr_narrow(s, a, ops);
7693 static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
7695 int halfbits = 4 << vece;
7696 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
7698 tcg_gen_shli_i64(n, n, halfbits - shr);
7699 tcg_gen_andi_i64(n, n, ~mask);
7700 tcg_gen_andi_i64(d, d, mask);
7701 tcg_gen_or_i64(d, d, n);
7704 static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7706 gen_shrnt_i64(MO_16, d, n, shr);
7709 static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7711 gen_shrnt_i64(MO_32, d, n, shr);
7714 static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7716 tcg_gen_shri_i64(n, n, shr);
7717 tcg_gen_deposit_i64(d, d, n, 32, 32);
7720 static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
7722 TCGv_vec t = tcg_temp_new_vec_matching(d);
7723 int halfbits = 4 << vece;
7724 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
7726 tcg_gen_shli_vec(vece, n, n, halfbits - shr);
7727 tcg_gen_dupi_vec(vece, t, mask);
7728 tcg_gen_bitsel_vec(vece, d, t, d, n);
7729 tcg_temp_free_vec(t);
7732 static bool trans_SHRNT(DisasContext *s, arg_rri_esz *a)
7734 static const TCGOpcode vec_list[] = { INDEX_op_shli_vec, 0 };
7735 static const GVecGen2i ops[3] = {
7736 { .fni8 = gen_shrnt16_i64,
7737 .fniv = gen_shrnt_vec,
7738 .opt_opc = vec_list,
7739 .load_dest = true,
7740 .fno = gen_helper_sve2_shrnt_h,
7741 .vece = MO_16 },
7742 { .fni8 = gen_shrnt32_i64,
7743 .fniv = gen_shrnt_vec,
7744 .opt_opc = vec_list,
7745 .load_dest = true,
7746 .fno = gen_helper_sve2_shrnt_s,
7747 .vece = MO_32 },
7748 { .fni8 = gen_shrnt64_i64,
7749 .fniv = gen_shrnt_vec,
7750 .opt_opc = vec_list,
7751 .load_dest = true,
7752 .fno = gen_helper_sve2_shrnt_d,
7753 .vece = MO_64 },
7755 return do_sve2_shr_narrow(s, a, ops);
7758 static bool trans_RSHRNB(DisasContext *s, arg_rri_esz *a)
7760 static const GVecGen2i ops[3] = {
7761 { .fno = gen_helper_sve2_rshrnb_h },
7762 { .fno = gen_helper_sve2_rshrnb_s },
7763 { .fno = gen_helper_sve2_rshrnb_d },
7765 return do_sve2_shr_narrow(s, a, ops);
7768 static bool trans_RSHRNT(DisasContext *s, arg_rri_esz *a)
7770 static const GVecGen2i ops[3] = {
7771 { .fno = gen_helper_sve2_rshrnt_h },
7772 { .fno = gen_helper_sve2_rshrnt_s },
7773 { .fno = gen_helper_sve2_rshrnt_d },
7775 return do_sve2_shr_narrow(s, a, ops);
7778 static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
7779 TCGv_vec n, int64_t shr)
7781 TCGv_vec t = tcg_temp_new_vec_matching(d);
7782 int halfbits = 4 << vece;
7784 tcg_gen_sari_vec(vece, n, n, shr);
7785 tcg_gen_dupi_vec(vece, t, 0);
7786 tcg_gen_smax_vec(vece, n, n, t);
7787 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7788 tcg_gen_umin_vec(vece, d, n, t);
7789 tcg_temp_free_vec(t);
7792 static bool trans_SQSHRUNB(DisasContext *s, arg_rri_esz *a)
7794 static const TCGOpcode vec_list[] = {
7795 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7797 static const GVecGen2i ops[3] = {
7798 { .fniv = gen_sqshrunb_vec,
7799 .opt_opc = vec_list,
7800 .fno = gen_helper_sve2_sqshrunb_h,
7801 .vece = MO_16 },
7802 { .fniv = gen_sqshrunb_vec,
7803 .opt_opc = vec_list,
7804 .fno = gen_helper_sve2_sqshrunb_s,
7805 .vece = MO_32 },
7806 { .fniv = gen_sqshrunb_vec,
7807 .opt_opc = vec_list,
7808 .fno = gen_helper_sve2_sqshrunb_d,
7809 .vece = MO_64 },
7811 return do_sve2_shr_narrow(s, a, ops);
7814 static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
7815 TCGv_vec n, int64_t shr)
7817 TCGv_vec t = tcg_temp_new_vec_matching(d);
7818 int halfbits = 4 << vece;
7820 tcg_gen_sari_vec(vece, n, n, shr);
7821 tcg_gen_dupi_vec(vece, t, 0);
7822 tcg_gen_smax_vec(vece, n, n, t);
7823 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7824 tcg_gen_umin_vec(vece, n, n, t);
7825 tcg_gen_shli_vec(vece, n, n, halfbits);
7826 tcg_gen_bitsel_vec(vece, d, t, d, n);
7827 tcg_temp_free_vec(t);
7830 static bool trans_SQSHRUNT(DisasContext *s, arg_rri_esz *a)
7832 static const TCGOpcode vec_list[] = {
7833 INDEX_op_shli_vec, INDEX_op_sari_vec,
7834 INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7836 static const GVecGen2i ops[3] = {
7837 { .fniv = gen_sqshrunt_vec,
7838 .opt_opc = vec_list,
7839 .load_dest = true,
7840 .fno = gen_helper_sve2_sqshrunt_h,
7841 .vece = MO_16 },
7842 { .fniv = gen_sqshrunt_vec,
7843 .opt_opc = vec_list,
7844 .load_dest = true,
7845 .fno = gen_helper_sve2_sqshrunt_s,
7846 .vece = MO_32 },
7847 { .fniv = gen_sqshrunt_vec,
7848 .opt_opc = vec_list,
7849 .load_dest = true,
7850 .fno = gen_helper_sve2_sqshrunt_d,
7851 .vece = MO_64 },
7853 return do_sve2_shr_narrow(s, a, ops);
7856 static bool trans_SQRSHRUNB(DisasContext *s, arg_rri_esz *a)
7858 static const GVecGen2i ops[3] = {
7859 { .fno = gen_helper_sve2_sqrshrunb_h },
7860 { .fno = gen_helper_sve2_sqrshrunb_s },
7861 { .fno = gen_helper_sve2_sqrshrunb_d },
7863 return do_sve2_shr_narrow(s, a, ops);
7866 static bool trans_SQRSHRUNT(DisasContext *s, arg_rri_esz *a)
7868 static const GVecGen2i ops[3] = {
7869 { .fno = gen_helper_sve2_sqrshrunt_h },
7870 { .fno = gen_helper_sve2_sqrshrunt_s },
7871 { .fno = gen_helper_sve2_sqrshrunt_d },
7873 return do_sve2_shr_narrow(s, a, ops);
7876 static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
7877 TCGv_vec n, int64_t shr)
7879 TCGv_vec t = tcg_temp_new_vec_matching(d);
7880 int halfbits = 4 << vece;
7881 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7882 int64_t min = -max - 1;
7884 tcg_gen_sari_vec(vece, n, n, shr);
7885 tcg_gen_dupi_vec(vece, t, min);
7886 tcg_gen_smax_vec(vece, n, n, t);
7887 tcg_gen_dupi_vec(vece, t, max);
7888 tcg_gen_smin_vec(vece, n, n, t);
7889 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7890 tcg_gen_and_vec(vece, d, n, t);
7891 tcg_temp_free_vec(t);
7894 static bool trans_SQSHRNB(DisasContext *s, arg_rri_esz *a)
7896 static const TCGOpcode vec_list[] = {
7897 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7899 static const GVecGen2i ops[3] = {
7900 { .fniv = gen_sqshrnb_vec,
7901 .opt_opc = vec_list,
7902 .fno = gen_helper_sve2_sqshrnb_h,
7903 .vece = MO_16 },
7904 { .fniv = gen_sqshrnb_vec,
7905 .opt_opc = vec_list,
7906 .fno = gen_helper_sve2_sqshrnb_s,
7907 .vece = MO_32 },
7908 { .fniv = gen_sqshrnb_vec,
7909 .opt_opc = vec_list,
7910 .fno = gen_helper_sve2_sqshrnb_d,
7911 .vece = MO_64 },
7913 return do_sve2_shr_narrow(s, a, ops);
7916 static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
7917 TCGv_vec n, int64_t shr)
7919 TCGv_vec t = tcg_temp_new_vec_matching(d);
7920 int halfbits = 4 << vece;
7921 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7922 int64_t min = -max - 1;
7924 tcg_gen_sari_vec(vece, n, n, shr);
7925 tcg_gen_dupi_vec(vece, t, min);
7926 tcg_gen_smax_vec(vece, n, n, t);
7927 tcg_gen_dupi_vec(vece, t, max);
7928 tcg_gen_smin_vec(vece, n, n, t);
7929 tcg_gen_shli_vec(vece, n, n, halfbits);
7930 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7931 tcg_gen_bitsel_vec(vece, d, t, d, n);
7932 tcg_temp_free_vec(t);
7935 static bool trans_SQSHRNT(DisasContext *s, arg_rri_esz *a)
7937 static const TCGOpcode vec_list[] = {
7938 INDEX_op_shli_vec, INDEX_op_sari_vec,
7939 INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7941 static const GVecGen2i ops[3] = {
7942 { .fniv = gen_sqshrnt_vec,
7943 .opt_opc = vec_list,
7944 .load_dest = true,
7945 .fno = gen_helper_sve2_sqshrnt_h,
7946 .vece = MO_16 },
7947 { .fniv = gen_sqshrnt_vec,
7948 .opt_opc = vec_list,
7949 .load_dest = true,
7950 .fno = gen_helper_sve2_sqshrnt_s,
7951 .vece = MO_32 },
7952 { .fniv = gen_sqshrnt_vec,
7953 .opt_opc = vec_list,
7954 .load_dest = true,
7955 .fno = gen_helper_sve2_sqshrnt_d,
7956 .vece = MO_64 },
7958 return do_sve2_shr_narrow(s, a, ops);
7961 static bool trans_SQRSHRNB(DisasContext *s, arg_rri_esz *a)
7963 static const GVecGen2i ops[3] = {
7964 { .fno = gen_helper_sve2_sqrshrnb_h },
7965 { .fno = gen_helper_sve2_sqrshrnb_s },
7966 { .fno = gen_helper_sve2_sqrshrnb_d },
7968 return do_sve2_shr_narrow(s, a, ops);
7971 static bool trans_SQRSHRNT(DisasContext *s, arg_rri_esz *a)
7973 static const GVecGen2i ops[3] = {
7974 { .fno = gen_helper_sve2_sqrshrnt_h },
7975 { .fno = gen_helper_sve2_sqrshrnt_s },
7976 { .fno = gen_helper_sve2_sqrshrnt_d },
7978 return do_sve2_shr_narrow(s, a, ops);
7981 static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
7982 TCGv_vec n, int64_t shr)
7984 TCGv_vec t = tcg_temp_new_vec_matching(d);
7985 int halfbits = 4 << vece;
7987 tcg_gen_shri_vec(vece, n, n, shr);
7988 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7989 tcg_gen_umin_vec(vece, d, n, t);
7990 tcg_temp_free_vec(t);
7993 static bool trans_UQSHRNB(DisasContext *s, arg_rri_esz *a)
7995 static const TCGOpcode vec_list[] = {
7996 INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7998 static const GVecGen2i ops[3] = {
7999 { .fniv = gen_uqshrnb_vec,
8000 .opt_opc = vec_list,
8001 .fno = gen_helper_sve2_uqshrnb_h,
8002 .vece = MO_16 },
8003 { .fniv = gen_uqshrnb_vec,
8004 .opt_opc = vec_list,
8005 .fno = gen_helper_sve2_uqshrnb_s,
8006 .vece = MO_32 },
8007 { .fniv = gen_uqshrnb_vec,
8008 .opt_opc = vec_list,
8009 .fno = gen_helper_sve2_uqshrnb_d,
8010 .vece = MO_64 },
8012 return do_sve2_shr_narrow(s, a, ops);
8015 static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
8016 TCGv_vec n, int64_t shr)
8018 TCGv_vec t = tcg_temp_new_vec_matching(d);
8019 int halfbits = 4 << vece;
8021 tcg_gen_shri_vec(vece, n, n, shr);
8022 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
8023 tcg_gen_umin_vec(vece, n, n, t);
8024 tcg_gen_shli_vec(vece, n, n, halfbits);
8025 tcg_gen_bitsel_vec(vece, d, t, d, n);
8026 tcg_temp_free_vec(t);
8029 static bool trans_UQSHRNT(DisasContext *s, arg_rri_esz *a)
8031 static const TCGOpcode vec_list[] = {
8032 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
8034 static const GVecGen2i ops[3] = {
8035 { .fniv = gen_uqshrnt_vec,
8036 .opt_opc = vec_list,
8037 .load_dest = true,
8038 .fno = gen_helper_sve2_uqshrnt_h,
8039 .vece = MO_16 },
8040 { .fniv = gen_uqshrnt_vec,
8041 .opt_opc = vec_list,
8042 .load_dest = true,
8043 .fno = gen_helper_sve2_uqshrnt_s,
8044 .vece = MO_32 },
8045 { .fniv = gen_uqshrnt_vec,
8046 .opt_opc = vec_list,
8047 .load_dest = true,
8048 .fno = gen_helper_sve2_uqshrnt_d,
8049 .vece = MO_64 },
8051 return do_sve2_shr_narrow(s, a, ops);
8054 static bool trans_UQRSHRNB(DisasContext *s, arg_rri_esz *a)
8056 static const GVecGen2i ops[3] = {
8057 { .fno = gen_helper_sve2_uqrshrnb_h },
8058 { .fno = gen_helper_sve2_uqrshrnb_s },
8059 { .fno = gen_helper_sve2_uqrshrnb_d },
8061 return do_sve2_shr_narrow(s, a, ops);
8064 static bool trans_UQRSHRNT(DisasContext *s, arg_rri_esz *a)
8066 static const GVecGen2i ops[3] = {
8067 { .fno = gen_helper_sve2_uqrshrnt_h },
8068 { .fno = gen_helper_sve2_uqrshrnt_s },
8069 { .fno = gen_helper_sve2_uqrshrnt_d },
8071 return do_sve2_shr_narrow(s, a, ops);
8074 #define DO_SVE2_ZZZ_NARROW(NAME, name) \
8075 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
8077 static gen_helper_gvec_3 * const fns[4] = { \
8078 NULL, gen_helper_sve2_##name##_h, \
8079 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
8080 }; \
8081 return do_sve2_zzz_ool(s, a, fns[a->esz]); \
8084 DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
8085 DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
8086 DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
8087 DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
8089 DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
8090 DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
8091 DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
8092 DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
8094 static bool do_sve2_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
8095 gen_helper_gvec_flags_4 *fn)
8097 if (!dc_isar_feature(aa64_sve2, s)) {
8098 return false;
8100 return do_ppzz_flags(s, a, fn);
8103 #define DO_SVE2_PPZZ_MATCH(NAME, name) \
8104 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
8106 static gen_helper_gvec_flags_4 * const fns[4] = { \
8107 gen_helper_sve2_##name##_ppzz_b, gen_helper_sve2_##name##_ppzz_h, \
8108 NULL, NULL \
8109 }; \
8110 return do_sve2_ppzz_flags(s, a, fns[a->esz]); \
8113 DO_SVE2_PPZZ_MATCH(MATCH, match)
8114 DO_SVE2_PPZZ_MATCH(NMATCH, nmatch)
8116 static bool trans_HISTCNT(DisasContext *s, arg_rprr_esz *a)
8118 static gen_helper_gvec_4 * const fns[2] = {
8119 gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
8121 if (a->esz < 2) {
8122 return false;
8124 return do_sve2_zpzz_ool(s, a, fns[a->esz - 2]);
8127 static bool trans_HISTSEG(DisasContext *s, arg_rrr_esz *a)
8129 if (a->esz != 0) {
8130 return false;
8132 return do_sve2_zzz_ool(s, a, gen_helper_sve2_histseg);
8135 static bool do_sve2_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
8136 gen_helper_gvec_4_ptr *fn)
8138 if (!dc_isar_feature(aa64_sve2, s)) {
8139 return false;
8141 return do_zpzz_fp(s, a, fn);
8144 #define DO_SVE2_ZPZZ_FP(NAME, name) \
8145 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
8147 static gen_helper_gvec_4_ptr * const fns[4] = { \
8148 NULL, gen_helper_sve2_##name##_zpzz_h, \
8149 gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d \
8150 }; \
8151 return do_sve2_zpzz_fp(s, a, fns[a->esz]); \
8154 DO_SVE2_ZPZZ_FP(FADDP, faddp)
8155 DO_SVE2_ZPZZ_FP(FMAXNMP, fmaxnmp)
8156 DO_SVE2_ZPZZ_FP(FMINNMP, fminnmp)
8157 DO_SVE2_ZPZZ_FP(FMAXP, fmaxp)
8158 DO_SVE2_ZPZZ_FP(FMINP, fminp)
8161 * SVE Integer Multiply-Add (unpredicated)
8164 static bool trans_FMMLA(DisasContext *s, arg_rrrr_esz *a)
8166 gen_helper_gvec_4_ptr *fn;
8168 switch (a->esz) {
8169 case MO_32:
8170 if (!dc_isar_feature(aa64_sve_f32mm, s)) {
8171 return false;
8173 fn = gen_helper_fmmla_s;
8174 break;
8175 case MO_64:
8176 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
8177 return false;
8179 fn = gen_helper_fmmla_d;
8180 break;
8181 default:
8182 return false;
8185 if (sve_access_check(s)) {
8186 unsigned vsz = vec_full_reg_size(s);
8187 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8188 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8189 vec_full_reg_offset(s, a->rn),
8190 vec_full_reg_offset(s, a->rm),
8191 vec_full_reg_offset(s, a->ra),
8192 status, vsz, vsz, 0, fn);
8193 tcg_temp_free_ptr(status);
8195 return true;
8198 static bool do_sqdmlal_zzzw(DisasContext *s, arg_rrrr_esz *a,
8199 bool sel1, bool sel2)
8201 static gen_helper_gvec_4 * const fns[] = {
8202 NULL, gen_helper_sve2_sqdmlal_zzzw_h,
8203 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
8205 return do_sve2_zzzz_ool(s, a, fns[a->esz], (sel2 << 1) | sel1);
8208 static bool do_sqdmlsl_zzzw(DisasContext *s, arg_rrrr_esz *a,
8209 bool sel1, bool sel2)
8211 static gen_helper_gvec_4 * const fns[] = {
8212 NULL, gen_helper_sve2_sqdmlsl_zzzw_h,
8213 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
8215 return do_sve2_zzzz_ool(s, a, fns[a->esz], (sel2 << 1) | sel1);
8218 static bool trans_SQDMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8220 return do_sqdmlal_zzzw(s, a, false, false);
8223 static bool trans_SQDMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8225 return do_sqdmlal_zzzw(s, a, true, true);
8228 static bool trans_SQDMLALBT(DisasContext *s, arg_rrrr_esz *a)
8230 return do_sqdmlal_zzzw(s, a, false, true);
8233 static bool trans_SQDMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8235 return do_sqdmlsl_zzzw(s, a, false, false);
8238 static bool trans_SQDMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8240 return do_sqdmlsl_zzzw(s, a, true, true);
8243 static bool trans_SQDMLSLBT(DisasContext *s, arg_rrrr_esz *a)
8245 return do_sqdmlsl_zzzw(s, a, false, true);
8248 static bool trans_SQRDMLAH_zzzz(DisasContext *s, arg_rrrr_esz *a)
8250 static gen_helper_gvec_4 * const fns[] = {
8251 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
8252 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
8254 return do_sve2_zzzz_ool(s, a, fns[a->esz], 0);
8257 static bool trans_SQRDMLSH_zzzz(DisasContext *s, arg_rrrr_esz *a)
8259 static gen_helper_gvec_4 * const fns[] = {
8260 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
8261 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
8263 return do_sve2_zzzz_ool(s, a, fns[a->esz], 0);
8266 static bool do_smlal_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8268 static gen_helper_gvec_4 * const fns[] = {
8269 NULL, gen_helper_sve2_smlal_zzzw_h,
8270 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
8272 return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
8275 static bool trans_SMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8277 return do_smlal_zzzw(s, a, false);
8280 static bool trans_SMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8282 return do_smlal_zzzw(s, a, true);
8285 static bool do_umlal_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8287 static gen_helper_gvec_4 * const fns[] = {
8288 NULL, gen_helper_sve2_umlal_zzzw_h,
8289 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
8291 return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
8294 static bool trans_UMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8296 return do_umlal_zzzw(s, a, false);
8299 static bool trans_UMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8301 return do_umlal_zzzw(s, a, true);
8304 static bool do_smlsl_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8306 static gen_helper_gvec_4 * const fns[] = {
8307 NULL, gen_helper_sve2_smlsl_zzzw_h,
8308 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
8310 return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
8313 static bool trans_SMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8315 return do_smlsl_zzzw(s, a, false);
8318 static bool trans_SMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8320 return do_smlsl_zzzw(s, a, true);
8323 static bool do_umlsl_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8325 static gen_helper_gvec_4 * const fns[] = {
8326 NULL, gen_helper_sve2_umlsl_zzzw_h,
8327 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
8329 return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
8332 static bool trans_UMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8334 return do_umlsl_zzzw(s, a, false);
8337 static bool trans_UMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8339 return do_umlsl_zzzw(s, a, true);
8342 static bool trans_CMLA_zzzz(DisasContext *s, arg_CMLA_zzzz *a)
8344 static gen_helper_gvec_4 * const fns[] = {
8345 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
8346 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
8349 if (!dc_isar_feature(aa64_sve2, s)) {
8350 return false;
8352 if (sve_access_check(s)) {
8353 gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot);
8355 return true;
8358 static bool trans_CDOT_zzzz(DisasContext *s, arg_CMLA_zzzz *a)
8360 if (!dc_isar_feature(aa64_sve2, s) || a->esz < MO_32) {
8361 return false;
8363 if (sve_access_check(s)) {
8364 gen_helper_gvec_4 *fn = (a->esz == MO_32
8365 ? gen_helper_sve2_cdot_zzzz_s
8366 : gen_helper_sve2_cdot_zzzz_d);
8367 gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->rot);
8369 return true;
8372 static bool trans_SQRDCMLAH_zzzz(DisasContext *s, arg_SQRDCMLAH_zzzz *a)
8374 static gen_helper_gvec_4 * const fns[] = {
8375 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
8376 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
8379 if (!dc_isar_feature(aa64_sve2, s)) {
8380 return false;
8382 if (sve_access_check(s)) {
8383 gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot);
8385 return true;
8388 static bool trans_USDOT_zzzz(DisasContext *s, arg_USDOT_zzzz *a)
8390 if (a->esz != 2 || !dc_isar_feature(aa64_sve_i8mm, s)) {
8391 return false;
8393 if (sve_access_check(s)) {
8394 unsigned vsz = vec_full_reg_size(s);
8395 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
8396 vec_full_reg_offset(s, a->rn),
8397 vec_full_reg_offset(s, a->rm),
8398 vec_full_reg_offset(s, a->ra),
8399 vsz, vsz, 0, gen_helper_gvec_usdot_b);
8401 return true;
8404 static bool trans_AESMC(DisasContext *s, arg_AESMC *a)
8406 if (!dc_isar_feature(aa64_sve2_aes, s)) {
8407 return false;
8409 if (sve_access_check(s)) {
8410 gen_gvec_ool_zz(s, gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt);
8412 return true;
8415 static bool do_aese(DisasContext *s, arg_rrr_esz *a, bool decrypt)
8417 if (!dc_isar_feature(aa64_sve2_aes, s)) {
8418 return false;
8420 if (sve_access_check(s)) {
8421 gen_gvec_ool_zzz(s, gen_helper_crypto_aese,
8422 a->rd, a->rn, a->rm, decrypt);
8424 return true;
8427 static bool trans_AESE(DisasContext *s, arg_rrr_esz *a)
8429 return do_aese(s, a, false);
8432 static bool trans_AESD(DisasContext *s, arg_rrr_esz *a)
8434 return do_aese(s, a, true);
8437 static bool do_sm4(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
8439 if (!dc_isar_feature(aa64_sve2_sm4, s)) {
8440 return false;
8442 if (sve_access_check(s)) {
8443 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
8445 return true;
8448 static bool trans_SM4E(DisasContext *s, arg_rrr_esz *a)
8450 return do_sm4(s, a, gen_helper_crypto_sm4e);
8453 static bool trans_SM4EKEY(DisasContext *s, arg_rrr_esz *a)
8455 return do_sm4(s, a, gen_helper_crypto_sm4ekey);
8458 static bool trans_RAX1(DisasContext *s, arg_rrr_esz *a)
8460 if (!dc_isar_feature(aa64_sve2_sha3, s)) {
8461 return false;
8463 if (sve_access_check(s)) {
8464 gen_gvec_fn_zzz(s, gen_gvec_rax1, MO_64, a->rd, a->rn, a->rm);
8466 return true;
8469 static bool trans_FCVTNT_sh(DisasContext *s, arg_rpr_esz *a)
8471 if (!dc_isar_feature(aa64_sve2, s)) {
8472 return false;
8474 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_sh);
8477 static bool trans_BFCVTNT(DisasContext *s, arg_rpr_esz *a)
8479 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8480 return false;
8482 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvtnt);
8485 static bool trans_FCVTNT_ds(DisasContext *s, arg_rpr_esz *a)
8487 if (!dc_isar_feature(aa64_sve2, s)) {
8488 return false;
8490 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_ds);
8493 static bool trans_FCVTLT_hs(DisasContext *s, arg_rpr_esz *a)
8495 if (!dc_isar_feature(aa64_sve2, s)) {
8496 return false;
8498 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_hs);
8501 static bool trans_FCVTLT_sd(DisasContext *s, arg_rpr_esz *a)
8503 if (!dc_isar_feature(aa64_sve2, s)) {
8504 return false;
8506 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_sd);
8509 static bool trans_FCVTX_ds(DisasContext *s, arg_rpr_esz *a)
8511 if (!dc_isar_feature(aa64_sve2, s)) {
8512 return false;
8514 return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve_fcvt_ds);
8517 static bool trans_FCVTXNT_ds(DisasContext *s, arg_rpr_esz *a)
8519 if (!dc_isar_feature(aa64_sve2, s)) {
8520 return false;
8522 return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve2_fcvtnt_ds);
8525 static bool trans_FLOGB(DisasContext *s, arg_rpr_esz *a)
8527 static gen_helper_gvec_3_ptr * const fns[] = {
8528 NULL, gen_helper_flogb_h,
8529 gen_helper_flogb_s, gen_helper_flogb_d
8532 if (!dc_isar_feature(aa64_sve2, s) || fns[a->esz] == NULL) {
8533 return false;
8535 if (sve_access_check(s)) {
8536 TCGv_ptr status =
8537 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8538 unsigned vsz = vec_full_reg_size(s);
8540 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
8541 vec_full_reg_offset(s, a->rn),
8542 pred_full_reg_offset(s, a->pg),
8543 status, vsz, vsz, 0, fns[a->esz]);
8544 tcg_temp_free_ptr(status);
8546 return true;
8549 static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
8551 if (!dc_isar_feature(aa64_sve2, s)) {
8552 return false;
8554 if (sve_access_check(s)) {
8555 unsigned vsz = vec_full_reg_size(s);
8556 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8557 vec_full_reg_offset(s, a->rn),
8558 vec_full_reg_offset(s, a->rm),
8559 vec_full_reg_offset(s, a->ra),
8560 cpu_env, vsz, vsz, (sel << 1) | sub,
8561 gen_helper_sve2_fmlal_zzzw_s);
8563 return true;
8566 static bool trans_FMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8568 return do_FMLAL_zzzw(s, a, false, false);
8571 static bool trans_FMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8573 return do_FMLAL_zzzw(s, a, false, true);
8576 static bool trans_FMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8578 return do_FMLAL_zzzw(s, a, true, false);
8581 static bool trans_FMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8583 return do_FMLAL_zzzw(s, a, true, true);
8586 static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
8588 if (!dc_isar_feature(aa64_sve2, s)) {
8589 return false;
8591 if (sve_access_check(s)) {
8592 unsigned vsz = vec_full_reg_size(s);
8593 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8594 vec_full_reg_offset(s, a->rn),
8595 vec_full_reg_offset(s, a->rm),
8596 vec_full_reg_offset(s, a->ra),
8597 cpu_env, vsz, vsz,
8598 (a->index << 2) | (sel << 1) | sub,
8599 gen_helper_sve2_fmlal_zzxw_s);
8601 return true;
8604 static bool trans_FMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8606 return do_FMLAL_zzxw(s, a, false, false);
8609 static bool trans_FMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8611 return do_FMLAL_zzxw(s, a, false, true);
8614 static bool trans_FMLSLB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8616 return do_FMLAL_zzxw(s, a, true, false);
8619 static bool trans_FMLSLT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8621 return do_FMLAL_zzxw(s, a, true, true);
8624 static bool do_i8mm_zzzz_ool(DisasContext *s, arg_rrrr_esz *a,
8625 gen_helper_gvec_4 *fn, int data)
8627 if (!dc_isar_feature(aa64_sve_i8mm, s)) {
8628 return false;
8630 if (sve_access_check(s)) {
8631 gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
8633 return true;
8636 static bool trans_SMMLA(DisasContext *s, arg_rrrr_esz *a)
8638 return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_smmla_b, 0);
8641 static bool trans_USMMLA(DisasContext *s, arg_rrrr_esz *a)
8643 return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_usmmla_b, 0);
8646 static bool trans_UMMLA(DisasContext *s, arg_rrrr_esz *a)
8648 return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_ummla_b, 0);
8651 static bool trans_BFDOT_zzzz(DisasContext *s, arg_rrrr_esz *a)
8653 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8654 return false;
8656 if (sve_access_check(s)) {
8657 gen_gvec_ool_zzzz(s, gen_helper_gvec_bfdot,
8658 a->rd, a->rn, a->rm, a->ra, 0);
8660 return true;
8663 static bool trans_BFDOT_zzxz(DisasContext *s, arg_rrxr_esz *a)
8665 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8666 return false;
8668 if (sve_access_check(s)) {
8669 gen_gvec_ool_zzzz(s, gen_helper_gvec_bfdot_idx,
8670 a->rd, a->rn, a->rm, a->ra, a->index);
8672 return true;
8675 static bool trans_BFMMLA(DisasContext *s, arg_rrrr_esz *a)
8677 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8678 return false;
8680 if (sve_access_check(s)) {
8681 gen_gvec_ool_zzzz(s, gen_helper_gvec_bfmmla,
8682 a->rd, a->rn, a->rm, a->ra, 0);
8684 return true;
8687 static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8689 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8690 return false;
8692 if (sve_access_check(s)) {
8693 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8694 unsigned vsz = vec_full_reg_size(s);
8696 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8697 vec_full_reg_offset(s, a->rn),
8698 vec_full_reg_offset(s, a->rm),
8699 vec_full_reg_offset(s, a->ra),
8700 status, vsz, vsz, sel,
8701 gen_helper_gvec_bfmlal);
8702 tcg_temp_free_ptr(status);
8704 return true;
8707 static bool trans_BFMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8709 return do_BFMLAL_zzzw(s, a, false);
8712 static bool trans_BFMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8714 return do_BFMLAL_zzzw(s, a, true);
8717 static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
8719 if (!dc_isar_feature(aa64_sve_bf16, s)) {
8720 return false;
8722 if (sve_access_check(s)) {
8723 TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8724 unsigned vsz = vec_full_reg_size(s);
8726 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8727 vec_full_reg_offset(s, a->rn),
8728 vec_full_reg_offset(s, a->rm),
8729 vec_full_reg_offset(s, a->ra),
8730 status, vsz, vsz, (a->index << 1) | sel,
8731 gen_helper_gvec_bfmlal_idx);
8732 tcg_temp_free_ptr(status);
8734 return true;
8737 static bool trans_BFMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8739 return do_BFMLAL_zzxw(s, a, false);
8742 static bool trans_BFMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8744 return do_BFMLAL_zzxw(s, a, true);