target/arm: Remove assert in trans_FCMLA_zzxz
[qemu/rayw.git] / target / arm / translate-sve.c
blob436d09b928a03960c69fb23f217bbb357af73690
1 /*
2 * AArch64 SVE translation
4 * Copyright (c) 2018 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "tcg/tcg-op.h"
24 #include "tcg/tcg-op-gvec.h"
25 #include "tcg/tcg-gvec-desc.h"
26 #include "qemu/log.h"
27 #include "arm_ldst.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
32 #include "exec/log.h"
33 #include "translate-a64.h"
34 #include "fpu/softfloat.h"
37 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
40 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
42 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
45 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
46 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
47 TCGv_ptr, TCGv_i64, TCGv_i32);
50 * Helpers for extracting complex instruction fields.
53 /* See e.g. ASR (immediate, predicated).
54 * Returns -1 for unallocated encoding; diagnose later.
56 static int tszimm_esz(DisasContext *s, int x)
58 x >>= 3; /* discard imm3 */
59 return 31 - clz32(x);
62 static int tszimm_shr(DisasContext *s, int x)
64 return (16 << tszimm_esz(s, x)) - x;
67 /* See e.g. LSL (immediate, predicated). */
68 static int tszimm_shl(DisasContext *s, int x)
70 return x - (8 << tszimm_esz(s, x));
73 /* The SH bit is in bit 8. Extract the low 8 and shift. */
74 static inline int expand_imm_sh8s(DisasContext *s, int x)
76 return (int8_t)x << (x & 0x100 ? 8 : 0);
79 static inline int expand_imm_sh8u(DisasContext *s, int x)
81 return (uint8_t)x << (x & 0x100 ? 8 : 0);
84 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
85 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
87 static inline int msz_dtype(DisasContext *s, int msz)
89 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
90 return dtype[msz];
94 * Include the generated decoder.
97 #include "decode-sve.c.inc"
100 * Implement all of the translator functions referenced by the decoder.
103 /* Return the offset info CPUARMState of the predicate vector register Pn.
104 * Note for this purpose, FFR is P16.
106 static inline int pred_full_reg_offset(DisasContext *s, int regno)
108 return offsetof(CPUARMState, vfp.pregs[regno]);
111 /* Return the byte size of the whole predicate register, VL / 64. */
112 static inline int pred_full_reg_size(DisasContext *s)
114 return s->sve_len >> 3;
117 /* Round up the size of a register to a size allowed by
118 * the tcg vector infrastructure. Any operation which uses this
119 * size may assume that the bits above pred_full_reg_size are zero,
120 * and must leave them the same way.
122 * Note that this is not needed for the vector registers as they
123 * are always properly sized for tcg vectors.
125 static int size_for_gvec(int size)
127 if (size <= 8) {
128 return 8;
129 } else {
130 return QEMU_ALIGN_UP(size, 16);
134 static int pred_gvec_reg_size(DisasContext *s)
136 return size_for_gvec(pred_full_reg_size(s));
139 /* Invoke an out-of-line helper on 2 Zregs. */
140 static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
141 int rd, int rn, int data)
143 if (fn == NULL) {
144 return false;
146 if (sve_access_check(s)) {
147 unsigned vsz = vec_full_reg_size(s);
148 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
149 vec_full_reg_offset(s, rn),
150 vsz, vsz, data, fn);
152 return true;
155 static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
156 int rd, int rn, int data,
157 ARMFPStatusFlavour flavour)
159 if (fn == NULL) {
160 return false;
162 if (sve_access_check(s)) {
163 unsigned vsz = vec_full_reg_size(s);
164 TCGv_ptr status = fpstatus_ptr(flavour);
166 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
167 vec_full_reg_offset(s, rn),
168 status, vsz, vsz, data, fn);
169 tcg_temp_free_ptr(status);
171 return true;
174 static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
175 arg_rr_esz *a, int data)
177 return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data,
178 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
181 /* Invoke an out-of-line helper on 3 Zregs. */
182 static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
183 int rd, int rn, int rm, int data)
185 if (fn == NULL) {
186 return false;
188 if (sve_access_check(s)) {
189 unsigned vsz = vec_full_reg_size(s);
190 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
191 vec_full_reg_offset(s, rn),
192 vec_full_reg_offset(s, rm),
193 vsz, vsz, data, fn);
195 return true;
198 static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
199 arg_rrr_esz *a, int data)
201 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
204 /* Invoke an out-of-line helper on 3 Zregs, plus float_status. */
205 static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
206 int rd, int rn, int rm,
207 int data, ARMFPStatusFlavour flavour)
209 if (fn == NULL) {
210 return false;
212 if (sve_access_check(s)) {
213 unsigned vsz = vec_full_reg_size(s);
214 TCGv_ptr status = fpstatus_ptr(flavour);
216 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
217 vec_full_reg_offset(s, rn),
218 vec_full_reg_offset(s, rm),
219 status, vsz, vsz, data, fn);
221 tcg_temp_free_ptr(status);
223 return true;
226 static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
227 arg_rrr_esz *a, int data)
229 return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data,
230 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
233 /* Invoke an out-of-line helper on 4 Zregs. */
234 static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
235 int rd, int rn, int rm, int ra, int data)
237 if (fn == NULL) {
238 return false;
240 if (sve_access_check(s)) {
241 unsigned vsz = vec_full_reg_size(s);
242 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
243 vec_full_reg_offset(s, rn),
244 vec_full_reg_offset(s, rm),
245 vec_full_reg_offset(s, ra),
246 vsz, vsz, data, fn);
248 return true;
251 static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
252 arg_rrrr_esz *a, int data)
254 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
257 static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn,
258 arg_rrxr_esz *a)
260 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
263 /* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */
264 static bool gen_gvec_ptr_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
265 int rd, int rn, int rm, int ra,
266 int data, TCGv_ptr ptr)
268 if (fn == NULL) {
269 return false;
271 if (sve_access_check(s)) {
272 unsigned vsz = vec_full_reg_size(s);
273 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
274 vec_full_reg_offset(s, rn),
275 vec_full_reg_offset(s, rm),
276 vec_full_reg_offset(s, ra),
277 ptr, vsz, vsz, data, fn);
279 return true;
282 static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
283 int rd, int rn, int rm, int ra,
284 int data, ARMFPStatusFlavour flavour)
286 TCGv_ptr status = fpstatus_ptr(flavour);
287 bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status);
288 tcg_temp_free_ptr(status);
289 return ret;
292 /* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */
293 static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn,
294 int rd, int rn, int rm, int ra, int pg,
295 int data, ARMFPStatusFlavour flavour)
297 if (fn == NULL) {
298 return false;
300 if (sve_access_check(s)) {
301 unsigned vsz = vec_full_reg_size(s);
302 TCGv_ptr status = fpstatus_ptr(flavour);
304 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, rd),
305 vec_full_reg_offset(s, rn),
306 vec_full_reg_offset(s, rm),
307 vec_full_reg_offset(s, ra),
308 pred_full_reg_offset(s, pg),
309 status, vsz, vsz, data, fn);
311 tcg_temp_free_ptr(status);
313 return true;
316 /* Invoke an out-of-line helper on 2 Zregs and a predicate. */
317 static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
318 int rd, int rn, int pg, int data)
320 if (fn == NULL) {
321 return false;
323 if (sve_access_check(s)) {
324 unsigned vsz = vec_full_reg_size(s);
325 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
326 vec_full_reg_offset(s, rn),
327 pred_full_reg_offset(s, pg),
328 vsz, vsz, data, fn);
330 return true;
333 static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn,
334 arg_rpr_esz *a, int data)
336 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data);
339 static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn,
340 arg_rpri_esz *a)
342 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
345 static bool gen_gvec_fpst_zzp(DisasContext *s, gen_helper_gvec_3_ptr *fn,
346 int rd, int rn, int pg, int data,
347 ARMFPStatusFlavour flavour)
349 if (fn == NULL) {
350 return false;
352 if (sve_access_check(s)) {
353 unsigned vsz = vec_full_reg_size(s);
354 TCGv_ptr status = fpstatus_ptr(flavour);
356 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
357 vec_full_reg_offset(s, rn),
358 pred_full_reg_offset(s, pg),
359 status, vsz, vsz, data, fn);
360 tcg_temp_free_ptr(status);
362 return true;
365 static bool gen_gvec_fpst_arg_zpz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
366 arg_rpr_esz *a, int data,
367 ARMFPStatusFlavour flavour)
369 return gen_gvec_fpst_zzp(s, fn, a->rd, a->rn, a->pg, data, flavour);
372 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */
373 static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
374 int rd, int rn, int rm, int pg, int data)
376 if (fn == NULL) {
377 return false;
379 if (sve_access_check(s)) {
380 unsigned vsz = vec_full_reg_size(s);
381 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
382 vec_full_reg_offset(s, rn),
383 vec_full_reg_offset(s, rm),
384 pred_full_reg_offset(s, pg),
385 vsz, vsz, data, fn);
387 return true;
390 static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn,
391 arg_rprr_esz *a, int data)
393 return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data);
396 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */
397 static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn,
398 int rd, int rn, int rm, int pg, int data,
399 ARMFPStatusFlavour flavour)
401 if (fn == NULL) {
402 return false;
404 if (sve_access_check(s)) {
405 unsigned vsz = vec_full_reg_size(s);
406 TCGv_ptr status = fpstatus_ptr(flavour);
408 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
409 vec_full_reg_offset(s, rn),
410 vec_full_reg_offset(s, rm),
411 pred_full_reg_offset(s, pg),
412 status, vsz, vsz, data, fn);
413 tcg_temp_free_ptr(status);
415 return true;
418 static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
419 arg_rprr_esz *a)
421 return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0,
422 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
425 /* Invoke a vector expander on two Zregs and an immediate. */
426 static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
427 int esz, int rd, int rn, uint64_t imm)
429 if (gvec_fn == NULL) {
430 return false;
432 if (sve_access_check(s)) {
433 unsigned vsz = vec_full_reg_size(s);
434 gvec_fn(esz, vec_full_reg_offset(s, rd),
435 vec_full_reg_offset(s, rn), imm, vsz, vsz);
437 return true;
440 static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
441 arg_rri_esz *a)
443 if (a->esz < 0) {
444 /* Invalid tsz encoding -- see tszimm_esz. */
445 return false;
447 return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm);
450 /* Invoke a vector expander on three Zregs. */
451 static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
452 int esz, int rd, int rn, int rm)
454 if (gvec_fn == NULL) {
455 return false;
457 if (sve_access_check(s)) {
458 unsigned vsz = vec_full_reg_size(s);
459 gvec_fn(esz, vec_full_reg_offset(s, rd),
460 vec_full_reg_offset(s, rn),
461 vec_full_reg_offset(s, rm), vsz, vsz);
463 return true;
466 static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn,
467 arg_rrr_esz *a)
469 return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
472 /* Invoke a vector expander on four Zregs. */
473 static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
474 arg_rrrr_esz *a)
476 if (gvec_fn == NULL) {
477 return false;
479 if (sve_access_check(s)) {
480 unsigned vsz = vec_full_reg_size(s);
481 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
482 vec_full_reg_offset(s, a->rn),
483 vec_full_reg_offset(s, a->rm),
484 vec_full_reg_offset(s, a->ra), vsz, vsz);
486 return true;
489 /* Invoke a vector move on two Zregs. */
490 static bool do_mov_z(DisasContext *s, int rd, int rn)
492 if (sve_access_check(s)) {
493 unsigned vsz = vec_full_reg_size(s);
494 tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd),
495 vec_full_reg_offset(s, rn), vsz, vsz);
497 return true;
500 /* Initialize a Zreg with replications of a 64-bit immediate. */
501 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
503 unsigned vsz = vec_full_reg_size(s);
504 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
507 /* Invoke a vector expander on three Pregs. */
508 static bool gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
509 int rd, int rn, int rm)
511 if (sve_access_check(s)) {
512 unsigned psz = pred_gvec_reg_size(s);
513 gvec_fn(MO_64, pred_full_reg_offset(s, rd),
514 pred_full_reg_offset(s, rn),
515 pred_full_reg_offset(s, rm), psz, psz);
517 return true;
520 /* Invoke a vector move on two Pregs. */
521 static bool do_mov_p(DisasContext *s, int rd, int rn)
523 if (sve_access_check(s)) {
524 unsigned psz = pred_gvec_reg_size(s);
525 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
526 pred_full_reg_offset(s, rn), psz, psz);
528 return true;
531 /* Set the cpu flags as per a return from an SVE helper. */
532 static void do_pred_flags(TCGv_i32 t)
534 tcg_gen_mov_i32(cpu_NF, t);
535 tcg_gen_andi_i32(cpu_ZF, t, 2);
536 tcg_gen_andi_i32(cpu_CF, t, 1);
537 tcg_gen_movi_i32(cpu_VF, 0);
540 /* Subroutines computing the ARM PredTest psuedofunction. */
541 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
543 TCGv_i32 t = tcg_temp_new_i32();
545 gen_helper_sve_predtest1(t, d, g);
546 do_pred_flags(t);
547 tcg_temp_free_i32(t);
550 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
552 TCGv_ptr dptr = tcg_temp_new_ptr();
553 TCGv_ptr gptr = tcg_temp_new_ptr();
554 TCGv_i32 t = tcg_temp_new_i32();
556 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
557 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
559 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words));
560 tcg_temp_free_ptr(dptr);
561 tcg_temp_free_ptr(gptr);
563 do_pred_flags(t);
564 tcg_temp_free_i32(t);
567 /* For each element size, the bits within a predicate word that are active. */
568 const uint64_t pred_esz_masks[4] = {
569 0xffffffffffffffffull, 0x5555555555555555ull,
570 0x1111111111111111ull, 0x0101010101010101ull
573 static bool trans_INVALID(DisasContext *s, arg_INVALID *a)
575 unallocated_encoding(s);
576 return true;
580 *** SVE Logical - Unpredicated Group
583 TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a)
584 TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a)
585 TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a)
586 TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a)
588 static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
590 TCGv_i64 t = tcg_temp_new_i64();
591 uint64_t mask = dup_const(MO_8, 0xff >> sh);
593 tcg_gen_xor_i64(t, n, m);
594 tcg_gen_shri_i64(d, t, sh);
595 tcg_gen_shli_i64(t, t, 8 - sh);
596 tcg_gen_andi_i64(d, d, mask);
597 tcg_gen_andi_i64(t, t, ~mask);
598 tcg_gen_or_i64(d, d, t);
599 tcg_temp_free_i64(t);
602 static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
604 TCGv_i64 t = tcg_temp_new_i64();
605 uint64_t mask = dup_const(MO_16, 0xffff >> sh);
607 tcg_gen_xor_i64(t, n, m);
608 tcg_gen_shri_i64(d, t, sh);
609 tcg_gen_shli_i64(t, t, 16 - sh);
610 tcg_gen_andi_i64(d, d, mask);
611 tcg_gen_andi_i64(t, t, ~mask);
612 tcg_gen_or_i64(d, d, t);
613 tcg_temp_free_i64(t);
616 static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
618 tcg_gen_xor_i32(d, n, m);
619 tcg_gen_rotri_i32(d, d, sh);
622 static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
624 tcg_gen_xor_i64(d, n, m);
625 tcg_gen_rotri_i64(d, d, sh);
628 static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
629 TCGv_vec m, int64_t sh)
631 tcg_gen_xor_vec(vece, d, n, m);
632 tcg_gen_rotri_vec(vece, d, d, sh);
635 void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
636 uint32_t rm_ofs, int64_t shift,
637 uint32_t opr_sz, uint32_t max_sz)
639 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
640 static const GVecGen3i ops[4] = {
641 { .fni8 = gen_xar8_i64,
642 .fniv = gen_xar_vec,
643 .fno = gen_helper_sve2_xar_b,
644 .opt_opc = vecop,
645 .vece = MO_8 },
646 { .fni8 = gen_xar16_i64,
647 .fniv = gen_xar_vec,
648 .fno = gen_helper_sve2_xar_h,
649 .opt_opc = vecop,
650 .vece = MO_16 },
651 { .fni4 = gen_xar_i32,
652 .fniv = gen_xar_vec,
653 .fno = gen_helper_sve2_xar_s,
654 .opt_opc = vecop,
655 .vece = MO_32 },
656 { .fni8 = gen_xar_i64,
657 .fniv = gen_xar_vec,
658 .fno = gen_helper_gvec_xar_d,
659 .opt_opc = vecop,
660 .vece = MO_64 }
662 int esize = 8 << vece;
664 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
665 tcg_debug_assert(shift >= 0);
666 tcg_debug_assert(shift <= esize);
667 shift &= esize - 1;
669 if (shift == 0) {
670 /* xar with no rotate devolves to xor. */
671 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
672 } else {
673 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
674 shift, &ops[vece]);
678 static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
680 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
681 return false;
683 if (sve_access_check(s)) {
684 unsigned vsz = vec_full_reg_size(s);
685 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd),
686 vec_full_reg_offset(s, a->rn),
687 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz);
689 return true;
692 static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
694 tcg_gen_xor_i64(d, n, m);
695 tcg_gen_xor_i64(d, d, k);
698 static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
699 TCGv_vec m, TCGv_vec k)
701 tcg_gen_xor_vec(vece, d, n, m);
702 tcg_gen_xor_vec(vece, d, d, k);
705 static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
706 uint32_t a, uint32_t oprsz, uint32_t maxsz)
708 static const GVecGen4 op = {
709 .fni8 = gen_eor3_i64,
710 .fniv = gen_eor3_vec,
711 .fno = gen_helper_sve2_eor3,
712 .vece = MO_64,
713 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
715 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
718 TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_eor3, a)
720 static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
722 tcg_gen_andc_i64(d, m, k);
723 tcg_gen_xor_i64(d, d, n);
726 static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
727 TCGv_vec m, TCGv_vec k)
729 tcg_gen_andc_vec(vece, d, m, k);
730 tcg_gen_xor_vec(vece, d, d, n);
733 static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
734 uint32_t a, uint32_t oprsz, uint32_t maxsz)
736 static const GVecGen4 op = {
737 .fni8 = gen_bcax_i64,
738 .fniv = gen_bcax_vec,
739 .fno = gen_helper_sve2_bcax,
740 .vece = MO_64,
741 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
743 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
746 TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bcax, a)
748 static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
749 uint32_t a, uint32_t oprsz, uint32_t maxsz)
751 /* BSL differs from the generic bitsel in argument ordering. */
752 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
755 TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a)
757 static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
759 tcg_gen_andc_i64(n, k, n);
760 tcg_gen_andc_i64(m, m, k);
761 tcg_gen_or_i64(d, n, m);
764 static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
765 TCGv_vec m, TCGv_vec k)
767 if (TCG_TARGET_HAS_bitsel_vec) {
768 tcg_gen_not_vec(vece, n, n);
769 tcg_gen_bitsel_vec(vece, d, k, n, m);
770 } else {
771 tcg_gen_andc_vec(vece, n, k, n);
772 tcg_gen_andc_vec(vece, m, m, k);
773 tcg_gen_or_vec(vece, d, n, m);
777 static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
778 uint32_t a, uint32_t oprsz, uint32_t maxsz)
780 static const GVecGen4 op = {
781 .fni8 = gen_bsl1n_i64,
782 .fniv = gen_bsl1n_vec,
783 .fno = gen_helper_sve2_bsl1n,
784 .vece = MO_64,
785 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
787 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
790 TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a)
792 static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
795 * Z[dn] = (n & k) | (~m & ~k)
796 * = | ~(m | k)
798 tcg_gen_and_i64(n, n, k);
799 if (TCG_TARGET_HAS_orc_i64) {
800 tcg_gen_or_i64(m, m, k);
801 tcg_gen_orc_i64(d, n, m);
802 } else {
803 tcg_gen_nor_i64(m, m, k);
804 tcg_gen_or_i64(d, n, m);
808 static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
809 TCGv_vec m, TCGv_vec k)
811 if (TCG_TARGET_HAS_bitsel_vec) {
812 tcg_gen_not_vec(vece, m, m);
813 tcg_gen_bitsel_vec(vece, d, k, n, m);
814 } else {
815 tcg_gen_and_vec(vece, n, n, k);
816 tcg_gen_or_vec(vece, m, m, k);
817 tcg_gen_orc_vec(vece, d, n, m);
821 static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
822 uint32_t a, uint32_t oprsz, uint32_t maxsz)
824 static const GVecGen4 op = {
825 .fni8 = gen_bsl2n_i64,
826 .fniv = gen_bsl2n_vec,
827 .fno = gen_helper_sve2_bsl2n,
828 .vece = MO_64,
829 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
831 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
834 TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a)
836 static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
838 tcg_gen_and_i64(n, n, k);
839 tcg_gen_andc_i64(m, m, k);
840 tcg_gen_nor_i64(d, n, m);
843 static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
844 TCGv_vec m, TCGv_vec k)
846 tcg_gen_bitsel_vec(vece, d, k, n, m);
847 tcg_gen_not_vec(vece, d, d);
850 static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
851 uint32_t a, uint32_t oprsz, uint32_t maxsz)
853 static const GVecGen4 op = {
854 .fni8 = gen_nbsl_i64,
855 .fniv = gen_nbsl_vec,
856 .fno = gen_helper_sve2_nbsl,
857 .vece = MO_64,
858 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
860 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
863 TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a)
866 *** SVE Integer Arithmetic - Unpredicated Group
869 TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a)
870 TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a)
871 TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a)
872 TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a)
873 TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a)
874 TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a)
877 *** SVE Integer Arithmetic - Binary Predicated Group
880 /* Select active elememnts from Zn and inactive elements from Zm,
881 * storing the result in Zd.
883 static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
885 static gen_helper_gvec_4 * const fns[4] = {
886 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
887 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
889 return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
892 #define DO_ZPZZ(NAME, FEAT, name) \
893 static gen_helper_gvec_4 * const name##_zpzz_fns[4] = { \
894 gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h, \
895 gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d, \
896 }; \
897 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz, \
898 name##_zpzz_fns[a->esz], a, 0)
900 DO_ZPZZ(AND_zpzz, aa64_sve, sve_and)
901 DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor)
902 DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr)
903 DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic)
905 DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add)
906 DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub)
908 DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax)
909 DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax)
910 DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin)
911 DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin)
912 DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd)
913 DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd)
915 DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul)
916 DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh)
917 DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh)
919 DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr)
920 DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr)
921 DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl)
923 static gen_helper_gvec_4 * const sdiv_fns[4] = {
924 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
926 TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0)
928 static gen_helper_gvec_4 * const udiv_fns[4] = {
929 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
931 TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0)
933 TRANS_FEAT(SEL_zpzz, aa64_sve, do_sel_z, a->rd, a->rn, a->rm, a->pg, a->esz)
936 *** SVE Integer Arithmetic - Unary Predicated Group
939 #define DO_ZPZ(NAME, FEAT, name) \
940 static gen_helper_gvec_3 * const name##_fns[4] = { \
941 gen_helper_##name##_b, gen_helper_##name##_h, \
942 gen_helper_##name##_s, gen_helper_##name##_d, \
943 }; \
944 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0)
946 DO_ZPZ(CLS, aa64_sve, sve_cls)
947 DO_ZPZ(CLZ, aa64_sve, sve_clz)
948 DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz)
949 DO_ZPZ(CNOT, aa64_sve, sve_cnot)
950 DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz)
951 DO_ZPZ(ABS, aa64_sve, sve_abs)
952 DO_ZPZ(NEG, aa64_sve, sve_neg)
953 DO_ZPZ(RBIT, aa64_sve, sve_rbit)
955 static gen_helper_gvec_3 * const fabs_fns[4] = {
956 NULL, gen_helper_sve_fabs_h,
957 gen_helper_sve_fabs_s, gen_helper_sve_fabs_d,
959 TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0)
961 static gen_helper_gvec_3 * const fneg_fns[4] = {
962 NULL, gen_helper_sve_fneg_h,
963 gen_helper_sve_fneg_s, gen_helper_sve_fneg_d,
965 TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0)
967 static gen_helper_gvec_3 * const sxtb_fns[4] = {
968 NULL, gen_helper_sve_sxtb_h,
969 gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d,
971 TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0)
973 static gen_helper_gvec_3 * const uxtb_fns[4] = {
974 NULL, gen_helper_sve_uxtb_h,
975 gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d,
977 TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0)
979 static gen_helper_gvec_3 * const sxth_fns[4] = {
980 NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d
982 TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0)
984 static gen_helper_gvec_3 * const uxth_fns[4] = {
985 NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d
987 TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0)
989 TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz,
990 a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0)
991 TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz,
992 a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0)
995 *** SVE Integer Reduction Group
998 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
999 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
1000 gen_helper_gvec_reduc *fn)
1002 unsigned vsz = vec_full_reg_size(s);
1003 TCGv_ptr t_zn, t_pg;
1004 TCGv_i32 desc;
1005 TCGv_i64 temp;
1007 if (fn == NULL) {
1008 return false;
1010 if (!sve_access_check(s)) {
1011 return true;
1014 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
1015 temp = tcg_temp_new_i64();
1016 t_zn = tcg_temp_new_ptr();
1017 t_pg = tcg_temp_new_ptr();
1019 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
1020 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
1021 fn(temp, t_zn, t_pg, desc);
1022 tcg_temp_free_ptr(t_zn);
1023 tcg_temp_free_ptr(t_pg);
1025 write_fp_dreg(s, a->rd, temp);
1026 tcg_temp_free_i64(temp);
1027 return true;
1030 #define DO_VPZ(NAME, name) \
1031 static gen_helper_gvec_reduc * const name##_fns[4] = { \
1032 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
1033 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
1034 }; \
1035 TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz])
1037 DO_VPZ(ORV, orv)
1038 DO_VPZ(ANDV, andv)
1039 DO_VPZ(EORV, eorv)
1041 DO_VPZ(UADDV, uaddv)
1042 DO_VPZ(SMAXV, smaxv)
1043 DO_VPZ(UMAXV, umaxv)
1044 DO_VPZ(SMINV, sminv)
1045 DO_VPZ(UMINV, uminv)
1047 static gen_helper_gvec_reduc * const saddv_fns[4] = {
1048 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
1049 gen_helper_sve_saddv_s, NULL
1051 TRANS_FEAT(SADDV, aa64_sve, do_vpz_ool, a, saddv_fns[a->esz])
1053 #undef DO_VPZ
1056 *** SVE Shift by Immediate - Predicated Group
1060 * Copy Zn into Zd, storing zeros into inactive elements.
1061 * If invert, store zeros into the active elements.
1063 static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
1064 int esz, bool invert)
1066 static gen_helper_gvec_3 * const fns[4] = {
1067 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
1068 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
1070 return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
1073 static bool do_shift_zpzi(DisasContext *s, arg_rpri_esz *a, bool asr,
1074 gen_helper_gvec_3 * const fns[4])
1076 int max;
1078 if (a->esz < 0) {
1079 /* Invalid tsz encoding -- see tszimm_esz. */
1080 return false;
1084 * Shift by element size is architecturally valid.
1085 * For arithmetic right-shift, it's the same as by one less.
1086 * For logical shifts and ASRD, it is a zeroing operation.
1088 max = 8 << a->esz;
1089 if (a->imm >= max) {
1090 if (asr) {
1091 a->imm = max - 1;
1092 } else {
1093 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
1096 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a);
1099 static gen_helper_gvec_3 * const asr_zpzi_fns[4] = {
1100 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
1101 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
1103 TRANS_FEAT(ASR_zpzi, aa64_sve, do_shift_zpzi, a, true, asr_zpzi_fns)
1105 static gen_helper_gvec_3 * const lsr_zpzi_fns[4] = {
1106 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
1107 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
1109 TRANS_FEAT(LSR_zpzi, aa64_sve, do_shift_zpzi, a, false, lsr_zpzi_fns)
1111 static gen_helper_gvec_3 * const lsl_zpzi_fns[4] = {
1112 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
1113 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
1115 TRANS_FEAT(LSL_zpzi, aa64_sve, do_shift_zpzi, a, false, lsl_zpzi_fns)
1117 static gen_helper_gvec_3 * const asrd_fns[4] = {
1118 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
1119 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
1121 TRANS_FEAT(ASRD, aa64_sve, do_shift_zpzi, a, false, asrd_fns)
1123 static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = {
1124 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h,
1125 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d,
1127 TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
1128 a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a)
1130 static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = {
1131 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h,
1132 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d,
1134 TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
1135 a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a)
1137 static gen_helper_gvec_3 * const srshr_fns[4] = {
1138 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h,
1139 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d,
1141 TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
1142 a->esz < 0 ? NULL : srshr_fns[a->esz], a)
1144 static gen_helper_gvec_3 * const urshr_fns[4] = {
1145 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h,
1146 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d,
1148 TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
1149 a->esz < 0 ? NULL : urshr_fns[a->esz], a)
1151 static gen_helper_gvec_3 * const sqshlu_fns[4] = {
1152 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h,
1153 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d,
1155 TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi,
1156 a->esz < 0 ? NULL : sqshlu_fns[a->esz], a)
1159 *** SVE Bitwise Shift - Predicated Group
1162 #define DO_ZPZW(NAME, name) \
1163 static gen_helper_gvec_4 * const name##_zpzw_fns[4] = { \
1164 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
1165 gen_helper_sve_##name##_zpzw_s, NULL \
1166 }; \
1167 TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz, \
1168 a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0)
1170 DO_ZPZW(ASR, asr)
1171 DO_ZPZW(LSR, lsr)
1172 DO_ZPZW(LSL, lsl)
1174 #undef DO_ZPZW
1177 *** SVE Bitwise Shift - Unpredicated Group
1180 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
1181 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
1182 int64_t, uint32_t, uint32_t))
1184 if (a->esz < 0) {
1185 /* Invalid tsz encoding -- see tszimm_esz. */
1186 return false;
1188 if (sve_access_check(s)) {
1189 unsigned vsz = vec_full_reg_size(s);
1190 /* Shift by element size is architecturally valid. For
1191 arithmetic right-shift, it's the same as by one less.
1192 Otherwise it is a zeroing operation. */
1193 if (a->imm >= 8 << a->esz) {
1194 if (asr) {
1195 a->imm = (8 << a->esz) - 1;
1196 } else {
1197 do_dupi_z(s, a->rd, 0);
1198 return true;
1201 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
1202 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
1204 return true;
1207 TRANS_FEAT(ASR_zzi, aa64_sve, do_shift_imm, a, true, tcg_gen_gvec_sari)
1208 TRANS_FEAT(LSR_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shri)
1209 TRANS_FEAT(LSL_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shli)
1211 #define DO_ZZW(NAME, name) \
1212 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \
1213 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
1214 gen_helper_sve_##name##_zzw_s, NULL \
1215 }; \
1216 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \
1217 name##_zzw_fns[a->esz], a, 0)
1219 DO_ZZW(ASR_zzw, asr)
1220 DO_ZZW(LSR_zzw, lsr)
1221 DO_ZZW(LSL_zzw, lsl)
1223 #undef DO_ZZW
1226 *** SVE Integer Multiply-Add Group
1229 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
1230 gen_helper_gvec_5 *fn)
1232 if (sve_access_check(s)) {
1233 unsigned vsz = vec_full_reg_size(s);
1234 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
1235 vec_full_reg_offset(s, a->ra),
1236 vec_full_reg_offset(s, a->rn),
1237 vec_full_reg_offset(s, a->rm),
1238 pred_full_reg_offset(s, a->pg),
1239 vsz, vsz, 0, fn);
1241 return true;
1244 static gen_helper_gvec_5 * const mla_fns[4] = {
1245 gen_helper_sve_mla_b, gen_helper_sve_mla_h,
1246 gen_helper_sve_mla_s, gen_helper_sve_mla_d,
1248 TRANS_FEAT(MLA, aa64_sve, do_zpzzz_ool, a, mla_fns[a->esz])
1250 static gen_helper_gvec_5 * const mls_fns[4] = {
1251 gen_helper_sve_mls_b, gen_helper_sve_mls_h,
1252 gen_helper_sve_mls_s, gen_helper_sve_mls_d,
1254 TRANS_FEAT(MLS, aa64_sve, do_zpzzz_ool, a, mls_fns[a->esz])
1257 *** SVE Index Generation Group
1260 static bool do_index(DisasContext *s, int esz, int rd,
1261 TCGv_i64 start, TCGv_i64 incr)
1263 unsigned vsz;
1264 TCGv_i32 desc;
1265 TCGv_ptr t_zd;
1267 if (!sve_access_check(s)) {
1268 return true;
1271 vsz = vec_full_reg_size(s);
1272 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
1273 t_zd = tcg_temp_new_ptr();
1275 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1276 if (esz == 3) {
1277 gen_helper_sve_index_d(t_zd, start, incr, desc);
1278 } else {
1279 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
1280 static index_fn * const fns[3] = {
1281 gen_helper_sve_index_b,
1282 gen_helper_sve_index_h,
1283 gen_helper_sve_index_s,
1285 TCGv_i32 s32 = tcg_temp_new_i32();
1286 TCGv_i32 i32 = tcg_temp_new_i32();
1288 tcg_gen_extrl_i64_i32(s32, start);
1289 tcg_gen_extrl_i64_i32(i32, incr);
1290 fns[esz](t_zd, s32, i32, desc);
1292 tcg_temp_free_i32(s32);
1293 tcg_temp_free_i32(i32);
1295 tcg_temp_free_ptr(t_zd);
1296 return true;
1299 TRANS_FEAT(INDEX_ii, aa64_sve, do_index, a->esz, a->rd,
1300 tcg_constant_i64(a->imm1), tcg_constant_i64(a->imm2))
1301 TRANS_FEAT(INDEX_ir, aa64_sve, do_index, a->esz, a->rd,
1302 tcg_constant_i64(a->imm), cpu_reg(s, a->rm))
1303 TRANS_FEAT(INDEX_ri, aa64_sve, do_index, a->esz, a->rd,
1304 cpu_reg(s, a->rn), tcg_constant_i64(a->imm))
1305 TRANS_FEAT(INDEX_rr, aa64_sve, do_index, a->esz, a->rd,
1306 cpu_reg(s, a->rn), cpu_reg(s, a->rm))
1309 *** SVE Stack Allocation Group
1312 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
1314 if (sve_access_check(s)) {
1315 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1316 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1317 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
1319 return true;
1322 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
1324 if (sve_access_check(s)) {
1325 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1326 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1327 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
1329 return true;
1332 static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
1334 if (sve_access_check(s)) {
1335 TCGv_i64 reg = cpu_reg(s, a->rd);
1336 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
1338 return true;
1342 *** SVE Compute Vector Address Group
1345 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
1347 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
1350 TRANS_FEAT(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
1351 TRANS_FEAT(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
1352 TRANS_FEAT(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
1353 TRANS_FEAT(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
1356 *** SVE Integer Misc - Unpredicated Group
1359 static gen_helper_gvec_2 * const fexpa_fns[4] = {
1360 NULL, gen_helper_sve_fexpa_h,
1361 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
1363 TRANS_FEAT(FEXPA, aa64_sve, gen_gvec_ool_zz,
1364 fexpa_fns[a->esz], a->rd, a->rn, 0)
1366 static gen_helper_gvec_3 * const ftssel_fns[4] = {
1367 NULL, gen_helper_sve_ftssel_h,
1368 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
1370 TRANS_FEAT(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, ftssel_fns[a->esz], a, 0)
1373 *** SVE Predicate Logical Operations Group
1376 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1377 const GVecGen4 *gvec_op)
1379 if (!sve_access_check(s)) {
1380 return true;
1383 unsigned psz = pred_gvec_reg_size(s);
1384 int dofs = pred_full_reg_offset(s, a->rd);
1385 int nofs = pred_full_reg_offset(s, a->rn);
1386 int mofs = pred_full_reg_offset(s, a->rm);
1387 int gofs = pred_full_reg_offset(s, a->pg);
1389 if (!a->s) {
1390 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1391 return true;
1394 if (psz == 8) {
1395 /* Do the operation and the flags generation in temps. */
1396 TCGv_i64 pd = tcg_temp_new_i64();
1397 TCGv_i64 pn = tcg_temp_new_i64();
1398 TCGv_i64 pm = tcg_temp_new_i64();
1399 TCGv_i64 pg = tcg_temp_new_i64();
1401 tcg_gen_ld_i64(pn, cpu_env, nofs);
1402 tcg_gen_ld_i64(pm, cpu_env, mofs);
1403 tcg_gen_ld_i64(pg, cpu_env, gofs);
1405 gvec_op->fni8(pd, pn, pm, pg);
1406 tcg_gen_st_i64(pd, cpu_env, dofs);
1408 do_predtest1(pd, pg);
1410 tcg_temp_free_i64(pd);
1411 tcg_temp_free_i64(pn);
1412 tcg_temp_free_i64(pm);
1413 tcg_temp_free_i64(pg);
1414 } else {
1415 /* The operation and flags generation is large. The computation
1416 * of the flags depends on the original contents of the guarding
1417 * predicate. If the destination overwrites the guarding predicate,
1418 * then the easiest way to get this right is to save a copy.
1420 int tofs = gofs;
1421 if (a->rd == a->pg) {
1422 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1423 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1426 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1427 do_predtest(s, dofs, tofs, psz / 8);
1429 return true;
1432 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1434 tcg_gen_and_i64(pd, pn, pm);
1435 tcg_gen_and_i64(pd, pd, pg);
1438 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1439 TCGv_vec pm, TCGv_vec pg)
1441 tcg_gen_and_vec(vece, pd, pn, pm);
1442 tcg_gen_and_vec(vece, pd, pd, pg);
1445 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
1447 static const GVecGen4 op = {
1448 .fni8 = gen_and_pg_i64,
1449 .fniv = gen_and_pg_vec,
1450 .fno = gen_helper_sve_and_pppp,
1451 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1454 if (!a->s) {
1455 if (a->rn == a->rm) {
1456 if (a->pg == a->rn) {
1457 return do_mov_p(s, a->rd, a->rn);
1459 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
1460 } else if (a->pg == a->rn || a->pg == a->rm) {
1461 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
1464 return do_pppp_flags(s, a, &op);
1467 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1469 tcg_gen_andc_i64(pd, pn, pm);
1470 tcg_gen_and_i64(pd, pd, pg);
1473 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1474 TCGv_vec pm, TCGv_vec pg)
1476 tcg_gen_andc_vec(vece, pd, pn, pm);
1477 tcg_gen_and_vec(vece, pd, pd, pg);
1480 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
1482 static const GVecGen4 op = {
1483 .fni8 = gen_bic_pg_i64,
1484 .fniv = gen_bic_pg_vec,
1485 .fno = gen_helper_sve_bic_pppp,
1486 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1489 if (!a->s && a->pg == a->rn) {
1490 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
1492 return do_pppp_flags(s, a, &op);
1495 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1497 tcg_gen_xor_i64(pd, pn, pm);
1498 tcg_gen_and_i64(pd, pd, pg);
1501 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1502 TCGv_vec pm, TCGv_vec pg)
1504 tcg_gen_xor_vec(vece, pd, pn, pm);
1505 tcg_gen_and_vec(vece, pd, pd, pg);
1508 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
1510 static const GVecGen4 op = {
1511 .fni8 = gen_eor_pg_i64,
1512 .fniv = gen_eor_pg_vec,
1513 .fno = gen_helper_sve_eor_pppp,
1514 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1517 /* Alias NOT (predicate) is EOR Pd.B, Pg/Z, Pn.B, Pg.B */
1518 if (!a->s && a->pg == a->rm) {
1519 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->pg, a->rn);
1521 return do_pppp_flags(s, a, &op);
1524 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
1526 if (a->s) {
1527 return false;
1529 if (sve_access_check(s)) {
1530 unsigned psz = pred_gvec_reg_size(s);
1531 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1532 pred_full_reg_offset(s, a->pg),
1533 pred_full_reg_offset(s, a->rn),
1534 pred_full_reg_offset(s, a->rm), psz, psz);
1536 return true;
1539 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1541 tcg_gen_or_i64(pd, pn, pm);
1542 tcg_gen_and_i64(pd, pd, pg);
1545 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1546 TCGv_vec pm, TCGv_vec pg)
1548 tcg_gen_or_vec(vece, pd, pn, pm);
1549 tcg_gen_and_vec(vece, pd, pd, pg);
1552 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
1554 static const GVecGen4 op = {
1555 .fni8 = gen_orr_pg_i64,
1556 .fniv = gen_orr_pg_vec,
1557 .fno = gen_helper_sve_orr_pppp,
1558 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1561 if (!a->s && a->pg == a->rn && a->rn == a->rm) {
1562 return do_mov_p(s, a->rd, a->rn);
1564 return do_pppp_flags(s, a, &op);
1567 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1569 tcg_gen_orc_i64(pd, pn, pm);
1570 tcg_gen_and_i64(pd, pd, pg);
1573 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1574 TCGv_vec pm, TCGv_vec pg)
1576 tcg_gen_orc_vec(vece, pd, pn, pm);
1577 tcg_gen_and_vec(vece, pd, pd, pg);
1580 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
1582 static const GVecGen4 op = {
1583 .fni8 = gen_orn_pg_i64,
1584 .fniv = gen_orn_pg_vec,
1585 .fno = gen_helper_sve_orn_pppp,
1586 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1588 return do_pppp_flags(s, a, &op);
1591 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1593 tcg_gen_or_i64(pd, pn, pm);
1594 tcg_gen_andc_i64(pd, pg, pd);
1597 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1598 TCGv_vec pm, TCGv_vec pg)
1600 tcg_gen_or_vec(vece, pd, pn, pm);
1601 tcg_gen_andc_vec(vece, pd, pg, pd);
1604 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
1606 static const GVecGen4 op = {
1607 .fni8 = gen_nor_pg_i64,
1608 .fniv = gen_nor_pg_vec,
1609 .fno = gen_helper_sve_nor_pppp,
1610 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1612 return do_pppp_flags(s, a, &op);
1615 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1617 tcg_gen_and_i64(pd, pn, pm);
1618 tcg_gen_andc_i64(pd, pg, pd);
1621 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1622 TCGv_vec pm, TCGv_vec pg)
1624 tcg_gen_and_vec(vece, pd, pn, pm);
1625 tcg_gen_andc_vec(vece, pd, pg, pd);
1628 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
1630 static const GVecGen4 op = {
1631 .fni8 = gen_nand_pg_i64,
1632 .fniv = gen_nand_pg_vec,
1633 .fno = gen_helper_sve_nand_pppp,
1634 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1636 return do_pppp_flags(s, a, &op);
1640 *** SVE Predicate Misc Group
1643 static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
1645 if (sve_access_check(s)) {
1646 int nofs = pred_full_reg_offset(s, a->rn);
1647 int gofs = pred_full_reg_offset(s, a->pg);
1648 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1650 if (words == 1) {
1651 TCGv_i64 pn = tcg_temp_new_i64();
1652 TCGv_i64 pg = tcg_temp_new_i64();
1654 tcg_gen_ld_i64(pn, cpu_env, nofs);
1655 tcg_gen_ld_i64(pg, cpu_env, gofs);
1656 do_predtest1(pn, pg);
1658 tcg_temp_free_i64(pn);
1659 tcg_temp_free_i64(pg);
1660 } else {
1661 do_predtest(s, nofs, gofs, words);
1664 return true;
1667 /* See the ARM pseudocode DecodePredCount. */
1668 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1670 unsigned elements = fullsz >> esz;
1671 unsigned bound;
1673 switch (pattern) {
1674 case 0x0: /* POW2 */
1675 return pow2floor(elements);
1676 case 0x1: /* VL1 */
1677 case 0x2: /* VL2 */
1678 case 0x3: /* VL3 */
1679 case 0x4: /* VL4 */
1680 case 0x5: /* VL5 */
1681 case 0x6: /* VL6 */
1682 case 0x7: /* VL7 */
1683 case 0x8: /* VL8 */
1684 bound = pattern;
1685 break;
1686 case 0x9: /* VL16 */
1687 case 0xa: /* VL32 */
1688 case 0xb: /* VL64 */
1689 case 0xc: /* VL128 */
1690 case 0xd: /* VL256 */
1691 bound = 16 << (pattern - 9);
1692 break;
1693 case 0x1d: /* MUL4 */
1694 return elements - elements % 4;
1695 case 0x1e: /* MUL3 */
1696 return elements - elements % 3;
1697 case 0x1f: /* ALL */
1698 return elements;
1699 default: /* #uimm5 */
1700 return 0;
1702 return elements >= bound ? bound : 0;
1705 /* This handles all of the predicate initialization instructions,
1706 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1707 * so that decode_pred_count returns 0. For SETFFR, we will have
1708 * set RD == 16 == FFR.
1710 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1712 if (!sve_access_check(s)) {
1713 return true;
1716 unsigned fullsz = vec_full_reg_size(s);
1717 unsigned ofs = pred_full_reg_offset(s, rd);
1718 unsigned numelem, setsz, i;
1719 uint64_t word, lastword;
1720 TCGv_i64 t;
1722 numelem = decode_pred_count(fullsz, pat, esz);
1724 /* Determine what we must store into each bit, and how many. */
1725 if (numelem == 0) {
1726 lastword = word = 0;
1727 setsz = fullsz;
1728 } else {
1729 setsz = numelem << esz;
1730 lastword = word = pred_esz_masks[esz];
1731 if (setsz % 64) {
1732 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
1736 t = tcg_temp_new_i64();
1737 if (fullsz <= 64) {
1738 tcg_gen_movi_i64(t, lastword);
1739 tcg_gen_st_i64(t, cpu_env, ofs);
1740 goto done;
1743 if (word == lastword) {
1744 unsigned maxsz = size_for_gvec(fullsz / 8);
1745 unsigned oprsz = size_for_gvec(setsz / 8);
1747 if (oprsz * 8 == setsz) {
1748 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
1749 goto done;
1753 setsz /= 8;
1754 fullsz /= 8;
1756 tcg_gen_movi_i64(t, word);
1757 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
1758 tcg_gen_st_i64(t, cpu_env, ofs + i);
1760 if (lastword != word) {
1761 tcg_gen_movi_i64(t, lastword);
1762 tcg_gen_st_i64(t, cpu_env, ofs + i);
1763 i += 8;
1765 if (i < fullsz) {
1766 tcg_gen_movi_i64(t, 0);
1767 for (; i < fullsz; i += 8) {
1768 tcg_gen_st_i64(t, cpu_env, ofs + i);
1772 done:
1773 tcg_temp_free_i64(t);
1775 /* PTRUES */
1776 if (setflag) {
1777 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1778 tcg_gen_movi_i32(cpu_CF, word == 0);
1779 tcg_gen_movi_i32(cpu_VF, 0);
1780 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1782 return true;
1785 TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s)
1787 /* Note pat == 31 is #all, to set all elements. */
1788 TRANS_FEAT(SETFFR, aa64_sve, do_predset, 0, FFR_PRED_NUM, 31, false)
1790 /* Note pat == 32 is #unimp, to set no elements. */
1791 TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false)
1793 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
1795 /* The path through do_pppp_flags is complicated enough to want to avoid
1796 * duplication. Frob the arguments into the form of a predicated AND.
1798 arg_rprr_s alt_a = {
1799 .rd = a->rd, .pg = a->pg, .s = a->s,
1800 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1802 return trans_AND_pppp(s, &alt_a);
1805 TRANS_FEAT(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
1806 TRANS_FEAT(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
1808 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1809 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1810 TCGv_ptr, TCGv_i32))
1812 if (!sve_access_check(s)) {
1813 return true;
1816 TCGv_ptr t_pd = tcg_temp_new_ptr();
1817 TCGv_ptr t_pg = tcg_temp_new_ptr();
1818 TCGv_i32 t;
1819 unsigned desc = 0;
1821 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1822 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
1824 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1825 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1826 t = tcg_temp_new_i32();
1828 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc));
1829 tcg_temp_free_ptr(t_pd);
1830 tcg_temp_free_ptr(t_pg);
1832 do_pred_flags(t);
1833 tcg_temp_free_i32(t);
1834 return true;
1837 TRANS_FEAT(PFIRST, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pfirst)
1838 TRANS_FEAT(PNEXT, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pnext)
1841 *** SVE Element Count Group
1844 /* Perform an inline saturating addition of a 32-bit value within
1845 * a 64-bit register. The second operand is known to be positive,
1846 * which halves the comparisions we must perform to bound the result.
1848 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1850 int64_t ibound;
1852 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1853 if (u) {
1854 tcg_gen_ext32u_i64(reg, reg);
1855 } else {
1856 tcg_gen_ext32s_i64(reg, reg);
1858 if (d) {
1859 tcg_gen_sub_i64(reg, reg, val);
1860 ibound = (u ? 0 : INT32_MIN);
1861 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound));
1862 } else {
1863 tcg_gen_add_i64(reg, reg, val);
1864 ibound = (u ? UINT32_MAX : INT32_MAX);
1865 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound));
1869 /* Similarly with 64-bit values. */
1870 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1872 TCGv_i64 t0 = tcg_temp_new_i64();
1873 TCGv_i64 t2;
1875 if (u) {
1876 if (d) {
1877 tcg_gen_sub_i64(t0, reg, val);
1878 t2 = tcg_constant_i64(0);
1879 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0);
1880 } else {
1881 tcg_gen_add_i64(t0, reg, val);
1882 t2 = tcg_constant_i64(-1);
1883 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0);
1885 } else {
1886 TCGv_i64 t1 = tcg_temp_new_i64();
1887 if (d) {
1888 /* Detect signed overflow for subtraction. */
1889 tcg_gen_xor_i64(t0, reg, val);
1890 tcg_gen_sub_i64(t1, reg, val);
1891 tcg_gen_xor_i64(reg, reg, t1);
1892 tcg_gen_and_i64(t0, t0, reg);
1894 /* Bound the result. */
1895 tcg_gen_movi_i64(reg, INT64_MIN);
1896 t2 = tcg_constant_i64(0);
1897 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1898 } else {
1899 /* Detect signed overflow for addition. */
1900 tcg_gen_xor_i64(t0, reg, val);
1901 tcg_gen_add_i64(reg, reg, val);
1902 tcg_gen_xor_i64(t1, reg, val);
1903 tcg_gen_andc_i64(t0, t1, t0);
1905 /* Bound the result. */
1906 tcg_gen_movi_i64(t1, INT64_MAX);
1907 t2 = tcg_constant_i64(0);
1908 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1910 tcg_temp_free_i64(t1);
1912 tcg_temp_free_i64(t0);
1915 /* Similarly with a vector and a scalar operand. */
1916 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1917 TCGv_i64 val, bool u, bool d)
1919 unsigned vsz = vec_full_reg_size(s);
1920 TCGv_ptr dptr, nptr;
1921 TCGv_i32 t32, desc;
1922 TCGv_i64 t64;
1924 dptr = tcg_temp_new_ptr();
1925 nptr = tcg_temp_new_ptr();
1926 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1927 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1928 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
1930 switch (esz) {
1931 case MO_8:
1932 t32 = tcg_temp_new_i32();
1933 tcg_gen_extrl_i64_i32(t32, val);
1934 if (d) {
1935 tcg_gen_neg_i32(t32, t32);
1937 if (u) {
1938 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1939 } else {
1940 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1942 tcg_temp_free_i32(t32);
1943 break;
1945 case MO_16:
1946 t32 = tcg_temp_new_i32();
1947 tcg_gen_extrl_i64_i32(t32, val);
1948 if (d) {
1949 tcg_gen_neg_i32(t32, t32);
1951 if (u) {
1952 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1953 } else {
1954 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1956 tcg_temp_free_i32(t32);
1957 break;
1959 case MO_32:
1960 t64 = tcg_temp_new_i64();
1961 if (d) {
1962 tcg_gen_neg_i64(t64, val);
1963 } else {
1964 tcg_gen_mov_i64(t64, val);
1966 if (u) {
1967 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1968 } else {
1969 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1971 tcg_temp_free_i64(t64);
1972 break;
1974 case MO_64:
1975 if (u) {
1976 if (d) {
1977 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1978 } else {
1979 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1981 } else if (d) {
1982 t64 = tcg_temp_new_i64();
1983 tcg_gen_neg_i64(t64, val);
1984 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1985 tcg_temp_free_i64(t64);
1986 } else {
1987 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1989 break;
1991 default:
1992 g_assert_not_reached();
1995 tcg_temp_free_ptr(dptr);
1996 tcg_temp_free_ptr(nptr);
1999 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
2001 if (sve_access_check(s)) {
2002 unsigned fullsz = vec_full_reg_size(s);
2003 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2004 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
2006 return true;
2009 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
2011 if (sve_access_check(s)) {
2012 unsigned fullsz = vec_full_reg_size(s);
2013 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2014 int inc = numelem * a->imm * (a->d ? -1 : 1);
2015 TCGv_i64 reg = cpu_reg(s, a->rd);
2017 tcg_gen_addi_i64(reg, reg, inc);
2019 return true;
2022 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
2024 if (!sve_access_check(s)) {
2025 return true;
2028 unsigned fullsz = vec_full_reg_size(s);
2029 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2030 int inc = numelem * a->imm;
2031 TCGv_i64 reg = cpu_reg(s, a->rd);
2033 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
2034 if (inc == 0) {
2035 if (a->u) {
2036 tcg_gen_ext32u_i64(reg, reg);
2037 } else {
2038 tcg_gen_ext32s_i64(reg, reg);
2040 } else {
2041 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d);
2043 return true;
2046 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
2048 if (!sve_access_check(s)) {
2049 return true;
2052 unsigned fullsz = vec_full_reg_size(s);
2053 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2054 int inc = numelem * a->imm;
2055 TCGv_i64 reg = cpu_reg(s, a->rd);
2057 if (inc != 0) {
2058 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d);
2060 return true;
2063 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
2065 if (a->esz == 0) {
2066 return false;
2069 unsigned fullsz = vec_full_reg_size(s);
2070 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2071 int inc = numelem * a->imm;
2073 if (inc != 0) {
2074 if (sve_access_check(s)) {
2075 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
2076 vec_full_reg_offset(s, a->rn),
2077 tcg_constant_i64(a->d ? -inc : inc),
2078 fullsz, fullsz);
2080 } else {
2081 do_mov_z(s, a->rd, a->rn);
2083 return true;
2086 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
2088 if (a->esz == 0) {
2089 return false;
2092 unsigned fullsz = vec_full_reg_size(s);
2093 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2094 int inc = numelem * a->imm;
2096 if (inc != 0) {
2097 if (sve_access_check(s)) {
2098 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
2099 tcg_constant_i64(inc), a->u, a->d);
2101 } else {
2102 do_mov_z(s, a->rd, a->rn);
2104 return true;
2108 *** SVE Bitwise Immediate Group
2111 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
2113 uint64_t imm;
2114 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2115 extract32(a->dbm, 0, 6),
2116 extract32(a->dbm, 6, 6))) {
2117 return false;
2119 return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm);
2122 TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi)
2123 TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori)
2124 TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori)
2126 static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
2128 uint64_t imm;
2129 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2130 extract32(a->dbm, 0, 6),
2131 extract32(a->dbm, 6, 6))) {
2132 return false;
2134 if (sve_access_check(s)) {
2135 do_dupi_z(s, a->rd, imm);
2137 return true;
2141 *** SVE Integer Wide Immediate - Predicated Group
2144 /* Implement all merging copies. This is used for CPY (immediate),
2145 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
2147 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
2148 TCGv_i64 val)
2150 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2151 static gen_cpy * const fns[4] = {
2152 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
2153 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
2155 unsigned vsz = vec_full_reg_size(s);
2156 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
2157 TCGv_ptr t_zd = tcg_temp_new_ptr();
2158 TCGv_ptr t_zn = tcg_temp_new_ptr();
2159 TCGv_ptr t_pg = tcg_temp_new_ptr();
2161 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
2162 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
2163 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2165 fns[esz](t_zd, t_zn, t_pg, val, desc);
2167 tcg_temp_free_ptr(t_zd);
2168 tcg_temp_free_ptr(t_zn);
2169 tcg_temp_free_ptr(t_pg);
2172 static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
2174 if (a->esz == 0) {
2175 return false;
2177 if (sve_access_check(s)) {
2178 /* Decode the VFP immediate. */
2179 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
2180 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm));
2182 return true;
2185 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
2187 if (sve_access_check(s)) {
2188 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm));
2190 return true;
2193 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
2195 static gen_helper_gvec_2i * const fns[4] = {
2196 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
2197 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
2200 if (sve_access_check(s)) {
2201 unsigned vsz = vec_full_reg_size(s);
2202 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
2203 pred_full_reg_offset(s, a->pg),
2204 tcg_constant_i64(a->imm),
2205 vsz, vsz, 0, fns[a->esz]);
2207 return true;
2211 *** SVE Permute Extract Group
2214 static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
2216 if (!sve_access_check(s)) {
2217 return true;
2220 unsigned vsz = vec_full_reg_size(s);
2221 unsigned n_ofs = imm >= vsz ? 0 : imm;
2222 unsigned n_siz = vsz - n_ofs;
2223 unsigned d = vec_full_reg_offset(s, rd);
2224 unsigned n = vec_full_reg_offset(s, rn);
2225 unsigned m = vec_full_reg_offset(s, rm);
2227 /* Use host vector move insns if we have appropriate sizes
2228 * and no unfortunate overlap.
2230 if (m != d
2231 && n_ofs == size_for_gvec(n_ofs)
2232 && n_siz == size_for_gvec(n_siz)
2233 && (d != n || n_siz <= n_ofs)) {
2234 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2235 if (n_ofs != 0) {
2236 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2238 } else {
2239 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2241 return true;
2244 TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm)
2245 TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm)
2248 *** SVE Permute - Unpredicated Group
2251 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
2253 if (sve_access_check(s)) {
2254 unsigned vsz = vec_full_reg_size(s);
2255 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2256 vsz, vsz, cpu_reg_sp(s, a->rn));
2258 return true;
2261 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
2263 if ((a->imm & 0x1f) == 0) {
2264 return false;
2266 if (sve_access_check(s)) {
2267 unsigned vsz = vec_full_reg_size(s);
2268 unsigned dofs = vec_full_reg_offset(s, a->rd);
2269 unsigned esz, index;
2271 esz = ctz32(a->imm);
2272 index = a->imm >> (esz + 1);
2274 if ((index << esz) < vsz) {
2275 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2276 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2277 } else {
2279 * While dup_mem handles 128-bit elements, dup_imm does not.
2280 * Thankfully element size doesn't matter for splatting zero.
2282 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
2285 return true;
2288 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2290 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2291 static gen_insr * const fns[4] = {
2292 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2293 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2295 unsigned vsz = vec_full_reg_size(s);
2296 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
2297 TCGv_ptr t_zd = tcg_temp_new_ptr();
2298 TCGv_ptr t_zn = tcg_temp_new_ptr();
2300 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2301 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2303 fns[a->esz](t_zd, t_zn, val, desc);
2305 tcg_temp_free_ptr(t_zd);
2306 tcg_temp_free_ptr(t_zn);
2309 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
2311 if (sve_access_check(s)) {
2312 TCGv_i64 t = tcg_temp_new_i64();
2313 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2314 do_insr_i64(s, a, t);
2315 tcg_temp_free_i64(t);
2317 return true;
2320 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
2322 if (sve_access_check(s)) {
2323 do_insr_i64(s, a, cpu_reg(s, a->rm));
2325 return true;
2328 static gen_helper_gvec_2 * const rev_fns[4] = {
2329 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2330 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2332 TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0)
2334 static gen_helper_gvec_3 * const sve_tbl_fns[4] = {
2335 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2336 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2338 TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0)
2340 static gen_helper_gvec_4 * const sve2_tbl_fns[4] = {
2341 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
2342 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d
2344 TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz],
2345 a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0)
2347 static gen_helper_gvec_3 * const tbx_fns[4] = {
2348 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
2349 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
2351 TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0)
2353 static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
2355 static gen_helper_gvec_2 * const fns[4][2] = {
2356 { NULL, NULL },
2357 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2358 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2359 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2362 if (a->esz == 0) {
2363 return false;
2365 if (sve_access_check(s)) {
2366 unsigned vsz = vec_full_reg_size(s);
2367 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2368 vec_full_reg_offset(s, a->rn)
2369 + (a->h ? vsz / 2 : 0),
2370 vsz, vsz, 0, fns[a->esz][a->u]);
2372 return true;
2376 *** SVE Permute - Predicates Group
2379 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2380 gen_helper_gvec_3 *fn)
2382 if (!sve_access_check(s)) {
2383 return true;
2386 unsigned vsz = pred_full_reg_size(s);
2388 TCGv_ptr t_d = tcg_temp_new_ptr();
2389 TCGv_ptr t_n = tcg_temp_new_ptr();
2390 TCGv_ptr t_m = tcg_temp_new_ptr();
2391 uint32_t desc = 0;
2393 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2394 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2395 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
2397 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2398 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2399 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2401 fn(t_d, t_n, t_m, tcg_constant_i32(desc));
2403 tcg_temp_free_ptr(t_d);
2404 tcg_temp_free_ptr(t_n);
2405 tcg_temp_free_ptr(t_m);
2406 return true;
2409 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2410 gen_helper_gvec_2 *fn)
2412 if (!sve_access_check(s)) {
2413 return true;
2416 unsigned vsz = pred_full_reg_size(s);
2417 TCGv_ptr t_d = tcg_temp_new_ptr();
2418 TCGv_ptr t_n = tcg_temp_new_ptr();
2419 uint32_t desc = 0;
2421 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2422 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2424 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2425 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2426 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
2428 fn(t_d, t_n, tcg_constant_i32(desc));
2430 tcg_temp_free_ptr(t_d);
2431 tcg_temp_free_ptr(t_n);
2432 return true;
2435 TRANS_FEAT(ZIP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_zip_p)
2436 TRANS_FEAT(ZIP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_zip_p)
2437 TRANS_FEAT(UZP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_uzp_p)
2438 TRANS_FEAT(UZP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_uzp_p)
2439 TRANS_FEAT(TRN1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_trn_p)
2440 TRANS_FEAT(TRN2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_trn_p)
2442 TRANS_FEAT(REV_p, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_rev_p)
2443 TRANS_FEAT(PUNPKLO, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_punpk_p)
2444 TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p)
2447 *** SVE Permute - Interleaving Group
2450 static gen_helper_gvec_3 * const zip_fns[4] = {
2451 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2452 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2454 TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2455 zip_fns[a->esz], a, 0)
2456 TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2457 zip_fns[a->esz], a, vec_full_reg_size(s) / 2)
2459 TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2460 gen_helper_sve2_zip_q, a, 0)
2461 TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2462 gen_helper_sve2_zip_q, a,
2463 QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2)
2465 static gen_helper_gvec_3 * const uzp_fns[4] = {
2466 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2467 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2470 TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2471 uzp_fns[a->esz], a, 0)
2472 TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2473 uzp_fns[a->esz], a, 1 << a->esz)
2475 TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2476 gen_helper_sve2_uzp_q, a, 0)
2477 TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2478 gen_helper_sve2_uzp_q, a, 16)
2480 static gen_helper_gvec_3 * const trn_fns[4] = {
2481 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2482 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2485 TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2486 trn_fns[a->esz], a, 0)
2487 TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2488 trn_fns[a->esz], a, 1 << a->esz)
2490 TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2491 gen_helper_sve2_trn_q, a, 0)
2492 TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2493 gen_helper_sve2_trn_q, a, 16)
2496 *** SVE Permute Vector - Predicated Group
2499 static gen_helper_gvec_3 * const compact_fns[4] = {
2500 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2502 TRANS_FEAT(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, compact_fns[a->esz], a, 0)
2504 /* Call the helper that computes the ARM LastActiveElement pseudocode
2505 * function, scaled by the element size. This includes the not found
2506 * indication; e.g. not found for esz=3 is -8.
2508 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2510 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2511 * round up, as we do elsewhere, because we need the exact size.
2513 TCGv_ptr t_p = tcg_temp_new_ptr();
2514 unsigned desc = 0;
2516 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
2517 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
2519 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2521 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc));
2523 tcg_temp_free_ptr(t_p);
2526 /* Increment LAST to the offset of the next element in the vector,
2527 * wrapping around to 0.
2529 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2531 unsigned vsz = vec_full_reg_size(s);
2533 tcg_gen_addi_i32(last, last, 1 << esz);
2534 if (is_power_of_2(vsz)) {
2535 tcg_gen_andi_i32(last, last, vsz - 1);
2536 } else {
2537 TCGv_i32 max = tcg_constant_i32(vsz);
2538 TCGv_i32 zero = tcg_constant_i32(0);
2539 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2543 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
2544 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2546 unsigned vsz = vec_full_reg_size(s);
2548 if (is_power_of_2(vsz)) {
2549 tcg_gen_andi_i32(last, last, vsz - 1);
2550 } else {
2551 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz));
2552 TCGv_i32 zero = tcg_constant_i32(0);
2553 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2557 /* Load an unsigned element of ESZ from BASE+OFS. */
2558 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2560 TCGv_i64 r = tcg_temp_new_i64();
2562 switch (esz) {
2563 case 0:
2564 tcg_gen_ld8u_i64(r, base, ofs);
2565 break;
2566 case 1:
2567 tcg_gen_ld16u_i64(r, base, ofs);
2568 break;
2569 case 2:
2570 tcg_gen_ld32u_i64(r, base, ofs);
2571 break;
2572 case 3:
2573 tcg_gen_ld_i64(r, base, ofs);
2574 break;
2575 default:
2576 g_assert_not_reached();
2578 return r;
2581 /* Load an unsigned element of ESZ from RM[LAST]. */
2582 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2583 int rm, int esz)
2585 TCGv_ptr p = tcg_temp_new_ptr();
2586 TCGv_i64 r;
2588 /* Convert offset into vector into offset into ENV.
2589 * The final adjustment for the vector register base
2590 * is added via constant offset to the load.
2592 #if HOST_BIG_ENDIAN
2593 /* Adjust for element ordering. See vec_reg_offset. */
2594 if (esz < 3) {
2595 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2597 #endif
2598 tcg_gen_ext_i32_ptr(p, last);
2599 tcg_gen_add_ptr(p, p, cpu_env);
2601 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2602 tcg_temp_free_ptr(p);
2604 return r;
2607 /* Compute CLAST for a Zreg. */
2608 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2610 TCGv_i32 last;
2611 TCGLabel *over;
2612 TCGv_i64 ele;
2613 unsigned vsz, esz = a->esz;
2615 if (!sve_access_check(s)) {
2616 return true;
2619 last = tcg_temp_local_new_i32();
2620 over = gen_new_label();
2622 find_last_active(s, last, esz, a->pg);
2624 /* There is of course no movcond for a 2048-bit vector,
2625 * so we must branch over the actual store.
2627 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2629 if (!before) {
2630 incr_last_active(s, last, esz);
2633 ele = load_last_active(s, last, a->rm, esz);
2634 tcg_temp_free_i32(last);
2636 vsz = vec_full_reg_size(s);
2637 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2638 tcg_temp_free_i64(ele);
2640 /* If this insn used MOVPRFX, we may need a second move. */
2641 if (a->rd != a->rn) {
2642 TCGLabel *done = gen_new_label();
2643 tcg_gen_br(done);
2645 gen_set_label(over);
2646 do_mov_z(s, a->rd, a->rn);
2648 gen_set_label(done);
2649 } else {
2650 gen_set_label(over);
2652 return true;
2655 TRANS_FEAT(CLASTA_z, aa64_sve, do_clast_vector, a, false)
2656 TRANS_FEAT(CLASTB_z, aa64_sve, do_clast_vector, a, true)
2658 /* Compute CLAST for a scalar. */
2659 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2660 bool before, TCGv_i64 reg_val)
2662 TCGv_i32 last = tcg_temp_new_i32();
2663 TCGv_i64 ele, cmp;
2665 find_last_active(s, last, esz, pg);
2667 /* Extend the original value of last prior to incrementing. */
2668 cmp = tcg_temp_new_i64();
2669 tcg_gen_ext_i32_i64(cmp, last);
2671 if (!before) {
2672 incr_last_active(s, last, esz);
2675 /* The conceit here is that while last < 0 indicates not found, after
2676 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2677 * from which we can load garbage. We then discard the garbage with
2678 * a conditional move.
2680 ele = load_last_active(s, last, rm, esz);
2681 tcg_temp_free_i32(last);
2683 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0),
2684 ele, reg_val);
2686 tcg_temp_free_i64(cmp);
2687 tcg_temp_free_i64(ele);
2690 /* Compute CLAST for a Vreg. */
2691 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2693 if (sve_access_check(s)) {
2694 int esz = a->esz;
2695 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2696 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2698 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2699 write_fp_dreg(s, a->rd, reg);
2700 tcg_temp_free_i64(reg);
2702 return true;
2705 TRANS_FEAT(CLASTA_v, aa64_sve, do_clast_fp, a, false)
2706 TRANS_FEAT(CLASTB_v, aa64_sve, do_clast_fp, a, true)
2708 /* Compute CLAST for a Xreg. */
2709 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2711 TCGv_i64 reg;
2713 if (!sve_access_check(s)) {
2714 return true;
2717 reg = cpu_reg(s, a->rd);
2718 switch (a->esz) {
2719 case 0:
2720 tcg_gen_ext8u_i64(reg, reg);
2721 break;
2722 case 1:
2723 tcg_gen_ext16u_i64(reg, reg);
2724 break;
2725 case 2:
2726 tcg_gen_ext32u_i64(reg, reg);
2727 break;
2728 case 3:
2729 break;
2730 default:
2731 g_assert_not_reached();
2734 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2735 return true;
2738 TRANS_FEAT(CLASTA_r, aa64_sve, do_clast_general, a, false)
2739 TRANS_FEAT(CLASTB_r, aa64_sve, do_clast_general, a, true)
2741 /* Compute LAST for a scalar. */
2742 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2743 int pg, int rm, bool before)
2745 TCGv_i32 last = tcg_temp_new_i32();
2746 TCGv_i64 ret;
2748 find_last_active(s, last, esz, pg);
2749 if (before) {
2750 wrap_last_active(s, last, esz);
2751 } else {
2752 incr_last_active(s, last, esz);
2755 ret = load_last_active(s, last, rm, esz);
2756 tcg_temp_free_i32(last);
2757 return ret;
2760 /* Compute LAST for a Vreg. */
2761 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2763 if (sve_access_check(s)) {
2764 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2765 write_fp_dreg(s, a->rd, val);
2766 tcg_temp_free_i64(val);
2768 return true;
2771 TRANS_FEAT(LASTA_v, aa64_sve, do_last_fp, a, false)
2772 TRANS_FEAT(LASTB_v, aa64_sve, do_last_fp, a, true)
2774 /* Compute LAST for a Xreg. */
2775 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2777 if (sve_access_check(s)) {
2778 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2779 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2780 tcg_temp_free_i64(val);
2782 return true;
2785 TRANS_FEAT(LASTA_r, aa64_sve, do_last_general, a, false)
2786 TRANS_FEAT(LASTB_r, aa64_sve, do_last_general, a, true)
2788 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
2790 if (sve_access_check(s)) {
2791 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2793 return true;
2796 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
2798 if (sve_access_check(s)) {
2799 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2800 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2801 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2802 tcg_temp_free_i64(t);
2804 return true;
2807 static gen_helper_gvec_3 * const revb_fns[4] = {
2808 NULL, gen_helper_sve_revb_h,
2809 gen_helper_sve_revb_s, gen_helper_sve_revb_d,
2811 TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0)
2813 static gen_helper_gvec_3 * const revh_fns[4] = {
2814 NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d,
2816 TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0)
2818 TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz,
2819 a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0)
2821 TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz,
2822 gen_helper_sve_splice, a, a->esz)
2824 TRANS_FEAT(SPLICE_sve2, aa64_sve2, gen_gvec_ool_zzzp, gen_helper_sve_splice,
2825 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz)
2828 *** SVE Integer Compare - Vectors Group
2831 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2832 gen_helper_gvec_flags_4 *gen_fn)
2834 TCGv_ptr pd, zn, zm, pg;
2835 unsigned vsz;
2836 TCGv_i32 t;
2838 if (gen_fn == NULL) {
2839 return false;
2841 if (!sve_access_check(s)) {
2842 return true;
2845 vsz = vec_full_reg_size(s);
2846 t = tcg_temp_new_i32();
2847 pd = tcg_temp_new_ptr();
2848 zn = tcg_temp_new_ptr();
2849 zm = tcg_temp_new_ptr();
2850 pg = tcg_temp_new_ptr();
2852 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2853 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2854 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2855 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2857 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0)));
2859 tcg_temp_free_ptr(pd);
2860 tcg_temp_free_ptr(zn);
2861 tcg_temp_free_ptr(zm);
2862 tcg_temp_free_ptr(pg);
2864 do_pred_flags(t);
2866 tcg_temp_free_i32(t);
2867 return true;
2870 #define DO_PPZZ(NAME, name) \
2871 static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = { \
2872 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2873 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2874 }; \
2875 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags, \
2876 a, name##_ppzz_fns[a->esz])
2878 DO_PPZZ(CMPEQ, cmpeq)
2879 DO_PPZZ(CMPNE, cmpne)
2880 DO_PPZZ(CMPGT, cmpgt)
2881 DO_PPZZ(CMPGE, cmpge)
2882 DO_PPZZ(CMPHI, cmphi)
2883 DO_PPZZ(CMPHS, cmphs)
2885 #undef DO_PPZZ
2887 #define DO_PPZW(NAME, name) \
2888 static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = { \
2889 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2890 gen_helper_sve_##name##_ppzw_s, NULL \
2891 }; \
2892 TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags, \
2893 a, name##_ppzw_fns[a->esz])
2895 DO_PPZW(CMPEQ, cmpeq)
2896 DO_PPZW(CMPNE, cmpne)
2897 DO_PPZW(CMPGT, cmpgt)
2898 DO_PPZW(CMPGE, cmpge)
2899 DO_PPZW(CMPHI, cmphi)
2900 DO_PPZW(CMPHS, cmphs)
2901 DO_PPZW(CMPLT, cmplt)
2902 DO_PPZW(CMPLE, cmple)
2903 DO_PPZW(CMPLO, cmplo)
2904 DO_PPZW(CMPLS, cmpls)
2906 #undef DO_PPZW
2909 *** SVE Integer Compare - Immediate Groups
2912 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2913 gen_helper_gvec_flags_3 *gen_fn)
2915 TCGv_ptr pd, zn, pg;
2916 unsigned vsz;
2917 TCGv_i32 t;
2919 if (gen_fn == NULL) {
2920 return false;
2922 if (!sve_access_check(s)) {
2923 return true;
2926 vsz = vec_full_reg_size(s);
2927 t = tcg_temp_new_i32();
2928 pd = tcg_temp_new_ptr();
2929 zn = tcg_temp_new_ptr();
2930 pg = tcg_temp_new_ptr();
2932 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2933 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2934 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2936 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm)));
2938 tcg_temp_free_ptr(pd);
2939 tcg_temp_free_ptr(zn);
2940 tcg_temp_free_ptr(pg);
2942 do_pred_flags(t);
2944 tcg_temp_free_i32(t);
2945 return true;
2948 #define DO_PPZI(NAME, name) \
2949 static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = { \
2950 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2951 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2952 }; \
2953 TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a, \
2954 name##_ppzi_fns[a->esz])
2956 DO_PPZI(CMPEQ, cmpeq)
2957 DO_PPZI(CMPNE, cmpne)
2958 DO_PPZI(CMPGT, cmpgt)
2959 DO_PPZI(CMPGE, cmpge)
2960 DO_PPZI(CMPHI, cmphi)
2961 DO_PPZI(CMPHS, cmphs)
2962 DO_PPZI(CMPLT, cmplt)
2963 DO_PPZI(CMPLE, cmple)
2964 DO_PPZI(CMPLO, cmplo)
2965 DO_PPZI(CMPLS, cmpls)
2967 #undef DO_PPZI
2970 *** SVE Partition Break Group
2973 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2974 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2976 if (!sve_access_check(s)) {
2977 return true;
2980 unsigned vsz = pred_full_reg_size(s);
2982 /* Predicate sizes may be smaller and cannot use simd_desc. */
2983 TCGv_ptr d = tcg_temp_new_ptr();
2984 TCGv_ptr n = tcg_temp_new_ptr();
2985 TCGv_ptr m = tcg_temp_new_ptr();
2986 TCGv_ptr g = tcg_temp_new_ptr();
2987 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
2989 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2990 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2991 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2992 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2994 if (a->s) {
2995 TCGv_i32 t = tcg_temp_new_i32();
2996 fn_s(t, d, n, m, g, desc);
2997 do_pred_flags(t);
2998 tcg_temp_free_i32(t);
2999 } else {
3000 fn(d, n, m, g, desc);
3002 tcg_temp_free_ptr(d);
3003 tcg_temp_free_ptr(n);
3004 tcg_temp_free_ptr(m);
3005 tcg_temp_free_ptr(g);
3006 return true;
3009 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
3010 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
3012 if (!sve_access_check(s)) {
3013 return true;
3016 unsigned vsz = pred_full_reg_size(s);
3018 /* Predicate sizes may be smaller and cannot use simd_desc. */
3019 TCGv_ptr d = tcg_temp_new_ptr();
3020 TCGv_ptr n = tcg_temp_new_ptr();
3021 TCGv_ptr g = tcg_temp_new_ptr();
3022 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
3024 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3025 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3026 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3028 if (a->s) {
3029 TCGv_i32 t = tcg_temp_new_i32();
3030 fn_s(t, d, n, g, desc);
3031 do_pred_flags(t);
3032 tcg_temp_free_i32(t);
3033 } else {
3034 fn(d, n, g, desc);
3036 tcg_temp_free_ptr(d);
3037 tcg_temp_free_ptr(n);
3038 tcg_temp_free_ptr(g);
3039 return true;
3042 TRANS_FEAT(BRKPA, aa64_sve, do_brk3, a,
3043 gen_helper_sve_brkpa, gen_helper_sve_brkpas)
3044 TRANS_FEAT(BRKPB, aa64_sve, do_brk3, a,
3045 gen_helper_sve_brkpb, gen_helper_sve_brkpbs)
3047 TRANS_FEAT(BRKA_m, aa64_sve, do_brk2, a,
3048 gen_helper_sve_brka_m, gen_helper_sve_brkas_m)
3049 TRANS_FEAT(BRKB_m, aa64_sve, do_brk2, a,
3050 gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m)
3052 TRANS_FEAT(BRKA_z, aa64_sve, do_brk2, a,
3053 gen_helper_sve_brka_z, gen_helper_sve_brkas_z)
3054 TRANS_FEAT(BRKB_z, aa64_sve, do_brk2, a,
3055 gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z)
3057 TRANS_FEAT(BRKN, aa64_sve, do_brk2, a,
3058 gen_helper_sve_brkn, gen_helper_sve_brkns)
3061 *** SVE Predicate Count Group
3064 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3066 unsigned psz = pred_full_reg_size(s);
3068 if (psz <= 8) {
3069 uint64_t psz_mask;
3071 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3072 if (pn != pg) {
3073 TCGv_i64 g = tcg_temp_new_i64();
3074 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3075 tcg_gen_and_i64(val, val, g);
3076 tcg_temp_free_i64(g);
3079 /* Reduce the pred_esz_masks value simply to reduce the
3080 * size of the code generated here.
3082 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3083 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3085 tcg_gen_ctpop_i64(val, val);
3086 } else {
3087 TCGv_ptr t_pn = tcg_temp_new_ptr();
3088 TCGv_ptr t_pg = tcg_temp_new_ptr();
3089 unsigned desc = 0;
3091 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
3092 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
3094 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3095 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3097 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc));
3098 tcg_temp_free_ptr(t_pn);
3099 tcg_temp_free_ptr(t_pg);
3103 static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
3105 if (sve_access_check(s)) {
3106 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3108 return true;
3111 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
3113 if (sve_access_check(s)) {
3114 TCGv_i64 reg = cpu_reg(s, a->rd);
3115 TCGv_i64 val = tcg_temp_new_i64();
3117 do_cntp(s, val, a->esz, a->pg, a->pg);
3118 if (a->d) {
3119 tcg_gen_sub_i64(reg, reg, val);
3120 } else {
3121 tcg_gen_add_i64(reg, reg, val);
3123 tcg_temp_free_i64(val);
3125 return true;
3128 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3130 if (a->esz == 0) {
3131 return false;
3133 if (sve_access_check(s)) {
3134 unsigned vsz = vec_full_reg_size(s);
3135 TCGv_i64 val = tcg_temp_new_i64();
3136 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3138 do_cntp(s, val, a->esz, a->pg, a->pg);
3139 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3140 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3142 return true;
3145 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
3147 if (sve_access_check(s)) {
3148 TCGv_i64 reg = cpu_reg(s, a->rd);
3149 TCGv_i64 val = tcg_temp_new_i64();
3151 do_cntp(s, val, a->esz, a->pg, a->pg);
3152 do_sat_addsub_32(reg, val, a->u, a->d);
3154 return true;
3157 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
3159 if (sve_access_check(s)) {
3160 TCGv_i64 reg = cpu_reg(s, a->rd);
3161 TCGv_i64 val = tcg_temp_new_i64();
3163 do_cntp(s, val, a->esz, a->pg, a->pg);
3164 do_sat_addsub_64(reg, val, a->u, a->d);
3166 return true;
3169 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3171 if (a->esz == 0) {
3172 return false;
3174 if (sve_access_check(s)) {
3175 TCGv_i64 val = tcg_temp_new_i64();
3176 do_cntp(s, val, a->esz, a->pg, a->pg);
3177 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3179 return true;
3183 *** SVE Integer Compare Scalars Group
3186 static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
3188 if (!sve_access_check(s)) {
3189 return true;
3192 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3193 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3194 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3195 TCGv_i64 cmp = tcg_temp_new_i64();
3197 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3198 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3199 tcg_temp_free_i64(cmp);
3201 /* VF = !NF & !CF. */
3202 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3203 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3205 /* Both NF and VF actually look at bit 31. */
3206 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3207 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3208 return true;
3211 static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
3213 TCGv_i64 op0, op1, t0, t1, tmax;
3214 TCGv_i32 t2;
3215 TCGv_ptr ptr;
3216 unsigned vsz = vec_full_reg_size(s);
3217 unsigned desc = 0;
3218 TCGCond cond;
3219 uint64_t maxval;
3220 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
3221 bool eq = a->eq == a->lt;
3223 /* The greater-than conditions are all SVE2. */
3224 if (!a->lt && !dc_isar_feature(aa64_sve2, s)) {
3225 return false;
3227 if (!sve_access_check(s)) {
3228 return true;
3231 op0 = read_cpu_reg(s, a->rn, 1);
3232 op1 = read_cpu_reg(s, a->rm, 1);
3234 if (!a->sf) {
3235 if (a->u) {
3236 tcg_gen_ext32u_i64(op0, op0);
3237 tcg_gen_ext32u_i64(op1, op1);
3238 } else {
3239 tcg_gen_ext32s_i64(op0, op0);
3240 tcg_gen_ext32s_i64(op1, op1);
3244 /* For the helper, compress the different conditions into a computation
3245 * of how many iterations for which the condition is true.
3247 t0 = tcg_temp_new_i64();
3248 t1 = tcg_temp_new_i64();
3250 if (a->lt) {
3251 tcg_gen_sub_i64(t0, op1, op0);
3252 if (a->u) {
3253 maxval = a->sf ? UINT64_MAX : UINT32_MAX;
3254 cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
3255 } else {
3256 maxval = a->sf ? INT64_MAX : INT32_MAX;
3257 cond = eq ? TCG_COND_LE : TCG_COND_LT;
3259 } else {
3260 tcg_gen_sub_i64(t0, op0, op1);
3261 if (a->u) {
3262 maxval = 0;
3263 cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
3264 } else {
3265 maxval = a->sf ? INT64_MIN : INT32_MIN;
3266 cond = eq ? TCG_COND_GE : TCG_COND_GT;
3270 tmax = tcg_constant_i64(vsz >> a->esz);
3271 if (eq) {
3272 /* Equality means one more iteration. */
3273 tcg_gen_addi_i64(t0, t0, 1);
3276 * For the less-than while, if op1 is maxval (and the only time
3277 * the addition above could overflow), then we produce an all-true
3278 * predicate by setting the count to the vector length. This is
3279 * because the pseudocode is described as an increment + compare
3280 * loop, and the maximum integer would always compare true.
3281 * Similarly, the greater-than while has the same issue with the
3282 * minimum integer due to the decrement + compare loop.
3284 tcg_gen_movi_i64(t1, maxval);
3285 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
3288 /* Bound to the maximum. */
3289 tcg_gen_umin_i64(t0, t0, tmax);
3291 /* Set the count to zero if the condition is false. */
3292 tcg_gen_movi_i64(t1, 0);
3293 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3294 tcg_temp_free_i64(t1);
3296 /* Since we're bounded, pass as a 32-bit type. */
3297 t2 = tcg_temp_new_i32();
3298 tcg_gen_extrl_i64_i32(t2, t0);
3299 tcg_temp_free_i64(t0);
3301 /* Scale elements to bits. */
3302 tcg_gen_shli_i32(t2, t2, a->esz);
3304 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3305 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
3307 ptr = tcg_temp_new_ptr();
3308 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3310 if (a->lt) {
3311 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
3312 } else {
3313 gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc));
3315 do_pred_flags(t2);
3317 tcg_temp_free_ptr(ptr);
3318 tcg_temp_free_i32(t2);
3319 return true;
3322 static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
3324 TCGv_i64 op0, op1, diff, t1, tmax;
3325 TCGv_i32 t2;
3326 TCGv_ptr ptr;
3327 unsigned vsz = vec_full_reg_size(s);
3328 unsigned desc = 0;
3330 if (!dc_isar_feature(aa64_sve2, s)) {
3331 return false;
3333 if (!sve_access_check(s)) {
3334 return true;
3337 op0 = read_cpu_reg(s, a->rn, 1);
3338 op1 = read_cpu_reg(s, a->rm, 1);
3340 tmax = tcg_constant_i64(vsz);
3341 diff = tcg_temp_new_i64();
3343 if (a->rw) {
3344 /* WHILERW */
3345 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
3346 t1 = tcg_temp_new_i64();
3347 tcg_gen_sub_i64(diff, op0, op1);
3348 tcg_gen_sub_i64(t1, op1, op0);
3349 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
3350 tcg_temp_free_i64(t1);
3351 /* Round down to a multiple of ESIZE. */
3352 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3353 /* If op1 == op0, diff == 0, and the condition is always true. */
3354 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
3355 } else {
3356 /* WHILEWR */
3357 tcg_gen_sub_i64(diff, op1, op0);
3358 /* Round down to a multiple of ESIZE. */
3359 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3360 /* If op0 >= op1, diff <= 0, the condition is always true. */
3361 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
3364 /* Bound to the maximum. */
3365 tcg_gen_umin_i64(diff, diff, tmax);
3367 /* Since we're bounded, pass as a 32-bit type. */
3368 t2 = tcg_temp_new_i32();
3369 tcg_gen_extrl_i64_i32(t2, diff);
3370 tcg_temp_free_i64(diff);
3372 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3373 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
3375 ptr = tcg_temp_new_ptr();
3376 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3378 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
3379 do_pred_flags(t2);
3381 tcg_temp_free_ptr(ptr);
3382 tcg_temp_free_i32(t2);
3383 return true;
3387 *** SVE Integer Wide Immediate - Unpredicated Group
3390 static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
3392 if (a->esz == 0) {
3393 return false;
3395 if (sve_access_check(s)) {
3396 unsigned vsz = vec_full_reg_size(s);
3397 int dofs = vec_full_reg_offset(s, a->rd);
3398 uint64_t imm;
3400 /* Decode the VFP immediate. */
3401 imm = vfp_expand_imm(a->esz, a->imm);
3402 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
3404 return true;
3407 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
3409 if (sve_access_check(s)) {
3410 unsigned vsz = vec_full_reg_size(s);
3411 int dofs = vec_full_reg_offset(s, a->rd);
3412 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
3414 return true;
3417 TRANS_FEAT(ADD_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_addi, a)
3419 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
3421 a->imm = -a->imm;
3422 return trans_ADD_zzi(s, a);
3425 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
3427 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
3428 static const GVecGen2s op[4] = {
3429 { .fni8 = tcg_gen_vec_sub8_i64,
3430 .fniv = tcg_gen_sub_vec,
3431 .fno = gen_helper_sve_subri_b,
3432 .opt_opc = vecop_list,
3433 .vece = MO_8,
3434 .scalar_first = true },
3435 { .fni8 = tcg_gen_vec_sub16_i64,
3436 .fniv = tcg_gen_sub_vec,
3437 .fno = gen_helper_sve_subri_h,
3438 .opt_opc = vecop_list,
3439 .vece = MO_16,
3440 .scalar_first = true },
3441 { .fni4 = tcg_gen_sub_i32,
3442 .fniv = tcg_gen_sub_vec,
3443 .fno = gen_helper_sve_subri_s,
3444 .opt_opc = vecop_list,
3445 .vece = MO_32,
3446 .scalar_first = true },
3447 { .fni8 = tcg_gen_sub_i64,
3448 .fniv = tcg_gen_sub_vec,
3449 .fno = gen_helper_sve_subri_d,
3450 .opt_opc = vecop_list,
3451 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3452 .vece = MO_64,
3453 .scalar_first = true }
3456 if (sve_access_check(s)) {
3457 unsigned vsz = vec_full_reg_size(s);
3458 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3459 vec_full_reg_offset(s, a->rn),
3460 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]);
3462 return true;
3465 TRANS_FEAT(MUL_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_muli, a)
3467 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
3469 if (sve_access_check(s)) {
3470 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
3471 tcg_constant_i64(a->imm), u, d);
3473 return true;
3476 TRANS_FEAT(SQADD_zzi, aa64_sve, do_zzi_sat, a, false, false)
3477 TRANS_FEAT(UQADD_zzi, aa64_sve, do_zzi_sat, a, true, false)
3478 TRANS_FEAT(SQSUB_zzi, aa64_sve, do_zzi_sat, a, false, true)
3479 TRANS_FEAT(UQSUB_zzi, aa64_sve, do_zzi_sat, a, true, true)
3481 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3483 if (sve_access_check(s)) {
3484 unsigned vsz = vec_full_reg_size(s);
3485 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3486 vec_full_reg_offset(s, a->rn),
3487 tcg_constant_i64(a->imm), vsz, vsz, 0, fn);
3489 return true;
3492 #define DO_ZZI(NAME, name) \
3493 static gen_helper_gvec_2i * const name##i_fns[4] = { \
3494 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3495 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3496 }; \
3497 TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz])
3499 DO_ZZI(SMAX, smax)
3500 DO_ZZI(UMAX, umax)
3501 DO_ZZI(SMIN, smin)
3502 DO_ZZI(UMIN, umin)
3504 #undef DO_ZZI
3506 static gen_helper_gvec_4 * const dot_fns[2][2] = {
3507 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3508 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3510 TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz,
3511 dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0)
3514 * SVE Multiply - Indexed
3517 TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3518 gen_helper_gvec_sdot_idx_b, a)
3519 TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3520 gen_helper_gvec_sdot_idx_h, a)
3521 TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3522 gen_helper_gvec_udot_idx_b, a)
3523 TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3524 gen_helper_gvec_udot_idx_h, a)
3526 TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3527 gen_helper_gvec_sudot_idx_b, a)
3528 TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3529 gen_helper_gvec_usdot_idx_b, a)
3531 #define DO_SVE2_RRX(NAME, FUNC) \
3532 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3533 a->rd, a->rn, a->rm, a->index)
3535 DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h)
3536 DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s)
3537 DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d)
3539 DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h)
3540 DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s)
3541 DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d)
3543 DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h)
3544 DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s)
3545 DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d)
3547 #undef DO_SVE2_RRX
3549 #define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
3550 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3551 a->rd, a->rn, a->rm, (a->index << 1) | TOP)
3553 DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false)
3554 DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false)
3555 DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true)
3556 DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true)
3558 DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false)
3559 DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false)
3560 DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true)
3561 DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true)
3563 DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false)
3564 DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false)
3565 DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true)
3566 DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true)
3568 #undef DO_SVE2_RRX_TB
3570 #define DO_SVE2_RRXR(NAME, FUNC) \
3571 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a)
3573 DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h)
3574 DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s)
3575 DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d)
3577 DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h)
3578 DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s)
3579 DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d)
3581 DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h)
3582 DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s)
3583 DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d)
3585 DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h)
3586 DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s)
3587 DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d)
3589 #undef DO_SVE2_RRXR
3591 #define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
3592 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3593 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP)
3595 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false)
3596 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false)
3597 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true)
3598 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true)
3600 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false)
3601 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false)
3602 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true)
3603 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true)
3605 DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false)
3606 DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false)
3607 DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true)
3608 DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true)
3610 DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false)
3611 DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false)
3612 DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true)
3613 DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true)
3615 DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false)
3616 DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false)
3617 DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true)
3618 DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true)
3620 DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false)
3621 DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false)
3622 DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true)
3623 DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true)
3625 #undef DO_SVE2_RRXR_TB
3627 #define DO_SVE2_RRXR_ROT(NAME, FUNC) \
3628 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3629 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot)
3631 DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h)
3632 DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s)
3634 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h)
3635 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s)
3637 DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s)
3638 DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
3640 #undef DO_SVE2_RRXR_ROT
3643 *** SVE Floating Point Multiply-Add Indexed Group
3646 static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
3648 static gen_helper_gvec_4_ptr * const fns[4] = {
3649 NULL,
3650 gen_helper_gvec_fmla_idx_h,
3651 gen_helper_gvec_fmla_idx_s,
3652 gen_helper_gvec_fmla_idx_d,
3654 return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
3655 (a->index << 1) | sub,
3656 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3659 TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false)
3660 TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true)
3663 *** SVE Floating Point Multiply Indexed Group
3666 static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = {
3667 NULL, gen_helper_gvec_fmul_idx_h,
3668 gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d,
3670 TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz,
3671 fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index,
3672 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3675 *** SVE Floating Point Fast Reduction Group
3678 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3679 TCGv_ptr, TCGv_i32);
3681 static bool do_reduce(DisasContext *s, arg_rpr_esz *a,
3682 gen_helper_fp_reduce *fn)
3684 unsigned vsz, p2vsz;
3685 TCGv_i32 t_desc;
3686 TCGv_ptr t_zn, t_pg, status;
3687 TCGv_i64 temp;
3689 if (fn == NULL) {
3690 return false;
3692 if (!sve_access_check(s)) {
3693 return true;
3696 vsz = vec_full_reg_size(s);
3697 p2vsz = pow2ceil(vsz);
3698 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz));
3699 temp = tcg_temp_new_i64();
3700 t_zn = tcg_temp_new_ptr();
3701 t_pg = tcg_temp_new_ptr();
3703 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3704 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3705 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3707 fn(temp, t_zn, t_pg, status, t_desc);
3708 tcg_temp_free_ptr(t_zn);
3709 tcg_temp_free_ptr(t_pg);
3710 tcg_temp_free_ptr(status);
3712 write_fp_dreg(s, a->rd, temp);
3713 tcg_temp_free_i64(temp);
3714 return true;
3717 #define DO_VPZ(NAME, name) \
3718 static gen_helper_fp_reduce * const name##_fns[4] = { \
3719 NULL, gen_helper_sve_##name##_h, \
3720 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
3721 }; \
3722 TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz])
3724 DO_VPZ(FADDV, faddv)
3725 DO_VPZ(FMINNMV, fminnmv)
3726 DO_VPZ(FMAXNMV, fmaxnmv)
3727 DO_VPZ(FMINV, fminv)
3728 DO_VPZ(FMAXV, fmaxv)
3730 #undef DO_VPZ
3733 *** SVE Floating Point Unary Operations - Unpredicated Group
3736 static gen_helper_gvec_2_ptr * const frecpe_fns[] = {
3737 NULL, gen_helper_gvec_frecpe_h,
3738 gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d,
3740 TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0)
3742 static gen_helper_gvec_2_ptr * const frsqrte_fns[] = {
3743 NULL, gen_helper_gvec_frsqrte_h,
3744 gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d,
3746 TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0)
3749 *** SVE Floating Point Compare with Zero Group
3752 static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3753 gen_helper_gvec_3_ptr *fn)
3755 if (fn == NULL) {
3756 return false;
3758 if (sve_access_check(s)) {
3759 unsigned vsz = vec_full_reg_size(s);
3760 TCGv_ptr status =
3761 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3763 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3764 vec_full_reg_offset(s, a->rn),
3765 pred_full_reg_offset(s, a->pg),
3766 status, vsz, vsz, 0, fn);
3767 tcg_temp_free_ptr(status);
3769 return true;
3772 #define DO_PPZ(NAME, name) \
3773 static gen_helper_gvec_3_ptr * const name##_fns[] = { \
3774 NULL, gen_helper_sve_##name##_h, \
3775 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
3776 }; \
3777 TRANS_FEAT(NAME, aa64_sve, do_ppz_fp, a, name##_fns[a->esz])
3779 DO_PPZ(FCMGE_ppz0, fcmge0)
3780 DO_PPZ(FCMGT_ppz0, fcmgt0)
3781 DO_PPZ(FCMLE_ppz0, fcmle0)
3782 DO_PPZ(FCMLT_ppz0, fcmlt0)
3783 DO_PPZ(FCMEQ_ppz0, fcmeq0)
3784 DO_PPZ(FCMNE_ppz0, fcmne0)
3786 #undef DO_PPZ
3789 *** SVE floating-point trig multiply-add coefficient
3792 static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
3793 NULL, gen_helper_sve_ftmad_h,
3794 gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d,
3796 TRANS_FEAT(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
3797 ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
3798 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3801 *** SVE Floating Point Accumulating Reduction Group
3804 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
3806 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3807 TCGv_ptr, TCGv_ptr, TCGv_i32);
3808 static fadda_fn * const fns[3] = {
3809 gen_helper_sve_fadda_h,
3810 gen_helper_sve_fadda_s,
3811 gen_helper_sve_fadda_d,
3813 unsigned vsz = vec_full_reg_size(s);
3814 TCGv_ptr t_rm, t_pg, t_fpst;
3815 TCGv_i64 t_val;
3816 TCGv_i32 t_desc;
3818 if (a->esz == 0) {
3819 return false;
3821 if (!sve_access_check(s)) {
3822 return true;
3825 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3826 t_rm = tcg_temp_new_ptr();
3827 t_pg = tcg_temp_new_ptr();
3828 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3829 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3830 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3831 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
3833 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3835 tcg_temp_free_ptr(t_fpst);
3836 tcg_temp_free_ptr(t_pg);
3837 tcg_temp_free_ptr(t_rm);
3839 write_fp_dreg(s, a->rd, t_val);
3840 tcg_temp_free_i64(t_val);
3841 return true;
3845 *** SVE Floating Point Arithmetic - Unpredicated Group
3848 #define DO_FP3(NAME, name) \
3849 static gen_helper_gvec_3_ptr * const name##_fns[4] = { \
3850 NULL, gen_helper_gvec_##name##_h, \
3851 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3852 }; \
3853 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0)
3855 DO_FP3(FADD_zzz, fadd)
3856 DO_FP3(FSUB_zzz, fsub)
3857 DO_FP3(FMUL_zzz, fmul)
3858 DO_FP3(FTSMUL, ftsmul)
3859 DO_FP3(FRECPS, recps)
3860 DO_FP3(FRSQRTS, rsqrts)
3862 #undef DO_FP3
3865 *** SVE Floating Point Arithmetic - Predicated Group
3868 #define DO_ZPZZ_FP(NAME, FEAT, name) \
3869 static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \
3870 NULL, gen_helper_##name##_h, \
3871 gen_helper_##name##_s, gen_helper_##name##_d \
3872 }; \
3873 TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a)
3875 DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd)
3876 DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub)
3877 DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul)
3878 DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin)
3879 DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax)
3880 DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum)
3881 DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum)
3882 DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd)
3883 DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn)
3884 DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv)
3885 DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx)
3887 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3888 TCGv_i64, TCGv_ptr, TCGv_i32);
3890 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3891 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3893 unsigned vsz = vec_full_reg_size(s);
3894 TCGv_ptr t_zd, t_zn, t_pg, status;
3895 TCGv_i32 desc;
3897 t_zd = tcg_temp_new_ptr();
3898 t_zn = tcg_temp_new_ptr();
3899 t_pg = tcg_temp_new_ptr();
3900 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3901 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3902 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3904 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
3905 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
3906 fn(t_zd, t_zn, t_pg, scalar, status, desc);
3908 tcg_temp_free_ptr(status);
3909 tcg_temp_free_ptr(t_pg);
3910 tcg_temp_free_ptr(t_zn);
3911 tcg_temp_free_ptr(t_zd);
3914 static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3915 gen_helper_sve_fp2scalar *fn)
3917 if (fn == NULL) {
3918 return false;
3920 if (sve_access_check(s)) {
3921 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16,
3922 tcg_constant_i64(imm), fn);
3924 return true;
3927 #define DO_FP_IMM(NAME, name, const0, const1) \
3928 static gen_helper_sve_fp2scalar * const name##_fns[4] = { \
3929 NULL, gen_helper_sve_##name##_h, \
3930 gen_helper_sve_##name##_s, \
3931 gen_helper_sve_##name##_d \
3932 }; \
3933 static uint64_t const name##_const[4][2] = { \
3934 { -1, -1 }, \
3935 { float16_##const0, float16_##const1 }, \
3936 { float32_##const0, float32_##const1 }, \
3937 { float64_##const0, float64_##const1 }, \
3938 }; \
3939 TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \
3940 name##_const[a->esz][a->imm], name##_fns[a->esz])
3942 DO_FP_IMM(FADD, fadds, half, one)
3943 DO_FP_IMM(FSUB, fsubs, half, one)
3944 DO_FP_IMM(FMUL, fmuls, half, two)
3945 DO_FP_IMM(FSUBR, fsubrs, half, one)
3946 DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3947 DO_FP_IMM(FMINNM, fminnms, zero, one)
3948 DO_FP_IMM(FMAX, fmaxs, zero, one)
3949 DO_FP_IMM(FMIN, fmins, zero, one)
3951 #undef DO_FP_IMM
3953 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3954 gen_helper_gvec_4_ptr *fn)
3956 if (fn == NULL) {
3957 return false;
3959 if (sve_access_check(s)) {
3960 unsigned vsz = vec_full_reg_size(s);
3961 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3962 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3963 vec_full_reg_offset(s, a->rn),
3964 vec_full_reg_offset(s, a->rm),
3965 pred_full_reg_offset(s, a->pg),
3966 status, vsz, vsz, 0, fn);
3967 tcg_temp_free_ptr(status);
3969 return true;
3972 #define DO_FPCMP(NAME, name) \
3973 static gen_helper_gvec_4_ptr * const name##_fns[4] = { \
3974 NULL, gen_helper_sve_##name##_h, \
3975 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3976 }; \
3977 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_fp_cmp, a, name##_fns[a->esz])
3979 DO_FPCMP(FCMGE, fcmge)
3980 DO_FPCMP(FCMGT, fcmgt)
3981 DO_FPCMP(FCMEQ, fcmeq)
3982 DO_FPCMP(FCMNE, fcmne)
3983 DO_FPCMP(FCMUO, fcmuo)
3984 DO_FPCMP(FACGE, facge)
3985 DO_FPCMP(FACGT, facgt)
3987 #undef DO_FPCMP
3989 static gen_helper_gvec_4_ptr * const fcadd_fns[] = {
3990 NULL, gen_helper_sve_fcadd_h,
3991 gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d,
3993 TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
3994 a->rd, a->rn, a->rm, a->pg, a->rot,
3995 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3997 #define DO_FMLA(NAME, name) \
3998 static gen_helper_gvec_5_ptr * const name##_fns[4] = { \
3999 NULL, gen_helper_sve_##name##_h, \
4000 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4001 }; \
4002 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \
4003 a->rd, a->rn, a->rm, a->ra, a->pg, 0, \
4004 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
4006 DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
4007 DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
4008 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
4009 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
4011 #undef DO_FMLA
4013 static gen_helper_gvec_5_ptr * const fcmla_fns[4] = {
4014 NULL, gen_helper_sve_fcmla_zpzzz_h,
4015 gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d,
4017 TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz],
4018 a->rd, a->rn, a->rm, a->ra, a->pg, a->rot,
4019 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
4021 static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
4023 static gen_helper_gvec_4_ptr * const fns[4] = {
4024 NULL,
4025 gen_helper_gvec_fcmlah_idx,
4026 gen_helper_gvec_fcmlas_idx,
4027 NULL,
4030 return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
4031 a->index * 4 + a->rot,
4032 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4036 *** SVE Floating Point Unary Operations Predicated Group
4039 TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4040 gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR)
4041 TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
4042 gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR)
4044 TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
4045 gen_helper_sve_bfcvt, a, 0, FPST_FPCR)
4047 TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4048 gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR)
4049 TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
4050 gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR)
4051 TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4052 gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR)
4053 TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4054 gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR)
4056 TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4057 gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16)
4058 TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4059 gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16)
4060 TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
4061 gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16)
4062 TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
4063 gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16)
4064 TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
4065 gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16)
4066 TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
4067 gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16)
4069 TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4070 gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR)
4071 TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4072 gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR)
4073 TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4074 gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR)
4075 TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4076 gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR)
4077 TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4078 gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR)
4079 TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4080 gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR)
4082 TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4083 gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR)
4084 TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4085 gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR)
4087 static gen_helper_gvec_3_ptr * const frint_fns[] = {
4088 NULL,
4089 gen_helper_sve_frint_h,
4090 gen_helper_sve_frint_s,
4091 gen_helper_sve_frint_d
4093 TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz],
4094 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
4096 static gen_helper_gvec_3_ptr * const frintx_fns[] = {
4097 NULL,
4098 gen_helper_sve_frintx_h,
4099 gen_helper_sve_frintx_s,
4100 gen_helper_sve_frintx_d
4102 TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz],
4103 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4105 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
4106 int mode, gen_helper_gvec_3_ptr *fn)
4108 unsigned vsz;
4109 TCGv_i32 tmode;
4110 TCGv_ptr status;
4112 if (fn == NULL) {
4113 return false;
4115 if (!sve_access_check(s)) {
4116 return true;
4119 vsz = vec_full_reg_size(s);
4120 tmode = tcg_const_i32(mode);
4121 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4123 gen_helper_set_rmode(tmode, tmode, status);
4125 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4126 vec_full_reg_offset(s, a->rn),
4127 pred_full_reg_offset(s, a->pg),
4128 status, vsz, vsz, 0, fn);
4130 gen_helper_set_rmode(tmode, tmode, status);
4131 tcg_temp_free_i32(tmode);
4132 tcg_temp_free_ptr(status);
4133 return true;
4136 TRANS_FEAT(FRINTN, aa64_sve, do_frint_mode, a,
4137 float_round_nearest_even, frint_fns[a->esz])
4138 TRANS_FEAT(FRINTP, aa64_sve, do_frint_mode, a,
4139 float_round_up, frint_fns[a->esz])
4140 TRANS_FEAT(FRINTM, aa64_sve, do_frint_mode, a,
4141 float_round_down, frint_fns[a->esz])
4142 TRANS_FEAT(FRINTZ, aa64_sve, do_frint_mode, a,
4143 float_round_to_zero, frint_fns[a->esz])
4144 TRANS_FEAT(FRINTA, aa64_sve, do_frint_mode, a,
4145 float_round_ties_away, frint_fns[a->esz])
4147 static gen_helper_gvec_3_ptr * const frecpx_fns[] = {
4148 NULL, gen_helper_sve_frecpx_h,
4149 gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d,
4151 TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz],
4152 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
4154 static gen_helper_gvec_3_ptr * const fsqrt_fns[] = {
4155 NULL, gen_helper_sve_fsqrt_h,
4156 gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d,
4158 TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz],
4159 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
4161 TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4162 gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16)
4163 TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4164 gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16)
4165 TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4166 gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16)
4168 TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4169 gen_helper_sve_scvt_ss, a, 0, FPST_FPCR)
4170 TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4171 gen_helper_sve_scvt_ds, a, 0, FPST_FPCR)
4173 TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4174 gen_helper_sve_scvt_sd, a, 0, FPST_FPCR)
4175 TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4176 gen_helper_sve_scvt_dd, a, 0, FPST_FPCR)
4178 TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4179 gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16)
4180 TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4181 gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16)
4182 TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4183 gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16)
4185 TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4186 gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR)
4187 TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4188 gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR)
4189 TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4190 gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR)
4192 TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4193 gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR)
4196 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4199 /* Subroutine loading a vector register at VOFS of LEN bytes.
4200 * The load should begin at the address Rn + IMM.
4203 static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4205 int len_align = QEMU_ALIGN_DOWN(len, 8);
4206 int len_remain = len % 8;
4207 int nparts = len / 8 + ctpop8(len_remain);
4208 int midx = get_mem_index(s);
4209 TCGv_i64 dirty_addr, clean_addr, t0, t1;
4211 dirty_addr = tcg_temp_new_i64();
4212 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
4213 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
4214 tcg_temp_free_i64(dirty_addr);
4217 * Note that unpredicated load/store of vector/predicate registers
4218 * are defined as a stream of bytes, which equates to little-endian
4219 * operations on larger quantities.
4220 * Attempt to keep code expansion to a minimum by limiting the
4221 * amount of unrolling done.
4223 if (nparts <= 4) {
4224 int i;
4226 t0 = tcg_temp_new_i64();
4227 for (i = 0; i < len_align; i += 8) {
4228 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
4229 tcg_gen_st_i64(t0, cpu_env, vofs + i);
4230 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4232 tcg_temp_free_i64(t0);
4233 } else {
4234 TCGLabel *loop = gen_new_label();
4235 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4237 /* Copy the clean address into a local temp, live across the loop. */
4238 t0 = clean_addr;
4239 clean_addr = new_tmp_a64_local(s);
4240 tcg_gen_mov_i64(clean_addr, t0);
4242 gen_set_label(loop);
4244 t0 = tcg_temp_new_i64();
4245 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
4246 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4248 tp = tcg_temp_new_ptr();
4249 tcg_gen_add_ptr(tp, cpu_env, i);
4250 tcg_gen_addi_ptr(i, i, 8);
4251 tcg_gen_st_i64(t0, tp, vofs);
4252 tcg_temp_free_ptr(tp);
4253 tcg_temp_free_i64(t0);
4255 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4256 tcg_temp_free_ptr(i);
4260 * Predicate register loads can be any multiple of 2.
4261 * Note that we still store the entire 64-bit unit into cpu_env.
4263 if (len_remain) {
4264 t0 = tcg_temp_new_i64();
4265 switch (len_remain) {
4266 case 2:
4267 case 4:
4268 case 8:
4269 tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4270 MO_LE | ctz32(len_remain));
4271 break;
4273 case 6:
4274 t1 = tcg_temp_new_i64();
4275 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
4276 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4277 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
4278 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4279 tcg_temp_free_i64(t1);
4280 break;
4282 default:
4283 g_assert_not_reached();
4285 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
4286 tcg_temp_free_i64(t0);
4290 /* Similarly for stores. */
4291 static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4293 int len_align = QEMU_ALIGN_DOWN(len, 8);
4294 int len_remain = len % 8;
4295 int nparts = len / 8 + ctpop8(len_remain);
4296 int midx = get_mem_index(s);
4297 TCGv_i64 dirty_addr, clean_addr, t0;
4299 dirty_addr = tcg_temp_new_i64();
4300 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
4301 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
4302 tcg_temp_free_i64(dirty_addr);
4304 /* Note that unpredicated load/store of vector/predicate registers
4305 * are defined as a stream of bytes, which equates to little-endian
4306 * operations on larger quantities. There is no nice way to force
4307 * a little-endian store for aarch64_be-linux-user out of line.
4309 * Attempt to keep code expansion to a minimum by limiting the
4310 * amount of unrolling done.
4312 if (nparts <= 4) {
4313 int i;
4315 t0 = tcg_temp_new_i64();
4316 for (i = 0; i < len_align; i += 8) {
4317 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
4318 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
4319 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4321 tcg_temp_free_i64(t0);
4322 } else {
4323 TCGLabel *loop = gen_new_label();
4324 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4326 /* Copy the clean address into a local temp, live across the loop. */
4327 t0 = clean_addr;
4328 clean_addr = new_tmp_a64_local(s);
4329 tcg_gen_mov_i64(clean_addr, t0);
4331 gen_set_label(loop);
4333 t0 = tcg_temp_new_i64();
4334 tp = tcg_temp_new_ptr();
4335 tcg_gen_add_ptr(tp, cpu_env, i);
4336 tcg_gen_ld_i64(t0, tp, vofs);
4337 tcg_gen_addi_ptr(i, i, 8);
4338 tcg_temp_free_ptr(tp);
4340 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
4341 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4342 tcg_temp_free_i64(t0);
4344 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4345 tcg_temp_free_ptr(i);
4348 /* Predicate register stores can be any multiple of 2. */
4349 if (len_remain) {
4350 t0 = tcg_temp_new_i64();
4351 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
4353 switch (len_remain) {
4354 case 2:
4355 case 4:
4356 case 8:
4357 tcg_gen_qemu_st_i64(t0, clean_addr, midx,
4358 MO_LE | ctz32(len_remain));
4359 break;
4361 case 6:
4362 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
4363 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4364 tcg_gen_shri_i64(t0, t0, 32);
4365 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
4366 break;
4368 default:
4369 g_assert_not_reached();
4371 tcg_temp_free_i64(t0);
4375 static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
4377 if (sve_access_check(s)) {
4378 int size = vec_full_reg_size(s);
4379 int off = vec_full_reg_offset(s, a->rd);
4380 do_ldr(s, off, size, a->rn, a->imm * size);
4382 return true;
4385 static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
4387 if (sve_access_check(s)) {
4388 int size = pred_full_reg_size(s);
4389 int off = pred_full_reg_offset(s, a->rd);
4390 do_ldr(s, off, size, a->rn, a->imm * size);
4392 return true;
4395 static bool trans_STR_zri(DisasContext *s, arg_rri *a)
4397 if (sve_access_check(s)) {
4398 int size = vec_full_reg_size(s);
4399 int off = vec_full_reg_offset(s, a->rd);
4400 do_str(s, off, size, a->rn, a->imm * size);
4402 return true;
4405 static bool trans_STR_pri(DisasContext *s, arg_rri *a)
4407 if (sve_access_check(s)) {
4408 int size = pred_full_reg_size(s);
4409 int off = pred_full_reg_offset(s, a->rd);
4410 do_str(s, off, size, a->rn, a->imm * size);
4412 return true;
4416 *** SVE Memory - Contiguous Load Group
4419 /* The memory mode of the dtype. */
4420 static const MemOp dtype_mop[16] = {
4421 MO_UB, MO_UB, MO_UB, MO_UB,
4422 MO_SL, MO_UW, MO_UW, MO_UW,
4423 MO_SW, MO_SW, MO_UL, MO_UL,
4424 MO_SB, MO_SB, MO_SB, MO_UQ
4427 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4429 /* The vector element size of dtype. */
4430 static const uint8_t dtype_esz[16] = {
4431 0, 1, 2, 3,
4432 3, 1, 2, 3,
4433 3, 2, 2, 3,
4434 3, 2, 1, 3
4437 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4438 int dtype, uint32_t mte_n, bool is_write,
4439 gen_helper_gvec_mem *fn)
4441 unsigned vsz = vec_full_reg_size(s);
4442 TCGv_ptr t_pg;
4443 int desc = 0;
4446 * For e.g. LD4, there are not enough arguments to pass all 4
4447 * registers as pointers, so encode the regno into the data field.
4448 * For consistency, do this even for LD1.
4450 if (s->mte_active[0]) {
4451 int msz = dtype_msz(dtype);
4453 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
4454 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4455 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4456 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
4457 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1);
4458 desc <<= SVE_MTEDESC_SHIFT;
4459 } else {
4460 addr = clean_data_tbi(s, addr);
4463 desc = simd_desc(vsz, vsz, zt | desc);
4464 t_pg = tcg_temp_new_ptr();
4466 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4467 fn(cpu_env, t_pg, addr, tcg_constant_i32(desc));
4469 tcg_temp_free_ptr(t_pg);
4472 /* Indexed by [mte][be][dtype][nreg] */
4473 static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
4474 { /* mte inactive, little-endian */
4475 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4476 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4477 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4478 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4479 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4481 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4482 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4483 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4484 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4485 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4487 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4488 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4489 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4490 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4491 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4493 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4494 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4495 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4496 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4497 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4499 /* mte inactive, big-endian */
4500 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4501 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4502 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4503 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4504 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4506 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4507 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4508 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4509 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4510 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4512 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4513 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4514 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4515 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4516 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4518 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4519 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4520 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4521 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4522 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
4524 { /* mte active, little-endian */
4525 { { gen_helper_sve_ld1bb_r_mte,
4526 gen_helper_sve_ld2bb_r_mte,
4527 gen_helper_sve_ld3bb_r_mte,
4528 gen_helper_sve_ld4bb_r_mte },
4529 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4530 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4531 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4533 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
4534 { gen_helper_sve_ld1hh_le_r_mte,
4535 gen_helper_sve_ld2hh_le_r_mte,
4536 gen_helper_sve_ld3hh_le_r_mte,
4537 gen_helper_sve_ld4hh_le_r_mte },
4538 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
4539 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
4541 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
4542 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
4543 { gen_helper_sve_ld1ss_le_r_mte,
4544 gen_helper_sve_ld2ss_le_r_mte,
4545 gen_helper_sve_ld3ss_le_r_mte,
4546 gen_helper_sve_ld4ss_le_r_mte },
4547 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
4549 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4550 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4551 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4552 { gen_helper_sve_ld1dd_le_r_mte,
4553 gen_helper_sve_ld2dd_le_r_mte,
4554 gen_helper_sve_ld3dd_le_r_mte,
4555 gen_helper_sve_ld4dd_le_r_mte } },
4557 /* mte active, big-endian */
4558 { { gen_helper_sve_ld1bb_r_mte,
4559 gen_helper_sve_ld2bb_r_mte,
4560 gen_helper_sve_ld3bb_r_mte,
4561 gen_helper_sve_ld4bb_r_mte },
4562 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4563 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4564 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4566 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
4567 { gen_helper_sve_ld1hh_be_r_mte,
4568 gen_helper_sve_ld2hh_be_r_mte,
4569 gen_helper_sve_ld3hh_be_r_mte,
4570 gen_helper_sve_ld4hh_be_r_mte },
4571 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
4572 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
4574 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
4575 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
4576 { gen_helper_sve_ld1ss_be_r_mte,
4577 gen_helper_sve_ld2ss_be_r_mte,
4578 gen_helper_sve_ld3ss_be_r_mte,
4579 gen_helper_sve_ld4ss_be_r_mte },
4580 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
4582 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4583 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4584 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4585 { gen_helper_sve_ld1dd_be_r_mte,
4586 gen_helper_sve_ld2dd_be_r_mte,
4587 gen_helper_sve_ld3dd_be_r_mte,
4588 gen_helper_sve_ld4dd_be_r_mte } } },
4591 static void do_ld_zpa(DisasContext *s, int zt, int pg,
4592 TCGv_i64 addr, int dtype, int nreg)
4594 gen_helper_gvec_mem *fn
4595 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
4598 * While there are holes in the table, they are not
4599 * accessible via the instruction encoding.
4601 assert(fn != NULL);
4602 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
4605 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
4607 if (a->rm == 31) {
4608 return false;
4610 if (sve_access_check(s)) {
4611 TCGv_i64 addr = new_tmp_a64(s);
4612 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4613 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4614 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4616 return true;
4619 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
4621 if (sve_access_check(s)) {
4622 int vsz = vec_full_reg_size(s);
4623 int elements = vsz >> dtype_esz[a->dtype];
4624 TCGv_i64 addr = new_tmp_a64(s);
4626 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4627 (a->imm * elements * (a->nreg + 1))
4628 << dtype_msz(a->dtype));
4629 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4631 return true;
4634 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
4636 static gen_helper_gvec_mem * const fns[2][2][16] = {
4637 { /* mte inactive, little-endian */
4638 { gen_helper_sve_ldff1bb_r,
4639 gen_helper_sve_ldff1bhu_r,
4640 gen_helper_sve_ldff1bsu_r,
4641 gen_helper_sve_ldff1bdu_r,
4643 gen_helper_sve_ldff1sds_le_r,
4644 gen_helper_sve_ldff1hh_le_r,
4645 gen_helper_sve_ldff1hsu_le_r,
4646 gen_helper_sve_ldff1hdu_le_r,
4648 gen_helper_sve_ldff1hds_le_r,
4649 gen_helper_sve_ldff1hss_le_r,
4650 gen_helper_sve_ldff1ss_le_r,
4651 gen_helper_sve_ldff1sdu_le_r,
4653 gen_helper_sve_ldff1bds_r,
4654 gen_helper_sve_ldff1bss_r,
4655 gen_helper_sve_ldff1bhs_r,
4656 gen_helper_sve_ldff1dd_le_r },
4658 /* mte inactive, big-endian */
4659 { gen_helper_sve_ldff1bb_r,
4660 gen_helper_sve_ldff1bhu_r,
4661 gen_helper_sve_ldff1bsu_r,
4662 gen_helper_sve_ldff1bdu_r,
4664 gen_helper_sve_ldff1sds_be_r,
4665 gen_helper_sve_ldff1hh_be_r,
4666 gen_helper_sve_ldff1hsu_be_r,
4667 gen_helper_sve_ldff1hdu_be_r,
4669 gen_helper_sve_ldff1hds_be_r,
4670 gen_helper_sve_ldff1hss_be_r,
4671 gen_helper_sve_ldff1ss_be_r,
4672 gen_helper_sve_ldff1sdu_be_r,
4674 gen_helper_sve_ldff1bds_r,
4675 gen_helper_sve_ldff1bss_r,
4676 gen_helper_sve_ldff1bhs_r,
4677 gen_helper_sve_ldff1dd_be_r } },
4679 { /* mte active, little-endian */
4680 { gen_helper_sve_ldff1bb_r_mte,
4681 gen_helper_sve_ldff1bhu_r_mte,
4682 gen_helper_sve_ldff1bsu_r_mte,
4683 gen_helper_sve_ldff1bdu_r_mte,
4685 gen_helper_sve_ldff1sds_le_r_mte,
4686 gen_helper_sve_ldff1hh_le_r_mte,
4687 gen_helper_sve_ldff1hsu_le_r_mte,
4688 gen_helper_sve_ldff1hdu_le_r_mte,
4690 gen_helper_sve_ldff1hds_le_r_mte,
4691 gen_helper_sve_ldff1hss_le_r_mte,
4692 gen_helper_sve_ldff1ss_le_r_mte,
4693 gen_helper_sve_ldff1sdu_le_r_mte,
4695 gen_helper_sve_ldff1bds_r_mte,
4696 gen_helper_sve_ldff1bss_r_mte,
4697 gen_helper_sve_ldff1bhs_r_mte,
4698 gen_helper_sve_ldff1dd_le_r_mte },
4700 /* mte active, big-endian */
4701 { gen_helper_sve_ldff1bb_r_mte,
4702 gen_helper_sve_ldff1bhu_r_mte,
4703 gen_helper_sve_ldff1bsu_r_mte,
4704 gen_helper_sve_ldff1bdu_r_mte,
4706 gen_helper_sve_ldff1sds_be_r_mte,
4707 gen_helper_sve_ldff1hh_be_r_mte,
4708 gen_helper_sve_ldff1hsu_be_r_mte,
4709 gen_helper_sve_ldff1hdu_be_r_mte,
4711 gen_helper_sve_ldff1hds_be_r_mte,
4712 gen_helper_sve_ldff1hss_be_r_mte,
4713 gen_helper_sve_ldff1ss_be_r_mte,
4714 gen_helper_sve_ldff1sdu_be_r_mte,
4716 gen_helper_sve_ldff1bds_r_mte,
4717 gen_helper_sve_ldff1bss_r_mte,
4718 gen_helper_sve_ldff1bhs_r_mte,
4719 gen_helper_sve_ldff1dd_be_r_mte } },
4722 if (sve_access_check(s)) {
4723 TCGv_i64 addr = new_tmp_a64(s);
4724 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4725 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4726 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4727 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
4729 return true;
4732 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
4734 static gen_helper_gvec_mem * const fns[2][2][16] = {
4735 { /* mte inactive, little-endian */
4736 { gen_helper_sve_ldnf1bb_r,
4737 gen_helper_sve_ldnf1bhu_r,
4738 gen_helper_sve_ldnf1bsu_r,
4739 gen_helper_sve_ldnf1bdu_r,
4741 gen_helper_sve_ldnf1sds_le_r,
4742 gen_helper_sve_ldnf1hh_le_r,
4743 gen_helper_sve_ldnf1hsu_le_r,
4744 gen_helper_sve_ldnf1hdu_le_r,
4746 gen_helper_sve_ldnf1hds_le_r,
4747 gen_helper_sve_ldnf1hss_le_r,
4748 gen_helper_sve_ldnf1ss_le_r,
4749 gen_helper_sve_ldnf1sdu_le_r,
4751 gen_helper_sve_ldnf1bds_r,
4752 gen_helper_sve_ldnf1bss_r,
4753 gen_helper_sve_ldnf1bhs_r,
4754 gen_helper_sve_ldnf1dd_le_r },
4756 /* mte inactive, big-endian */
4757 { gen_helper_sve_ldnf1bb_r,
4758 gen_helper_sve_ldnf1bhu_r,
4759 gen_helper_sve_ldnf1bsu_r,
4760 gen_helper_sve_ldnf1bdu_r,
4762 gen_helper_sve_ldnf1sds_be_r,
4763 gen_helper_sve_ldnf1hh_be_r,
4764 gen_helper_sve_ldnf1hsu_be_r,
4765 gen_helper_sve_ldnf1hdu_be_r,
4767 gen_helper_sve_ldnf1hds_be_r,
4768 gen_helper_sve_ldnf1hss_be_r,
4769 gen_helper_sve_ldnf1ss_be_r,
4770 gen_helper_sve_ldnf1sdu_be_r,
4772 gen_helper_sve_ldnf1bds_r,
4773 gen_helper_sve_ldnf1bss_r,
4774 gen_helper_sve_ldnf1bhs_r,
4775 gen_helper_sve_ldnf1dd_be_r } },
4777 { /* mte inactive, little-endian */
4778 { gen_helper_sve_ldnf1bb_r_mte,
4779 gen_helper_sve_ldnf1bhu_r_mte,
4780 gen_helper_sve_ldnf1bsu_r_mte,
4781 gen_helper_sve_ldnf1bdu_r_mte,
4783 gen_helper_sve_ldnf1sds_le_r_mte,
4784 gen_helper_sve_ldnf1hh_le_r_mte,
4785 gen_helper_sve_ldnf1hsu_le_r_mte,
4786 gen_helper_sve_ldnf1hdu_le_r_mte,
4788 gen_helper_sve_ldnf1hds_le_r_mte,
4789 gen_helper_sve_ldnf1hss_le_r_mte,
4790 gen_helper_sve_ldnf1ss_le_r_mte,
4791 gen_helper_sve_ldnf1sdu_le_r_mte,
4793 gen_helper_sve_ldnf1bds_r_mte,
4794 gen_helper_sve_ldnf1bss_r_mte,
4795 gen_helper_sve_ldnf1bhs_r_mte,
4796 gen_helper_sve_ldnf1dd_le_r_mte },
4798 /* mte inactive, big-endian */
4799 { gen_helper_sve_ldnf1bb_r_mte,
4800 gen_helper_sve_ldnf1bhu_r_mte,
4801 gen_helper_sve_ldnf1bsu_r_mte,
4802 gen_helper_sve_ldnf1bdu_r_mte,
4804 gen_helper_sve_ldnf1sds_be_r_mte,
4805 gen_helper_sve_ldnf1hh_be_r_mte,
4806 gen_helper_sve_ldnf1hsu_be_r_mte,
4807 gen_helper_sve_ldnf1hdu_be_r_mte,
4809 gen_helper_sve_ldnf1hds_be_r_mte,
4810 gen_helper_sve_ldnf1hss_be_r_mte,
4811 gen_helper_sve_ldnf1ss_be_r_mte,
4812 gen_helper_sve_ldnf1sdu_be_r_mte,
4814 gen_helper_sve_ldnf1bds_r_mte,
4815 gen_helper_sve_ldnf1bss_r_mte,
4816 gen_helper_sve_ldnf1bhs_r_mte,
4817 gen_helper_sve_ldnf1dd_be_r_mte } },
4820 if (sve_access_check(s)) {
4821 int vsz = vec_full_reg_size(s);
4822 int elements = vsz >> dtype_esz[a->dtype];
4823 int off = (a->imm * elements) << dtype_msz(a->dtype);
4824 TCGv_i64 addr = new_tmp_a64(s);
4826 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4827 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4828 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
4830 return true;
4833 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
4835 unsigned vsz = vec_full_reg_size(s);
4836 TCGv_ptr t_pg;
4837 int poff;
4839 /* Load the first quadword using the normal predicated load helpers. */
4840 poff = pred_full_reg_offset(s, pg);
4841 if (vsz > 16) {
4843 * Zero-extend the first 16 bits of the predicate into a temporary.
4844 * This avoids triggering an assert making sure we don't have bits
4845 * set within a predicate beyond VQ, but we have lowered VQ to 1
4846 * for this load operation.
4848 TCGv_i64 tmp = tcg_temp_new_i64();
4849 #if HOST_BIG_ENDIAN
4850 poff += 6;
4851 #endif
4852 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
4854 poff = offsetof(CPUARMState, vfp.preg_tmp);
4855 tcg_gen_st_i64(tmp, cpu_env, poff);
4856 tcg_temp_free_i64(tmp);
4859 t_pg = tcg_temp_new_ptr();
4860 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
4862 gen_helper_gvec_mem *fn
4863 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
4864 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt)));
4866 tcg_temp_free_ptr(t_pg);
4868 /* Replicate that first quadword. */
4869 if (vsz > 16) {
4870 int doff = vec_full_reg_offset(s, zt);
4871 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16);
4875 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
4877 if (a->rm == 31) {
4878 return false;
4880 if (sve_access_check(s)) {
4881 int msz = dtype_msz(a->dtype);
4882 TCGv_i64 addr = new_tmp_a64(s);
4883 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4884 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4885 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
4887 return true;
4890 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
4892 if (sve_access_check(s)) {
4893 TCGv_i64 addr = new_tmp_a64(s);
4894 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4895 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
4897 return true;
4900 static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
4902 unsigned vsz = vec_full_reg_size(s);
4903 unsigned vsz_r32;
4904 TCGv_ptr t_pg;
4905 int poff, doff;
4907 if (vsz < 32) {
4909 * Note that this UNDEFINED check comes after CheckSVEEnabled()
4910 * in the ARM pseudocode, which is the sve_access_check() done
4911 * in our caller. We should not now return false from the caller.
4913 unallocated_encoding(s);
4914 return;
4917 /* Load the first octaword using the normal predicated load helpers. */
4919 poff = pred_full_reg_offset(s, pg);
4920 if (vsz > 32) {
4922 * Zero-extend the first 32 bits of the predicate into a temporary.
4923 * This avoids triggering an assert making sure we don't have bits
4924 * set within a predicate beyond VQ, but we have lowered VQ to 2
4925 * for this load operation.
4927 TCGv_i64 tmp = tcg_temp_new_i64();
4928 #if HOST_BIG_ENDIAN
4929 poff += 4;
4930 #endif
4931 tcg_gen_ld32u_i64(tmp, cpu_env, poff);
4933 poff = offsetof(CPUARMState, vfp.preg_tmp);
4934 tcg_gen_st_i64(tmp, cpu_env, poff);
4935 tcg_temp_free_i64(tmp);
4938 t_pg = tcg_temp_new_ptr();
4939 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
4941 gen_helper_gvec_mem *fn
4942 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
4943 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt)));
4945 tcg_temp_free_ptr(t_pg);
4948 * Replicate that first octaword.
4949 * The replication happens in units of 32; if the full vector size
4950 * is not a multiple of 32, the final bits are zeroed.
4952 doff = vec_full_reg_offset(s, zt);
4953 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
4954 if (vsz >= 64) {
4955 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32);
4957 vsz -= vsz_r32;
4958 if (vsz) {
4959 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0);
4963 static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
4965 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
4966 return false;
4968 if (a->rm == 31) {
4969 return false;
4971 if (sve_access_check(s)) {
4972 TCGv_i64 addr = new_tmp_a64(s);
4973 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4974 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4975 do_ldro(s, a->rd, a->pg, addr, a->dtype);
4977 return true;
4980 static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
4982 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
4983 return false;
4985 if (sve_access_check(s)) {
4986 TCGv_i64 addr = new_tmp_a64(s);
4987 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
4988 do_ldro(s, a->rd, a->pg, addr, a->dtype);
4990 return true;
4993 /* Load and broadcast element. */
4994 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
4996 unsigned vsz = vec_full_reg_size(s);
4997 unsigned psz = pred_full_reg_size(s);
4998 unsigned esz = dtype_esz[a->dtype];
4999 unsigned msz = dtype_msz(a->dtype);
5000 TCGLabel *over;
5001 TCGv_i64 temp, clean_addr;
5003 if (!sve_access_check(s)) {
5004 return true;
5007 over = gen_new_label();
5009 /* If the guarding predicate has no bits set, no load occurs. */
5010 if (psz <= 8) {
5011 /* Reduce the pred_esz_masks value simply to reduce the
5012 * size of the code generated here.
5014 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
5015 temp = tcg_temp_new_i64();
5016 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
5017 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
5018 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
5019 tcg_temp_free_i64(temp);
5020 } else {
5021 TCGv_i32 t32 = tcg_temp_new_i32();
5022 find_last_active(s, t32, esz, a->pg);
5023 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
5024 tcg_temp_free_i32(t32);
5027 /* Load the data. */
5028 temp = tcg_temp_new_i64();
5029 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
5030 clean_addr = gen_mte_check1(s, temp, false, true, msz);
5032 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
5033 finalize_memop(s, dtype_mop[a->dtype]));
5035 /* Broadcast to *all* elements. */
5036 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5037 vsz, vsz, temp);
5038 tcg_temp_free_i64(temp);
5040 /* Zero the inactive elements. */
5041 gen_set_label(over);
5042 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
5045 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5046 int msz, int esz, int nreg)
5048 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5049 { { { gen_helper_sve_st1bb_r,
5050 gen_helper_sve_st1bh_r,
5051 gen_helper_sve_st1bs_r,
5052 gen_helper_sve_st1bd_r },
5053 { NULL,
5054 gen_helper_sve_st1hh_le_r,
5055 gen_helper_sve_st1hs_le_r,
5056 gen_helper_sve_st1hd_le_r },
5057 { NULL, NULL,
5058 gen_helper_sve_st1ss_le_r,
5059 gen_helper_sve_st1sd_le_r },
5060 { NULL, NULL, NULL,
5061 gen_helper_sve_st1dd_le_r } },
5062 { { gen_helper_sve_st1bb_r,
5063 gen_helper_sve_st1bh_r,
5064 gen_helper_sve_st1bs_r,
5065 gen_helper_sve_st1bd_r },
5066 { NULL,
5067 gen_helper_sve_st1hh_be_r,
5068 gen_helper_sve_st1hs_be_r,
5069 gen_helper_sve_st1hd_be_r },
5070 { NULL, NULL,
5071 gen_helper_sve_st1ss_be_r,
5072 gen_helper_sve_st1sd_be_r },
5073 { NULL, NULL, NULL,
5074 gen_helper_sve_st1dd_be_r } } },
5076 { { { gen_helper_sve_st1bb_r_mte,
5077 gen_helper_sve_st1bh_r_mte,
5078 gen_helper_sve_st1bs_r_mte,
5079 gen_helper_sve_st1bd_r_mte },
5080 { NULL,
5081 gen_helper_sve_st1hh_le_r_mte,
5082 gen_helper_sve_st1hs_le_r_mte,
5083 gen_helper_sve_st1hd_le_r_mte },
5084 { NULL, NULL,
5085 gen_helper_sve_st1ss_le_r_mte,
5086 gen_helper_sve_st1sd_le_r_mte },
5087 { NULL, NULL, NULL,
5088 gen_helper_sve_st1dd_le_r_mte } },
5089 { { gen_helper_sve_st1bb_r_mte,
5090 gen_helper_sve_st1bh_r_mte,
5091 gen_helper_sve_st1bs_r_mte,
5092 gen_helper_sve_st1bd_r_mte },
5093 { NULL,
5094 gen_helper_sve_st1hh_be_r_mte,
5095 gen_helper_sve_st1hs_be_r_mte,
5096 gen_helper_sve_st1hd_be_r_mte },
5097 { NULL, NULL,
5098 gen_helper_sve_st1ss_be_r_mte,
5099 gen_helper_sve_st1sd_be_r_mte },
5100 { NULL, NULL, NULL,
5101 gen_helper_sve_st1dd_be_r_mte } } },
5103 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5104 { { { gen_helper_sve_st2bb_r,
5105 gen_helper_sve_st2hh_le_r,
5106 gen_helper_sve_st2ss_le_r,
5107 gen_helper_sve_st2dd_le_r },
5108 { gen_helper_sve_st3bb_r,
5109 gen_helper_sve_st3hh_le_r,
5110 gen_helper_sve_st3ss_le_r,
5111 gen_helper_sve_st3dd_le_r },
5112 { gen_helper_sve_st4bb_r,
5113 gen_helper_sve_st4hh_le_r,
5114 gen_helper_sve_st4ss_le_r,
5115 gen_helper_sve_st4dd_le_r } },
5116 { { gen_helper_sve_st2bb_r,
5117 gen_helper_sve_st2hh_be_r,
5118 gen_helper_sve_st2ss_be_r,
5119 gen_helper_sve_st2dd_be_r },
5120 { gen_helper_sve_st3bb_r,
5121 gen_helper_sve_st3hh_be_r,
5122 gen_helper_sve_st3ss_be_r,
5123 gen_helper_sve_st3dd_be_r },
5124 { gen_helper_sve_st4bb_r,
5125 gen_helper_sve_st4hh_be_r,
5126 gen_helper_sve_st4ss_be_r,
5127 gen_helper_sve_st4dd_be_r } } },
5128 { { { gen_helper_sve_st2bb_r_mte,
5129 gen_helper_sve_st2hh_le_r_mte,
5130 gen_helper_sve_st2ss_le_r_mte,
5131 gen_helper_sve_st2dd_le_r_mte },
5132 { gen_helper_sve_st3bb_r_mte,
5133 gen_helper_sve_st3hh_le_r_mte,
5134 gen_helper_sve_st3ss_le_r_mte,
5135 gen_helper_sve_st3dd_le_r_mte },
5136 { gen_helper_sve_st4bb_r_mte,
5137 gen_helper_sve_st4hh_le_r_mte,
5138 gen_helper_sve_st4ss_le_r_mte,
5139 gen_helper_sve_st4dd_le_r_mte } },
5140 { { gen_helper_sve_st2bb_r_mte,
5141 gen_helper_sve_st2hh_be_r_mte,
5142 gen_helper_sve_st2ss_be_r_mte,
5143 gen_helper_sve_st2dd_be_r_mte },
5144 { gen_helper_sve_st3bb_r_mte,
5145 gen_helper_sve_st3hh_be_r_mte,
5146 gen_helper_sve_st3ss_be_r_mte,
5147 gen_helper_sve_st3dd_be_r_mte },
5148 { gen_helper_sve_st4bb_r_mte,
5149 gen_helper_sve_st4hh_be_r_mte,
5150 gen_helper_sve_st4ss_be_r_mte,
5151 gen_helper_sve_st4dd_be_r_mte } } },
5153 gen_helper_gvec_mem *fn;
5154 int be = s->be_data == MO_BE;
5156 if (nreg == 0) {
5157 /* ST1 */
5158 fn = fn_single[s->mte_active[0]][be][msz][esz];
5159 nreg = 1;
5160 } else {
5161 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5162 assert(msz == esz);
5163 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
5165 assert(fn != NULL);
5166 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
5169 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
5171 if (a->rm == 31 || a->msz > a->esz) {
5172 return false;
5174 if (sve_access_check(s)) {
5175 TCGv_i64 addr = new_tmp_a64(s);
5176 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
5177 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5178 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5180 return true;
5183 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
5185 if (a->msz > a->esz) {
5186 return false;
5188 if (sve_access_check(s)) {
5189 int vsz = vec_full_reg_size(s);
5190 int elements = vsz >> a->esz;
5191 TCGv_i64 addr = new_tmp_a64(s);
5193 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5194 (a->imm * elements * (a->nreg + 1)) << a->msz);
5195 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5197 return true;
5201 *** SVE gather loads / scatter stores
5204 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
5205 int scale, TCGv_i64 scalar, int msz, bool is_write,
5206 gen_helper_gvec_mem_scatter *fn)
5208 unsigned vsz = vec_full_reg_size(s);
5209 TCGv_ptr t_zm = tcg_temp_new_ptr();
5210 TCGv_ptr t_pg = tcg_temp_new_ptr();
5211 TCGv_ptr t_zt = tcg_temp_new_ptr();
5212 int desc = 0;
5214 if (s->mte_active[0]) {
5215 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5216 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5217 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5218 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
5219 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1);
5220 desc <<= SVE_MTEDESC_SHIFT;
5222 desc = simd_desc(vsz, vsz, desc | scale);
5224 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5225 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5226 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
5227 fn(cpu_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc));
5229 tcg_temp_free_ptr(t_zt);
5230 tcg_temp_free_ptr(t_zm);
5231 tcg_temp_free_ptr(t_pg);
5234 /* Indexed by [mte][be][ff][xs][u][msz]. */
5235 static gen_helper_gvec_mem_scatter * const
5236 gather_load_fn32[2][2][2][2][2][3] = {
5237 { /* MTE Inactive */
5238 { /* Little-endian */
5239 { { { gen_helper_sve_ldbss_zsu,
5240 gen_helper_sve_ldhss_le_zsu,
5241 NULL, },
5242 { gen_helper_sve_ldbsu_zsu,
5243 gen_helper_sve_ldhsu_le_zsu,
5244 gen_helper_sve_ldss_le_zsu, } },
5245 { { gen_helper_sve_ldbss_zss,
5246 gen_helper_sve_ldhss_le_zss,
5247 NULL, },
5248 { gen_helper_sve_ldbsu_zss,
5249 gen_helper_sve_ldhsu_le_zss,
5250 gen_helper_sve_ldss_le_zss, } } },
5252 /* First-fault */
5253 { { { gen_helper_sve_ldffbss_zsu,
5254 gen_helper_sve_ldffhss_le_zsu,
5255 NULL, },
5256 { gen_helper_sve_ldffbsu_zsu,
5257 gen_helper_sve_ldffhsu_le_zsu,
5258 gen_helper_sve_ldffss_le_zsu, } },
5259 { { gen_helper_sve_ldffbss_zss,
5260 gen_helper_sve_ldffhss_le_zss,
5261 NULL, },
5262 { gen_helper_sve_ldffbsu_zss,
5263 gen_helper_sve_ldffhsu_le_zss,
5264 gen_helper_sve_ldffss_le_zss, } } } },
5266 { /* Big-endian */
5267 { { { gen_helper_sve_ldbss_zsu,
5268 gen_helper_sve_ldhss_be_zsu,
5269 NULL, },
5270 { gen_helper_sve_ldbsu_zsu,
5271 gen_helper_sve_ldhsu_be_zsu,
5272 gen_helper_sve_ldss_be_zsu, } },
5273 { { gen_helper_sve_ldbss_zss,
5274 gen_helper_sve_ldhss_be_zss,
5275 NULL, },
5276 { gen_helper_sve_ldbsu_zss,
5277 gen_helper_sve_ldhsu_be_zss,
5278 gen_helper_sve_ldss_be_zss, } } },
5280 /* First-fault */
5281 { { { gen_helper_sve_ldffbss_zsu,
5282 gen_helper_sve_ldffhss_be_zsu,
5283 NULL, },
5284 { gen_helper_sve_ldffbsu_zsu,
5285 gen_helper_sve_ldffhsu_be_zsu,
5286 gen_helper_sve_ldffss_be_zsu, } },
5287 { { gen_helper_sve_ldffbss_zss,
5288 gen_helper_sve_ldffhss_be_zss,
5289 NULL, },
5290 { gen_helper_sve_ldffbsu_zss,
5291 gen_helper_sve_ldffhsu_be_zss,
5292 gen_helper_sve_ldffss_be_zss, } } } } },
5293 { /* MTE Active */
5294 { /* Little-endian */
5295 { { { gen_helper_sve_ldbss_zsu_mte,
5296 gen_helper_sve_ldhss_le_zsu_mte,
5297 NULL, },
5298 { gen_helper_sve_ldbsu_zsu_mte,
5299 gen_helper_sve_ldhsu_le_zsu_mte,
5300 gen_helper_sve_ldss_le_zsu_mte, } },
5301 { { gen_helper_sve_ldbss_zss_mte,
5302 gen_helper_sve_ldhss_le_zss_mte,
5303 NULL, },
5304 { gen_helper_sve_ldbsu_zss_mte,
5305 gen_helper_sve_ldhsu_le_zss_mte,
5306 gen_helper_sve_ldss_le_zss_mte, } } },
5308 /* First-fault */
5309 { { { gen_helper_sve_ldffbss_zsu_mte,
5310 gen_helper_sve_ldffhss_le_zsu_mte,
5311 NULL, },
5312 { gen_helper_sve_ldffbsu_zsu_mte,
5313 gen_helper_sve_ldffhsu_le_zsu_mte,
5314 gen_helper_sve_ldffss_le_zsu_mte, } },
5315 { { gen_helper_sve_ldffbss_zss_mte,
5316 gen_helper_sve_ldffhss_le_zss_mte,
5317 NULL, },
5318 { gen_helper_sve_ldffbsu_zss_mte,
5319 gen_helper_sve_ldffhsu_le_zss_mte,
5320 gen_helper_sve_ldffss_le_zss_mte, } } } },
5322 { /* Big-endian */
5323 { { { gen_helper_sve_ldbss_zsu_mte,
5324 gen_helper_sve_ldhss_be_zsu_mte,
5325 NULL, },
5326 { gen_helper_sve_ldbsu_zsu_mte,
5327 gen_helper_sve_ldhsu_be_zsu_mte,
5328 gen_helper_sve_ldss_be_zsu_mte, } },
5329 { { gen_helper_sve_ldbss_zss_mte,
5330 gen_helper_sve_ldhss_be_zss_mte,
5331 NULL, },
5332 { gen_helper_sve_ldbsu_zss_mte,
5333 gen_helper_sve_ldhsu_be_zss_mte,
5334 gen_helper_sve_ldss_be_zss_mte, } } },
5336 /* First-fault */
5337 { { { gen_helper_sve_ldffbss_zsu_mte,
5338 gen_helper_sve_ldffhss_be_zsu_mte,
5339 NULL, },
5340 { gen_helper_sve_ldffbsu_zsu_mte,
5341 gen_helper_sve_ldffhsu_be_zsu_mte,
5342 gen_helper_sve_ldffss_be_zsu_mte, } },
5343 { { gen_helper_sve_ldffbss_zss_mte,
5344 gen_helper_sve_ldffhss_be_zss_mte,
5345 NULL, },
5346 { gen_helper_sve_ldffbsu_zss_mte,
5347 gen_helper_sve_ldffhsu_be_zss_mte,
5348 gen_helper_sve_ldffss_be_zss_mte, } } } } },
5351 /* Note that we overload xs=2 to indicate 64-bit offset. */
5352 static gen_helper_gvec_mem_scatter * const
5353 gather_load_fn64[2][2][2][3][2][4] = {
5354 { /* MTE Inactive */
5355 { /* Little-endian */
5356 { { { gen_helper_sve_ldbds_zsu,
5357 gen_helper_sve_ldhds_le_zsu,
5358 gen_helper_sve_ldsds_le_zsu,
5359 NULL, },
5360 { gen_helper_sve_ldbdu_zsu,
5361 gen_helper_sve_ldhdu_le_zsu,
5362 gen_helper_sve_ldsdu_le_zsu,
5363 gen_helper_sve_lddd_le_zsu, } },
5364 { { gen_helper_sve_ldbds_zss,
5365 gen_helper_sve_ldhds_le_zss,
5366 gen_helper_sve_ldsds_le_zss,
5367 NULL, },
5368 { gen_helper_sve_ldbdu_zss,
5369 gen_helper_sve_ldhdu_le_zss,
5370 gen_helper_sve_ldsdu_le_zss,
5371 gen_helper_sve_lddd_le_zss, } },
5372 { { gen_helper_sve_ldbds_zd,
5373 gen_helper_sve_ldhds_le_zd,
5374 gen_helper_sve_ldsds_le_zd,
5375 NULL, },
5376 { gen_helper_sve_ldbdu_zd,
5377 gen_helper_sve_ldhdu_le_zd,
5378 gen_helper_sve_ldsdu_le_zd,
5379 gen_helper_sve_lddd_le_zd, } } },
5381 /* First-fault */
5382 { { { gen_helper_sve_ldffbds_zsu,
5383 gen_helper_sve_ldffhds_le_zsu,
5384 gen_helper_sve_ldffsds_le_zsu,
5385 NULL, },
5386 { gen_helper_sve_ldffbdu_zsu,
5387 gen_helper_sve_ldffhdu_le_zsu,
5388 gen_helper_sve_ldffsdu_le_zsu,
5389 gen_helper_sve_ldffdd_le_zsu, } },
5390 { { gen_helper_sve_ldffbds_zss,
5391 gen_helper_sve_ldffhds_le_zss,
5392 gen_helper_sve_ldffsds_le_zss,
5393 NULL, },
5394 { gen_helper_sve_ldffbdu_zss,
5395 gen_helper_sve_ldffhdu_le_zss,
5396 gen_helper_sve_ldffsdu_le_zss,
5397 gen_helper_sve_ldffdd_le_zss, } },
5398 { { gen_helper_sve_ldffbds_zd,
5399 gen_helper_sve_ldffhds_le_zd,
5400 gen_helper_sve_ldffsds_le_zd,
5401 NULL, },
5402 { gen_helper_sve_ldffbdu_zd,
5403 gen_helper_sve_ldffhdu_le_zd,
5404 gen_helper_sve_ldffsdu_le_zd,
5405 gen_helper_sve_ldffdd_le_zd, } } } },
5406 { /* Big-endian */
5407 { { { gen_helper_sve_ldbds_zsu,
5408 gen_helper_sve_ldhds_be_zsu,
5409 gen_helper_sve_ldsds_be_zsu,
5410 NULL, },
5411 { gen_helper_sve_ldbdu_zsu,
5412 gen_helper_sve_ldhdu_be_zsu,
5413 gen_helper_sve_ldsdu_be_zsu,
5414 gen_helper_sve_lddd_be_zsu, } },
5415 { { gen_helper_sve_ldbds_zss,
5416 gen_helper_sve_ldhds_be_zss,
5417 gen_helper_sve_ldsds_be_zss,
5418 NULL, },
5419 { gen_helper_sve_ldbdu_zss,
5420 gen_helper_sve_ldhdu_be_zss,
5421 gen_helper_sve_ldsdu_be_zss,
5422 gen_helper_sve_lddd_be_zss, } },
5423 { { gen_helper_sve_ldbds_zd,
5424 gen_helper_sve_ldhds_be_zd,
5425 gen_helper_sve_ldsds_be_zd,
5426 NULL, },
5427 { gen_helper_sve_ldbdu_zd,
5428 gen_helper_sve_ldhdu_be_zd,
5429 gen_helper_sve_ldsdu_be_zd,
5430 gen_helper_sve_lddd_be_zd, } } },
5432 /* First-fault */
5433 { { { gen_helper_sve_ldffbds_zsu,
5434 gen_helper_sve_ldffhds_be_zsu,
5435 gen_helper_sve_ldffsds_be_zsu,
5436 NULL, },
5437 { gen_helper_sve_ldffbdu_zsu,
5438 gen_helper_sve_ldffhdu_be_zsu,
5439 gen_helper_sve_ldffsdu_be_zsu,
5440 gen_helper_sve_ldffdd_be_zsu, } },
5441 { { gen_helper_sve_ldffbds_zss,
5442 gen_helper_sve_ldffhds_be_zss,
5443 gen_helper_sve_ldffsds_be_zss,
5444 NULL, },
5445 { gen_helper_sve_ldffbdu_zss,
5446 gen_helper_sve_ldffhdu_be_zss,
5447 gen_helper_sve_ldffsdu_be_zss,
5448 gen_helper_sve_ldffdd_be_zss, } },
5449 { { gen_helper_sve_ldffbds_zd,
5450 gen_helper_sve_ldffhds_be_zd,
5451 gen_helper_sve_ldffsds_be_zd,
5452 NULL, },
5453 { gen_helper_sve_ldffbdu_zd,
5454 gen_helper_sve_ldffhdu_be_zd,
5455 gen_helper_sve_ldffsdu_be_zd,
5456 gen_helper_sve_ldffdd_be_zd, } } } } },
5457 { /* MTE Active */
5458 { /* Little-endian */
5459 { { { gen_helper_sve_ldbds_zsu_mte,
5460 gen_helper_sve_ldhds_le_zsu_mte,
5461 gen_helper_sve_ldsds_le_zsu_mte,
5462 NULL, },
5463 { gen_helper_sve_ldbdu_zsu_mte,
5464 gen_helper_sve_ldhdu_le_zsu_mte,
5465 gen_helper_sve_ldsdu_le_zsu_mte,
5466 gen_helper_sve_lddd_le_zsu_mte, } },
5467 { { gen_helper_sve_ldbds_zss_mte,
5468 gen_helper_sve_ldhds_le_zss_mte,
5469 gen_helper_sve_ldsds_le_zss_mte,
5470 NULL, },
5471 { gen_helper_sve_ldbdu_zss_mte,
5472 gen_helper_sve_ldhdu_le_zss_mte,
5473 gen_helper_sve_ldsdu_le_zss_mte,
5474 gen_helper_sve_lddd_le_zss_mte, } },
5475 { { gen_helper_sve_ldbds_zd_mte,
5476 gen_helper_sve_ldhds_le_zd_mte,
5477 gen_helper_sve_ldsds_le_zd_mte,
5478 NULL, },
5479 { gen_helper_sve_ldbdu_zd_mte,
5480 gen_helper_sve_ldhdu_le_zd_mte,
5481 gen_helper_sve_ldsdu_le_zd_mte,
5482 gen_helper_sve_lddd_le_zd_mte, } } },
5484 /* First-fault */
5485 { { { gen_helper_sve_ldffbds_zsu_mte,
5486 gen_helper_sve_ldffhds_le_zsu_mte,
5487 gen_helper_sve_ldffsds_le_zsu_mte,
5488 NULL, },
5489 { gen_helper_sve_ldffbdu_zsu_mte,
5490 gen_helper_sve_ldffhdu_le_zsu_mte,
5491 gen_helper_sve_ldffsdu_le_zsu_mte,
5492 gen_helper_sve_ldffdd_le_zsu_mte, } },
5493 { { gen_helper_sve_ldffbds_zss_mte,
5494 gen_helper_sve_ldffhds_le_zss_mte,
5495 gen_helper_sve_ldffsds_le_zss_mte,
5496 NULL, },
5497 { gen_helper_sve_ldffbdu_zss_mte,
5498 gen_helper_sve_ldffhdu_le_zss_mte,
5499 gen_helper_sve_ldffsdu_le_zss_mte,
5500 gen_helper_sve_ldffdd_le_zss_mte, } },
5501 { { gen_helper_sve_ldffbds_zd_mte,
5502 gen_helper_sve_ldffhds_le_zd_mte,
5503 gen_helper_sve_ldffsds_le_zd_mte,
5504 NULL, },
5505 { gen_helper_sve_ldffbdu_zd_mte,
5506 gen_helper_sve_ldffhdu_le_zd_mte,
5507 gen_helper_sve_ldffsdu_le_zd_mte,
5508 gen_helper_sve_ldffdd_le_zd_mte, } } } },
5509 { /* Big-endian */
5510 { { { gen_helper_sve_ldbds_zsu_mte,
5511 gen_helper_sve_ldhds_be_zsu_mte,
5512 gen_helper_sve_ldsds_be_zsu_mte,
5513 NULL, },
5514 { gen_helper_sve_ldbdu_zsu_mte,
5515 gen_helper_sve_ldhdu_be_zsu_mte,
5516 gen_helper_sve_ldsdu_be_zsu_mte,
5517 gen_helper_sve_lddd_be_zsu_mte, } },
5518 { { gen_helper_sve_ldbds_zss_mte,
5519 gen_helper_sve_ldhds_be_zss_mte,
5520 gen_helper_sve_ldsds_be_zss_mte,
5521 NULL, },
5522 { gen_helper_sve_ldbdu_zss_mte,
5523 gen_helper_sve_ldhdu_be_zss_mte,
5524 gen_helper_sve_ldsdu_be_zss_mte,
5525 gen_helper_sve_lddd_be_zss_mte, } },
5526 { { gen_helper_sve_ldbds_zd_mte,
5527 gen_helper_sve_ldhds_be_zd_mte,
5528 gen_helper_sve_ldsds_be_zd_mte,
5529 NULL, },
5530 { gen_helper_sve_ldbdu_zd_mte,
5531 gen_helper_sve_ldhdu_be_zd_mte,
5532 gen_helper_sve_ldsdu_be_zd_mte,
5533 gen_helper_sve_lddd_be_zd_mte, } } },
5535 /* First-fault */
5536 { { { gen_helper_sve_ldffbds_zsu_mte,
5537 gen_helper_sve_ldffhds_be_zsu_mte,
5538 gen_helper_sve_ldffsds_be_zsu_mte,
5539 NULL, },
5540 { gen_helper_sve_ldffbdu_zsu_mte,
5541 gen_helper_sve_ldffhdu_be_zsu_mte,
5542 gen_helper_sve_ldffsdu_be_zsu_mte,
5543 gen_helper_sve_ldffdd_be_zsu_mte, } },
5544 { { gen_helper_sve_ldffbds_zss_mte,
5545 gen_helper_sve_ldffhds_be_zss_mte,
5546 gen_helper_sve_ldffsds_be_zss_mte,
5547 NULL, },
5548 { gen_helper_sve_ldffbdu_zss_mte,
5549 gen_helper_sve_ldffhdu_be_zss_mte,
5550 gen_helper_sve_ldffsdu_be_zss_mte,
5551 gen_helper_sve_ldffdd_be_zss_mte, } },
5552 { { gen_helper_sve_ldffbds_zd_mte,
5553 gen_helper_sve_ldffhds_be_zd_mte,
5554 gen_helper_sve_ldffsds_be_zd_mte,
5555 NULL, },
5556 { gen_helper_sve_ldffbdu_zd_mte,
5557 gen_helper_sve_ldffhdu_be_zd_mte,
5558 gen_helper_sve_ldffsdu_be_zd_mte,
5559 gen_helper_sve_ldffdd_be_zd_mte, } } } } },
5562 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
5564 gen_helper_gvec_mem_scatter *fn = NULL;
5565 bool be = s->be_data == MO_BE;
5566 bool mte = s->mte_active[0];
5568 if (!sve_access_check(s)) {
5569 return true;
5572 switch (a->esz) {
5573 case MO_32:
5574 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
5575 break;
5576 case MO_64:
5577 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
5578 break;
5580 assert(fn != NULL);
5582 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5583 cpu_reg_sp(s, a->rn), a->msz, false, fn);
5584 return true;
5587 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
5589 gen_helper_gvec_mem_scatter *fn = NULL;
5590 bool be = s->be_data == MO_BE;
5591 bool mte = s->mte_active[0];
5593 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5594 return false;
5596 if (!sve_access_check(s)) {
5597 return true;
5600 switch (a->esz) {
5601 case MO_32:
5602 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
5603 break;
5604 case MO_64:
5605 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
5606 break;
5608 assert(fn != NULL);
5610 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5611 * by loading the immediate into the scalar parameter.
5613 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5614 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn);
5615 return true;
5618 static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
5620 gen_helper_gvec_mem_scatter *fn = NULL;
5621 bool be = s->be_data == MO_BE;
5622 bool mte = s->mte_active[0];
5624 if (a->esz < a->msz + !a->u) {
5625 return false;
5627 if (!dc_isar_feature(aa64_sve2, s)) {
5628 return false;
5630 if (!sve_access_check(s)) {
5631 return true;
5634 switch (a->esz) {
5635 case MO_32:
5636 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz];
5637 break;
5638 case MO_64:
5639 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz];
5640 break;
5642 assert(fn != NULL);
5644 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5645 cpu_reg(s, a->rm), a->msz, false, fn);
5646 return true;
5649 /* Indexed by [mte][be][xs][msz]. */
5650 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
5651 { /* MTE Inactive */
5652 { /* Little-endian */
5653 { gen_helper_sve_stbs_zsu,
5654 gen_helper_sve_sths_le_zsu,
5655 gen_helper_sve_stss_le_zsu, },
5656 { gen_helper_sve_stbs_zss,
5657 gen_helper_sve_sths_le_zss,
5658 gen_helper_sve_stss_le_zss, } },
5659 { /* Big-endian */
5660 { gen_helper_sve_stbs_zsu,
5661 gen_helper_sve_sths_be_zsu,
5662 gen_helper_sve_stss_be_zsu, },
5663 { gen_helper_sve_stbs_zss,
5664 gen_helper_sve_sths_be_zss,
5665 gen_helper_sve_stss_be_zss, } } },
5666 { /* MTE Active */
5667 { /* Little-endian */
5668 { gen_helper_sve_stbs_zsu_mte,
5669 gen_helper_sve_sths_le_zsu_mte,
5670 gen_helper_sve_stss_le_zsu_mte, },
5671 { gen_helper_sve_stbs_zss_mte,
5672 gen_helper_sve_sths_le_zss_mte,
5673 gen_helper_sve_stss_le_zss_mte, } },
5674 { /* Big-endian */
5675 { gen_helper_sve_stbs_zsu_mte,
5676 gen_helper_sve_sths_be_zsu_mte,
5677 gen_helper_sve_stss_be_zsu_mte, },
5678 { gen_helper_sve_stbs_zss_mte,
5679 gen_helper_sve_sths_be_zss_mte,
5680 gen_helper_sve_stss_be_zss_mte, } } },
5683 /* Note that we overload xs=2 to indicate 64-bit offset. */
5684 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
5685 { /* MTE Inactive */
5686 { /* Little-endian */
5687 { gen_helper_sve_stbd_zsu,
5688 gen_helper_sve_sthd_le_zsu,
5689 gen_helper_sve_stsd_le_zsu,
5690 gen_helper_sve_stdd_le_zsu, },
5691 { gen_helper_sve_stbd_zss,
5692 gen_helper_sve_sthd_le_zss,
5693 gen_helper_sve_stsd_le_zss,
5694 gen_helper_sve_stdd_le_zss, },
5695 { gen_helper_sve_stbd_zd,
5696 gen_helper_sve_sthd_le_zd,
5697 gen_helper_sve_stsd_le_zd,
5698 gen_helper_sve_stdd_le_zd, } },
5699 { /* Big-endian */
5700 { gen_helper_sve_stbd_zsu,
5701 gen_helper_sve_sthd_be_zsu,
5702 gen_helper_sve_stsd_be_zsu,
5703 gen_helper_sve_stdd_be_zsu, },
5704 { gen_helper_sve_stbd_zss,
5705 gen_helper_sve_sthd_be_zss,
5706 gen_helper_sve_stsd_be_zss,
5707 gen_helper_sve_stdd_be_zss, },
5708 { gen_helper_sve_stbd_zd,
5709 gen_helper_sve_sthd_be_zd,
5710 gen_helper_sve_stsd_be_zd,
5711 gen_helper_sve_stdd_be_zd, } } },
5712 { /* MTE Inactive */
5713 { /* Little-endian */
5714 { gen_helper_sve_stbd_zsu_mte,
5715 gen_helper_sve_sthd_le_zsu_mte,
5716 gen_helper_sve_stsd_le_zsu_mte,
5717 gen_helper_sve_stdd_le_zsu_mte, },
5718 { gen_helper_sve_stbd_zss_mte,
5719 gen_helper_sve_sthd_le_zss_mte,
5720 gen_helper_sve_stsd_le_zss_mte,
5721 gen_helper_sve_stdd_le_zss_mte, },
5722 { gen_helper_sve_stbd_zd_mte,
5723 gen_helper_sve_sthd_le_zd_mte,
5724 gen_helper_sve_stsd_le_zd_mte,
5725 gen_helper_sve_stdd_le_zd_mte, } },
5726 { /* Big-endian */
5727 { gen_helper_sve_stbd_zsu_mte,
5728 gen_helper_sve_sthd_be_zsu_mte,
5729 gen_helper_sve_stsd_be_zsu_mte,
5730 gen_helper_sve_stdd_be_zsu_mte, },
5731 { gen_helper_sve_stbd_zss_mte,
5732 gen_helper_sve_sthd_be_zss_mte,
5733 gen_helper_sve_stsd_be_zss_mte,
5734 gen_helper_sve_stdd_be_zss_mte, },
5735 { gen_helper_sve_stbd_zd_mte,
5736 gen_helper_sve_sthd_be_zd_mte,
5737 gen_helper_sve_stsd_be_zd_mte,
5738 gen_helper_sve_stdd_be_zd_mte, } } },
5741 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
5743 gen_helper_gvec_mem_scatter *fn;
5744 bool be = s->be_data == MO_BE;
5745 bool mte = s->mte_active[0];
5747 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5748 return false;
5750 if (!sve_access_check(s)) {
5751 return true;
5753 switch (a->esz) {
5754 case MO_32:
5755 fn = scatter_store_fn32[mte][be][a->xs][a->msz];
5756 break;
5757 case MO_64:
5758 fn = scatter_store_fn64[mte][be][a->xs][a->msz];
5759 break;
5760 default:
5761 g_assert_not_reached();
5763 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5764 cpu_reg_sp(s, a->rn), a->msz, true, fn);
5765 return true;
5768 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
5770 gen_helper_gvec_mem_scatter *fn = NULL;
5771 bool be = s->be_data == MO_BE;
5772 bool mte = s->mte_active[0];
5774 if (a->esz < a->msz) {
5775 return false;
5777 if (!sve_access_check(s)) {
5778 return true;
5781 switch (a->esz) {
5782 case MO_32:
5783 fn = scatter_store_fn32[mte][be][0][a->msz];
5784 break;
5785 case MO_64:
5786 fn = scatter_store_fn64[mte][be][2][a->msz];
5787 break;
5789 assert(fn != NULL);
5791 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5792 * by loading the immediate into the scalar parameter.
5794 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5795 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn);
5796 return true;
5799 static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
5801 gen_helper_gvec_mem_scatter *fn;
5802 bool be = s->be_data == MO_BE;
5803 bool mte = s->mte_active[0];
5805 if (a->esz < a->msz) {
5806 return false;
5808 if (!dc_isar_feature(aa64_sve2, s)) {
5809 return false;
5811 if (!sve_access_check(s)) {
5812 return true;
5815 switch (a->esz) {
5816 case MO_32:
5817 fn = scatter_store_fn32[mte][be][0][a->msz];
5818 break;
5819 case MO_64:
5820 fn = scatter_store_fn64[mte][be][2][a->msz];
5821 break;
5822 default:
5823 g_assert_not_reached();
5826 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5827 cpu_reg(s, a->rm), a->msz, true, fn);
5828 return true;
5832 * Prefetches
5835 static bool trans_PRF(DisasContext *s, arg_PRF *a)
5837 /* Prefetch is a nop within QEMU. */
5838 (void)sve_access_check(s);
5839 return true;
5842 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
5844 if (a->rm == 31) {
5845 return false;
5847 /* Prefetch is a nop within QEMU. */
5848 (void)sve_access_check(s);
5849 return true;
5853 * Move Prefix
5855 * TODO: The implementation so far could handle predicated merging movprfx.
5856 * The helper functions as written take an extra source register to
5857 * use in the operation, but the result is only written when predication
5858 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
5859 * to allow the final write back to the destination to be unconditional.
5860 * For predicated zeroing movprfx, we need to rearrange the helpers to
5861 * allow the final write back to zero inactives.
5863 * In the meantime, just emit the moves.
5866 TRANS_FEAT(MOVPRFX, aa64_sve, do_mov_z, a->rd, a->rn)
5867 TRANS_FEAT(MOVPRFX_m, aa64_sve, do_sel_z, a->rd, a->rn, a->rd, a->pg, a->esz)
5868 TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false)
5871 * SVE2 Integer Multiply - Unpredicated
5874 TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a)
5876 static gen_helper_gvec_3 * const smulh_zzz_fns[4] = {
5877 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
5878 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
5880 TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5881 smulh_zzz_fns[a->esz], a, 0)
5883 static gen_helper_gvec_3 * const umulh_zzz_fns[4] = {
5884 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
5885 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
5887 TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5888 umulh_zzz_fns[a->esz], a, 0)
5890 TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5891 gen_helper_gvec_pmul_b, a, 0)
5893 static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = {
5894 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
5895 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
5897 TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5898 sqdmulh_zzz_fns[a->esz], a, 0)
5900 static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = {
5901 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
5902 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
5904 TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5905 sqrdmulh_zzz_fns[a->esz], a, 0)
5908 * SVE2 Integer - Predicated
5911 static gen_helper_gvec_4 * const sadlp_fns[4] = {
5912 NULL, gen_helper_sve2_sadalp_zpzz_h,
5913 gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d,
5915 TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
5916 sadlp_fns[a->esz], a, 0)
5918 static gen_helper_gvec_4 * const uadlp_fns[4] = {
5919 NULL, gen_helper_sve2_uadalp_zpzz_h,
5920 gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d,
5922 TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
5923 uadlp_fns[a->esz], a, 0)
5926 * SVE2 integer unary operations (predicated)
5929 TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz,
5930 a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0)
5932 TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz,
5933 a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0)
5935 static gen_helper_gvec_3 * const sqabs_fns[4] = {
5936 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
5937 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
5939 TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0)
5941 static gen_helper_gvec_3 * const sqneg_fns[4] = {
5942 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
5943 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
5945 TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0)
5947 DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl)
5948 DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl)
5949 DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl)
5951 DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl)
5952 DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl)
5953 DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl)
5955 DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd)
5956 DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd)
5957 DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub)
5959 DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd)
5960 DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd)
5961 DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub)
5963 DO_ZPZZ(ADDP, aa64_sve2, sve2_addp)
5964 DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp)
5965 DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp)
5966 DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp)
5967 DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp)
5969 DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd)
5970 DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd)
5971 DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub)
5972 DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub)
5973 DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd)
5974 DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd)
5977 * SVE2 Widening Integer Arithmetic
5980 static gen_helper_gvec_3 * const saddl_fns[4] = {
5981 NULL, gen_helper_sve2_saddl_h,
5982 gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d,
5984 TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
5985 saddl_fns[a->esz], a, 0)
5986 TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
5987 saddl_fns[a->esz], a, 3)
5988 TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
5989 saddl_fns[a->esz], a, 2)
5991 static gen_helper_gvec_3 * const ssubl_fns[4] = {
5992 NULL, gen_helper_sve2_ssubl_h,
5993 gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d,
5995 TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
5996 ssubl_fns[a->esz], a, 0)
5997 TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
5998 ssubl_fns[a->esz], a, 3)
5999 TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
6000 ssubl_fns[a->esz], a, 2)
6001 TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz,
6002 ssubl_fns[a->esz], a, 1)
6004 static gen_helper_gvec_3 * const sabdl_fns[4] = {
6005 NULL, gen_helper_sve2_sabdl_h,
6006 gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d,
6008 TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6009 sabdl_fns[a->esz], a, 0)
6010 TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6011 sabdl_fns[a->esz], a, 3)
6013 static gen_helper_gvec_3 * const uaddl_fns[4] = {
6014 NULL, gen_helper_sve2_uaddl_h,
6015 gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d,
6017 TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6018 uaddl_fns[a->esz], a, 0)
6019 TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6020 uaddl_fns[a->esz], a, 3)
6022 static gen_helper_gvec_3 * const usubl_fns[4] = {
6023 NULL, gen_helper_sve2_usubl_h,
6024 gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d,
6026 TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6027 usubl_fns[a->esz], a, 0)
6028 TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6029 usubl_fns[a->esz], a, 3)
6031 static gen_helper_gvec_3 * const uabdl_fns[4] = {
6032 NULL, gen_helper_sve2_uabdl_h,
6033 gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d,
6035 TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6036 uabdl_fns[a->esz], a, 0)
6037 TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6038 uabdl_fns[a->esz], a, 3)
6040 static gen_helper_gvec_3 * const sqdmull_fns[4] = {
6041 NULL, gen_helper_sve2_sqdmull_zzz_h,
6042 gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d,
6044 TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6045 sqdmull_fns[a->esz], a, 0)
6046 TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6047 sqdmull_fns[a->esz], a, 3)
6049 static gen_helper_gvec_3 * const smull_fns[4] = {
6050 NULL, gen_helper_sve2_smull_zzz_h,
6051 gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d,
6053 TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6054 smull_fns[a->esz], a, 0)
6055 TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6056 smull_fns[a->esz], a, 3)
6058 static gen_helper_gvec_3 * const umull_fns[4] = {
6059 NULL, gen_helper_sve2_umull_zzz_h,
6060 gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d,
6062 TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6063 umull_fns[a->esz], a, 0)
6064 TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6065 umull_fns[a->esz], a, 3)
6067 static gen_helper_gvec_3 * const eoril_fns[4] = {
6068 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
6069 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
6071 TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2)
6072 TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1)
6074 static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
6076 static gen_helper_gvec_3 * const fns[4] = {
6077 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
6078 NULL, gen_helper_sve2_pmull_d,
6080 if (a->esz == 0 && !dc_isar_feature(aa64_sve2_pmull128, s)) {
6081 return false;
6083 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel);
6086 TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false)
6087 TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true)
6089 static gen_helper_gvec_3 * const saddw_fns[4] = {
6090 NULL, gen_helper_sve2_saddw_h,
6091 gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d,
6093 TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0)
6094 TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1)
6096 static gen_helper_gvec_3 * const ssubw_fns[4] = {
6097 NULL, gen_helper_sve2_ssubw_h,
6098 gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d,
6100 TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0)
6101 TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1)
6103 static gen_helper_gvec_3 * const uaddw_fns[4] = {
6104 NULL, gen_helper_sve2_uaddw_h,
6105 gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d,
6107 TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0)
6108 TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1)
6110 static gen_helper_gvec_3 * const usubw_fns[4] = {
6111 NULL, gen_helper_sve2_usubw_h,
6112 gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d,
6114 TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0)
6115 TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1)
6117 static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6119 int top = imm & 1;
6120 int shl = imm >> 1;
6121 int halfbits = 4 << vece;
6123 if (top) {
6124 if (shl == halfbits) {
6125 TCGv_vec t = tcg_temp_new_vec_matching(d);
6126 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6127 tcg_gen_and_vec(vece, d, n, t);
6128 tcg_temp_free_vec(t);
6129 } else {
6130 tcg_gen_sari_vec(vece, d, n, halfbits);
6131 tcg_gen_shli_vec(vece, d, d, shl);
6133 } else {
6134 tcg_gen_shli_vec(vece, d, n, halfbits);
6135 tcg_gen_sari_vec(vece, d, d, halfbits - shl);
6139 static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
6141 int halfbits = 4 << vece;
6142 int top = imm & 1;
6143 int shl = (imm >> 1);
6144 int shift;
6145 uint64_t mask;
6147 mask = MAKE_64BIT_MASK(0, halfbits);
6148 mask <<= shl;
6149 mask = dup_const(vece, mask);
6151 shift = shl - top * halfbits;
6152 if (shift < 0) {
6153 tcg_gen_shri_i64(d, n, -shift);
6154 } else {
6155 tcg_gen_shli_i64(d, n, shift);
6157 tcg_gen_andi_i64(d, d, mask);
6160 static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6162 gen_ushll_i64(MO_16, d, n, imm);
6165 static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6167 gen_ushll_i64(MO_32, d, n, imm);
6170 static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6172 gen_ushll_i64(MO_64, d, n, imm);
6175 static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6177 int halfbits = 4 << vece;
6178 int top = imm & 1;
6179 int shl = imm >> 1;
6181 if (top) {
6182 if (shl == halfbits) {
6183 TCGv_vec t = tcg_temp_new_vec_matching(d);
6184 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6185 tcg_gen_and_vec(vece, d, n, t);
6186 tcg_temp_free_vec(t);
6187 } else {
6188 tcg_gen_shri_vec(vece, d, n, halfbits);
6189 tcg_gen_shli_vec(vece, d, d, shl);
6191 } else {
6192 if (shl == 0) {
6193 TCGv_vec t = tcg_temp_new_vec_matching(d);
6194 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6195 tcg_gen_and_vec(vece, d, n, t);
6196 tcg_temp_free_vec(t);
6197 } else {
6198 tcg_gen_shli_vec(vece, d, n, halfbits);
6199 tcg_gen_shri_vec(vece, d, d, halfbits - shl);
6204 static bool do_sve2_shll_tb(DisasContext *s, arg_rri_esz *a,
6205 bool sel, bool uns)
6207 static const TCGOpcode sshll_list[] = {
6208 INDEX_op_shli_vec, INDEX_op_sari_vec, 0
6210 static const TCGOpcode ushll_list[] = {
6211 INDEX_op_shli_vec, INDEX_op_shri_vec, 0
6213 static const GVecGen2i ops[2][3] = {
6214 { { .fniv = gen_sshll_vec,
6215 .opt_opc = sshll_list,
6216 .fno = gen_helper_sve2_sshll_h,
6217 .vece = MO_16 },
6218 { .fniv = gen_sshll_vec,
6219 .opt_opc = sshll_list,
6220 .fno = gen_helper_sve2_sshll_s,
6221 .vece = MO_32 },
6222 { .fniv = gen_sshll_vec,
6223 .opt_opc = sshll_list,
6224 .fno = gen_helper_sve2_sshll_d,
6225 .vece = MO_64 } },
6226 { { .fni8 = gen_ushll16_i64,
6227 .fniv = gen_ushll_vec,
6228 .opt_opc = ushll_list,
6229 .fno = gen_helper_sve2_ushll_h,
6230 .vece = MO_16 },
6231 { .fni8 = gen_ushll32_i64,
6232 .fniv = gen_ushll_vec,
6233 .opt_opc = ushll_list,
6234 .fno = gen_helper_sve2_ushll_s,
6235 .vece = MO_32 },
6236 { .fni8 = gen_ushll64_i64,
6237 .fniv = gen_ushll_vec,
6238 .opt_opc = ushll_list,
6239 .fno = gen_helper_sve2_ushll_d,
6240 .vece = MO_64 } },
6243 if (a->esz < 0 || a->esz > 2 || !dc_isar_feature(aa64_sve2, s)) {
6244 return false;
6246 if (sve_access_check(s)) {
6247 unsigned vsz = vec_full_reg_size(s);
6248 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6249 vec_full_reg_offset(s, a->rn),
6250 vsz, vsz, (a->imm << 1) | sel,
6251 &ops[uns][a->esz]);
6253 return true;
6256 static bool trans_SSHLLB(DisasContext *s, arg_rri_esz *a)
6258 return do_sve2_shll_tb(s, a, false, false);
6261 static bool trans_SSHLLT(DisasContext *s, arg_rri_esz *a)
6263 return do_sve2_shll_tb(s, a, true, false);
6266 static bool trans_USHLLB(DisasContext *s, arg_rri_esz *a)
6268 return do_sve2_shll_tb(s, a, false, true);
6271 static bool trans_USHLLT(DisasContext *s, arg_rri_esz *a)
6273 return do_sve2_shll_tb(s, a, true, true);
6276 static gen_helper_gvec_3 * const bext_fns[4] = {
6277 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
6278 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
6280 TRANS_FEAT(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6281 bext_fns[a->esz], a, 0)
6283 static gen_helper_gvec_3 * const bdep_fns[4] = {
6284 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
6285 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
6287 TRANS_FEAT(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6288 bdep_fns[a->esz], a, 0)
6290 static gen_helper_gvec_3 * const bgrp_fns[4] = {
6291 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
6292 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
6294 TRANS_FEAT(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6295 bgrp_fns[a->esz], a, 0)
6297 static gen_helper_gvec_3 * const cadd_fns[4] = {
6298 gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
6299 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d,
6301 TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6302 cadd_fns[a->esz], a, 0)
6303 TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6304 cadd_fns[a->esz], a, 1)
6306 static gen_helper_gvec_3 * const sqcadd_fns[4] = {
6307 gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
6308 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d,
6310 TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6311 sqcadd_fns[a->esz], a, 0)
6312 TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6313 sqcadd_fns[a->esz], a, 1)
6315 static gen_helper_gvec_4 * const sabal_fns[4] = {
6316 NULL, gen_helper_sve2_sabal_h,
6317 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d,
6319 TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0)
6320 TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1)
6322 static gen_helper_gvec_4 * const uabal_fns[4] = {
6323 NULL, gen_helper_sve2_uabal_h,
6324 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d,
6326 TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0)
6327 TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1)
6329 static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
6331 static gen_helper_gvec_4 * const fns[2] = {
6332 gen_helper_sve2_adcl_s,
6333 gen_helper_sve2_adcl_d,
6336 * Note that in this case the ESZ field encodes both size and sign.
6337 * Split out 'subtract' into bit 1 of the data field for the helper.
6339 return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel);
6342 TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false)
6343 TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true)
6345 TRANS_FEAT(SSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ssra, a)
6346 TRANS_FEAT(USRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_usra, a)
6347 TRANS_FEAT(SRSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_srsra, a)
6348 TRANS_FEAT(URSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ursra, a)
6349 TRANS_FEAT(SRI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sri, a)
6350 TRANS_FEAT(SLI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sli, a)
6352 TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a)
6353 TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a)
6355 static bool do_sve2_narrow_extract(DisasContext *s, arg_rri_esz *a,
6356 const GVecGen2 ops[3])
6358 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0 ||
6359 !dc_isar_feature(aa64_sve2, s)) {
6360 return false;
6362 if (sve_access_check(s)) {
6363 unsigned vsz = vec_full_reg_size(s);
6364 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
6365 vec_full_reg_offset(s, a->rn),
6366 vsz, vsz, &ops[a->esz]);
6368 return true;
6371 static const TCGOpcode sqxtn_list[] = {
6372 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
6375 static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6377 TCGv_vec t = tcg_temp_new_vec_matching(d);
6378 int halfbits = 4 << vece;
6379 int64_t mask = (1ull << halfbits) - 1;
6380 int64_t min = -1ull << (halfbits - 1);
6381 int64_t max = -min - 1;
6383 tcg_gen_dupi_vec(vece, t, min);
6384 tcg_gen_smax_vec(vece, d, n, t);
6385 tcg_gen_dupi_vec(vece, t, max);
6386 tcg_gen_smin_vec(vece, d, d, t);
6387 tcg_gen_dupi_vec(vece, t, mask);
6388 tcg_gen_and_vec(vece, d, d, t);
6389 tcg_temp_free_vec(t);
6392 static bool trans_SQXTNB(DisasContext *s, arg_rri_esz *a)
6394 static const GVecGen2 ops[3] = {
6395 { .fniv = gen_sqxtnb_vec,
6396 .opt_opc = sqxtn_list,
6397 .fno = gen_helper_sve2_sqxtnb_h,
6398 .vece = MO_16 },
6399 { .fniv = gen_sqxtnb_vec,
6400 .opt_opc = sqxtn_list,
6401 .fno = gen_helper_sve2_sqxtnb_s,
6402 .vece = MO_32 },
6403 { .fniv = gen_sqxtnb_vec,
6404 .opt_opc = sqxtn_list,
6405 .fno = gen_helper_sve2_sqxtnb_d,
6406 .vece = MO_64 },
6408 return do_sve2_narrow_extract(s, a, ops);
6411 static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6413 TCGv_vec t = tcg_temp_new_vec_matching(d);
6414 int halfbits = 4 << vece;
6415 int64_t mask = (1ull << halfbits) - 1;
6416 int64_t min = -1ull << (halfbits - 1);
6417 int64_t max = -min - 1;
6419 tcg_gen_dupi_vec(vece, t, min);
6420 tcg_gen_smax_vec(vece, n, n, t);
6421 tcg_gen_dupi_vec(vece, t, max);
6422 tcg_gen_smin_vec(vece, n, n, t);
6423 tcg_gen_shli_vec(vece, n, n, halfbits);
6424 tcg_gen_dupi_vec(vece, t, mask);
6425 tcg_gen_bitsel_vec(vece, d, t, d, n);
6426 tcg_temp_free_vec(t);
6429 static bool trans_SQXTNT(DisasContext *s, arg_rri_esz *a)
6431 static const GVecGen2 ops[3] = {
6432 { .fniv = gen_sqxtnt_vec,
6433 .opt_opc = sqxtn_list,
6434 .load_dest = true,
6435 .fno = gen_helper_sve2_sqxtnt_h,
6436 .vece = MO_16 },
6437 { .fniv = gen_sqxtnt_vec,
6438 .opt_opc = sqxtn_list,
6439 .load_dest = true,
6440 .fno = gen_helper_sve2_sqxtnt_s,
6441 .vece = MO_32 },
6442 { .fniv = gen_sqxtnt_vec,
6443 .opt_opc = sqxtn_list,
6444 .load_dest = true,
6445 .fno = gen_helper_sve2_sqxtnt_d,
6446 .vece = MO_64 },
6448 return do_sve2_narrow_extract(s, a, ops);
6451 static const TCGOpcode uqxtn_list[] = {
6452 INDEX_op_shli_vec, INDEX_op_umin_vec, 0
6455 static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6457 TCGv_vec t = tcg_temp_new_vec_matching(d);
6458 int halfbits = 4 << vece;
6459 int64_t max = (1ull << halfbits) - 1;
6461 tcg_gen_dupi_vec(vece, t, max);
6462 tcg_gen_umin_vec(vece, d, n, t);
6463 tcg_temp_free_vec(t);
6466 static bool trans_UQXTNB(DisasContext *s, arg_rri_esz *a)
6468 static const GVecGen2 ops[3] = {
6469 { .fniv = gen_uqxtnb_vec,
6470 .opt_opc = uqxtn_list,
6471 .fno = gen_helper_sve2_uqxtnb_h,
6472 .vece = MO_16 },
6473 { .fniv = gen_uqxtnb_vec,
6474 .opt_opc = uqxtn_list,
6475 .fno = gen_helper_sve2_uqxtnb_s,
6476 .vece = MO_32 },
6477 { .fniv = gen_uqxtnb_vec,
6478 .opt_opc = uqxtn_list,
6479 .fno = gen_helper_sve2_uqxtnb_d,
6480 .vece = MO_64 },
6482 return do_sve2_narrow_extract(s, a, ops);
6485 static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6487 TCGv_vec t = tcg_temp_new_vec_matching(d);
6488 int halfbits = 4 << vece;
6489 int64_t max = (1ull << halfbits) - 1;
6491 tcg_gen_dupi_vec(vece, t, max);
6492 tcg_gen_umin_vec(vece, n, n, t);
6493 tcg_gen_shli_vec(vece, n, n, halfbits);
6494 tcg_gen_bitsel_vec(vece, d, t, d, n);
6495 tcg_temp_free_vec(t);
6498 static bool trans_UQXTNT(DisasContext *s, arg_rri_esz *a)
6500 static const GVecGen2 ops[3] = {
6501 { .fniv = gen_uqxtnt_vec,
6502 .opt_opc = uqxtn_list,
6503 .load_dest = true,
6504 .fno = gen_helper_sve2_uqxtnt_h,
6505 .vece = MO_16 },
6506 { .fniv = gen_uqxtnt_vec,
6507 .opt_opc = uqxtn_list,
6508 .load_dest = true,
6509 .fno = gen_helper_sve2_uqxtnt_s,
6510 .vece = MO_32 },
6511 { .fniv = gen_uqxtnt_vec,
6512 .opt_opc = uqxtn_list,
6513 .load_dest = true,
6514 .fno = gen_helper_sve2_uqxtnt_d,
6515 .vece = MO_64 },
6517 return do_sve2_narrow_extract(s, a, ops);
6520 static const TCGOpcode sqxtun_list[] = {
6521 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
6524 static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6526 TCGv_vec t = tcg_temp_new_vec_matching(d);
6527 int halfbits = 4 << vece;
6528 int64_t max = (1ull << halfbits) - 1;
6530 tcg_gen_dupi_vec(vece, t, 0);
6531 tcg_gen_smax_vec(vece, d, n, t);
6532 tcg_gen_dupi_vec(vece, t, max);
6533 tcg_gen_umin_vec(vece, d, d, t);
6534 tcg_temp_free_vec(t);
6537 static bool trans_SQXTUNB(DisasContext *s, arg_rri_esz *a)
6539 static const GVecGen2 ops[3] = {
6540 { .fniv = gen_sqxtunb_vec,
6541 .opt_opc = sqxtun_list,
6542 .fno = gen_helper_sve2_sqxtunb_h,
6543 .vece = MO_16 },
6544 { .fniv = gen_sqxtunb_vec,
6545 .opt_opc = sqxtun_list,
6546 .fno = gen_helper_sve2_sqxtunb_s,
6547 .vece = MO_32 },
6548 { .fniv = gen_sqxtunb_vec,
6549 .opt_opc = sqxtun_list,
6550 .fno = gen_helper_sve2_sqxtunb_d,
6551 .vece = MO_64 },
6553 return do_sve2_narrow_extract(s, a, ops);
6556 static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6558 TCGv_vec t = tcg_temp_new_vec_matching(d);
6559 int halfbits = 4 << vece;
6560 int64_t max = (1ull << halfbits) - 1;
6562 tcg_gen_dupi_vec(vece, t, 0);
6563 tcg_gen_smax_vec(vece, n, n, t);
6564 tcg_gen_dupi_vec(vece, t, max);
6565 tcg_gen_umin_vec(vece, n, n, t);
6566 tcg_gen_shli_vec(vece, n, n, halfbits);
6567 tcg_gen_bitsel_vec(vece, d, t, d, n);
6568 tcg_temp_free_vec(t);
6571 static bool trans_SQXTUNT(DisasContext *s, arg_rri_esz *a)
6573 static const GVecGen2 ops[3] = {
6574 { .fniv = gen_sqxtunt_vec,
6575 .opt_opc = sqxtun_list,
6576 .load_dest = true,
6577 .fno = gen_helper_sve2_sqxtunt_h,
6578 .vece = MO_16 },
6579 { .fniv = gen_sqxtunt_vec,
6580 .opt_opc = sqxtun_list,
6581 .load_dest = true,
6582 .fno = gen_helper_sve2_sqxtunt_s,
6583 .vece = MO_32 },
6584 { .fniv = gen_sqxtunt_vec,
6585 .opt_opc = sqxtun_list,
6586 .load_dest = true,
6587 .fno = gen_helper_sve2_sqxtunt_d,
6588 .vece = MO_64 },
6590 return do_sve2_narrow_extract(s, a, ops);
6593 static bool do_sve2_shr_narrow(DisasContext *s, arg_rri_esz *a,
6594 const GVecGen2i ops[3])
6596 if (a->esz < 0 || a->esz > MO_32 || !dc_isar_feature(aa64_sve2, s)) {
6597 return false;
6599 assert(a->imm > 0 && a->imm <= (8 << a->esz));
6600 if (sve_access_check(s)) {
6601 unsigned vsz = vec_full_reg_size(s);
6602 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6603 vec_full_reg_offset(s, a->rn),
6604 vsz, vsz, a->imm, &ops[a->esz]);
6606 return true;
6609 static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
6611 int halfbits = 4 << vece;
6612 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
6614 tcg_gen_shri_i64(d, n, shr);
6615 tcg_gen_andi_i64(d, d, mask);
6618 static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6620 gen_shrnb_i64(MO_16, d, n, shr);
6623 static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6625 gen_shrnb_i64(MO_32, d, n, shr);
6628 static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6630 gen_shrnb_i64(MO_64, d, n, shr);
6633 static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
6635 TCGv_vec t = tcg_temp_new_vec_matching(d);
6636 int halfbits = 4 << vece;
6637 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
6639 tcg_gen_shri_vec(vece, n, n, shr);
6640 tcg_gen_dupi_vec(vece, t, mask);
6641 tcg_gen_and_vec(vece, d, n, t);
6642 tcg_temp_free_vec(t);
6645 static bool trans_SHRNB(DisasContext *s, arg_rri_esz *a)
6647 static const TCGOpcode vec_list[] = { INDEX_op_shri_vec, 0 };
6648 static const GVecGen2i ops[3] = {
6649 { .fni8 = gen_shrnb16_i64,
6650 .fniv = gen_shrnb_vec,
6651 .opt_opc = vec_list,
6652 .fno = gen_helper_sve2_shrnb_h,
6653 .vece = MO_16 },
6654 { .fni8 = gen_shrnb32_i64,
6655 .fniv = gen_shrnb_vec,
6656 .opt_opc = vec_list,
6657 .fno = gen_helper_sve2_shrnb_s,
6658 .vece = MO_32 },
6659 { .fni8 = gen_shrnb64_i64,
6660 .fniv = gen_shrnb_vec,
6661 .opt_opc = vec_list,
6662 .fno = gen_helper_sve2_shrnb_d,
6663 .vece = MO_64 },
6665 return do_sve2_shr_narrow(s, a, ops);
6668 static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
6670 int halfbits = 4 << vece;
6671 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
6673 tcg_gen_shli_i64(n, n, halfbits - shr);
6674 tcg_gen_andi_i64(n, n, ~mask);
6675 tcg_gen_andi_i64(d, d, mask);
6676 tcg_gen_or_i64(d, d, n);
6679 static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6681 gen_shrnt_i64(MO_16, d, n, shr);
6684 static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6686 gen_shrnt_i64(MO_32, d, n, shr);
6689 static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6691 tcg_gen_shri_i64(n, n, shr);
6692 tcg_gen_deposit_i64(d, d, n, 32, 32);
6695 static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
6697 TCGv_vec t = tcg_temp_new_vec_matching(d);
6698 int halfbits = 4 << vece;
6699 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
6701 tcg_gen_shli_vec(vece, n, n, halfbits - shr);
6702 tcg_gen_dupi_vec(vece, t, mask);
6703 tcg_gen_bitsel_vec(vece, d, t, d, n);
6704 tcg_temp_free_vec(t);
6707 static bool trans_SHRNT(DisasContext *s, arg_rri_esz *a)
6709 static const TCGOpcode vec_list[] = { INDEX_op_shli_vec, 0 };
6710 static const GVecGen2i ops[3] = {
6711 { .fni8 = gen_shrnt16_i64,
6712 .fniv = gen_shrnt_vec,
6713 .opt_opc = vec_list,
6714 .load_dest = true,
6715 .fno = gen_helper_sve2_shrnt_h,
6716 .vece = MO_16 },
6717 { .fni8 = gen_shrnt32_i64,
6718 .fniv = gen_shrnt_vec,
6719 .opt_opc = vec_list,
6720 .load_dest = true,
6721 .fno = gen_helper_sve2_shrnt_s,
6722 .vece = MO_32 },
6723 { .fni8 = gen_shrnt64_i64,
6724 .fniv = gen_shrnt_vec,
6725 .opt_opc = vec_list,
6726 .load_dest = true,
6727 .fno = gen_helper_sve2_shrnt_d,
6728 .vece = MO_64 },
6730 return do_sve2_shr_narrow(s, a, ops);
6733 static bool trans_RSHRNB(DisasContext *s, arg_rri_esz *a)
6735 static const GVecGen2i ops[3] = {
6736 { .fno = gen_helper_sve2_rshrnb_h },
6737 { .fno = gen_helper_sve2_rshrnb_s },
6738 { .fno = gen_helper_sve2_rshrnb_d },
6740 return do_sve2_shr_narrow(s, a, ops);
6743 static bool trans_RSHRNT(DisasContext *s, arg_rri_esz *a)
6745 static const GVecGen2i ops[3] = {
6746 { .fno = gen_helper_sve2_rshrnt_h },
6747 { .fno = gen_helper_sve2_rshrnt_s },
6748 { .fno = gen_helper_sve2_rshrnt_d },
6750 return do_sve2_shr_narrow(s, a, ops);
6753 static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
6754 TCGv_vec n, int64_t shr)
6756 TCGv_vec t = tcg_temp_new_vec_matching(d);
6757 int halfbits = 4 << vece;
6759 tcg_gen_sari_vec(vece, n, n, shr);
6760 tcg_gen_dupi_vec(vece, t, 0);
6761 tcg_gen_smax_vec(vece, n, n, t);
6762 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6763 tcg_gen_umin_vec(vece, d, n, t);
6764 tcg_temp_free_vec(t);
6767 static bool trans_SQSHRUNB(DisasContext *s, arg_rri_esz *a)
6769 static const TCGOpcode vec_list[] = {
6770 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
6772 static const GVecGen2i ops[3] = {
6773 { .fniv = gen_sqshrunb_vec,
6774 .opt_opc = vec_list,
6775 .fno = gen_helper_sve2_sqshrunb_h,
6776 .vece = MO_16 },
6777 { .fniv = gen_sqshrunb_vec,
6778 .opt_opc = vec_list,
6779 .fno = gen_helper_sve2_sqshrunb_s,
6780 .vece = MO_32 },
6781 { .fniv = gen_sqshrunb_vec,
6782 .opt_opc = vec_list,
6783 .fno = gen_helper_sve2_sqshrunb_d,
6784 .vece = MO_64 },
6786 return do_sve2_shr_narrow(s, a, ops);
6789 static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
6790 TCGv_vec n, int64_t shr)
6792 TCGv_vec t = tcg_temp_new_vec_matching(d);
6793 int halfbits = 4 << vece;
6795 tcg_gen_sari_vec(vece, n, n, shr);
6796 tcg_gen_dupi_vec(vece, t, 0);
6797 tcg_gen_smax_vec(vece, n, n, t);
6798 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6799 tcg_gen_umin_vec(vece, n, n, t);
6800 tcg_gen_shli_vec(vece, n, n, halfbits);
6801 tcg_gen_bitsel_vec(vece, d, t, d, n);
6802 tcg_temp_free_vec(t);
6805 static bool trans_SQSHRUNT(DisasContext *s, arg_rri_esz *a)
6807 static const TCGOpcode vec_list[] = {
6808 INDEX_op_shli_vec, INDEX_op_sari_vec,
6809 INDEX_op_smax_vec, INDEX_op_umin_vec, 0
6811 static const GVecGen2i ops[3] = {
6812 { .fniv = gen_sqshrunt_vec,
6813 .opt_opc = vec_list,
6814 .load_dest = true,
6815 .fno = gen_helper_sve2_sqshrunt_h,
6816 .vece = MO_16 },
6817 { .fniv = gen_sqshrunt_vec,
6818 .opt_opc = vec_list,
6819 .load_dest = true,
6820 .fno = gen_helper_sve2_sqshrunt_s,
6821 .vece = MO_32 },
6822 { .fniv = gen_sqshrunt_vec,
6823 .opt_opc = vec_list,
6824 .load_dest = true,
6825 .fno = gen_helper_sve2_sqshrunt_d,
6826 .vece = MO_64 },
6828 return do_sve2_shr_narrow(s, a, ops);
6831 static bool trans_SQRSHRUNB(DisasContext *s, arg_rri_esz *a)
6833 static const GVecGen2i ops[3] = {
6834 { .fno = gen_helper_sve2_sqrshrunb_h },
6835 { .fno = gen_helper_sve2_sqrshrunb_s },
6836 { .fno = gen_helper_sve2_sqrshrunb_d },
6838 return do_sve2_shr_narrow(s, a, ops);
6841 static bool trans_SQRSHRUNT(DisasContext *s, arg_rri_esz *a)
6843 static const GVecGen2i ops[3] = {
6844 { .fno = gen_helper_sve2_sqrshrunt_h },
6845 { .fno = gen_helper_sve2_sqrshrunt_s },
6846 { .fno = gen_helper_sve2_sqrshrunt_d },
6848 return do_sve2_shr_narrow(s, a, ops);
6851 static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
6852 TCGv_vec n, int64_t shr)
6854 TCGv_vec t = tcg_temp_new_vec_matching(d);
6855 int halfbits = 4 << vece;
6856 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
6857 int64_t min = -max - 1;
6859 tcg_gen_sari_vec(vece, n, n, shr);
6860 tcg_gen_dupi_vec(vece, t, min);
6861 tcg_gen_smax_vec(vece, n, n, t);
6862 tcg_gen_dupi_vec(vece, t, max);
6863 tcg_gen_smin_vec(vece, n, n, t);
6864 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6865 tcg_gen_and_vec(vece, d, n, t);
6866 tcg_temp_free_vec(t);
6869 static bool trans_SQSHRNB(DisasContext *s, arg_rri_esz *a)
6871 static const TCGOpcode vec_list[] = {
6872 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
6874 static const GVecGen2i ops[3] = {
6875 { .fniv = gen_sqshrnb_vec,
6876 .opt_opc = vec_list,
6877 .fno = gen_helper_sve2_sqshrnb_h,
6878 .vece = MO_16 },
6879 { .fniv = gen_sqshrnb_vec,
6880 .opt_opc = vec_list,
6881 .fno = gen_helper_sve2_sqshrnb_s,
6882 .vece = MO_32 },
6883 { .fniv = gen_sqshrnb_vec,
6884 .opt_opc = vec_list,
6885 .fno = gen_helper_sve2_sqshrnb_d,
6886 .vece = MO_64 },
6888 return do_sve2_shr_narrow(s, a, ops);
6891 static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
6892 TCGv_vec n, int64_t shr)
6894 TCGv_vec t = tcg_temp_new_vec_matching(d);
6895 int halfbits = 4 << vece;
6896 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
6897 int64_t min = -max - 1;
6899 tcg_gen_sari_vec(vece, n, n, shr);
6900 tcg_gen_dupi_vec(vece, t, min);
6901 tcg_gen_smax_vec(vece, n, n, t);
6902 tcg_gen_dupi_vec(vece, t, max);
6903 tcg_gen_smin_vec(vece, n, n, t);
6904 tcg_gen_shli_vec(vece, n, n, halfbits);
6905 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6906 tcg_gen_bitsel_vec(vece, d, t, d, n);
6907 tcg_temp_free_vec(t);
6910 static bool trans_SQSHRNT(DisasContext *s, arg_rri_esz *a)
6912 static const TCGOpcode vec_list[] = {
6913 INDEX_op_shli_vec, INDEX_op_sari_vec,
6914 INDEX_op_smax_vec, INDEX_op_smin_vec, 0
6916 static const GVecGen2i ops[3] = {
6917 { .fniv = gen_sqshrnt_vec,
6918 .opt_opc = vec_list,
6919 .load_dest = true,
6920 .fno = gen_helper_sve2_sqshrnt_h,
6921 .vece = MO_16 },
6922 { .fniv = gen_sqshrnt_vec,
6923 .opt_opc = vec_list,
6924 .load_dest = true,
6925 .fno = gen_helper_sve2_sqshrnt_s,
6926 .vece = MO_32 },
6927 { .fniv = gen_sqshrnt_vec,
6928 .opt_opc = vec_list,
6929 .load_dest = true,
6930 .fno = gen_helper_sve2_sqshrnt_d,
6931 .vece = MO_64 },
6933 return do_sve2_shr_narrow(s, a, ops);
6936 static bool trans_SQRSHRNB(DisasContext *s, arg_rri_esz *a)
6938 static const GVecGen2i ops[3] = {
6939 { .fno = gen_helper_sve2_sqrshrnb_h },
6940 { .fno = gen_helper_sve2_sqrshrnb_s },
6941 { .fno = gen_helper_sve2_sqrshrnb_d },
6943 return do_sve2_shr_narrow(s, a, ops);
6946 static bool trans_SQRSHRNT(DisasContext *s, arg_rri_esz *a)
6948 static const GVecGen2i ops[3] = {
6949 { .fno = gen_helper_sve2_sqrshrnt_h },
6950 { .fno = gen_helper_sve2_sqrshrnt_s },
6951 { .fno = gen_helper_sve2_sqrshrnt_d },
6953 return do_sve2_shr_narrow(s, a, ops);
6956 static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
6957 TCGv_vec n, int64_t shr)
6959 TCGv_vec t = tcg_temp_new_vec_matching(d);
6960 int halfbits = 4 << vece;
6962 tcg_gen_shri_vec(vece, n, n, shr);
6963 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6964 tcg_gen_umin_vec(vece, d, n, t);
6965 tcg_temp_free_vec(t);
6968 static bool trans_UQSHRNB(DisasContext *s, arg_rri_esz *a)
6970 static const TCGOpcode vec_list[] = {
6971 INDEX_op_shri_vec, INDEX_op_umin_vec, 0
6973 static const GVecGen2i ops[3] = {
6974 { .fniv = gen_uqshrnb_vec,
6975 .opt_opc = vec_list,
6976 .fno = gen_helper_sve2_uqshrnb_h,
6977 .vece = MO_16 },
6978 { .fniv = gen_uqshrnb_vec,
6979 .opt_opc = vec_list,
6980 .fno = gen_helper_sve2_uqshrnb_s,
6981 .vece = MO_32 },
6982 { .fniv = gen_uqshrnb_vec,
6983 .opt_opc = vec_list,
6984 .fno = gen_helper_sve2_uqshrnb_d,
6985 .vece = MO_64 },
6987 return do_sve2_shr_narrow(s, a, ops);
6990 static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
6991 TCGv_vec n, int64_t shr)
6993 TCGv_vec t = tcg_temp_new_vec_matching(d);
6994 int halfbits = 4 << vece;
6996 tcg_gen_shri_vec(vece, n, n, shr);
6997 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6998 tcg_gen_umin_vec(vece, n, n, t);
6999 tcg_gen_shli_vec(vece, n, n, halfbits);
7000 tcg_gen_bitsel_vec(vece, d, t, d, n);
7001 tcg_temp_free_vec(t);
7004 static bool trans_UQSHRNT(DisasContext *s, arg_rri_esz *a)
7006 static const TCGOpcode vec_list[] = {
7007 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7009 static const GVecGen2i ops[3] = {
7010 { .fniv = gen_uqshrnt_vec,
7011 .opt_opc = vec_list,
7012 .load_dest = true,
7013 .fno = gen_helper_sve2_uqshrnt_h,
7014 .vece = MO_16 },
7015 { .fniv = gen_uqshrnt_vec,
7016 .opt_opc = vec_list,
7017 .load_dest = true,
7018 .fno = gen_helper_sve2_uqshrnt_s,
7019 .vece = MO_32 },
7020 { .fniv = gen_uqshrnt_vec,
7021 .opt_opc = vec_list,
7022 .load_dest = true,
7023 .fno = gen_helper_sve2_uqshrnt_d,
7024 .vece = MO_64 },
7026 return do_sve2_shr_narrow(s, a, ops);
7029 static bool trans_UQRSHRNB(DisasContext *s, arg_rri_esz *a)
7031 static const GVecGen2i ops[3] = {
7032 { .fno = gen_helper_sve2_uqrshrnb_h },
7033 { .fno = gen_helper_sve2_uqrshrnb_s },
7034 { .fno = gen_helper_sve2_uqrshrnb_d },
7036 return do_sve2_shr_narrow(s, a, ops);
7039 static bool trans_UQRSHRNT(DisasContext *s, arg_rri_esz *a)
7041 static const GVecGen2i ops[3] = {
7042 { .fno = gen_helper_sve2_uqrshrnt_h },
7043 { .fno = gen_helper_sve2_uqrshrnt_s },
7044 { .fno = gen_helper_sve2_uqrshrnt_d },
7046 return do_sve2_shr_narrow(s, a, ops);
7049 #define DO_SVE2_ZZZ_NARROW(NAME, name) \
7050 static gen_helper_gvec_3 * const name##_fns[4] = { \
7051 NULL, gen_helper_sve2_##name##_h, \
7052 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
7053 }; \
7054 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \
7055 name##_fns[a->esz], a, 0)
7057 DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
7058 DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
7059 DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
7060 DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
7062 DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
7063 DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
7064 DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
7065 DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
7067 static gen_helper_gvec_flags_4 * const match_fns[4] = {
7068 gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL
7070 TRANS_FEAT(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
7072 static gen_helper_gvec_flags_4 * const nmatch_fns[4] = {
7073 gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL
7075 TRANS_FEAT(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
7077 static gen_helper_gvec_4 * const histcnt_fns[4] = {
7078 NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
7080 TRANS_FEAT(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
7081 histcnt_fns[a->esz], a, 0)
7083 TRANS_FEAT(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
7084 a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
7086 DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz)
7087 DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz)
7088 DO_ZPZZ_FP(FMINNMP, aa64_sve2, sve2_fminnmp_zpzz)
7089 DO_ZPZZ_FP(FMAXP, aa64_sve2, sve2_fmaxp_zpzz)
7090 DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz)
7093 * SVE Integer Multiply-Add (unpredicated)
7096 TRANS_FEAT(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_s,
7097 a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
7098 TRANS_FEAT(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_d,
7099 a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
7101 static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
7102 NULL, gen_helper_sve2_sqdmlal_zzzw_h,
7103 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
7105 TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7106 sqdmlal_zzzw_fns[a->esz], a, 0)
7107 TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7108 sqdmlal_zzzw_fns[a->esz], a, 3)
7109 TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7110 sqdmlal_zzzw_fns[a->esz], a, 2)
7112 static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = {
7113 NULL, gen_helper_sve2_sqdmlsl_zzzw_h,
7114 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
7116 TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7117 sqdmlsl_zzzw_fns[a->esz], a, 0)
7118 TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7119 sqdmlsl_zzzw_fns[a->esz], a, 3)
7120 TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7121 sqdmlsl_zzzw_fns[a->esz], a, 2)
7123 static gen_helper_gvec_4 * const sqrdmlah_fns[] = {
7124 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
7125 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
7127 TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
7128 sqrdmlah_fns[a->esz], a, 0)
7130 static gen_helper_gvec_4 * const sqrdmlsh_fns[] = {
7131 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
7132 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
7134 TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
7135 sqrdmlsh_fns[a->esz], a, 0)
7137 static gen_helper_gvec_4 * const smlal_zzzw_fns[] = {
7138 NULL, gen_helper_sve2_smlal_zzzw_h,
7139 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
7141 TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7142 smlal_zzzw_fns[a->esz], a, 0)
7143 TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7144 smlal_zzzw_fns[a->esz], a, 1)
7146 static gen_helper_gvec_4 * const umlal_zzzw_fns[] = {
7147 NULL, gen_helper_sve2_umlal_zzzw_h,
7148 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
7150 TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7151 umlal_zzzw_fns[a->esz], a, 0)
7152 TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7153 umlal_zzzw_fns[a->esz], a, 1)
7155 static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = {
7156 NULL, gen_helper_sve2_smlsl_zzzw_h,
7157 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
7159 TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7160 smlsl_zzzw_fns[a->esz], a, 0)
7161 TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7162 smlsl_zzzw_fns[a->esz], a, 1)
7164 static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = {
7165 NULL, gen_helper_sve2_umlsl_zzzw_h,
7166 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
7168 TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7169 umlsl_zzzw_fns[a->esz], a, 0)
7170 TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7171 umlsl_zzzw_fns[a->esz], a, 1)
7173 static gen_helper_gvec_4 * const cmla_fns[] = {
7174 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
7175 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
7177 TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7178 cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
7180 static gen_helper_gvec_4 * const cdot_fns[] = {
7181 NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d
7183 TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7184 cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
7186 static gen_helper_gvec_4 * const sqrdcmlah_fns[] = {
7187 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
7188 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
7190 TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7191 sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
7193 TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7194 a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
7196 TRANS_FEAT(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
7197 gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
7199 TRANS_FEAT(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7200 gen_helper_crypto_aese, a, false)
7201 TRANS_FEAT(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7202 gen_helper_crypto_aese, a, true)
7204 TRANS_FEAT(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7205 gen_helper_crypto_sm4e, a, 0)
7206 TRANS_FEAT(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7207 gen_helper_crypto_sm4ekey, a, 0)
7209 TRANS_FEAT(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, gen_gvec_rax1, a)
7211 TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz,
7212 gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR)
7213 TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz,
7214 gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR)
7216 TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
7217 gen_helper_sve_bfcvtnt, a, 0, FPST_FPCR)
7219 TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz,
7220 gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR)
7221 TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz,
7222 gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR)
7224 TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a,
7225 float_round_to_odd, gen_helper_sve_fcvt_ds)
7226 TRANS_FEAT(FCVTXNT_ds, aa64_sve2, do_frint_mode, a,
7227 float_round_to_odd, gen_helper_sve2_fcvtnt_ds)
7229 static gen_helper_gvec_3_ptr * const flogb_fns[] = {
7230 NULL, gen_helper_flogb_h,
7231 gen_helper_flogb_s, gen_helper_flogb_d
7233 TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz],
7234 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
7236 static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
7238 if (!dc_isar_feature(aa64_sve2, s)) {
7239 return false;
7241 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzzw_s,
7242 a->rd, a->rn, a->rm, a->ra,
7243 (sel << 1) | sub, cpu_env);
7246 static bool trans_FMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
7248 return do_FMLAL_zzzw(s, a, false, false);
7251 static bool trans_FMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
7253 return do_FMLAL_zzzw(s, a, false, true);
7256 static bool trans_FMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
7258 return do_FMLAL_zzzw(s, a, true, false);
7261 static bool trans_FMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
7263 return do_FMLAL_zzzw(s, a, true, true);
7266 static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
7268 if (!dc_isar_feature(aa64_sve2, s)) {
7269 return false;
7271 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s,
7272 a->rd, a->rn, a->rm, a->ra,
7273 (a->index << 2) | (sel << 1) | sub, cpu_env);
7276 static bool trans_FMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
7278 return do_FMLAL_zzxw(s, a, false, false);
7281 static bool trans_FMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
7283 return do_FMLAL_zzxw(s, a, false, true);
7286 static bool trans_FMLSLB_zzxw(DisasContext *s, arg_rrxr_esz *a)
7288 return do_FMLAL_zzxw(s, a, true, false);
7291 static bool trans_FMLSLT_zzxw(DisasContext *s, arg_rrxr_esz *a)
7293 return do_FMLAL_zzxw(s, a, true, true);
7296 TRANS_FEAT(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7297 gen_helper_gvec_smmla_b, a, 0)
7298 TRANS_FEAT(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7299 gen_helper_gvec_usmmla_b, a, 0)
7300 TRANS_FEAT(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7301 gen_helper_gvec_ummla_b, a, 0)
7303 TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
7304 gen_helper_gvec_bfdot, a, 0)
7305 TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
7306 gen_helper_gvec_bfdot_idx, a)
7308 TRANS_FEAT(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
7309 gen_helper_gvec_bfmmla, a, 0)
7311 static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
7313 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal,
7314 a->rd, a->rn, a->rm, a->ra, sel, FPST_FPCR);
7317 TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false)
7318 TRANS_FEAT(BFMLALT_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, true)
7320 static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
7322 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx,
7323 a->rd, a->rn, a->rm, a->ra,
7324 (a->index << 1) | sel, FPST_FPCR);
7327 TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
7328 TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true)