target/arm: Use TRANS_FEAT for do_FMLAL_zzzw
[qemu/ar7.git] / target / arm / translate-sve.c
blob57bff0d34503edf7adcdde308476399a8810afcb
1 /*
2 * AArch64 SVE translation
4 * Copyright (c) 2018 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "tcg/tcg-op.h"
24 #include "tcg/tcg-op-gvec.h"
25 #include "tcg/tcg-gvec-desc.h"
26 #include "qemu/log.h"
27 #include "arm_ldst.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
32 #include "exec/log.h"
33 #include "translate-a64.h"
34 #include "fpu/softfloat.h"
37 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
40 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
42 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
45 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
46 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
47 TCGv_ptr, TCGv_i64, TCGv_i32);
50 * Helpers for extracting complex instruction fields.
53 /* See e.g. ASR (immediate, predicated).
54 * Returns -1 for unallocated encoding; diagnose later.
56 static int tszimm_esz(DisasContext *s, int x)
58 x >>= 3; /* discard imm3 */
59 return 31 - clz32(x);
62 static int tszimm_shr(DisasContext *s, int x)
64 return (16 << tszimm_esz(s, x)) - x;
67 /* See e.g. LSL (immediate, predicated). */
68 static int tszimm_shl(DisasContext *s, int x)
70 return x - (8 << tszimm_esz(s, x));
73 /* The SH bit is in bit 8. Extract the low 8 and shift. */
74 static inline int expand_imm_sh8s(DisasContext *s, int x)
76 return (int8_t)x << (x & 0x100 ? 8 : 0);
79 static inline int expand_imm_sh8u(DisasContext *s, int x)
81 return (uint8_t)x << (x & 0x100 ? 8 : 0);
84 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
85 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
87 static inline int msz_dtype(DisasContext *s, int msz)
89 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
90 return dtype[msz];
94 * Include the generated decoder.
97 #include "decode-sve.c.inc"
100 * Implement all of the translator functions referenced by the decoder.
103 /* Return the offset info CPUARMState of the predicate vector register Pn.
104 * Note for this purpose, FFR is P16.
106 static inline int pred_full_reg_offset(DisasContext *s, int regno)
108 return offsetof(CPUARMState, vfp.pregs[regno]);
111 /* Return the byte size of the whole predicate register, VL / 64. */
112 static inline int pred_full_reg_size(DisasContext *s)
114 return s->sve_len >> 3;
117 /* Round up the size of a register to a size allowed by
118 * the tcg vector infrastructure. Any operation which uses this
119 * size may assume that the bits above pred_full_reg_size are zero,
120 * and must leave them the same way.
122 * Note that this is not needed for the vector registers as they
123 * are always properly sized for tcg vectors.
125 static int size_for_gvec(int size)
127 if (size <= 8) {
128 return 8;
129 } else {
130 return QEMU_ALIGN_UP(size, 16);
134 static int pred_gvec_reg_size(DisasContext *s)
136 return size_for_gvec(pred_full_reg_size(s));
139 /* Invoke an out-of-line helper on 2 Zregs. */
140 static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
141 int rd, int rn, int data)
143 if (fn == NULL) {
144 return false;
146 if (sve_access_check(s)) {
147 unsigned vsz = vec_full_reg_size(s);
148 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
149 vec_full_reg_offset(s, rn),
150 vsz, vsz, data, fn);
152 return true;
155 static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
156 int rd, int rn, int data,
157 ARMFPStatusFlavour flavour)
159 if (fn == NULL) {
160 return false;
162 if (sve_access_check(s)) {
163 unsigned vsz = vec_full_reg_size(s);
164 TCGv_ptr status = fpstatus_ptr(flavour);
166 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
167 vec_full_reg_offset(s, rn),
168 status, vsz, vsz, data, fn);
169 tcg_temp_free_ptr(status);
171 return true;
174 static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
175 arg_rr_esz *a, int data)
177 return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data,
178 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
181 /* Invoke an out-of-line helper on 3 Zregs. */
182 static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
183 int rd, int rn, int rm, int data)
185 if (fn == NULL) {
186 return false;
188 if (sve_access_check(s)) {
189 unsigned vsz = vec_full_reg_size(s);
190 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
191 vec_full_reg_offset(s, rn),
192 vec_full_reg_offset(s, rm),
193 vsz, vsz, data, fn);
195 return true;
198 static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
199 arg_rrr_esz *a, int data)
201 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
204 /* Invoke an out-of-line helper on 3 Zregs, plus float_status. */
205 static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
206 int rd, int rn, int rm,
207 int data, ARMFPStatusFlavour flavour)
209 if (fn == NULL) {
210 return false;
212 if (sve_access_check(s)) {
213 unsigned vsz = vec_full_reg_size(s);
214 TCGv_ptr status = fpstatus_ptr(flavour);
216 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
217 vec_full_reg_offset(s, rn),
218 vec_full_reg_offset(s, rm),
219 status, vsz, vsz, data, fn);
221 tcg_temp_free_ptr(status);
223 return true;
226 static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
227 arg_rrr_esz *a, int data)
229 return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data,
230 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
233 /* Invoke an out-of-line helper on 4 Zregs. */
234 static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
235 int rd, int rn, int rm, int ra, int data)
237 if (fn == NULL) {
238 return false;
240 if (sve_access_check(s)) {
241 unsigned vsz = vec_full_reg_size(s);
242 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
243 vec_full_reg_offset(s, rn),
244 vec_full_reg_offset(s, rm),
245 vec_full_reg_offset(s, ra),
246 vsz, vsz, data, fn);
248 return true;
251 static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
252 arg_rrrr_esz *a, int data)
254 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
257 static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn,
258 arg_rrxr_esz *a)
260 return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
263 /* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */
264 static bool gen_gvec_ptr_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
265 int rd, int rn, int rm, int ra,
266 int data, TCGv_ptr ptr)
268 if (fn == NULL) {
269 return false;
271 if (sve_access_check(s)) {
272 unsigned vsz = vec_full_reg_size(s);
273 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
274 vec_full_reg_offset(s, rn),
275 vec_full_reg_offset(s, rm),
276 vec_full_reg_offset(s, ra),
277 ptr, vsz, vsz, data, fn);
279 return true;
282 static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
283 int rd, int rn, int rm, int ra,
284 int data, ARMFPStatusFlavour flavour)
286 TCGv_ptr status = fpstatus_ptr(flavour);
287 bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status);
288 tcg_temp_free_ptr(status);
289 return ret;
292 /* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */
293 static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn,
294 int rd, int rn, int rm, int ra, int pg,
295 int data, ARMFPStatusFlavour flavour)
297 if (fn == NULL) {
298 return false;
300 if (sve_access_check(s)) {
301 unsigned vsz = vec_full_reg_size(s);
302 TCGv_ptr status = fpstatus_ptr(flavour);
304 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, rd),
305 vec_full_reg_offset(s, rn),
306 vec_full_reg_offset(s, rm),
307 vec_full_reg_offset(s, ra),
308 pred_full_reg_offset(s, pg),
309 status, vsz, vsz, data, fn);
311 tcg_temp_free_ptr(status);
313 return true;
316 /* Invoke an out-of-line helper on 2 Zregs and a predicate. */
317 static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
318 int rd, int rn, int pg, int data)
320 if (fn == NULL) {
321 return false;
323 if (sve_access_check(s)) {
324 unsigned vsz = vec_full_reg_size(s);
325 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
326 vec_full_reg_offset(s, rn),
327 pred_full_reg_offset(s, pg),
328 vsz, vsz, data, fn);
330 return true;
333 static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn,
334 arg_rpr_esz *a, int data)
336 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data);
339 static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn,
340 arg_rpri_esz *a)
342 return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
345 static bool gen_gvec_fpst_zzp(DisasContext *s, gen_helper_gvec_3_ptr *fn,
346 int rd, int rn, int pg, int data,
347 ARMFPStatusFlavour flavour)
349 if (fn == NULL) {
350 return false;
352 if (sve_access_check(s)) {
353 unsigned vsz = vec_full_reg_size(s);
354 TCGv_ptr status = fpstatus_ptr(flavour);
356 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
357 vec_full_reg_offset(s, rn),
358 pred_full_reg_offset(s, pg),
359 status, vsz, vsz, data, fn);
360 tcg_temp_free_ptr(status);
362 return true;
365 static bool gen_gvec_fpst_arg_zpz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
366 arg_rpr_esz *a, int data,
367 ARMFPStatusFlavour flavour)
369 return gen_gvec_fpst_zzp(s, fn, a->rd, a->rn, a->pg, data, flavour);
372 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */
373 static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
374 int rd, int rn, int rm, int pg, int data)
376 if (fn == NULL) {
377 return false;
379 if (sve_access_check(s)) {
380 unsigned vsz = vec_full_reg_size(s);
381 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
382 vec_full_reg_offset(s, rn),
383 vec_full_reg_offset(s, rm),
384 pred_full_reg_offset(s, pg),
385 vsz, vsz, data, fn);
387 return true;
390 static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn,
391 arg_rprr_esz *a, int data)
393 return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data);
396 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */
397 static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn,
398 int rd, int rn, int rm, int pg, int data,
399 ARMFPStatusFlavour flavour)
401 if (fn == NULL) {
402 return false;
404 if (sve_access_check(s)) {
405 unsigned vsz = vec_full_reg_size(s);
406 TCGv_ptr status = fpstatus_ptr(flavour);
408 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
409 vec_full_reg_offset(s, rn),
410 vec_full_reg_offset(s, rm),
411 pred_full_reg_offset(s, pg),
412 status, vsz, vsz, data, fn);
413 tcg_temp_free_ptr(status);
415 return true;
418 static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
419 arg_rprr_esz *a)
421 return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0,
422 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
425 /* Invoke a vector expander on two Zregs and an immediate. */
426 static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
427 int esz, int rd, int rn, uint64_t imm)
429 if (gvec_fn == NULL) {
430 return false;
432 if (sve_access_check(s)) {
433 unsigned vsz = vec_full_reg_size(s);
434 gvec_fn(esz, vec_full_reg_offset(s, rd),
435 vec_full_reg_offset(s, rn), imm, vsz, vsz);
437 return true;
440 static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
441 arg_rri_esz *a)
443 if (a->esz < 0) {
444 /* Invalid tsz encoding -- see tszimm_esz. */
445 return false;
447 return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm);
450 /* Invoke a vector expander on three Zregs. */
451 static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
452 int esz, int rd, int rn, int rm)
454 if (gvec_fn == NULL) {
455 return false;
457 if (sve_access_check(s)) {
458 unsigned vsz = vec_full_reg_size(s);
459 gvec_fn(esz, vec_full_reg_offset(s, rd),
460 vec_full_reg_offset(s, rn),
461 vec_full_reg_offset(s, rm), vsz, vsz);
463 return true;
466 static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn,
467 arg_rrr_esz *a)
469 return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
472 /* Invoke a vector expander on four Zregs. */
473 static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
474 arg_rrrr_esz *a)
476 if (gvec_fn == NULL) {
477 return false;
479 if (sve_access_check(s)) {
480 unsigned vsz = vec_full_reg_size(s);
481 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
482 vec_full_reg_offset(s, a->rn),
483 vec_full_reg_offset(s, a->rm),
484 vec_full_reg_offset(s, a->ra), vsz, vsz);
486 return true;
489 /* Invoke a vector move on two Zregs. */
490 static bool do_mov_z(DisasContext *s, int rd, int rn)
492 if (sve_access_check(s)) {
493 unsigned vsz = vec_full_reg_size(s);
494 tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd),
495 vec_full_reg_offset(s, rn), vsz, vsz);
497 return true;
500 /* Initialize a Zreg with replications of a 64-bit immediate. */
501 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
503 unsigned vsz = vec_full_reg_size(s);
504 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
507 /* Invoke a vector expander on three Pregs. */
508 static bool gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
509 int rd, int rn, int rm)
511 if (sve_access_check(s)) {
512 unsigned psz = pred_gvec_reg_size(s);
513 gvec_fn(MO_64, pred_full_reg_offset(s, rd),
514 pred_full_reg_offset(s, rn),
515 pred_full_reg_offset(s, rm), psz, psz);
517 return true;
520 /* Invoke a vector move on two Pregs. */
521 static bool do_mov_p(DisasContext *s, int rd, int rn)
523 if (sve_access_check(s)) {
524 unsigned psz = pred_gvec_reg_size(s);
525 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
526 pred_full_reg_offset(s, rn), psz, psz);
528 return true;
531 /* Set the cpu flags as per a return from an SVE helper. */
532 static void do_pred_flags(TCGv_i32 t)
534 tcg_gen_mov_i32(cpu_NF, t);
535 tcg_gen_andi_i32(cpu_ZF, t, 2);
536 tcg_gen_andi_i32(cpu_CF, t, 1);
537 tcg_gen_movi_i32(cpu_VF, 0);
540 /* Subroutines computing the ARM PredTest psuedofunction. */
541 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
543 TCGv_i32 t = tcg_temp_new_i32();
545 gen_helper_sve_predtest1(t, d, g);
546 do_pred_flags(t);
547 tcg_temp_free_i32(t);
550 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
552 TCGv_ptr dptr = tcg_temp_new_ptr();
553 TCGv_ptr gptr = tcg_temp_new_ptr();
554 TCGv_i32 t = tcg_temp_new_i32();
556 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
557 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
559 gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words));
560 tcg_temp_free_ptr(dptr);
561 tcg_temp_free_ptr(gptr);
563 do_pred_flags(t);
564 tcg_temp_free_i32(t);
567 /* For each element size, the bits within a predicate word that are active. */
568 const uint64_t pred_esz_masks[4] = {
569 0xffffffffffffffffull, 0x5555555555555555ull,
570 0x1111111111111111ull, 0x0101010101010101ull
573 static bool trans_INVALID(DisasContext *s, arg_INVALID *a)
575 unallocated_encoding(s);
576 return true;
580 *** SVE Logical - Unpredicated Group
583 TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a)
584 TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a)
585 TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a)
586 TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a)
588 static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
590 TCGv_i64 t = tcg_temp_new_i64();
591 uint64_t mask = dup_const(MO_8, 0xff >> sh);
593 tcg_gen_xor_i64(t, n, m);
594 tcg_gen_shri_i64(d, t, sh);
595 tcg_gen_shli_i64(t, t, 8 - sh);
596 tcg_gen_andi_i64(d, d, mask);
597 tcg_gen_andi_i64(t, t, ~mask);
598 tcg_gen_or_i64(d, d, t);
599 tcg_temp_free_i64(t);
602 static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
604 TCGv_i64 t = tcg_temp_new_i64();
605 uint64_t mask = dup_const(MO_16, 0xffff >> sh);
607 tcg_gen_xor_i64(t, n, m);
608 tcg_gen_shri_i64(d, t, sh);
609 tcg_gen_shli_i64(t, t, 16 - sh);
610 tcg_gen_andi_i64(d, d, mask);
611 tcg_gen_andi_i64(t, t, ~mask);
612 tcg_gen_or_i64(d, d, t);
613 tcg_temp_free_i64(t);
616 static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
618 tcg_gen_xor_i32(d, n, m);
619 tcg_gen_rotri_i32(d, d, sh);
622 static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
624 tcg_gen_xor_i64(d, n, m);
625 tcg_gen_rotri_i64(d, d, sh);
628 static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
629 TCGv_vec m, int64_t sh)
631 tcg_gen_xor_vec(vece, d, n, m);
632 tcg_gen_rotri_vec(vece, d, d, sh);
635 void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
636 uint32_t rm_ofs, int64_t shift,
637 uint32_t opr_sz, uint32_t max_sz)
639 static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
640 static const GVecGen3i ops[4] = {
641 { .fni8 = gen_xar8_i64,
642 .fniv = gen_xar_vec,
643 .fno = gen_helper_sve2_xar_b,
644 .opt_opc = vecop,
645 .vece = MO_8 },
646 { .fni8 = gen_xar16_i64,
647 .fniv = gen_xar_vec,
648 .fno = gen_helper_sve2_xar_h,
649 .opt_opc = vecop,
650 .vece = MO_16 },
651 { .fni4 = gen_xar_i32,
652 .fniv = gen_xar_vec,
653 .fno = gen_helper_sve2_xar_s,
654 .opt_opc = vecop,
655 .vece = MO_32 },
656 { .fni8 = gen_xar_i64,
657 .fniv = gen_xar_vec,
658 .fno = gen_helper_gvec_xar_d,
659 .opt_opc = vecop,
660 .vece = MO_64 }
662 int esize = 8 << vece;
664 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
665 tcg_debug_assert(shift >= 0);
666 tcg_debug_assert(shift <= esize);
667 shift &= esize - 1;
669 if (shift == 0) {
670 /* xar with no rotate devolves to xor. */
671 tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
672 } else {
673 tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
674 shift, &ops[vece]);
678 static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
680 if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
681 return false;
683 if (sve_access_check(s)) {
684 unsigned vsz = vec_full_reg_size(s);
685 gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd),
686 vec_full_reg_offset(s, a->rn),
687 vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz);
689 return true;
692 static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
694 tcg_gen_xor_i64(d, n, m);
695 tcg_gen_xor_i64(d, d, k);
698 static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
699 TCGv_vec m, TCGv_vec k)
701 tcg_gen_xor_vec(vece, d, n, m);
702 tcg_gen_xor_vec(vece, d, d, k);
705 static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
706 uint32_t a, uint32_t oprsz, uint32_t maxsz)
708 static const GVecGen4 op = {
709 .fni8 = gen_eor3_i64,
710 .fniv = gen_eor3_vec,
711 .fno = gen_helper_sve2_eor3,
712 .vece = MO_64,
713 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
715 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
718 TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_eor3, a)
720 static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
722 tcg_gen_andc_i64(d, m, k);
723 tcg_gen_xor_i64(d, d, n);
726 static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
727 TCGv_vec m, TCGv_vec k)
729 tcg_gen_andc_vec(vece, d, m, k);
730 tcg_gen_xor_vec(vece, d, d, n);
733 static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
734 uint32_t a, uint32_t oprsz, uint32_t maxsz)
736 static const GVecGen4 op = {
737 .fni8 = gen_bcax_i64,
738 .fniv = gen_bcax_vec,
739 .fno = gen_helper_sve2_bcax,
740 .vece = MO_64,
741 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
743 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
746 TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bcax, a)
748 static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
749 uint32_t a, uint32_t oprsz, uint32_t maxsz)
751 /* BSL differs from the generic bitsel in argument ordering. */
752 tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
755 TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a)
757 static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
759 tcg_gen_andc_i64(n, k, n);
760 tcg_gen_andc_i64(m, m, k);
761 tcg_gen_or_i64(d, n, m);
764 static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
765 TCGv_vec m, TCGv_vec k)
767 if (TCG_TARGET_HAS_bitsel_vec) {
768 tcg_gen_not_vec(vece, n, n);
769 tcg_gen_bitsel_vec(vece, d, k, n, m);
770 } else {
771 tcg_gen_andc_vec(vece, n, k, n);
772 tcg_gen_andc_vec(vece, m, m, k);
773 tcg_gen_or_vec(vece, d, n, m);
777 static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
778 uint32_t a, uint32_t oprsz, uint32_t maxsz)
780 static const GVecGen4 op = {
781 .fni8 = gen_bsl1n_i64,
782 .fniv = gen_bsl1n_vec,
783 .fno = gen_helper_sve2_bsl1n,
784 .vece = MO_64,
785 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
787 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
790 TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a)
792 static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
795 * Z[dn] = (n & k) | (~m & ~k)
796 * = | ~(m | k)
798 tcg_gen_and_i64(n, n, k);
799 if (TCG_TARGET_HAS_orc_i64) {
800 tcg_gen_or_i64(m, m, k);
801 tcg_gen_orc_i64(d, n, m);
802 } else {
803 tcg_gen_nor_i64(m, m, k);
804 tcg_gen_or_i64(d, n, m);
808 static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
809 TCGv_vec m, TCGv_vec k)
811 if (TCG_TARGET_HAS_bitsel_vec) {
812 tcg_gen_not_vec(vece, m, m);
813 tcg_gen_bitsel_vec(vece, d, k, n, m);
814 } else {
815 tcg_gen_and_vec(vece, n, n, k);
816 tcg_gen_or_vec(vece, m, m, k);
817 tcg_gen_orc_vec(vece, d, n, m);
821 static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
822 uint32_t a, uint32_t oprsz, uint32_t maxsz)
824 static const GVecGen4 op = {
825 .fni8 = gen_bsl2n_i64,
826 .fniv = gen_bsl2n_vec,
827 .fno = gen_helper_sve2_bsl2n,
828 .vece = MO_64,
829 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
831 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
834 TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a)
836 static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
838 tcg_gen_and_i64(n, n, k);
839 tcg_gen_andc_i64(m, m, k);
840 tcg_gen_nor_i64(d, n, m);
843 static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
844 TCGv_vec m, TCGv_vec k)
846 tcg_gen_bitsel_vec(vece, d, k, n, m);
847 tcg_gen_not_vec(vece, d, d);
850 static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
851 uint32_t a, uint32_t oprsz, uint32_t maxsz)
853 static const GVecGen4 op = {
854 .fni8 = gen_nbsl_i64,
855 .fniv = gen_nbsl_vec,
856 .fno = gen_helper_sve2_nbsl,
857 .vece = MO_64,
858 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
860 tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
863 TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a)
866 *** SVE Integer Arithmetic - Unpredicated Group
869 TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a)
870 TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a)
871 TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a)
872 TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a)
873 TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a)
874 TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a)
877 *** SVE Integer Arithmetic - Binary Predicated Group
880 /* Select active elememnts from Zn and inactive elements from Zm,
881 * storing the result in Zd.
883 static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
885 static gen_helper_gvec_4 * const fns[4] = {
886 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
887 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
889 return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
892 #define DO_ZPZZ(NAME, FEAT, name) \
893 static gen_helper_gvec_4 * const name##_zpzz_fns[4] = { \
894 gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h, \
895 gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d, \
896 }; \
897 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz, \
898 name##_zpzz_fns[a->esz], a, 0)
900 DO_ZPZZ(AND_zpzz, aa64_sve, sve_and)
901 DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor)
902 DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr)
903 DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic)
905 DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add)
906 DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub)
908 DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax)
909 DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax)
910 DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin)
911 DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin)
912 DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd)
913 DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd)
915 DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul)
916 DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh)
917 DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh)
919 DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr)
920 DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr)
921 DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl)
923 static gen_helper_gvec_4 * const sdiv_fns[4] = {
924 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
926 TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0)
928 static gen_helper_gvec_4 * const udiv_fns[4] = {
929 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
931 TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0)
933 TRANS_FEAT(SEL_zpzz, aa64_sve, do_sel_z, a->rd, a->rn, a->rm, a->pg, a->esz)
936 *** SVE Integer Arithmetic - Unary Predicated Group
939 #define DO_ZPZ(NAME, FEAT, name) \
940 static gen_helper_gvec_3 * const name##_fns[4] = { \
941 gen_helper_##name##_b, gen_helper_##name##_h, \
942 gen_helper_##name##_s, gen_helper_##name##_d, \
943 }; \
944 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0)
946 DO_ZPZ(CLS, aa64_sve, sve_cls)
947 DO_ZPZ(CLZ, aa64_sve, sve_clz)
948 DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz)
949 DO_ZPZ(CNOT, aa64_sve, sve_cnot)
950 DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz)
951 DO_ZPZ(ABS, aa64_sve, sve_abs)
952 DO_ZPZ(NEG, aa64_sve, sve_neg)
953 DO_ZPZ(RBIT, aa64_sve, sve_rbit)
955 static gen_helper_gvec_3 * const fabs_fns[4] = {
956 NULL, gen_helper_sve_fabs_h,
957 gen_helper_sve_fabs_s, gen_helper_sve_fabs_d,
959 TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0)
961 static gen_helper_gvec_3 * const fneg_fns[4] = {
962 NULL, gen_helper_sve_fneg_h,
963 gen_helper_sve_fneg_s, gen_helper_sve_fneg_d,
965 TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0)
967 static gen_helper_gvec_3 * const sxtb_fns[4] = {
968 NULL, gen_helper_sve_sxtb_h,
969 gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d,
971 TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0)
973 static gen_helper_gvec_3 * const uxtb_fns[4] = {
974 NULL, gen_helper_sve_uxtb_h,
975 gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d,
977 TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0)
979 static gen_helper_gvec_3 * const sxth_fns[4] = {
980 NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d
982 TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0)
984 static gen_helper_gvec_3 * const uxth_fns[4] = {
985 NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d
987 TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0)
989 TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz,
990 a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0)
991 TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz,
992 a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0)
995 *** SVE Integer Reduction Group
998 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
999 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
1000 gen_helper_gvec_reduc *fn)
1002 unsigned vsz = vec_full_reg_size(s);
1003 TCGv_ptr t_zn, t_pg;
1004 TCGv_i32 desc;
1005 TCGv_i64 temp;
1007 if (fn == NULL) {
1008 return false;
1010 if (!sve_access_check(s)) {
1011 return true;
1014 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
1015 temp = tcg_temp_new_i64();
1016 t_zn = tcg_temp_new_ptr();
1017 t_pg = tcg_temp_new_ptr();
1019 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
1020 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
1021 fn(temp, t_zn, t_pg, desc);
1022 tcg_temp_free_ptr(t_zn);
1023 tcg_temp_free_ptr(t_pg);
1025 write_fp_dreg(s, a->rd, temp);
1026 tcg_temp_free_i64(temp);
1027 return true;
1030 #define DO_VPZ(NAME, name) \
1031 static gen_helper_gvec_reduc * const name##_fns[4] = { \
1032 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
1033 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
1034 }; \
1035 TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz])
1037 DO_VPZ(ORV, orv)
1038 DO_VPZ(ANDV, andv)
1039 DO_VPZ(EORV, eorv)
1041 DO_VPZ(UADDV, uaddv)
1042 DO_VPZ(SMAXV, smaxv)
1043 DO_VPZ(UMAXV, umaxv)
1044 DO_VPZ(SMINV, sminv)
1045 DO_VPZ(UMINV, uminv)
1047 static gen_helper_gvec_reduc * const saddv_fns[4] = {
1048 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
1049 gen_helper_sve_saddv_s, NULL
1051 TRANS_FEAT(SADDV, aa64_sve, do_vpz_ool, a, saddv_fns[a->esz])
1053 #undef DO_VPZ
1056 *** SVE Shift by Immediate - Predicated Group
1060 * Copy Zn into Zd, storing zeros into inactive elements.
1061 * If invert, store zeros into the active elements.
1063 static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
1064 int esz, bool invert)
1066 static gen_helper_gvec_3 * const fns[4] = {
1067 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
1068 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
1070 return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
1073 static bool do_shift_zpzi(DisasContext *s, arg_rpri_esz *a, bool asr,
1074 gen_helper_gvec_3 * const fns[4])
1076 int max;
1078 if (a->esz < 0) {
1079 /* Invalid tsz encoding -- see tszimm_esz. */
1080 return false;
1084 * Shift by element size is architecturally valid.
1085 * For arithmetic right-shift, it's the same as by one less.
1086 * For logical shifts and ASRD, it is a zeroing operation.
1088 max = 8 << a->esz;
1089 if (a->imm >= max) {
1090 if (asr) {
1091 a->imm = max - 1;
1092 } else {
1093 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
1096 return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a);
1099 static gen_helper_gvec_3 * const asr_zpzi_fns[4] = {
1100 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
1101 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
1103 TRANS_FEAT(ASR_zpzi, aa64_sve, do_shift_zpzi, a, true, asr_zpzi_fns)
1105 static gen_helper_gvec_3 * const lsr_zpzi_fns[4] = {
1106 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
1107 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
1109 TRANS_FEAT(LSR_zpzi, aa64_sve, do_shift_zpzi, a, false, lsr_zpzi_fns)
1111 static gen_helper_gvec_3 * const lsl_zpzi_fns[4] = {
1112 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
1113 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
1115 TRANS_FEAT(LSL_zpzi, aa64_sve, do_shift_zpzi, a, false, lsl_zpzi_fns)
1117 static gen_helper_gvec_3 * const asrd_fns[4] = {
1118 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
1119 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
1121 TRANS_FEAT(ASRD, aa64_sve, do_shift_zpzi, a, false, asrd_fns)
1123 static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = {
1124 gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h,
1125 gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d,
1127 TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
1128 a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a)
1130 static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = {
1131 gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h,
1132 gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d,
1134 TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
1135 a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a)
1137 static gen_helper_gvec_3 * const srshr_fns[4] = {
1138 gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h,
1139 gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d,
1141 TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
1142 a->esz < 0 ? NULL : srshr_fns[a->esz], a)
1144 static gen_helper_gvec_3 * const urshr_fns[4] = {
1145 gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h,
1146 gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d,
1148 TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
1149 a->esz < 0 ? NULL : urshr_fns[a->esz], a)
1151 static gen_helper_gvec_3 * const sqshlu_fns[4] = {
1152 gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h,
1153 gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d,
1155 TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi,
1156 a->esz < 0 ? NULL : sqshlu_fns[a->esz], a)
1159 *** SVE Bitwise Shift - Predicated Group
1162 #define DO_ZPZW(NAME, name) \
1163 static gen_helper_gvec_4 * const name##_zpzw_fns[4] = { \
1164 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
1165 gen_helper_sve_##name##_zpzw_s, NULL \
1166 }; \
1167 TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz, \
1168 a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0)
1170 DO_ZPZW(ASR, asr)
1171 DO_ZPZW(LSR, lsr)
1172 DO_ZPZW(LSL, lsl)
1174 #undef DO_ZPZW
1177 *** SVE Bitwise Shift - Unpredicated Group
1180 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
1181 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
1182 int64_t, uint32_t, uint32_t))
1184 if (a->esz < 0) {
1185 /* Invalid tsz encoding -- see tszimm_esz. */
1186 return false;
1188 if (sve_access_check(s)) {
1189 unsigned vsz = vec_full_reg_size(s);
1190 /* Shift by element size is architecturally valid. For
1191 arithmetic right-shift, it's the same as by one less.
1192 Otherwise it is a zeroing operation. */
1193 if (a->imm >= 8 << a->esz) {
1194 if (asr) {
1195 a->imm = (8 << a->esz) - 1;
1196 } else {
1197 do_dupi_z(s, a->rd, 0);
1198 return true;
1201 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
1202 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
1204 return true;
1207 TRANS_FEAT(ASR_zzi, aa64_sve, do_shift_imm, a, true, tcg_gen_gvec_sari)
1208 TRANS_FEAT(LSR_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shri)
1209 TRANS_FEAT(LSL_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shli)
1211 #define DO_ZZW(NAME, name) \
1212 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \
1213 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
1214 gen_helper_sve_##name##_zzw_s, NULL \
1215 }; \
1216 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \
1217 name##_zzw_fns[a->esz], a, 0)
1219 DO_ZZW(ASR_zzw, asr)
1220 DO_ZZW(LSR_zzw, lsr)
1221 DO_ZZW(LSL_zzw, lsl)
1223 #undef DO_ZZW
1226 *** SVE Integer Multiply-Add Group
1229 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
1230 gen_helper_gvec_5 *fn)
1232 if (sve_access_check(s)) {
1233 unsigned vsz = vec_full_reg_size(s);
1234 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
1235 vec_full_reg_offset(s, a->ra),
1236 vec_full_reg_offset(s, a->rn),
1237 vec_full_reg_offset(s, a->rm),
1238 pred_full_reg_offset(s, a->pg),
1239 vsz, vsz, 0, fn);
1241 return true;
1244 static gen_helper_gvec_5 * const mla_fns[4] = {
1245 gen_helper_sve_mla_b, gen_helper_sve_mla_h,
1246 gen_helper_sve_mla_s, gen_helper_sve_mla_d,
1248 TRANS_FEAT(MLA, aa64_sve, do_zpzzz_ool, a, mla_fns[a->esz])
1250 static gen_helper_gvec_5 * const mls_fns[4] = {
1251 gen_helper_sve_mls_b, gen_helper_sve_mls_h,
1252 gen_helper_sve_mls_s, gen_helper_sve_mls_d,
1254 TRANS_FEAT(MLS, aa64_sve, do_zpzzz_ool, a, mls_fns[a->esz])
1257 *** SVE Index Generation Group
1260 static bool do_index(DisasContext *s, int esz, int rd,
1261 TCGv_i64 start, TCGv_i64 incr)
1263 unsigned vsz;
1264 TCGv_i32 desc;
1265 TCGv_ptr t_zd;
1267 if (!sve_access_check(s)) {
1268 return true;
1271 vsz = vec_full_reg_size(s);
1272 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
1273 t_zd = tcg_temp_new_ptr();
1275 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1276 if (esz == 3) {
1277 gen_helper_sve_index_d(t_zd, start, incr, desc);
1278 } else {
1279 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
1280 static index_fn * const fns[3] = {
1281 gen_helper_sve_index_b,
1282 gen_helper_sve_index_h,
1283 gen_helper_sve_index_s,
1285 TCGv_i32 s32 = tcg_temp_new_i32();
1286 TCGv_i32 i32 = tcg_temp_new_i32();
1288 tcg_gen_extrl_i64_i32(s32, start);
1289 tcg_gen_extrl_i64_i32(i32, incr);
1290 fns[esz](t_zd, s32, i32, desc);
1292 tcg_temp_free_i32(s32);
1293 tcg_temp_free_i32(i32);
1295 tcg_temp_free_ptr(t_zd);
1296 return true;
1299 TRANS_FEAT(INDEX_ii, aa64_sve, do_index, a->esz, a->rd,
1300 tcg_constant_i64(a->imm1), tcg_constant_i64(a->imm2))
1301 TRANS_FEAT(INDEX_ir, aa64_sve, do_index, a->esz, a->rd,
1302 tcg_constant_i64(a->imm), cpu_reg(s, a->rm))
1303 TRANS_FEAT(INDEX_ri, aa64_sve, do_index, a->esz, a->rd,
1304 cpu_reg(s, a->rn), tcg_constant_i64(a->imm))
1305 TRANS_FEAT(INDEX_rr, aa64_sve, do_index, a->esz, a->rd,
1306 cpu_reg(s, a->rn), cpu_reg(s, a->rm))
1309 *** SVE Stack Allocation Group
1312 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
1314 if (sve_access_check(s)) {
1315 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1316 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1317 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
1319 return true;
1322 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
1324 if (sve_access_check(s)) {
1325 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1326 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1327 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
1329 return true;
1332 static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
1334 if (sve_access_check(s)) {
1335 TCGv_i64 reg = cpu_reg(s, a->rd);
1336 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
1338 return true;
1342 *** SVE Compute Vector Address Group
1345 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
1347 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
1350 TRANS_FEAT(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
1351 TRANS_FEAT(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
1352 TRANS_FEAT(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
1353 TRANS_FEAT(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
1356 *** SVE Integer Misc - Unpredicated Group
1359 static gen_helper_gvec_2 * const fexpa_fns[4] = {
1360 NULL, gen_helper_sve_fexpa_h,
1361 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
1363 TRANS_FEAT(FEXPA, aa64_sve, gen_gvec_ool_zz,
1364 fexpa_fns[a->esz], a->rd, a->rn, 0)
1366 static gen_helper_gvec_3 * const ftssel_fns[4] = {
1367 NULL, gen_helper_sve_ftssel_h,
1368 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
1370 TRANS_FEAT(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, ftssel_fns[a->esz], a, 0)
1373 *** SVE Predicate Logical Operations Group
1376 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1377 const GVecGen4 *gvec_op)
1379 if (!sve_access_check(s)) {
1380 return true;
1383 unsigned psz = pred_gvec_reg_size(s);
1384 int dofs = pred_full_reg_offset(s, a->rd);
1385 int nofs = pred_full_reg_offset(s, a->rn);
1386 int mofs = pred_full_reg_offset(s, a->rm);
1387 int gofs = pred_full_reg_offset(s, a->pg);
1389 if (!a->s) {
1390 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1391 return true;
1394 if (psz == 8) {
1395 /* Do the operation and the flags generation in temps. */
1396 TCGv_i64 pd = tcg_temp_new_i64();
1397 TCGv_i64 pn = tcg_temp_new_i64();
1398 TCGv_i64 pm = tcg_temp_new_i64();
1399 TCGv_i64 pg = tcg_temp_new_i64();
1401 tcg_gen_ld_i64(pn, cpu_env, nofs);
1402 tcg_gen_ld_i64(pm, cpu_env, mofs);
1403 tcg_gen_ld_i64(pg, cpu_env, gofs);
1405 gvec_op->fni8(pd, pn, pm, pg);
1406 tcg_gen_st_i64(pd, cpu_env, dofs);
1408 do_predtest1(pd, pg);
1410 tcg_temp_free_i64(pd);
1411 tcg_temp_free_i64(pn);
1412 tcg_temp_free_i64(pm);
1413 tcg_temp_free_i64(pg);
1414 } else {
1415 /* The operation and flags generation is large. The computation
1416 * of the flags depends on the original contents of the guarding
1417 * predicate. If the destination overwrites the guarding predicate,
1418 * then the easiest way to get this right is to save a copy.
1420 int tofs = gofs;
1421 if (a->rd == a->pg) {
1422 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1423 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1426 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1427 do_predtest(s, dofs, tofs, psz / 8);
1429 return true;
1432 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1434 tcg_gen_and_i64(pd, pn, pm);
1435 tcg_gen_and_i64(pd, pd, pg);
1438 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1439 TCGv_vec pm, TCGv_vec pg)
1441 tcg_gen_and_vec(vece, pd, pn, pm);
1442 tcg_gen_and_vec(vece, pd, pd, pg);
1445 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
1447 static const GVecGen4 op = {
1448 .fni8 = gen_and_pg_i64,
1449 .fniv = gen_and_pg_vec,
1450 .fno = gen_helper_sve_and_pppp,
1451 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1454 if (!a->s) {
1455 if (a->rn == a->rm) {
1456 if (a->pg == a->rn) {
1457 return do_mov_p(s, a->rd, a->rn);
1459 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
1460 } else if (a->pg == a->rn || a->pg == a->rm) {
1461 return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
1464 return do_pppp_flags(s, a, &op);
1467 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1469 tcg_gen_andc_i64(pd, pn, pm);
1470 tcg_gen_and_i64(pd, pd, pg);
1473 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1474 TCGv_vec pm, TCGv_vec pg)
1476 tcg_gen_andc_vec(vece, pd, pn, pm);
1477 tcg_gen_and_vec(vece, pd, pd, pg);
1480 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
1482 static const GVecGen4 op = {
1483 .fni8 = gen_bic_pg_i64,
1484 .fniv = gen_bic_pg_vec,
1485 .fno = gen_helper_sve_bic_pppp,
1486 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1489 if (!a->s && a->pg == a->rn) {
1490 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
1492 return do_pppp_flags(s, a, &op);
1495 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1497 tcg_gen_xor_i64(pd, pn, pm);
1498 tcg_gen_and_i64(pd, pd, pg);
1501 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1502 TCGv_vec pm, TCGv_vec pg)
1504 tcg_gen_xor_vec(vece, pd, pn, pm);
1505 tcg_gen_and_vec(vece, pd, pd, pg);
1508 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
1510 static const GVecGen4 op = {
1511 .fni8 = gen_eor_pg_i64,
1512 .fniv = gen_eor_pg_vec,
1513 .fno = gen_helper_sve_eor_pppp,
1514 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1517 /* Alias NOT (predicate) is EOR Pd.B, Pg/Z, Pn.B, Pg.B */
1518 if (!a->s && a->pg == a->rm) {
1519 return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->pg, a->rn);
1521 return do_pppp_flags(s, a, &op);
1524 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
1526 if (a->s) {
1527 return false;
1529 if (sve_access_check(s)) {
1530 unsigned psz = pred_gvec_reg_size(s);
1531 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1532 pred_full_reg_offset(s, a->pg),
1533 pred_full_reg_offset(s, a->rn),
1534 pred_full_reg_offset(s, a->rm), psz, psz);
1536 return true;
1539 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1541 tcg_gen_or_i64(pd, pn, pm);
1542 tcg_gen_and_i64(pd, pd, pg);
1545 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1546 TCGv_vec pm, TCGv_vec pg)
1548 tcg_gen_or_vec(vece, pd, pn, pm);
1549 tcg_gen_and_vec(vece, pd, pd, pg);
1552 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
1554 static const GVecGen4 op = {
1555 .fni8 = gen_orr_pg_i64,
1556 .fniv = gen_orr_pg_vec,
1557 .fno = gen_helper_sve_orr_pppp,
1558 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1561 if (!a->s && a->pg == a->rn && a->rn == a->rm) {
1562 return do_mov_p(s, a->rd, a->rn);
1564 return do_pppp_flags(s, a, &op);
1567 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1569 tcg_gen_orc_i64(pd, pn, pm);
1570 tcg_gen_and_i64(pd, pd, pg);
1573 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1574 TCGv_vec pm, TCGv_vec pg)
1576 tcg_gen_orc_vec(vece, pd, pn, pm);
1577 tcg_gen_and_vec(vece, pd, pd, pg);
1580 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
1582 static const GVecGen4 op = {
1583 .fni8 = gen_orn_pg_i64,
1584 .fniv = gen_orn_pg_vec,
1585 .fno = gen_helper_sve_orn_pppp,
1586 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1588 return do_pppp_flags(s, a, &op);
1591 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1593 tcg_gen_or_i64(pd, pn, pm);
1594 tcg_gen_andc_i64(pd, pg, pd);
1597 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1598 TCGv_vec pm, TCGv_vec pg)
1600 tcg_gen_or_vec(vece, pd, pn, pm);
1601 tcg_gen_andc_vec(vece, pd, pg, pd);
1604 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
1606 static const GVecGen4 op = {
1607 .fni8 = gen_nor_pg_i64,
1608 .fniv = gen_nor_pg_vec,
1609 .fno = gen_helper_sve_nor_pppp,
1610 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1612 return do_pppp_flags(s, a, &op);
1615 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1617 tcg_gen_and_i64(pd, pn, pm);
1618 tcg_gen_andc_i64(pd, pg, pd);
1621 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1622 TCGv_vec pm, TCGv_vec pg)
1624 tcg_gen_and_vec(vece, pd, pn, pm);
1625 tcg_gen_andc_vec(vece, pd, pg, pd);
1628 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
1630 static const GVecGen4 op = {
1631 .fni8 = gen_nand_pg_i64,
1632 .fniv = gen_nand_pg_vec,
1633 .fno = gen_helper_sve_nand_pppp,
1634 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1636 return do_pppp_flags(s, a, &op);
1640 *** SVE Predicate Misc Group
1643 static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
1645 if (sve_access_check(s)) {
1646 int nofs = pred_full_reg_offset(s, a->rn);
1647 int gofs = pred_full_reg_offset(s, a->pg);
1648 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1650 if (words == 1) {
1651 TCGv_i64 pn = tcg_temp_new_i64();
1652 TCGv_i64 pg = tcg_temp_new_i64();
1654 tcg_gen_ld_i64(pn, cpu_env, nofs);
1655 tcg_gen_ld_i64(pg, cpu_env, gofs);
1656 do_predtest1(pn, pg);
1658 tcg_temp_free_i64(pn);
1659 tcg_temp_free_i64(pg);
1660 } else {
1661 do_predtest(s, nofs, gofs, words);
1664 return true;
1667 /* See the ARM pseudocode DecodePredCount. */
1668 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1670 unsigned elements = fullsz >> esz;
1671 unsigned bound;
1673 switch (pattern) {
1674 case 0x0: /* POW2 */
1675 return pow2floor(elements);
1676 case 0x1: /* VL1 */
1677 case 0x2: /* VL2 */
1678 case 0x3: /* VL3 */
1679 case 0x4: /* VL4 */
1680 case 0x5: /* VL5 */
1681 case 0x6: /* VL6 */
1682 case 0x7: /* VL7 */
1683 case 0x8: /* VL8 */
1684 bound = pattern;
1685 break;
1686 case 0x9: /* VL16 */
1687 case 0xa: /* VL32 */
1688 case 0xb: /* VL64 */
1689 case 0xc: /* VL128 */
1690 case 0xd: /* VL256 */
1691 bound = 16 << (pattern - 9);
1692 break;
1693 case 0x1d: /* MUL4 */
1694 return elements - elements % 4;
1695 case 0x1e: /* MUL3 */
1696 return elements - elements % 3;
1697 case 0x1f: /* ALL */
1698 return elements;
1699 default: /* #uimm5 */
1700 return 0;
1702 return elements >= bound ? bound : 0;
1705 /* This handles all of the predicate initialization instructions,
1706 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1707 * so that decode_pred_count returns 0. For SETFFR, we will have
1708 * set RD == 16 == FFR.
1710 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1712 if (!sve_access_check(s)) {
1713 return true;
1716 unsigned fullsz = vec_full_reg_size(s);
1717 unsigned ofs = pred_full_reg_offset(s, rd);
1718 unsigned numelem, setsz, i;
1719 uint64_t word, lastword;
1720 TCGv_i64 t;
1722 numelem = decode_pred_count(fullsz, pat, esz);
1724 /* Determine what we must store into each bit, and how many. */
1725 if (numelem == 0) {
1726 lastword = word = 0;
1727 setsz = fullsz;
1728 } else {
1729 setsz = numelem << esz;
1730 lastword = word = pred_esz_masks[esz];
1731 if (setsz % 64) {
1732 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
1736 t = tcg_temp_new_i64();
1737 if (fullsz <= 64) {
1738 tcg_gen_movi_i64(t, lastword);
1739 tcg_gen_st_i64(t, cpu_env, ofs);
1740 goto done;
1743 if (word == lastword) {
1744 unsigned maxsz = size_for_gvec(fullsz / 8);
1745 unsigned oprsz = size_for_gvec(setsz / 8);
1747 if (oprsz * 8 == setsz) {
1748 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
1749 goto done;
1753 setsz /= 8;
1754 fullsz /= 8;
1756 tcg_gen_movi_i64(t, word);
1757 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
1758 tcg_gen_st_i64(t, cpu_env, ofs + i);
1760 if (lastword != word) {
1761 tcg_gen_movi_i64(t, lastword);
1762 tcg_gen_st_i64(t, cpu_env, ofs + i);
1763 i += 8;
1765 if (i < fullsz) {
1766 tcg_gen_movi_i64(t, 0);
1767 for (; i < fullsz; i += 8) {
1768 tcg_gen_st_i64(t, cpu_env, ofs + i);
1772 done:
1773 tcg_temp_free_i64(t);
1775 /* PTRUES */
1776 if (setflag) {
1777 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1778 tcg_gen_movi_i32(cpu_CF, word == 0);
1779 tcg_gen_movi_i32(cpu_VF, 0);
1780 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1782 return true;
1785 TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s)
1787 /* Note pat == 31 is #all, to set all elements. */
1788 TRANS_FEAT(SETFFR, aa64_sve, do_predset, 0, FFR_PRED_NUM, 31, false)
1790 /* Note pat == 32 is #unimp, to set no elements. */
1791 TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false)
1793 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
1795 /* The path through do_pppp_flags is complicated enough to want to avoid
1796 * duplication. Frob the arguments into the form of a predicated AND.
1798 arg_rprr_s alt_a = {
1799 .rd = a->rd, .pg = a->pg, .s = a->s,
1800 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1802 return trans_AND_pppp(s, &alt_a);
1805 TRANS_FEAT(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
1806 TRANS_FEAT(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
1808 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1809 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1810 TCGv_ptr, TCGv_i32))
1812 if (!sve_access_check(s)) {
1813 return true;
1816 TCGv_ptr t_pd = tcg_temp_new_ptr();
1817 TCGv_ptr t_pg = tcg_temp_new_ptr();
1818 TCGv_i32 t;
1819 unsigned desc = 0;
1821 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1822 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
1824 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1825 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1826 t = tcg_temp_new_i32();
1828 gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc));
1829 tcg_temp_free_ptr(t_pd);
1830 tcg_temp_free_ptr(t_pg);
1832 do_pred_flags(t);
1833 tcg_temp_free_i32(t);
1834 return true;
1837 TRANS_FEAT(PFIRST, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pfirst)
1838 TRANS_FEAT(PNEXT, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pnext)
1841 *** SVE Element Count Group
1844 /* Perform an inline saturating addition of a 32-bit value within
1845 * a 64-bit register. The second operand is known to be positive,
1846 * which halves the comparisions we must perform to bound the result.
1848 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1850 int64_t ibound;
1852 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1853 if (u) {
1854 tcg_gen_ext32u_i64(reg, reg);
1855 } else {
1856 tcg_gen_ext32s_i64(reg, reg);
1858 if (d) {
1859 tcg_gen_sub_i64(reg, reg, val);
1860 ibound = (u ? 0 : INT32_MIN);
1861 tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound));
1862 } else {
1863 tcg_gen_add_i64(reg, reg, val);
1864 ibound = (u ? UINT32_MAX : INT32_MAX);
1865 tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound));
1869 /* Similarly with 64-bit values. */
1870 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1872 TCGv_i64 t0 = tcg_temp_new_i64();
1873 TCGv_i64 t2;
1875 if (u) {
1876 if (d) {
1877 tcg_gen_sub_i64(t0, reg, val);
1878 t2 = tcg_constant_i64(0);
1879 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0);
1880 } else {
1881 tcg_gen_add_i64(t0, reg, val);
1882 t2 = tcg_constant_i64(-1);
1883 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0);
1885 } else {
1886 TCGv_i64 t1 = tcg_temp_new_i64();
1887 if (d) {
1888 /* Detect signed overflow for subtraction. */
1889 tcg_gen_xor_i64(t0, reg, val);
1890 tcg_gen_sub_i64(t1, reg, val);
1891 tcg_gen_xor_i64(reg, reg, t1);
1892 tcg_gen_and_i64(t0, t0, reg);
1894 /* Bound the result. */
1895 tcg_gen_movi_i64(reg, INT64_MIN);
1896 t2 = tcg_constant_i64(0);
1897 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1898 } else {
1899 /* Detect signed overflow for addition. */
1900 tcg_gen_xor_i64(t0, reg, val);
1901 tcg_gen_add_i64(reg, reg, val);
1902 tcg_gen_xor_i64(t1, reg, val);
1903 tcg_gen_andc_i64(t0, t1, t0);
1905 /* Bound the result. */
1906 tcg_gen_movi_i64(t1, INT64_MAX);
1907 t2 = tcg_constant_i64(0);
1908 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1910 tcg_temp_free_i64(t1);
1912 tcg_temp_free_i64(t0);
1915 /* Similarly with a vector and a scalar operand. */
1916 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1917 TCGv_i64 val, bool u, bool d)
1919 unsigned vsz = vec_full_reg_size(s);
1920 TCGv_ptr dptr, nptr;
1921 TCGv_i32 t32, desc;
1922 TCGv_i64 t64;
1924 dptr = tcg_temp_new_ptr();
1925 nptr = tcg_temp_new_ptr();
1926 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1927 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1928 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
1930 switch (esz) {
1931 case MO_8:
1932 t32 = tcg_temp_new_i32();
1933 tcg_gen_extrl_i64_i32(t32, val);
1934 if (d) {
1935 tcg_gen_neg_i32(t32, t32);
1937 if (u) {
1938 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1939 } else {
1940 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1942 tcg_temp_free_i32(t32);
1943 break;
1945 case MO_16:
1946 t32 = tcg_temp_new_i32();
1947 tcg_gen_extrl_i64_i32(t32, val);
1948 if (d) {
1949 tcg_gen_neg_i32(t32, t32);
1951 if (u) {
1952 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1953 } else {
1954 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1956 tcg_temp_free_i32(t32);
1957 break;
1959 case MO_32:
1960 t64 = tcg_temp_new_i64();
1961 if (d) {
1962 tcg_gen_neg_i64(t64, val);
1963 } else {
1964 tcg_gen_mov_i64(t64, val);
1966 if (u) {
1967 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1968 } else {
1969 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1971 tcg_temp_free_i64(t64);
1972 break;
1974 case MO_64:
1975 if (u) {
1976 if (d) {
1977 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1978 } else {
1979 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1981 } else if (d) {
1982 t64 = tcg_temp_new_i64();
1983 tcg_gen_neg_i64(t64, val);
1984 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1985 tcg_temp_free_i64(t64);
1986 } else {
1987 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1989 break;
1991 default:
1992 g_assert_not_reached();
1995 tcg_temp_free_ptr(dptr);
1996 tcg_temp_free_ptr(nptr);
1999 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
2001 if (sve_access_check(s)) {
2002 unsigned fullsz = vec_full_reg_size(s);
2003 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2004 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
2006 return true;
2009 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
2011 if (sve_access_check(s)) {
2012 unsigned fullsz = vec_full_reg_size(s);
2013 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2014 int inc = numelem * a->imm * (a->d ? -1 : 1);
2015 TCGv_i64 reg = cpu_reg(s, a->rd);
2017 tcg_gen_addi_i64(reg, reg, inc);
2019 return true;
2022 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
2024 if (!sve_access_check(s)) {
2025 return true;
2028 unsigned fullsz = vec_full_reg_size(s);
2029 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2030 int inc = numelem * a->imm;
2031 TCGv_i64 reg = cpu_reg(s, a->rd);
2033 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
2034 if (inc == 0) {
2035 if (a->u) {
2036 tcg_gen_ext32u_i64(reg, reg);
2037 } else {
2038 tcg_gen_ext32s_i64(reg, reg);
2040 } else {
2041 do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d);
2043 return true;
2046 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
2048 if (!sve_access_check(s)) {
2049 return true;
2052 unsigned fullsz = vec_full_reg_size(s);
2053 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2054 int inc = numelem * a->imm;
2055 TCGv_i64 reg = cpu_reg(s, a->rd);
2057 if (inc != 0) {
2058 do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d);
2060 return true;
2063 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
2065 if (a->esz == 0) {
2066 return false;
2069 unsigned fullsz = vec_full_reg_size(s);
2070 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2071 int inc = numelem * a->imm;
2073 if (inc != 0) {
2074 if (sve_access_check(s)) {
2075 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
2076 vec_full_reg_offset(s, a->rn),
2077 tcg_constant_i64(a->d ? -inc : inc),
2078 fullsz, fullsz);
2080 } else {
2081 do_mov_z(s, a->rd, a->rn);
2083 return true;
2086 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
2088 if (a->esz == 0) {
2089 return false;
2092 unsigned fullsz = vec_full_reg_size(s);
2093 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2094 int inc = numelem * a->imm;
2096 if (inc != 0) {
2097 if (sve_access_check(s)) {
2098 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
2099 tcg_constant_i64(inc), a->u, a->d);
2101 } else {
2102 do_mov_z(s, a->rd, a->rn);
2104 return true;
2108 *** SVE Bitwise Immediate Group
2111 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
2113 uint64_t imm;
2114 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2115 extract32(a->dbm, 0, 6),
2116 extract32(a->dbm, 6, 6))) {
2117 return false;
2119 return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm);
2122 TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi)
2123 TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori)
2124 TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori)
2126 static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
2128 uint64_t imm;
2129 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2130 extract32(a->dbm, 0, 6),
2131 extract32(a->dbm, 6, 6))) {
2132 return false;
2134 if (sve_access_check(s)) {
2135 do_dupi_z(s, a->rd, imm);
2137 return true;
2141 *** SVE Integer Wide Immediate - Predicated Group
2144 /* Implement all merging copies. This is used for CPY (immediate),
2145 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
2147 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
2148 TCGv_i64 val)
2150 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2151 static gen_cpy * const fns[4] = {
2152 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
2153 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
2155 unsigned vsz = vec_full_reg_size(s);
2156 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
2157 TCGv_ptr t_zd = tcg_temp_new_ptr();
2158 TCGv_ptr t_zn = tcg_temp_new_ptr();
2159 TCGv_ptr t_pg = tcg_temp_new_ptr();
2161 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
2162 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
2163 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2165 fns[esz](t_zd, t_zn, t_pg, val, desc);
2167 tcg_temp_free_ptr(t_zd);
2168 tcg_temp_free_ptr(t_zn);
2169 tcg_temp_free_ptr(t_pg);
2172 static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
2174 if (a->esz == 0) {
2175 return false;
2177 if (sve_access_check(s)) {
2178 /* Decode the VFP immediate. */
2179 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
2180 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm));
2182 return true;
2185 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
2187 if (sve_access_check(s)) {
2188 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm));
2190 return true;
2193 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
2195 static gen_helper_gvec_2i * const fns[4] = {
2196 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
2197 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
2200 if (sve_access_check(s)) {
2201 unsigned vsz = vec_full_reg_size(s);
2202 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
2203 pred_full_reg_offset(s, a->pg),
2204 tcg_constant_i64(a->imm),
2205 vsz, vsz, 0, fns[a->esz]);
2207 return true;
2211 *** SVE Permute Extract Group
2214 static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
2216 if (!sve_access_check(s)) {
2217 return true;
2220 unsigned vsz = vec_full_reg_size(s);
2221 unsigned n_ofs = imm >= vsz ? 0 : imm;
2222 unsigned n_siz = vsz - n_ofs;
2223 unsigned d = vec_full_reg_offset(s, rd);
2224 unsigned n = vec_full_reg_offset(s, rn);
2225 unsigned m = vec_full_reg_offset(s, rm);
2227 /* Use host vector move insns if we have appropriate sizes
2228 * and no unfortunate overlap.
2230 if (m != d
2231 && n_ofs == size_for_gvec(n_ofs)
2232 && n_siz == size_for_gvec(n_siz)
2233 && (d != n || n_siz <= n_ofs)) {
2234 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2235 if (n_ofs != 0) {
2236 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2238 } else {
2239 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2241 return true;
2244 TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm)
2245 TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm)
2248 *** SVE Permute - Unpredicated Group
2251 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
2253 if (sve_access_check(s)) {
2254 unsigned vsz = vec_full_reg_size(s);
2255 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2256 vsz, vsz, cpu_reg_sp(s, a->rn));
2258 return true;
2261 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
2263 if ((a->imm & 0x1f) == 0) {
2264 return false;
2266 if (sve_access_check(s)) {
2267 unsigned vsz = vec_full_reg_size(s);
2268 unsigned dofs = vec_full_reg_offset(s, a->rd);
2269 unsigned esz, index;
2271 esz = ctz32(a->imm);
2272 index = a->imm >> (esz + 1);
2274 if ((index << esz) < vsz) {
2275 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2276 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2277 } else {
2279 * While dup_mem handles 128-bit elements, dup_imm does not.
2280 * Thankfully element size doesn't matter for splatting zero.
2282 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
2285 return true;
2288 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2290 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2291 static gen_insr * const fns[4] = {
2292 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2293 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2295 unsigned vsz = vec_full_reg_size(s);
2296 TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
2297 TCGv_ptr t_zd = tcg_temp_new_ptr();
2298 TCGv_ptr t_zn = tcg_temp_new_ptr();
2300 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2301 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2303 fns[a->esz](t_zd, t_zn, val, desc);
2305 tcg_temp_free_ptr(t_zd);
2306 tcg_temp_free_ptr(t_zn);
2309 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
2311 if (sve_access_check(s)) {
2312 TCGv_i64 t = tcg_temp_new_i64();
2313 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2314 do_insr_i64(s, a, t);
2315 tcg_temp_free_i64(t);
2317 return true;
2320 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
2322 if (sve_access_check(s)) {
2323 do_insr_i64(s, a, cpu_reg(s, a->rm));
2325 return true;
2328 static gen_helper_gvec_2 * const rev_fns[4] = {
2329 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2330 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2332 TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0)
2334 static gen_helper_gvec_3 * const sve_tbl_fns[4] = {
2335 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2336 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2338 TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0)
2340 static gen_helper_gvec_4 * const sve2_tbl_fns[4] = {
2341 gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
2342 gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d
2344 TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz],
2345 a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0)
2347 static gen_helper_gvec_3 * const tbx_fns[4] = {
2348 gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
2349 gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
2351 TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0)
2353 static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
2355 static gen_helper_gvec_2 * const fns[4][2] = {
2356 { NULL, NULL },
2357 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2358 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2359 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2362 if (a->esz == 0) {
2363 return false;
2365 if (sve_access_check(s)) {
2366 unsigned vsz = vec_full_reg_size(s);
2367 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2368 vec_full_reg_offset(s, a->rn)
2369 + (a->h ? vsz / 2 : 0),
2370 vsz, vsz, 0, fns[a->esz][a->u]);
2372 return true;
2376 *** SVE Permute - Predicates Group
2379 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2380 gen_helper_gvec_3 *fn)
2382 if (!sve_access_check(s)) {
2383 return true;
2386 unsigned vsz = pred_full_reg_size(s);
2388 TCGv_ptr t_d = tcg_temp_new_ptr();
2389 TCGv_ptr t_n = tcg_temp_new_ptr();
2390 TCGv_ptr t_m = tcg_temp_new_ptr();
2391 uint32_t desc = 0;
2393 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2394 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2395 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
2397 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2398 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2399 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2401 fn(t_d, t_n, t_m, tcg_constant_i32(desc));
2403 tcg_temp_free_ptr(t_d);
2404 tcg_temp_free_ptr(t_n);
2405 tcg_temp_free_ptr(t_m);
2406 return true;
2409 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2410 gen_helper_gvec_2 *fn)
2412 if (!sve_access_check(s)) {
2413 return true;
2416 unsigned vsz = pred_full_reg_size(s);
2417 TCGv_ptr t_d = tcg_temp_new_ptr();
2418 TCGv_ptr t_n = tcg_temp_new_ptr();
2419 uint32_t desc = 0;
2421 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2422 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2424 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2425 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2426 desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
2428 fn(t_d, t_n, tcg_constant_i32(desc));
2430 tcg_temp_free_ptr(t_d);
2431 tcg_temp_free_ptr(t_n);
2432 return true;
2435 TRANS_FEAT(ZIP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_zip_p)
2436 TRANS_FEAT(ZIP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_zip_p)
2437 TRANS_FEAT(UZP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_uzp_p)
2438 TRANS_FEAT(UZP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_uzp_p)
2439 TRANS_FEAT(TRN1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_trn_p)
2440 TRANS_FEAT(TRN2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_trn_p)
2442 TRANS_FEAT(REV_p, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_rev_p)
2443 TRANS_FEAT(PUNPKLO, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_punpk_p)
2444 TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p)
2447 *** SVE Permute - Interleaving Group
2450 static gen_helper_gvec_3 * const zip_fns[4] = {
2451 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2452 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2454 TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2455 zip_fns[a->esz], a, 0)
2456 TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2457 zip_fns[a->esz], a, vec_full_reg_size(s) / 2)
2459 TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2460 gen_helper_sve2_zip_q, a, 0)
2461 TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2462 gen_helper_sve2_zip_q, a,
2463 QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2)
2465 static gen_helper_gvec_3 * const uzp_fns[4] = {
2466 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2467 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2470 TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2471 uzp_fns[a->esz], a, 0)
2472 TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2473 uzp_fns[a->esz], a, 1 << a->esz)
2475 TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2476 gen_helper_sve2_uzp_q, a, 0)
2477 TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2478 gen_helper_sve2_uzp_q, a, 16)
2480 static gen_helper_gvec_3 * const trn_fns[4] = {
2481 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2482 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2485 TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz,
2486 trn_fns[a->esz], a, 0)
2487 TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz,
2488 trn_fns[a->esz], a, 1 << a->esz)
2490 TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2491 gen_helper_sve2_trn_q, a, 0)
2492 TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
2493 gen_helper_sve2_trn_q, a, 16)
2496 *** SVE Permute Vector - Predicated Group
2499 static gen_helper_gvec_3 * const compact_fns[4] = {
2500 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2502 TRANS_FEAT(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, compact_fns[a->esz], a, 0)
2504 /* Call the helper that computes the ARM LastActiveElement pseudocode
2505 * function, scaled by the element size. This includes the not found
2506 * indication; e.g. not found for esz=3 is -8.
2508 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2510 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2511 * round up, as we do elsewhere, because we need the exact size.
2513 TCGv_ptr t_p = tcg_temp_new_ptr();
2514 unsigned desc = 0;
2516 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
2517 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
2519 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2521 gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc));
2523 tcg_temp_free_ptr(t_p);
2526 /* Increment LAST to the offset of the next element in the vector,
2527 * wrapping around to 0.
2529 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2531 unsigned vsz = vec_full_reg_size(s);
2533 tcg_gen_addi_i32(last, last, 1 << esz);
2534 if (is_power_of_2(vsz)) {
2535 tcg_gen_andi_i32(last, last, vsz - 1);
2536 } else {
2537 TCGv_i32 max = tcg_constant_i32(vsz);
2538 TCGv_i32 zero = tcg_constant_i32(0);
2539 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2543 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
2544 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2546 unsigned vsz = vec_full_reg_size(s);
2548 if (is_power_of_2(vsz)) {
2549 tcg_gen_andi_i32(last, last, vsz - 1);
2550 } else {
2551 TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz));
2552 TCGv_i32 zero = tcg_constant_i32(0);
2553 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2557 /* Load an unsigned element of ESZ from BASE+OFS. */
2558 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2560 TCGv_i64 r = tcg_temp_new_i64();
2562 switch (esz) {
2563 case 0:
2564 tcg_gen_ld8u_i64(r, base, ofs);
2565 break;
2566 case 1:
2567 tcg_gen_ld16u_i64(r, base, ofs);
2568 break;
2569 case 2:
2570 tcg_gen_ld32u_i64(r, base, ofs);
2571 break;
2572 case 3:
2573 tcg_gen_ld_i64(r, base, ofs);
2574 break;
2575 default:
2576 g_assert_not_reached();
2578 return r;
2581 /* Load an unsigned element of ESZ from RM[LAST]. */
2582 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2583 int rm, int esz)
2585 TCGv_ptr p = tcg_temp_new_ptr();
2586 TCGv_i64 r;
2588 /* Convert offset into vector into offset into ENV.
2589 * The final adjustment for the vector register base
2590 * is added via constant offset to the load.
2592 #if HOST_BIG_ENDIAN
2593 /* Adjust for element ordering. See vec_reg_offset. */
2594 if (esz < 3) {
2595 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2597 #endif
2598 tcg_gen_ext_i32_ptr(p, last);
2599 tcg_gen_add_ptr(p, p, cpu_env);
2601 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2602 tcg_temp_free_ptr(p);
2604 return r;
2607 /* Compute CLAST for a Zreg. */
2608 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2610 TCGv_i32 last;
2611 TCGLabel *over;
2612 TCGv_i64 ele;
2613 unsigned vsz, esz = a->esz;
2615 if (!sve_access_check(s)) {
2616 return true;
2619 last = tcg_temp_local_new_i32();
2620 over = gen_new_label();
2622 find_last_active(s, last, esz, a->pg);
2624 /* There is of course no movcond for a 2048-bit vector,
2625 * so we must branch over the actual store.
2627 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2629 if (!before) {
2630 incr_last_active(s, last, esz);
2633 ele = load_last_active(s, last, a->rm, esz);
2634 tcg_temp_free_i32(last);
2636 vsz = vec_full_reg_size(s);
2637 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2638 tcg_temp_free_i64(ele);
2640 /* If this insn used MOVPRFX, we may need a second move. */
2641 if (a->rd != a->rn) {
2642 TCGLabel *done = gen_new_label();
2643 tcg_gen_br(done);
2645 gen_set_label(over);
2646 do_mov_z(s, a->rd, a->rn);
2648 gen_set_label(done);
2649 } else {
2650 gen_set_label(over);
2652 return true;
2655 TRANS_FEAT(CLASTA_z, aa64_sve, do_clast_vector, a, false)
2656 TRANS_FEAT(CLASTB_z, aa64_sve, do_clast_vector, a, true)
2658 /* Compute CLAST for a scalar. */
2659 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2660 bool before, TCGv_i64 reg_val)
2662 TCGv_i32 last = tcg_temp_new_i32();
2663 TCGv_i64 ele, cmp;
2665 find_last_active(s, last, esz, pg);
2667 /* Extend the original value of last prior to incrementing. */
2668 cmp = tcg_temp_new_i64();
2669 tcg_gen_ext_i32_i64(cmp, last);
2671 if (!before) {
2672 incr_last_active(s, last, esz);
2675 /* The conceit here is that while last < 0 indicates not found, after
2676 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2677 * from which we can load garbage. We then discard the garbage with
2678 * a conditional move.
2680 ele = load_last_active(s, last, rm, esz);
2681 tcg_temp_free_i32(last);
2683 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0),
2684 ele, reg_val);
2686 tcg_temp_free_i64(cmp);
2687 tcg_temp_free_i64(ele);
2690 /* Compute CLAST for a Vreg. */
2691 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2693 if (sve_access_check(s)) {
2694 int esz = a->esz;
2695 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2696 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2698 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2699 write_fp_dreg(s, a->rd, reg);
2700 tcg_temp_free_i64(reg);
2702 return true;
2705 TRANS_FEAT(CLASTA_v, aa64_sve, do_clast_fp, a, false)
2706 TRANS_FEAT(CLASTB_v, aa64_sve, do_clast_fp, a, true)
2708 /* Compute CLAST for a Xreg. */
2709 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2711 TCGv_i64 reg;
2713 if (!sve_access_check(s)) {
2714 return true;
2717 reg = cpu_reg(s, a->rd);
2718 switch (a->esz) {
2719 case 0:
2720 tcg_gen_ext8u_i64(reg, reg);
2721 break;
2722 case 1:
2723 tcg_gen_ext16u_i64(reg, reg);
2724 break;
2725 case 2:
2726 tcg_gen_ext32u_i64(reg, reg);
2727 break;
2728 case 3:
2729 break;
2730 default:
2731 g_assert_not_reached();
2734 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2735 return true;
2738 TRANS_FEAT(CLASTA_r, aa64_sve, do_clast_general, a, false)
2739 TRANS_FEAT(CLASTB_r, aa64_sve, do_clast_general, a, true)
2741 /* Compute LAST for a scalar. */
2742 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2743 int pg, int rm, bool before)
2745 TCGv_i32 last = tcg_temp_new_i32();
2746 TCGv_i64 ret;
2748 find_last_active(s, last, esz, pg);
2749 if (before) {
2750 wrap_last_active(s, last, esz);
2751 } else {
2752 incr_last_active(s, last, esz);
2755 ret = load_last_active(s, last, rm, esz);
2756 tcg_temp_free_i32(last);
2757 return ret;
2760 /* Compute LAST for a Vreg. */
2761 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2763 if (sve_access_check(s)) {
2764 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2765 write_fp_dreg(s, a->rd, val);
2766 tcg_temp_free_i64(val);
2768 return true;
2771 TRANS_FEAT(LASTA_v, aa64_sve, do_last_fp, a, false)
2772 TRANS_FEAT(LASTB_v, aa64_sve, do_last_fp, a, true)
2774 /* Compute LAST for a Xreg. */
2775 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2777 if (sve_access_check(s)) {
2778 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2779 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2780 tcg_temp_free_i64(val);
2782 return true;
2785 TRANS_FEAT(LASTA_r, aa64_sve, do_last_general, a, false)
2786 TRANS_FEAT(LASTB_r, aa64_sve, do_last_general, a, true)
2788 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
2790 if (sve_access_check(s)) {
2791 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2793 return true;
2796 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
2798 if (sve_access_check(s)) {
2799 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2800 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2801 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2802 tcg_temp_free_i64(t);
2804 return true;
2807 static gen_helper_gvec_3 * const revb_fns[4] = {
2808 NULL, gen_helper_sve_revb_h,
2809 gen_helper_sve_revb_s, gen_helper_sve_revb_d,
2811 TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0)
2813 static gen_helper_gvec_3 * const revh_fns[4] = {
2814 NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d,
2816 TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0)
2818 TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz,
2819 a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0)
2821 TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz,
2822 gen_helper_sve_splice, a, a->esz)
2824 TRANS_FEAT(SPLICE_sve2, aa64_sve2, gen_gvec_ool_zzzp, gen_helper_sve_splice,
2825 a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz)
2828 *** SVE Integer Compare - Vectors Group
2831 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2832 gen_helper_gvec_flags_4 *gen_fn)
2834 TCGv_ptr pd, zn, zm, pg;
2835 unsigned vsz;
2836 TCGv_i32 t;
2838 if (gen_fn == NULL) {
2839 return false;
2841 if (!sve_access_check(s)) {
2842 return true;
2845 vsz = vec_full_reg_size(s);
2846 t = tcg_temp_new_i32();
2847 pd = tcg_temp_new_ptr();
2848 zn = tcg_temp_new_ptr();
2849 zm = tcg_temp_new_ptr();
2850 pg = tcg_temp_new_ptr();
2852 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2853 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2854 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2855 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2857 gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0)));
2859 tcg_temp_free_ptr(pd);
2860 tcg_temp_free_ptr(zn);
2861 tcg_temp_free_ptr(zm);
2862 tcg_temp_free_ptr(pg);
2864 do_pred_flags(t);
2866 tcg_temp_free_i32(t);
2867 return true;
2870 #define DO_PPZZ(NAME, name) \
2871 static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = { \
2872 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2873 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2874 }; \
2875 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags, \
2876 a, name##_ppzz_fns[a->esz])
2878 DO_PPZZ(CMPEQ, cmpeq)
2879 DO_PPZZ(CMPNE, cmpne)
2880 DO_PPZZ(CMPGT, cmpgt)
2881 DO_PPZZ(CMPGE, cmpge)
2882 DO_PPZZ(CMPHI, cmphi)
2883 DO_PPZZ(CMPHS, cmphs)
2885 #undef DO_PPZZ
2887 #define DO_PPZW(NAME, name) \
2888 static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = { \
2889 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2890 gen_helper_sve_##name##_ppzw_s, NULL \
2891 }; \
2892 TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags, \
2893 a, name##_ppzw_fns[a->esz])
2895 DO_PPZW(CMPEQ, cmpeq)
2896 DO_PPZW(CMPNE, cmpne)
2897 DO_PPZW(CMPGT, cmpgt)
2898 DO_PPZW(CMPGE, cmpge)
2899 DO_PPZW(CMPHI, cmphi)
2900 DO_PPZW(CMPHS, cmphs)
2901 DO_PPZW(CMPLT, cmplt)
2902 DO_PPZW(CMPLE, cmple)
2903 DO_PPZW(CMPLO, cmplo)
2904 DO_PPZW(CMPLS, cmpls)
2906 #undef DO_PPZW
2909 *** SVE Integer Compare - Immediate Groups
2912 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2913 gen_helper_gvec_flags_3 *gen_fn)
2915 TCGv_ptr pd, zn, pg;
2916 unsigned vsz;
2917 TCGv_i32 t;
2919 if (gen_fn == NULL) {
2920 return false;
2922 if (!sve_access_check(s)) {
2923 return true;
2926 vsz = vec_full_reg_size(s);
2927 t = tcg_temp_new_i32();
2928 pd = tcg_temp_new_ptr();
2929 zn = tcg_temp_new_ptr();
2930 pg = tcg_temp_new_ptr();
2932 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2933 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2934 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2936 gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm)));
2938 tcg_temp_free_ptr(pd);
2939 tcg_temp_free_ptr(zn);
2940 tcg_temp_free_ptr(pg);
2942 do_pred_flags(t);
2944 tcg_temp_free_i32(t);
2945 return true;
2948 #define DO_PPZI(NAME, name) \
2949 static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = { \
2950 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2951 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2952 }; \
2953 TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a, \
2954 name##_ppzi_fns[a->esz])
2956 DO_PPZI(CMPEQ, cmpeq)
2957 DO_PPZI(CMPNE, cmpne)
2958 DO_PPZI(CMPGT, cmpgt)
2959 DO_PPZI(CMPGE, cmpge)
2960 DO_PPZI(CMPHI, cmphi)
2961 DO_PPZI(CMPHS, cmphs)
2962 DO_PPZI(CMPLT, cmplt)
2963 DO_PPZI(CMPLE, cmple)
2964 DO_PPZI(CMPLO, cmplo)
2965 DO_PPZI(CMPLS, cmpls)
2967 #undef DO_PPZI
2970 *** SVE Partition Break Group
2973 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2974 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2976 if (!sve_access_check(s)) {
2977 return true;
2980 unsigned vsz = pred_full_reg_size(s);
2982 /* Predicate sizes may be smaller and cannot use simd_desc. */
2983 TCGv_ptr d = tcg_temp_new_ptr();
2984 TCGv_ptr n = tcg_temp_new_ptr();
2985 TCGv_ptr m = tcg_temp_new_ptr();
2986 TCGv_ptr g = tcg_temp_new_ptr();
2987 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
2989 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2990 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2991 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2992 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2994 if (a->s) {
2995 TCGv_i32 t = tcg_temp_new_i32();
2996 fn_s(t, d, n, m, g, desc);
2997 do_pred_flags(t);
2998 tcg_temp_free_i32(t);
2999 } else {
3000 fn(d, n, m, g, desc);
3002 tcg_temp_free_ptr(d);
3003 tcg_temp_free_ptr(n);
3004 tcg_temp_free_ptr(m);
3005 tcg_temp_free_ptr(g);
3006 return true;
3009 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
3010 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
3012 if (!sve_access_check(s)) {
3013 return true;
3016 unsigned vsz = pred_full_reg_size(s);
3018 /* Predicate sizes may be smaller and cannot use simd_desc. */
3019 TCGv_ptr d = tcg_temp_new_ptr();
3020 TCGv_ptr n = tcg_temp_new_ptr();
3021 TCGv_ptr g = tcg_temp_new_ptr();
3022 TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
3024 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3025 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3026 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3028 if (a->s) {
3029 TCGv_i32 t = tcg_temp_new_i32();
3030 fn_s(t, d, n, g, desc);
3031 do_pred_flags(t);
3032 tcg_temp_free_i32(t);
3033 } else {
3034 fn(d, n, g, desc);
3036 tcg_temp_free_ptr(d);
3037 tcg_temp_free_ptr(n);
3038 tcg_temp_free_ptr(g);
3039 return true;
3042 TRANS_FEAT(BRKPA, aa64_sve, do_brk3, a,
3043 gen_helper_sve_brkpa, gen_helper_sve_brkpas)
3044 TRANS_FEAT(BRKPB, aa64_sve, do_brk3, a,
3045 gen_helper_sve_brkpb, gen_helper_sve_brkpbs)
3047 TRANS_FEAT(BRKA_m, aa64_sve, do_brk2, a,
3048 gen_helper_sve_brka_m, gen_helper_sve_brkas_m)
3049 TRANS_FEAT(BRKB_m, aa64_sve, do_brk2, a,
3050 gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m)
3052 TRANS_FEAT(BRKA_z, aa64_sve, do_brk2, a,
3053 gen_helper_sve_brka_z, gen_helper_sve_brkas_z)
3054 TRANS_FEAT(BRKB_z, aa64_sve, do_brk2, a,
3055 gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z)
3057 TRANS_FEAT(BRKN, aa64_sve, do_brk2, a,
3058 gen_helper_sve_brkn, gen_helper_sve_brkns)
3061 *** SVE Predicate Count Group
3064 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3066 unsigned psz = pred_full_reg_size(s);
3068 if (psz <= 8) {
3069 uint64_t psz_mask;
3071 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3072 if (pn != pg) {
3073 TCGv_i64 g = tcg_temp_new_i64();
3074 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3075 tcg_gen_and_i64(val, val, g);
3076 tcg_temp_free_i64(g);
3079 /* Reduce the pred_esz_masks value simply to reduce the
3080 * size of the code generated here.
3082 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3083 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3085 tcg_gen_ctpop_i64(val, val);
3086 } else {
3087 TCGv_ptr t_pn = tcg_temp_new_ptr();
3088 TCGv_ptr t_pg = tcg_temp_new_ptr();
3089 unsigned desc = 0;
3091 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
3092 desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
3094 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3095 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3097 gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc));
3098 tcg_temp_free_ptr(t_pn);
3099 tcg_temp_free_ptr(t_pg);
3103 static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
3105 if (sve_access_check(s)) {
3106 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3108 return true;
3111 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
3113 if (sve_access_check(s)) {
3114 TCGv_i64 reg = cpu_reg(s, a->rd);
3115 TCGv_i64 val = tcg_temp_new_i64();
3117 do_cntp(s, val, a->esz, a->pg, a->pg);
3118 if (a->d) {
3119 tcg_gen_sub_i64(reg, reg, val);
3120 } else {
3121 tcg_gen_add_i64(reg, reg, val);
3123 tcg_temp_free_i64(val);
3125 return true;
3128 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3130 if (a->esz == 0) {
3131 return false;
3133 if (sve_access_check(s)) {
3134 unsigned vsz = vec_full_reg_size(s);
3135 TCGv_i64 val = tcg_temp_new_i64();
3136 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3138 do_cntp(s, val, a->esz, a->pg, a->pg);
3139 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3140 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3142 return true;
3145 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
3147 if (sve_access_check(s)) {
3148 TCGv_i64 reg = cpu_reg(s, a->rd);
3149 TCGv_i64 val = tcg_temp_new_i64();
3151 do_cntp(s, val, a->esz, a->pg, a->pg);
3152 do_sat_addsub_32(reg, val, a->u, a->d);
3154 return true;
3157 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
3159 if (sve_access_check(s)) {
3160 TCGv_i64 reg = cpu_reg(s, a->rd);
3161 TCGv_i64 val = tcg_temp_new_i64();
3163 do_cntp(s, val, a->esz, a->pg, a->pg);
3164 do_sat_addsub_64(reg, val, a->u, a->d);
3166 return true;
3169 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3171 if (a->esz == 0) {
3172 return false;
3174 if (sve_access_check(s)) {
3175 TCGv_i64 val = tcg_temp_new_i64();
3176 do_cntp(s, val, a->esz, a->pg, a->pg);
3177 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3179 return true;
3183 *** SVE Integer Compare Scalars Group
3186 static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
3188 if (!sve_access_check(s)) {
3189 return true;
3192 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3193 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3194 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3195 TCGv_i64 cmp = tcg_temp_new_i64();
3197 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3198 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3199 tcg_temp_free_i64(cmp);
3201 /* VF = !NF & !CF. */
3202 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3203 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3205 /* Both NF and VF actually look at bit 31. */
3206 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3207 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3208 return true;
3211 static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
3213 TCGv_i64 op0, op1, t0, t1, tmax;
3214 TCGv_i32 t2;
3215 TCGv_ptr ptr;
3216 unsigned vsz = vec_full_reg_size(s);
3217 unsigned desc = 0;
3218 TCGCond cond;
3219 uint64_t maxval;
3220 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
3221 bool eq = a->eq == a->lt;
3223 /* The greater-than conditions are all SVE2. */
3224 if (!a->lt && !dc_isar_feature(aa64_sve2, s)) {
3225 return false;
3227 if (!sve_access_check(s)) {
3228 return true;
3231 op0 = read_cpu_reg(s, a->rn, 1);
3232 op1 = read_cpu_reg(s, a->rm, 1);
3234 if (!a->sf) {
3235 if (a->u) {
3236 tcg_gen_ext32u_i64(op0, op0);
3237 tcg_gen_ext32u_i64(op1, op1);
3238 } else {
3239 tcg_gen_ext32s_i64(op0, op0);
3240 tcg_gen_ext32s_i64(op1, op1);
3244 /* For the helper, compress the different conditions into a computation
3245 * of how many iterations for which the condition is true.
3247 t0 = tcg_temp_new_i64();
3248 t1 = tcg_temp_new_i64();
3250 if (a->lt) {
3251 tcg_gen_sub_i64(t0, op1, op0);
3252 if (a->u) {
3253 maxval = a->sf ? UINT64_MAX : UINT32_MAX;
3254 cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
3255 } else {
3256 maxval = a->sf ? INT64_MAX : INT32_MAX;
3257 cond = eq ? TCG_COND_LE : TCG_COND_LT;
3259 } else {
3260 tcg_gen_sub_i64(t0, op0, op1);
3261 if (a->u) {
3262 maxval = 0;
3263 cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
3264 } else {
3265 maxval = a->sf ? INT64_MIN : INT32_MIN;
3266 cond = eq ? TCG_COND_GE : TCG_COND_GT;
3270 tmax = tcg_constant_i64(vsz >> a->esz);
3271 if (eq) {
3272 /* Equality means one more iteration. */
3273 tcg_gen_addi_i64(t0, t0, 1);
3276 * For the less-than while, if op1 is maxval (and the only time
3277 * the addition above could overflow), then we produce an all-true
3278 * predicate by setting the count to the vector length. This is
3279 * because the pseudocode is described as an increment + compare
3280 * loop, and the maximum integer would always compare true.
3281 * Similarly, the greater-than while has the same issue with the
3282 * minimum integer due to the decrement + compare loop.
3284 tcg_gen_movi_i64(t1, maxval);
3285 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
3288 /* Bound to the maximum. */
3289 tcg_gen_umin_i64(t0, t0, tmax);
3291 /* Set the count to zero if the condition is false. */
3292 tcg_gen_movi_i64(t1, 0);
3293 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3294 tcg_temp_free_i64(t1);
3296 /* Since we're bounded, pass as a 32-bit type. */
3297 t2 = tcg_temp_new_i32();
3298 tcg_gen_extrl_i64_i32(t2, t0);
3299 tcg_temp_free_i64(t0);
3301 /* Scale elements to bits. */
3302 tcg_gen_shli_i32(t2, t2, a->esz);
3304 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3305 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
3307 ptr = tcg_temp_new_ptr();
3308 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3310 if (a->lt) {
3311 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
3312 } else {
3313 gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc));
3315 do_pred_flags(t2);
3317 tcg_temp_free_ptr(ptr);
3318 tcg_temp_free_i32(t2);
3319 return true;
3322 static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
3324 TCGv_i64 op0, op1, diff, t1, tmax;
3325 TCGv_i32 t2;
3326 TCGv_ptr ptr;
3327 unsigned vsz = vec_full_reg_size(s);
3328 unsigned desc = 0;
3330 if (!dc_isar_feature(aa64_sve2, s)) {
3331 return false;
3333 if (!sve_access_check(s)) {
3334 return true;
3337 op0 = read_cpu_reg(s, a->rn, 1);
3338 op1 = read_cpu_reg(s, a->rm, 1);
3340 tmax = tcg_constant_i64(vsz);
3341 diff = tcg_temp_new_i64();
3343 if (a->rw) {
3344 /* WHILERW */
3345 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
3346 t1 = tcg_temp_new_i64();
3347 tcg_gen_sub_i64(diff, op0, op1);
3348 tcg_gen_sub_i64(t1, op1, op0);
3349 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
3350 tcg_temp_free_i64(t1);
3351 /* Round down to a multiple of ESIZE. */
3352 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3353 /* If op1 == op0, diff == 0, and the condition is always true. */
3354 tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
3355 } else {
3356 /* WHILEWR */
3357 tcg_gen_sub_i64(diff, op1, op0);
3358 /* Round down to a multiple of ESIZE. */
3359 tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3360 /* If op0 >= op1, diff <= 0, the condition is always true. */
3361 tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
3364 /* Bound to the maximum. */
3365 tcg_gen_umin_i64(diff, diff, tmax);
3367 /* Since we're bounded, pass as a 32-bit type. */
3368 t2 = tcg_temp_new_i32();
3369 tcg_gen_extrl_i64_i32(t2, diff);
3370 tcg_temp_free_i64(diff);
3372 desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3373 desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
3375 ptr = tcg_temp_new_ptr();
3376 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3378 gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
3379 do_pred_flags(t2);
3381 tcg_temp_free_ptr(ptr);
3382 tcg_temp_free_i32(t2);
3383 return true;
3387 *** SVE Integer Wide Immediate - Unpredicated Group
3390 static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
3392 if (a->esz == 0) {
3393 return false;
3395 if (sve_access_check(s)) {
3396 unsigned vsz = vec_full_reg_size(s);
3397 int dofs = vec_full_reg_offset(s, a->rd);
3398 uint64_t imm;
3400 /* Decode the VFP immediate. */
3401 imm = vfp_expand_imm(a->esz, a->imm);
3402 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
3404 return true;
3407 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
3409 if (sve_access_check(s)) {
3410 unsigned vsz = vec_full_reg_size(s);
3411 int dofs = vec_full_reg_offset(s, a->rd);
3412 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
3414 return true;
3417 TRANS_FEAT(ADD_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_addi, a)
3419 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
3421 a->imm = -a->imm;
3422 return trans_ADD_zzi(s, a);
3425 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
3427 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
3428 static const GVecGen2s op[4] = {
3429 { .fni8 = tcg_gen_vec_sub8_i64,
3430 .fniv = tcg_gen_sub_vec,
3431 .fno = gen_helper_sve_subri_b,
3432 .opt_opc = vecop_list,
3433 .vece = MO_8,
3434 .scalar_first = true },
3435 { .fni8 = tcg_gen_vec_sub16_i64,
3436 .fniv = tcg_gen_sub_vec,
3437 .fno = gen_helper_sve_subri_h,
3438 .opt_opc = vecop_list,
3439 .vece = MO_16,
3440 .scalar_first = true },
3441 { .fni4 = tcg_gen_sub_i32,
3442 .fniv = tcg_gen_sub_vec,
3443 .fno = gen_helper_sve_subri_s,
3444 .opt_opc = vecop_list,
3445 .vece = MO_32,
3446 .scalar_first = true },
3447 { .fni8 = tcg_gen_sub_i64,
3448 .fniv = tcg_gen_sub_vec,
3449 .fno = gen_helper_sve_subri_d,
3450 .opt_opc = vecop_list,
3451 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3452 .vece = MO_64,
3453 .scalar_first = true }
3456 if (sve_access_check(s)) {
3457 unsigned vsz = vec_full_reg_size(s);
3458 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3459 vec_full_reg_offset(s, a->rn),
3460 vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]);
3462 return true;
3465 TRANS_FEAT(MUL_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_muli, a)
3467 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
3469 if (sve_access_check(s)) {
3470 do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
3471 tcg_constant_i64(a->imm), u, d);
3473 return true;
3476 TRANS_FEAT(SQADD_zzi, aa64_sve, do_zzi_sat, a, false, false)
3477 TRANS_FEAT(UQADD_zzi, aa64_sve, do_zzi_sat, a, true, false)
3478 TRANS_FEAT(SQSUB_zzi, aa64_sve, do_zzi_sat, a, false, true)
3479 TRANS_FEAT(UQSUB_zzi, aa64_sve, do_zzi_sat, a, true, true)
3481 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3483 if (sve_access_check(s)) {
3484 unsigned vsz = vec_full_reg_size(s);
3485 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3486 vec_full_reg_offset(s, a->rn),
3487 tcg_constant_i64(a->imm), vsz, vsz, 0, fn);
3489 return true;
3492 #define DO_ZZI(NAME, name) \
3493 static gen_helper_gvec_2i * const name##i_fns[4] = { \
3494 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3495 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3496 }; \
3497 TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz])
3499 DO_ZZI(SMAX, smax)
3500 DO_ZZI(UMAX, umax)
3501 DO_ZZI(SMIN, smin)
3502 DO_ZZI(UMIN, umin)
3504 #undef DO_ZZI
3506 static gen_helper_gvec_4 * const dot_fns[2][2] = {
3507 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3508 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3510 TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz,
3511 dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0)
3514 * SVE Multiply - Indexed
3517 TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3518 gen_helper_gvec_sdot_idx_b, a)
3519 TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3520 gen_helper_gvec_sdot_idx_h, a)
3521 TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
3522 gen_helper_gvec_udot_idx_b, a)
3523 TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
3524 gen_helper_gvec_udot_idx_h, a)
3526 TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3527 gen_helper_gvec_sudot_idx_b, a)
3528 TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
3529 gen_helper_gvec_usdot_idx_b, a)
3531 #define DO_SVE2_RRX(NAME, FUNC) \
3532 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3533 a->rd, a->rn, a->rm, a->index)
3535 DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h)
3536 DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s)
3537 DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d)
3539 DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h)
3540 DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s)
3541 DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d)
3543 DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h)
3544 DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s)
3545 DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d)
3547 #undef DO_SVE2_RRX
3549 #define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
3550 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3551 a->rd, a->rn, a->rm, (a->index << 1) | TOP)
3553 DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false)
3554 DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false)
3555 DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true)
3556 DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true)
3558 DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false)
3559 DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false)
3560 DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true)
3561 DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true)
3563 DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false)
3564 DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false)
3565 DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true)
3566 DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true)
3568 #undef DO_SVE2_RRX_TB
3570 #define DO_SVE2_RRXR(NAME, FUNC) \
3571 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a)
3573 DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h)
3574 DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s)
3575 DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d)
3577 DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h)
3578 DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s)
3579 DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d)
3581 DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h)
3582 DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s)
3583 DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d)
3585 DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h)
3586 DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s)
3587 DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d)
3589 #undef DO_SVE2_RRXR
3591 #define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
3592 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3593 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP)
3595 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false)
3596 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false)
3597 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true)
3598 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true)
3600 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false)
3601 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false)
3602 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true)
3603 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true)
3605 DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false)
3606 DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false)
3607 DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true)
3608 DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true)
3610 DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false)
3611 DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false)
3612 DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true)
3613 DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true)
3615 DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false)
3616 DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false)
3617 DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true)
3618 DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true)
3620 DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false)
3621 DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false)
3622 DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true)
3623 DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true)
3625 #undef DO_SVE2_RRXR_TB
3627 #define DO_SVE2_RRXR_ROT(NAME, FUNC) \
3628 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3629 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot)
3631 DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h)
3632 DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s)
3634 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h)
3635 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s)
3637 DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s)
3638 DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
3640 #undef DO_SVE2_RRXR_ROT
3643 *** SVE Floating Point Multiply-Add Indexed Group
3646 static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
3648 static gen_helper_gvec_4_ptr * const fns[4] = {
3649 NULL,
3650 gen_helper_gvec_fmla_idx_h,
3651 gen_helper_gvec_fmla_idx_s,
3652 gen_helper_gvec_fmla_idx_d,
3654 return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
3655 (a->index << 1) | sub,
3656 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3659 TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false)
3660 TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true)
3663 *** SVE Floating Point Multiply Indexed Group
3666 static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = {
3667 NULL, gen_helper_gvec_fmul_idx_h,
3668 gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d,
3670 TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz,
3671 fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index,
3672 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3675 *** SVE Floating Point Fast Reduction Group
3678 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3679 TCGv_ptr, TCGv_i32);
3681 static bool do_reduce(DisasContext *s, arg_rpr_esz *a,
3682 gen_helper_fp_reduce *fn)
3684 unsigned vsz, p2vsz;
3685 TCGv_i32 t_desc;
3686 TCGv_ptr t_zn, t_pg, status;
3687 TCGv_i64 temp;
3689 if (fn == NULL) {
3690 return false;
3692 if (!sve_access_check(s)) {
3693 return true;
3696 vsz = vec_full_reg_size(s);
3697 p2vsz = pow2ceil(vsz);
3698 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz));
3699 temp = tcg_temp_new_i64();
3700 t_zn = tcg_temp_new_ptr();
3701 t_pg = tcg_temp_new_ptr();
3703 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3704 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3705 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3707 fn(temp, t_zn, t_pg, status, t_desc);
3708 tcg_temp_free_ptr(t_zn);
3709 tcg_temp_free_ptr(t_pg);
3710 tcg_temp_free_ptr(status);
3712 write_fp_dreg(s, a->rd, temp);
3713 tcg_temp_free_i64(temp);
3714 return true;
3717 #define DO_VPZ(NAME, name) \
3718 static gen_helper_fp_reduce * const name##_fns[4] = { \
3719 NULL, gen_helper_sve_##name##_h, \
3720 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
3721 }; \
3722 TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz])
3724 DO_VPZ(FADDV, faddv)
3725 DO_VPZ(FMINNMV, fminnmv)
3726 DO_VPZ(FMAXNMV, fmaxnmv)
3727 DO_VPZ(FMINV, fminv)
3728 DO_VPZ(FMAXV, fmaxv)
3730 #undef DO_VPZ
3733 *** SVE Floating Point Unary Operations - Unpredicated Group
3736 static gen_helper_gvec_2_ptr * const frecpe_fns[] = {
3737 NULL, gen_helper_gvec_frecpe_h,
3738 gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d,
3740 TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0)
3742 static gen_helper_gvec_2_ptr * const frsqrte_fns[] = {
3743 NULL, gen_helper_gvec_frsqrte_h,
3744 gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d,
3746 TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0)
3749 *** SVE Floating Point Compare with Zero Group
3752 static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3753 gen_helper_gvec_3_ptr *fn)
3755 if (fn == NULL) {
3756 return false;
3758 if (sve_access_check(s)) {
3759 unsigned vsz = vec_full_reg_size(s);
3760 TCGv_ptr status =
3761 fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3763 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3764 vec_full_reg_offset(s, a->rn),
3765 pred_full_reg_offset(s, a->pg),
3766 status, vsz, vsz, 0, fn);
3767 tcg_temp_free_ptr(status);
3769 return true;
3772 #define DO_PPZ(NAME, name) \
3773 static gen_helper_gvec_3_ptr * const name##_fns[] = { \
3774 NULL, gen_helper_sve_##name##_h, \
3775 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
3776 }; \
3777 TRANS_FEAT(NAME, aa64_sve, do_ppz_fp, a, name##_fns[a->esz])
3779 DO_PPZ(FCMGE_ppz0, fcmge0)
3780 DO_PPZ(FCMGT_ppz0, fcmgt0)
3781 DO_PPZ(FCMLE_ppz0, fcmle0)
3782 DO_PPZ(FCMLT_ppz0, fcmlt0)
3783 DO_PPZ(FCMEQ_ppz0, fcmeq0)
3784 DO_PPZ(FCMNE_ppz0, fcmne0)
3786 #undef DO_PPZ
3789 *** SVE floating-point trig multiply-add coefficient
3792 static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
3793 NULL, gen_helper_sve_ftmad_h,
3794 gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d,
3796 TRANS_FEAT(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
3797 ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
3798 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3801 *** SVE Floating Point Accumulating Reduction Group
3804 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
3806 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3807 TCGv_ptr, TCGv_ptr, TCGv_i32);
3808 static fadda_fn * const fns[3] = {
3809 gen_helper_sve_fadda_h,
3810 gen_helper_sve_fadda_s,
3811 gen_helper_sve_fadda_d,
3813 unsigned vsz = vec_full_reg_size(s);
3814 TCGv_ptr t_rm, t_pg, t_fpst;
3815 TCGv_i64 t_val;
3816 TCGv_i32 t_desc;
3818 if (a->esz == 0) {
3819 return false;
3821 if (!sve_access_check(s)) {
3822 return true;
3825 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3826 t_rm = tcg_temp_new_ptr();
3827 t_pg = tcg_temp_new_ptr();
3828 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3829 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3830 t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3831 t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
3833 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3835 tcg_temp_free_ptr(t_fpst);
3836 tcg_temp_free_ptr(t_pg);
3837 tcg_temp_free_ptr(t_rm);
3839 write_fp_dreg(s, a->rd, t_val);
3840 tcg_temp_free_i64(t_val);
3841 return true;
3845 *** SVE Floating Point Arithmetic - Unpredicated Group
3848 #define DO_FP3(NAME, name) \
3849 static gen_helper_gvec_3_ptr * const name##_fns[4] = { \
3850 NULL, gen_helper_gvec_##name##_h, \
3851 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3852 }; \
3853 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0)
3855 DO_FP3(FADD_zzz, fadd)
3856 DO_FP3(FSUB_zzz, fsub)
3857 DO_FP3(FMUL_zzz, fmul)
3858 DO_FP3(FTSMUL, ftsmul)
3859 DO_FP3(FRECPS, recps)
3860 DO_FP3(FRSQRTS, rsqrts)
3862 #undef DO_FP3
3865 *** SVE Floating Point Arithmetic - Predicated Group
3868 #define DO_ZPZZ_FP(NAME, FEAT, name) \
3869 static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \
3870 NULL, gen_helper_##name##_h, \
3871 gen_helper_##name##_s, gen_helper_##name##_d \
3872 }; \
3873 TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a)
3875 DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd)
3876 DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub)
3877 DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul)
3878 DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin)
3879 DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax)
3880 DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum)
3881 DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum)
3882 DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd)
3883 DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn)
3884 DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv)
3885 DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx)
3887 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3888 TCGv_i64, TCGv_ptr, TCGv_i32);
3890 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3891 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3893 unsigned vsz = vec_full_reg_size(s);
3894 TCGv_ptr t_zd, t_zn, t_pg, status;
3895 TCGv_i32 desc;
3897 t_zd = tcg_temp_new_ptr();
3898 t_zn = tcg_temp_new_ptr();
3899 t_pg = tcg_temp_new_ptr();
3900 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3901 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3902 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3904 status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
3905 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
3906 fn(t_zd, t_zn, t_pg, scalar, status, desc);
3908 tcg_temp_free_ptr(status);
3909 tcg_temp_free_ptr(t_pg);
3910 tcg_temp_free_ptr(t_zn);
3911 tcg_temp_free_ptr(t_zd);
3914 static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3915 gen_helper_sve_fp2scalar *fn)
3917 if (fn == NULL) {
3918 return false;
3920 if (sve_access_check(s)) {
3921 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16,
3922 tcg_constant_i64(imm), fn);
3924 return true;
3927 #define DO_FP_IMM(NAME, name, const0, const1) \
3928 static gen_helper_sve_fp2scalar * const name##_fns[4] = { \
3929 NULL, gen_helper_sve_##name##_h, \
3930 gen_helper_sve_##name##_s, \
3931 gen_helper_sve_##name##_d \
3932 }; \
3933 static uint64_t const name##_const[4][2] = { \
3934 { -1, -1 }, \
3935 { float16_##const0, float16_##const1 }, \
3936 { float32_##const0, float32_##const1 }, \
3937 { float64_##const0, float64_##const1 }, \
3938 }; \
3939 TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \
3940 name##_const[a->esz][a->imm], name##_fns[a->esz])
3942 DO_FP_IMM(FADD, fadds, half, one)
3943 DO_FP_IMM(FSUB, fsubs, half, one)
3944 DO_FP_IMM(FMUL, fmuls, half, two)
3945 DO_FP_IMM(FSUBR, fsubrs, half, one)
3946 DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3947 DO_FP_IMM(FMINNM, fminnms, zero, one)
3948 DO_FP_IMM(FMAX, fmaxs, zero, one)
3949 DO_FP_IMM(FMIN, fmins, zero, one)
3951 #undef DO_FP_IMM
3953 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3954 gen_helper_gvec_4_ptr *fn)
3956 if (fn == NULL) {
3957 return false;
3959 if (sve_access_check(s)) {
3960 unsigned vsz = vec_full_reg_size(s);
3961 TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
3962 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3963 vec_full_reg_offset(s, a->rn),
3964 vec_full_reg_offset(s, a->rm),
3965 pred_full_reg_offset(s, a->pg),
3966 status, vsz, vsz, 0, fn);
3967 tcg_temp_free_ptr(status);
3969 return true;
3972 #define DO_FPCMP(NAME, name) \
3973 static gen_helper_gvec_4_ptr * const name##_fns[4] = { \
3974 NULL, gen_helper_sve_##name##_h, \
3975 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3976 }; \
3977 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_fp_cmp, a, name##_fns[a->esz])
3979 DO_FPCMP(FCMGE, fcmge)
3980 DO_FPCMP(FCMGT, fcmgt)
3981 DO_FPCMP(FCMEQ, fcmeq)
3982 DO_FPCMP(FCMNE, fcmne)
3983 DO_FPCMP(FCMUO, fcmuo)
3984 DO_FPCMP(FACGE, facge)
3985 DO_FPCMP(FACGT, facgt)
3987 #undef DO_FPCMP
3989 static gen_helper_gvec_4_ptr * const fcadd_fns[] = {
3990 NULL, gen_helper_sve_fcadd_h,
3991 gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d,
3993 TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
3994 a->rd, a->rn, a->rm, a->pg, a->rot,
3995 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
3997 #define DO_FMLA(NAME, name) \
3998 static gen_helper_gvec_5_ptr * const name##_fns[4] = { \
3999 NULL, gen_helper_sve_##name##_h, \
4000 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4001 }; \
4002 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \
4003 a->rd, a->rn, a->rm, a->ra, a->pg, 0, \
4004 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
4006 DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
4007 DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
4008 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
4009 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
4011 #undef DO_FMLA
4013 static gen_helper_gvec_5_ptr * const fcmla_fns[4] = {
4014 NULL, gen_helper_sve_fcmla_zpzzz_h,
4015 gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d,
4017 TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz],
4018 a->rd, a->rn, a->rm, a->ra, a->pg, a->rot,
4019 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
4021 static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = {
4022 NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL
4024 TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz],
4025 a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot,
4026 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
4029 *** SVE Floating Point Unary Operations Predicated Group
4032 TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4033 gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR)
4034 TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
4035 gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR)
4037 TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
4038 gen_helper_sve_bfcvt, a, 0, FPST_FPCR)
4040 TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4041 gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR)
4042 TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
4043 gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR)
4044 TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4045 gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR)
4046 TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4047 gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR)
4049 TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4050 gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16)
4051 TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4052 gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16)
4053 TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
4054 gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16)
4055 TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
4056 gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16)
4057 TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
4058 gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16)
4059 TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
4060 gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16)
4062 TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4063 gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR)
4064 TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4065 gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR)
4066 TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4067 gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR)
4068 TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4069 gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR)
4070 TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4071 gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR)
4072 TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4073 gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR)
4075 TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4076 gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR)
4077 TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4078 gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR)
4080 static gen_helper_gvec_3_ptr * const frint_fns[] = {
4081 NULL,
4082 gen_helper_sve_frint_h,
4083 gen_helper_sve_frint_s,
4084 gen_helper_sve_frint_d
4086 TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz],
4087 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
4089 static gen_helper_gvec_3_ptr * const frintx_fns[] = {
4090 NULL,
4091 gen_helper_sve_frintx_h,
4092 gen_helper_sve_frintx_s,
4093 gen_helper_sve_frintx_d
4095 TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz],
4096 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4098 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
4099 int mode, gen_helper_gvec_3_ptr *fn)
4101 unsigned vsz;
4102 TCGv_i32 tmode;
4103 TCGv_ptr status;
4105 if (fn == NULL) {
4106 return false;
4108 if (!sve_access_check(s)) {
4109 return true;
4112 vsz = vec_full_reg_size(s);
4113 tmode = tcg_const_i32(mode);
4114 status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4116 gen_helper_set_rmode(tmode, tmode, status);
4118 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4119 vec_full_reg_offset(s, a->rn),
4120 pred_full_reg_offset(s, a->pg),
4121 status, vsz, vsz, 0, fn);
4123 gen_helper_set_rmode(tmode, tmode, status);
4124 tcg_temp_free_i32(tmode);
4125 tcg_temp_free_ptr(status);
4126 return true;
4129 TRANS_FEAT(FRINTN, aa64_sve, do_frint_mode, a,
4130 float_round_nearest_even, frint_fns[a->esz])
4131 TRANS_FEAT(FRINTP, aa64_sve, do_frint_mode, a,
4132 float_round_up, frint_fns[a->esz])
4133 TRANS_FEAT(FRINTM, aa64_sve, do_frint_mode, a,
4134 float_round_down, frint_fns[a->esz])
4135 TRANS_FEAT(FRINTZ, aa64_sve, do_frint_mode, a,
4136 float_round_to_zero, frint_fns[a->esz])
4137 TRANS_FEAT(FRINTA, aa64_sve, do_frint_mode, a,
4138 float_round_ties_away, frint_fns[a->esz])
4140 static gen_helper_gvec_3_ptr * const frecpx_fns[] = {
4141 NULL, gen_helper_sve_frecpx_h,
4142 gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d,
4144 TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz],
4145 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
4147 static gen_helper_gvec_3_ptr * const fsqrt_fns[] = {
4148 NULL, gen_helper_sve_fsqrt_h,
4149 gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d,
4151 TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz],
4152 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
4154 TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4155 gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16)
4156 TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4157 gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16)
4158 TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4159 gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16)
4161 TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4162 gen_helper_sve_scvt_ss, a, 0, FPST_FPCR)
4163 TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4164 gen_helper_sve_scvt_ds, a, 0, FPST_FPCR)
4166 TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4167 gen_helper_sve_scvt_sd, a, 0, FPST_FPCR)
4168 TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4169 gen_helper_sve_scvt_dd, a, 0, FPST_FPCR)
4171 TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
4172 gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16)
4173 TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
4174 gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16)
4175 TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
4176 gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16)
4178 TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
4179 gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR)
4180 TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
4181 gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR)
4182 TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
4183 gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR)
4185 TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
4186 gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR)
4189 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4192 /* Subroutine loading a vector register at VOFS of LEN bytes.
4193 * The load should begin at the address Rn + IMM.
4196 static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4198 int len_align = QEMU_ALIGN_DOWN(len, 8);
4199 int len_remain = len % 8;
4200 int nparts = len / 8 + ctpop8(len_remain);
4201 int midx = get_mem_index(s);
4202 TCGv_i64 dirty_addr, clean_addr, t0, t1;
4204 dirty_addr = tcg_temp_new_i64();
4205 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
4206 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
4207 tcg_temp_free_i64(dirty_addr);
4210 * Note that unpredicated load/store of vector/predicate registers
4211 * are defined as a stream of bytes, which equates to little-endian
4212 * operations on larger quantities.
4213 * Attempt to keep code expansion to a minimum by limiting the
4214 * amount of unrolling done.
4216 if (nparts <= 4) {
4217 int i;
4219 t0 = tcg_temp_new_i64();
4220 for (i = 0; i < len_align; i += 8) {
4221 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
4222 tcg_gen_st_i64(t0, cpu_env, vofs + i);
4223 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4225 tcg_temp_free_i64(t0);
4226 } else {
4227 TCGLabel *loop = gen_new_label();
4228 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4230 /* Copy the clean address into a local temp, live across the loop. */
4231 t0 = clean_addr;
4232 clean_addr = new_tmp_a64_local(s);
4233 tcg_gen_mov_i64(clean_addr, t0);
4235 gen_set_label(loop);
4237 t0 = tcg_temp_new_i64();
4238 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
4239 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4241 tp = tcg_temp_new_ptr();
4242 tcg_gen_add_ptr(tp, cpu_env, i);
4243 tcg_gen_addi_ptr(i, i, 8);
4244 tcg_gen_st_i64(t0, tp, vofs);
4245 tcg_temp_free_ptr(tp);
4246 tcg_temp_free_i64(t0);
4248 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4249 tcg_temp_free_ptr(i);
4253 * Predicate register loads can be any multiple of 2.
4254 * Note that we still store the entire 64-bit unit into cpu_env.
4256 if (len_remain) {
4257 t0 = tcg_temp_new_i64();
4258 switch (len_remain) {
4259 case 2:
4260 case 4:
4261 case 8:
4262 tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4263 MO_LE | ctz32(len_remain));
4264 break;
4266 case 6:
4267 t1 = tcg_temp_new_i64();
4268 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
4269 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4270 tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
4271 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4272 tcg_temp_free_i64(t1);
4273 break;
4275 default:
4276 g_assert_not_reached();
4278 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
4279 tcg_temp_free_i64(t0);
4283 /* Similarly for stores. */
4284 static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4286 int len_align = QEMU_ALIGN_DOWN(len, 8);
4287 int len_remain = len % 8;
4288 int nparts = len / 8 + ctpop8(len_remain);
4289 int midx = get_mem_index(s);
4290 TCGv_i64 dirty_addr, clean_addr, t0;
4292 dirty_addr = tcg_temp_new_i64();
4293 tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
4294 clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
4295 tcg_temp_free_i64(dirty_addr);
4297 /* Note that unpredicated load/store of vector/predicate registers
4298 * are defined as a stream of bytes, which equates to little-endian
4299 * operations on larger quantities. There is no nice way to force
4300 * a little-endian store for aarch64_be-linux-user out of line.
4302 * Attempt to keep code expansion to a minimum by limiting the
4303 * amount of unrolling done.
4305 if (nparts <= 4) {
4306 int i;
4308 t0 = tcg_temp_new_i64();
4309 for (i = 0; i < len_align; i += 8) {
4310 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
4311 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
4312 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4314 tcg_temp_free_i64(t0);
4315 } else {
4316 TCGLabel *loop = gen_new_label();
4317 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4319 /* Copy the clean address into a local temp, live across the loop. */
4320 t0 = clean_addr;
4321 clean_addr = new_tmp_a64_local(s);
4322 tcg_gen_mov_i64(clean_addr, t0);
4324 gen_set_label(loop);
4326 t0 = tcg_temp_new_i64();
4327 tp = tcg_temp_new_ptr();
4328 tcg_gen_add_ptr(tp, cpu_env, i);
4329 tcg_gen_ld_i64(t0, tp, vofs);
4330 tcg_gen_addi_ptr(i, i, 8);
4331 tcg_temp_free_ptr(tp);
4333 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
4334 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4335 tcg_temp_free_i64(t0);
4337 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4338 tcg_temp_free_ptr(i);
4341 /* Predicate register stores can be any multiple of 2. */
4342 if (len_remain) {
4343 t0 = tcg_temp_new_i64();
4344 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
4346 switch (len_remain) {
4347 case 2:
4348 case 4:
4349 case 8:
4350 tcg_gen_qemu_st_i64(t0, clean_addr, midx,
4351 MO_LE | ctz32(len_remain));
4352 break;
4354 case 6:
4355 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
4356 tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4357 tcg_gen_shri_i64(t0, t0, 32);
4358 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
4359 break;
4361 default:
4362 g_assert_not_reached();
4364 tcg_temp_free_i64(t0);
4368 static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
4370 if (sve_access_check(s)) {
4371 int size = vec_full_reg_size(s);
4372 int off = vec_full_reg_offset(s, a->rd);
4373 do_ldr(s, off, size, a->rn, a->imm * size);
4375 return true;
4378 static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
4380 if (sve_access_check(s)) {
4381 int size = pred_full_reg_size(s);
4382 int off = pred_full_reg_offset(s, a->rd);
4383 do_ldr(s, off, size, a->rn, a->imm * size);
4385 return true;
4388 static bool trans_STR_zri(DisasContext *s, arg_rri *a)
4390 if (sve_access_check(s)) {
4391 int size = vec_full_reg_size(s);
4392 int off = vec_full_reg_offset(s, a->rd);
4393 do_str(s, off, size, a->rn, a->imm * size);
4395 return true;
4398 static bool trans_STR_pri(DisasContext *s, arg_rri *a)
4400 if (sve_access_check(s)) {
4401 int size = pred_full_reg_size(s);
4402 int off = pred_full_reg_offset(s, a->rd);
4403 do_str(s, off, size, a->rn, a->imm * size);
4405 return true;
4409 *** SVE Memory - Contiguous Load Group
4412 /* The memory mode of the dtype. */
4413 static const MemOp dtype_mop[16] = {
4414 MO_UB, MO_UB, MO_UB, MO_UB,
4415 MO_SL, MO_UW, MO_UW, MO_UW,
4416 MO_SW, MO_SW, MO_UL, MO_UL,
4417 MO_SB, MO_SB, MO_SB, MO_UQ
4420 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4422 /* The vector element size of dtype. */
4423 static const uint8_t dtype_esz[16] = {
4424 0, 1, 2, 3,
4425 3, 1, 2, 3,
4426 3, 2, 2, 3,
4427 3, 2, 1, 3
4430 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4431 int dtype, uint32_t mte_n, bool is_write,
4432 gen_helper_gvec_mem *fn)
4434 unsigned vsz = vec_full_reg_size(s);
4435 TCGv_ptr t_pg;
4436 int desc = 0;
4439 * For e.g. LD4, there are not enough arguments to pass all 4
4440 * registers as pointers, so encode the regno into the data field.
4441 * For consistency, do this even for LD1.
4443 if (s->mte_active[0]) {
4444 int msz = dtype_msz(dtype);
4446 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
4447 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4448 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4449 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
4450 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1);
4451 desc <<= SVE_MTEDESC_SHIFT;
4452 } else {
4453 addr = clean_data_tbi(s, addr);
4456 desc = simd_desc(vsz, vsz, zt | desc);
4457 t_pg = tcg_temp_new_ptr();
4459 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4460 fn(cpu_env, t_pg, addr, tcg_constant_i32(desc));
4462 tcg_temp_free_ptr(t_pg);
4465 /* Indexed by [mte][be][dtype][nreg] */
4466 static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
4467 { /* mte inactive, little-endian */
4468 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4469 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4470 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4471 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4472 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4474 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4475 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4476 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4477 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4478 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4480 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4481 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4482 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4483 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4484 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4486 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4487 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4488 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4489 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4490 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4492 /* mte inactive, big-endian */
4493 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4494 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4495 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4496 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4497 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4499 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4500 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4501 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4502 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4503 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4505 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4506 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4507 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4508 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4509 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4511 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4512 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4513 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4514 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4515 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
4517 { /* mte active, little-endian */
4518 { { gen_helper_sve_ld1bb_r_mte,
4519 gen_helper_sve_ld2bb_r_mte,
4520 gen_helper_sve_ld3bb_r_mte,
4521 gen_helper_sve_ld4bb_r_mte },
4522 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4523 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4524 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4526 { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
4527 { gen_helper_sve_ld1hh_le_r_mte,
4528 gen_helper_sve_ld2hh_le_r_mte,
4529 gen_helper_sve_ld3hh_le_r_mte,
4530 gen_helper_sve_ld4hh_le_r_mte },
4531 { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
4532 { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
4534 { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
4535 { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
4536 { gen_helper_sve_ld1ss_le_r_mte,
4537 gen_helper_sve_ld2ss_le_r_mte,
4538 gen_helper_sve_ld3ss_le_r_mte,
4539 gen_helper_sve_ld4ss_le_r_mte },
4540 { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
4542 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4543 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4544 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4545 { gen_helper_sve_ld1dd_le_r_mte,
4546 gen_helper_sve_ld2dd_le_r_mte,
4547 gen_helper_sve_ld3dd_le_r_mte,
4548 gen_helper_sve_ld4dd_le_r_mte } },
4550 /* mte active, big-endian */
4551 { { gen_helper_sve_ld1bb_r_mte,
4552 gen_helper_sve_ld2bb_r_mte,
4553 gen_helper_sve_ld3bb_r_mte,
4554 gen_helper_sve_ld4bb_r_mte },
4555 { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4556 { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4557 { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4559 { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
4560 { gen_helper_sve_ld1hh_be_r_mte,
4561 gen_helper_sve_ld2hh_be_r_mte,
4562 gen_helper_sve_ld3hh_be_r_mte,
4563 gen_helper_sve_ld4hh_be_r_mte },
4564 { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
4565 { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
4567 { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
4568 { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
4569 { gen_helper_sve_ld1ss_be_r_mte,
4570 gen_helper_sve_ld2ss_be_r_mte,
4571 gen_helper_sve_ld3ss_be_r_mte,
4572 gen_helper_sve_ld4ss_be_r_mte },
4573 { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
4575 { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4576 { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4577 { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4578 { gen_helper_sve_ld1dd_be_r_mte,
4579 gen_helper_sve_ld2dd_be_r_mte,
4580 gen_helper_sve_ld3dd_be_r_mte,
4581 gen_helper_sve_ld4dd_be_r_mte } } },
4584 static void do_ld_zpa(DisasContext *s, int zt, int pg,
4585 TCGv_i64 addr, int dtype, int nreg)
4587 gen_helper_gvec_mem *fn
4588 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
4591 * While there are holes in the table, they are not
4592 * accessible via the instruction encoding.
4594 assert(fn != NULL);
4595 do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
4598 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
4600 if (a->rm == 31) {
4601 return false;
4603 if (sve_access_check(s)) {
4604 TCGv_i64 addr = new_tmp_a64(s);
4605 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4606 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4607 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4609 return true;
4612 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
4614 if (sve_access_check(s)) {
4615 int vsz = vec_full_reg_size(s);
4616 int elements = vsz >> dtype_esz[a->dtype];
4617 TCGv_i64 addr = new_tmp_a64(s);
4619 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4620 (a->imm * elements * (a->nreg + 1))
4621 << dtype_msz(a->dtype));
4622 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4624 return true;
4627 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
4629 static gen_helper_gvec_mem * const fns[2][2][16] = {
4630 { /* mte inactive, little-endian */
4631 { gen_helper_sve_ldff1bb_r,
4632 gen_helper_sve_ldff1bhu_r,
4633 gen_helper_sve_ldff1bsu_r,
4634 gen_helper_sve_ldff1bdu_r,
4636 gen_helper_sve_ldff1sds_le_r,
4637 gen_helper_sve_ldff1hh_le_r,
4638 gen_helper_sve_ldff1hsu_le_r,
4639 gen_helper_sve_ldff1hdu_le_r,
4641 gen_helper_sve_ldff1hds_le_r,
4642 gen_helper_sve_ldff1hss_le_r,
4643 gen_helper_sve_ldff1ss_le_r,
4644 gen_helper_sve_ldff1sdu_le_r,
4646 gen_helper_sve_ldff1bds_r,
4647 gen_helper_sve_ldff1bss_r,
4648 gen_helper_sve_ldff1bhs_r,
4649 gen_helper_sve_ldff1dd_le_r },
4651 /* mte inactive, big-endian */
4652 { gen_helper_sve_ldff1bb_r,
4653 gen_helper_sve_ldff1bhu_r,
4654 gen_helper_sve_ldff1bsu_r,
4655 gen_helper_sve_ldff1bdu_r,
4657 gen_helper_sve_ldff1sds_be_r,
4658 gen_helper_sve_ldff1hh_be_r,
4659 gen_helper_sve_ldff1hsu_be_r,
4660 gen_helper_sve_ldff1hdu_be_r,
4662 gen_helper_sve_ldff1hds_be_r,
4663 gen_helper_sve_ldff1hss_be_r,
4664 gen_helper_sve_ldff1ss_be_r,
4665 gen_helper_sve_ldff1sdu_be_r,
4667 gen_helper_sve_ldff1bds_r,
4668 gen_helper_sve_ldff1bss_r,
4669 gen_helper_sve_ldff1bhs_r,
4670 gen_helper_sve_ldff1dd_be_r } },
4672 { /* mte active, little-endian */
4673 { gen_helper_sve_ldff1bb_r_mte,
4674 gen_helper_sve_ldff1bhu_r_mte,
4675 gen_helper_sve_ldff1bsu_r_mte,
4676 gen_helper_sve_ldff1bdu_r_mte,
4678 gen_helper_sve_ldff1sds_le_r_mte,
4679 gen_helper_sve_ldff1hh_le_r_mte,
4680 gen_helper_sve_ldff1hsu_le_r_mte,
4681 gen_helper_sve_ldff1hdu_le_r_mte,
4683 gen_helper_sve_ldff1hds_le_r_mte,
4684 gen_helper_sve_ldff1hss_le_r_mte,
4685 gen_helper_sve_ldff1ss_le_r_mte,
4686 gen_helper_sve_ldff1sdu_le_r_mte,
4688 gen_helper_sve_ldff1bds_r_mte,
4689 gen_helper_sve_ldff1bss_r_mte,
4690 gen_helper_sve_ldff1bhs_r_mte,
4691 gen_helper_sve_ldff1dd_le_r_mte },
4693 /* mte active, big-endian */
4694 { gen_helper_sve_ldff1bb_r_mte,
4695 gen_helper_sve_ldff1bhu_r_mte,
4696 gen_helper_sve_ldff1bsu_r_mte,
4697 gen_helper_sve_ldff1bdu_r_mte,
4699 gen_helper_sve_ldff1sds_be_r_mte,
4700 gen_helper_sve_ldff1hh_be_r_mte,
4701 gen_helper_sve_ldff1hsu_be_r_mte,
4702 gen_helper_sve_ldff1hdu_be_r_mte,
4704 gen_helper_sve_ldff1hds_be_r_mte,
4705 gen_helper_sve_ldff1hss_be_r_mte,
4706 gen_helper_sve_ldff1ss_be_r_mte,
4707 gen_helper_sve_ldff1sdu_be_r_mte,
4709 gen_helper_sve_ldff1bds_r_mte,
4710 gen_helper_sve_ldff1bss_r_mte,
4711 gen_helper_sve_ldff1bhs_r_mte,
4712 gen_helper_sve_ldff1dd_be_r_mte } },
4715 if (sve_access_check(s)) {
4716 TCGv_i64 addr = new_tmp_a64(s);
4717 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4718 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4719 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4720 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
4722 return true;
4725 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
4727 static gen_helper_gvec_mem * const fns[2][2][16] = {
4728 { /* mte inactive, little-endian */
4729 { gen_helper_sve_ldnf1bb_r,
4730 gen_helper_sve_ldnf1bhu_r,
4731 gen_helper_sve_ldnf1bsu_r,
4732 gen_helper_sve_ldnf1bdu_r,
4734 gen_helper_sve_ldnf1sds_le_r,
4735 gen_helper_sve_ldnf1hh_le_r,
4736 gen_helper_sve_ldnf1hsu_le_r,
4737 gen_helper_sve_ldnf1hdu_le_r,
4739 gen_helper_sve_ldnf1hds_le_r,
4740 gen_helper_sve_ldnf1hss_le_r,
4741 gen_helper_sve_ldnf1ss_le_r,
4742 gen_helper_sve_ldnf1sdu_le_r,
4744 gen_helper_sve_ldnf1bds_r,
4745 gen_helper_sve_ldnf1bss_r,
4746 gen_helper_sve_ldnf1bhs_r,
4747 gen_helper_sve_ldnf1dd_le_r },
4749 /* mte inactive, big-endian */
4750 { gen_helper_sve_ldnf1bb_r,
4751 gen_helper_sve_ldnf1bhu_r,
4752 gen_helper_sve_ldnf1bsu_r,
4753 gen_helper_sve_ldnf1bdu_r,
4755 gen_helper_sve_ldnf1sds_be_r,
4756 gen_helper_sve_ldnf1hh_be_r,
4757 gen_helper_sve_ldnf1hsu_be_r,
4758 gen_helper_sve_ldnf1hdu_be_r,
4760 gen_helper_sve_ldnf1hds_be_r,
4761 gen_helper_sve_ldnf1hss_be_r,
4762 gen_helper_sve_ldnf1ss_be_r,
4763 gen_helper_sve_ldnf1sdu_be_r,
4765 gen_helper_sve_ldnf1bds_r,
4766 gen_helper_sve_ldnf1bss_r,
4767 gen_helper_sve_ldnf1bhs_r,
4768 gen_helper_sve_ldnf1dd_be_r } },
4770 { /* mte inactive, little-endian */
4771 { gen_helper_sve_ldnf1bb_r_mte,
4772 gen_helper_sve_ldnf1bhu_r_mte,
4773 gen_helper_sve_ldnf1bsu_r_mte,
4774 gen_helper_sve_ldnf1bdu_r_mte,
4776 gen_helper_sve_ldnf1sds_le_r_mte,
4777 gen_helper_sve_ldnf1hh_le_r_mte,
4778 gen_helper_sve_ldnf1hsu_le_r_mte,
4779 gen_helper_sve_ldnf1hdu_le_r_mte,
4781 gen_helper_sve_ldnf1hds_le_r_mte,
4782 gen_helper_sve_ldnf1hss_le_r_mte,
4783 gen_helper_sve_ldnf1ss_le_r_mte,
4784 gen_helper_sve_ldnf1sdu_le_r_mte,
4786 gen_helper_sve_ldnf1bds_r_mte,
4787 gen_helper_sve_ldnf1bss_r_mte,
4788 gen_helper_sve_ldnf1bhs_r_mte,
4789 gen_helper_sve_ldnf1dd_le_r_mte },
4791 /* mte inactive, big-endian */
4792 { gen_helper_sve_ldnf1bb_r_mte,
4793 gen_helper_sve_ldnf1bhu_r_mte,
4794 gen_helper_sve_ldnf1bsu_r_mte,
4795 gen_helper_sve_ldnf1bdu_r_mte,
4797 gen_helper_sve_ldnf1sds_be_r_mte,
4798 gen_helper_sve_ldnf1hh_be_r_mte,
4799 gen_helper_sve_ldnf1hsu_be_r_mte,
4800 gen_helper_sve_ldnf1hdu_be_r_mte,
4802 gen_helper_sve_ldnf1hds_be_r_mte,
4803 gen_helper_sve_ldnf1hss_be_r_mte,
4804 gen_helper_sve_ldnf1ss_be_r_mte,
4805 gen_helper_sve_ldnf1sdu_be_r_mte,
4807 gen_helper_sve_ldnf1bds_r_mte,
4808 gen_helper_sve_ldnf1bss_r_mte,
4809 gen_helper_sve_ldnf1bhs_r_mte,
4810 gen_helper_sve_ldnf1dd_be_r_mte } },
4813 if (sve_access_check(s)) {
4814 int vsz = vec_full_reg_size(s);
4815 int elements = vsz >> dtype_esz[a->dtype];
4816 int off = (a->imm * elements) << dtype_msz(a->dtype);
4817 TCGv_i64 addr = new_tmp_a64(s);
4819 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4820 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4821 fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
4823 return true;
4826 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
4828 unsigned vsz = vec_full_reg_size(s);
4829 TCGv_ptr t_pg;
4830 int poff;
4832 /* Load the first quadword using the normal predicated load helpers. */
4833 poff = pred_full_reg_offset(s, pg);
4834 if (vsz > 16) {
4836 * Zero-extend the first 16 bits of the predicate into a temporary.
4837 * This avoids triggering an assert making sure we don't have bits
4838 * set within a predicate beyond VQ, but we have lowered VQ to 1
4839 * for this load operation.
4841 TCGv_i64 tmp = tcg_temp_new_i64();
4842 #if HOST_BIG_ENDIAN
4843 poff += 6;
4844 #endif
4845 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
4847 poff = offsetof(CPUARMState, vfp.preg_tmp);
4848 tcg_gen_st_i64(tmp, cpu_env, poff);
4849 tcg_temp_free_i64(tmp);
4852 t_pg = tcg_temp_new_ptr();
4853 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
4855 gen_helper_gvec_mem *fn
4856 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
4857 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt)));
4859 tcg_temp_free_ptr(t_pg);
4861 /* Replicate that first quadword. */
4862 if (vsz > 16) {
4863 int doff = vec_full_reg_offset(s, zt);
4864 tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16);
4868 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
4870 if (a->rm == 31) {
4871 return false;
4873 if (sve_access_check(s)) {
4874 int msz = dtype_msz(a->dtype);
4875 TCGv_i64 addr = new_tmp_a64(s);
4876 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4877 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4878 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
4880 return true;
4883 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
4885 if (sve_access_check(s)) {
4886 TCGv_i64 addr = new_tmp_a64(s);
4887 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4888 do_ldrq(s, a->rd, a->pg, addr, a->dtype);
4890 return true;
4893 static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
4895 unsigned vsz = vec_full_reg_size(s);
4896 unsigned vsz_r32;
4897 TCGv_ptr t_pg;
4898 int poff, doff;
4900 if (vsz < 32) {
4902 * Note that this UNDEFINED check comes after CheckSVEEnabled()
4903 * in the ARM pseudocode, which is the sve_access_check() done
4904 * in our caller. We should not now return false from the caller.
4906 unallocated_encoding(s);
4907 return;
4910 /* Load the first octaword using the normal predicated load helpers. */
4912 poff = pred_full_reg_offset(s, pg);
4913 if (vsz > 32) {
4915 * Zero-extend the first 32 bits of the predicate into a temporary.
4916 * This avoids triggering an assert making sure we don't have bits
4917 * set within a predicate beyond VQ, but we have lowered VQ to 2
4918 * for this load operation.
4920 TCGv_i64 tmp = tcg_temp_new_i64();
4921 #if HOST_BIG_ENDIAN
4922 poff += 4;
4923 #endif
4924 tcg_gen_ld32u_i64(tmp, cpu_env, poff);
4926 poff = offsetof(CPUARMState, vfp.preg_tmp);
4927 tcg_gen_st_i64(tmp, cpu_env, poff);
4928 tcg_temp_free_i64(tmp);
4931 t_pg = tcg_temp_new_ptr();
4932 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
4934 gen_helper_gvec_mem *fn
4935 = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
4936 fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt)));
4938 tcg_temp_free_ptr(t_pg);
4941 * Replicate that first octaword.
4942 * The replication happens in units of 32; if the full vector size
4943 * is not a multiple of 32, the final bits are zeroed.
4945 doff = vec_full_reg_offset(s, zt);
4946 vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
4947 if (vsz >= 64) {
4948 tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32);
4950 vsz -= vsz_r32;
4951 if (vsz) {
4952 tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0);
4956 static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
4958 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
4959 return false;
4961 if (a->rm == 31) {
4962 return false;
4964 if (sve_access_check(s)) {
4965 TCGv_i64 addr = new_tmp_a64(s);
4966 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4967 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4968 do_ldro(s, a->rd, a->pg, addr, a->dtype);
4970 return true;
4973 static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
4975 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
4976 return false;
4978 if (sve_access_check(s)) {
4979 TCGv_i64 addr = new_tmp_a64(s);
4980 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
4981 do_ldro(s, a->rd, a->pg, addr, a->dtype);
4983 return true;
4986 /* Load and broadcast element. */
4987 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
4989 unsigned vsz = vec_full_reg_size(s);
4990 unsigned psz = pred_full_reg_size(s);
4991 unsigned esz = dtype_esz[a->dtype];
4992 unsigned msz = dtype_msz(a->dtype);
4993 TCGLabel *over;
4994 TCGv_i64 temp, clean_addr;
4996 if (!sve_access_check(s)) {
4997 return true;
5000 over = gen_new_label();
5002 /* If the guarding predicate has no bits set, no load occurs. */
5003 if (psz <= 8) {
5004 /* Reduce the pred_esz_masks value simply to reduce the
5005 * size of the code generated here.
5007 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
5008 temp = tcg_temp_new_i64();
5009 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
5010 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
5011 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
5012 tcg_temp_free_i64(temp);
5013 } else {
5014 TCGv_i32 t32 = tcg_temp_new_i32();
5015 find_last_active(s, t32, esz, a->pg);
5016 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
5017 tcg_temp_free_i32(t32);
5020 /* Load the data. */
5021 temp = tcg_temp_new_i64();
5022 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
5023 clean_addr = gen_mte_check1(s, temp, false, true, msz);
5025 tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
5026 finalize_memop(s, dtype_mop[a->dtype]));
5028 /* Broadcast to *all* elements. */
5029 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5030 vsz, vsz, temp);
5031 tcg_temp_free_i64(temp);
5033 /* Zero the inactive elements. */
5034 gen_set_label(over);
5035 return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
5038 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5039 int msz, int esz, int nreg)
5041 static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5042 { { { gen_helper_sve_st1bb_r,
5043 gen_helper_sve_st1bh_r,
5044 gen_helper_sve_st1bs_r,
5045 gen_helper_sve_st1bd_r },
5046 { NULL,
5047 gen_helper_sve_st1hh_le_r,
5048 gen_helper_sve_st1hs_le_r,
5049 gen_helper_sve_st1hd_le_r },
5050 { NULL, NULL,
5051 gen_helper_sve_st1ss_le_r,
5052 gen_helper_sve_st1sd_le_r },
5053 { NULL, NULL, NULL,
5054 gen_helper_sve_st1dd_le_r } },
5055 { { gen_helper_sve_st1bb_r,
5056 gen_helper_sve_st1bh_r,
5057 gen_helper_sve_st1bs_r,
5058 gen_helper_sve_st1bd_r },
5059 { NULL,
5060 gen_helper_sve_st1hh_be_r,
5061 gen_helper_sve_st1hs_be_r,
5062 gen_helper_sve_st1hd_be_r },
5063 { NULL, NULL,
5064 gen_helper_sve_st1ss_be_r,
5065 gen_helper_sve_st1sd_be_r },
5066 { NULL, NULL, NULL,
5067 gen_helper_sve_st1dd_be_r } } },
5069 { { { gen_helper_sve_st1bb_r_mte,
5070 gen_helper_sve_st1bh_r_mte,
5071 gen_helper_sve_st1bs_r_mte,
5072 gen_helper_sve_st1bd_r_mte },
5073 { NULL,
5074 gen_helper_sve_st1hh_le_r_mte,
5075 gen_helper_sve_st1hs_le_r_mte,
5076 gen_helper_sve_st1hd_le_r_mte },
5077 { NULL, NULL,
5078 gen_helper_sve_st1ss_le_r_mte,
5079 gen_helper_sve_st1sd_le_r_mte },
5080 { NULL, NULL, NULL,
5081 gen_helper_sve_st1dd_le_r_mte } },
5082 { { gen_helper_sve_st1bb_r_mte,
5083 gen_helper_sve_st1bh_r_mte,
5084 gen_helper_sve_st1bs_r_mte,
5085 gen_helper_sve_st1bd_r_mte },
5086 { NULL,
5087 gen_helper_sve_st1hh_be_r_mte,
5088 gen_helper_sve_st1hs_be_r_mte,
5089 gen_helper_sve_st1hd_be_r_mte },
5090 { NULL, NULL,
5091 gen_helper_sve_st1ss_be_r_mte,
5092 gen_helper_sve_st1sd_be_r_mte },
5093 { NULL, NULL, NULL,
5094 gen_helper_sve_st1dd_be_r_mte } } },
5096 static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5097 { { { gen_helper_sve_st2bb_r,
5098 gen_helper_sve_st2hh_le_r,
5099 gen_helper_sve_st2ss_le_r,
5100 gen_helper_sve_st2dd_le_r },
5101 { gen_helper_sve_st3bb_r,
5102 gen_helper_sve_st3hh_le_r,
5103 gen_helper_sve_st3ss_le_r,
5104 gen_helper_sve_st3dd_le_r },
5105 { gen_helper_sve_st4bb_r,
5106 gen_helper_sve_st4hh_le_r,
5107 gen_helper_sve_st4ss_le_r,
5108 gen_helper_sve_st4dd_le_r } },
5109 { { gen_helper_sve_st2bb_r,
5110 gen_helper_sve_st2hh_be_r,
5111 gen_helper_sve_st2ss_be_r,
5112 gen_helper_sve_st2dd_be_r },
5113 { gen_helper_sve_st3bb_r,
5114 gen_helper_sve_st3hh_be_r,
5115 gen_helper_sve_st3ss_be_r,
5116 gen_helper_sve_st3dd_be_r },
5117 { gen_helper_sve_st4bb_r,
5118 gen_helper_sve_st4hh_be_r,
5119 gen_helper_sve_st4ss_be_r,
5120 gen_helper_sve_st4dd_be_r } } },
5121 { { { gen_helper_sve_st2bb_r_mte,
5122 gen_helper_sve_st2hh_le_r_mte,
5123 gen_helper_sve_st2ss_le_r_mte,
5124 gen_helper_sve_st2dd_le_r_mte },
5125 { gen_helper_sve_st3bb_r_mte,
5126 gen_helper_sve_st3hh_le_r_mte,
5127 gen_helper_sve_st3ss_le_r_mte,
5128 gen_helper_sve_st3dd_le_r_mte },
5129 { gen_helper_sve_st4bb_r_mte,
5130 gen_helper_sve_st4hh_le_r_mte,
5131 gen_helper_sve_st4ss_le_r_mte,
5132 gen_helper_sve_st4dd_le_r_mte } },
5133 { { gen_helper_sve_st2bb_r_mte,
5134 gen_helper_sve_st2hh_be_r_mte,
5135 gen_helper_sve_st2ss_be_r_mte,
5136 gen_helper_sve_st2dd_be_r_mte },
5137 { gen_helper_sve_st3bb_r_mte,
5138 gen_helper_sve_st3hh_be_r_mte,
5139 gen_helper_sve_st3ss_be_r_mte,
5140 gen_helper_sve_st3dd_be_r_mte },
5141 { gen_helper_sve_st4bb_r_mte,
5142 gen_helper_sve_st4hh_be_r_mte,
5143 gen_helper_sve_st4ss_be_r_mte,
5144 gen_helper_sve_st4dd_be_r_mte } } },
5146 gen_helper_gvec_mem *fn;
5147 int be = s->be_data == MO_BE;
5149 if (nreg == 0) {
5150 /* ST1 */
5151 fn = fn_single[s->mte_active[0]][be][msz][esz];
5152 nreg = 1;
5153 } else {
5154 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5155 assert(msz == esz);
5156 fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
5158 assert(fn != NULL);
5159 do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
5162 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
5164 if (a->rm == 31 || a->msz > a->esz) {
5165 return false;
5167 if (sve_access_check(s)) {
5168 TCGv_i64 addr = new_tmp_a64(s);
5169 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
5170 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5171 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5173 return true;
5176 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
5178 if (a->msz > a->esz) {
5179 return false;
5181 if (sve_access_check(s)) {
5182 int vsz = vec_full_reg_size(s);
5183 int elements = vsz >> a->esz;
5184 TCGv_i64 addr = new_tmp_a64(s);
5186 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5187 (a->imm * elements * (a->nreg + 1)) << a->msz);
5188 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5190 return true;
5194 *** SVE gather loads / scatter stores
5197 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
5198 int scale, TCGv_i64 scalar, int msz, bool is_write,
5199 gen_helper_gvec_mem_scatter *fn)
5201 unsigned vsz = vec_full_reg_size(s);
5202 TCGv_ptr t_zm = tcg_temp_new_ptr();
5203 TCGv_ptr t_pg = tcg_temp_new_ptr();
5204 TCGv_ptr t_zt = tcg_temp_new_ptr();
5205 int desc = 0;
5207 if (s->mte_active[0]) {
5208 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5209 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5210 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5211 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
5212 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1);
5213 desc <<= SVE_MTEDESC_SHIFT;
5215 desc = simd_desc(vsz, vsz, desc | scale);
5217 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5218 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5219 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
5220 fn(cpu_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc));
5222 tcg_temp_free_ptr(t_zt);
5223 tcg_temp_free_ptr(t_zm);
5224 tcg_temp_free_ptr(t_pg);
5227 /* Indexed by [mte][be][ff][xs][u][msz]. */
5228 static gen_helper_gvec_mem_scatter * const
5229 gather_load_fn32[2][2][2][2][2][3] = {
5230 { /* MTE Inactive */
5231 { /* Little-endian */
5232 { { { gen_helper_sve_ldbss_zsu,
5233 gen_helper_sve_ldhss_le_zsu,
5234 NULL, },
5235 { gen_helper_sve_ldbsu_zsu,
5236 gen_helper_sve_ldhsu_le_zsu,
5237 gen_helper_sve_ldss_le_zsu, } },
5238 { { gen_helper_sve_ldbss_zss,
5239 gen_helper_sve_ldhss_le_zss,
5240 NULL, },
5241 { gen_helper_sve_ldbsu_zss,
5242 gen_helper_sve_ldhsu_le_zss,
5243 gen_helper_sve_ldss_le_zss, } } },
5245 /* First-fault */
5246 { { { gen_helper_sve_ldffbss_zsu,
5247 gen_helper_sve_ldffhss_le_zsu,
5248 NULL, },
5249 { gen_helper_sve_ldffbsu_zsu,
5250 gen_helper_sve_ldffhsu_le_zsu,
5251 gen_helper_sve_ldffss_le_zsu, } },
5252 { { gen_helper_sve_ldffbss_zss,
5253 gen_helper_sve_ldffhss_le_zss,
5254 NULL, },
5255 { gen_helper_sve_ldffbsu_zss,
5256 gen_helper_sve_ldffhsu_le_zss,
5257 gen_helper_sve_ldffss_le_zss, } } } },
5259 { /* Big-endian */
5260 { { { gen_helper_sve_ldbss_zsu,
5261 gen_helper_sve_ldhss_be_zsu,
5262 NULL, },
5263 { gen_helper_sve_ldbsu_zsu,
5264 gen_helper_sve_ldhsu_be_zsu,
5265 gen_helper_sve_ldss_be_zsu, } },
5266 { { gen_helper_sve_ldbss_zss,
5267 gen_helper_sve_ldhss_be_zss,
5268 NULL, },
5269 { gen_helper_sve_ldbsu_zss,
5270 gen_helper_sve_ldhsu_be_zss,
5271 gen_helper_sve_ldss_be_zss, } } },
5273 /* First-fault */
5274 { { { gen_helper_sve_ldffbss_zsu,
5275 gen_helper_sve_ldffhss_be_zsu,
5276 NULL, },
5277 { gen_helper_sve_ldffbsu_zsu,
5278 gen_helper_sve_ldffhsu_be_zsu,
5279 gen_helper_sve_ldffss_be_zsu, } },
5280 { { gen_helper_sve_ldffbss_zss,
5281 gen_helper_sve_ldffhss_be_zss,
5282 NULL, },
5283 { gen_helper_sve_ldffbsu_zss,
5284 gen_helper_sve_ldffhsu_be_zss,
5285 gen_helper_sve_ldffss_be_zss, } } } } },
5286 { /* MTE Active */
5287 { /* Little-endian */
5288 { { { gen_helper_sve_ldbss_zsu_mte,
5289 gen_helper_sve_ldhss_le_zsu_mte,
5290 NULL, },
5291 { gen_helper_sve_ldbsu_zsu_mte,
5292 gen_helper_sve_ldhsu_le_zsu_mte,
5293 gen_helper_sve_ldss_le_zsu_mte, } },
5294 { { gen_helper_sve_ldbss_zss_mte,
5295 gen_helper_sve_ldhss_le_zss_mte,
5296 NULL, },
5297 { gen_helper_sve_ldbsu_zss_mte,
5298 gen_helper_sve_ldhsu_le_zss_mte,
5299 gen_helper_sve_ldss_le_zss_mte, } } },
5301 /* First-fault */
5302 { { { gen_helper_sve_ldffbss_zsu_mte,
5303 gen_helper_sve_ldffhss_le_zsu_mte,
5304 NULL, },
5305 { gen_helper_sve_ldffbsu_zsu_mte,
5306 gen_helper_sve_ldffhsu_le_zsu_mte,
5307 gen_helper_sve_ldffss_le_zsu_mte, } },
5308 { { gen_helper_sve_ldffbss_zss_mte,
5309 gen_helper_sve_ldffhss_le_zss_mte,
5310 NULL, },
5311 { gen_helper_sve_ldffbsu_zss_mte,
5312 gen_helper_sve_ldffhsu_le_zss_mte,
5313 gen_helper_sve_ldffss_le_zss_mte, } } } },
5315 { /* Big-endian */
5316 { { { gen_helper_sve_ldbss_zsu_mte,
5317 gen_helper_sve_ldhss_be_zsu_mte,
5318 NULL, },
5319 { gen_helper_sve_ldbsu_zsu_mte,
5320 gen_helper_sve_ldhsu_be_zsu_mte,
5321 gen_helper_sve_ldss_be_zsu_mte, } },
5322 { { gen_helper_sve_ldbss_zss_mte,
5323 gen_helper_sve_ldhss_be_zss_mte,
5324 NULL, },
5325 { gen_helper_sve_ldbsu_zss_mte,
5326 gen_helper_sve_ldhsu_be_zss_mte,
5327 gen_helper_sve_ldss_be_zss_mte, } } },
5329 /* First-fault */
5330 { { { gen_helper_sve_ldffbss_zsu_mte,
5331 gen_helper_sve_ldffhss_be_zsu_mte,
5332 NULL, },
5333 { gen_helper_sve_ldffbsu_zsu_mte,
5334 gen_helper_sve_ldffhsu_be_zsu_mte,
5335 gen_helper_sve_ldffss_be_zsu_mte, } },
5336 { { gen_helper_sve_ldffbss_zss_mte,
5337 gen_helper_sve_ldffhss_be_zss_mte,
5338 NULL, },
5339 { gen_helper_sve_ldffbsu_zss_mte,
5340 gen_helper_sve_ldffhsu_be_zss_mte,
5341 gen_helper_sve_ldffss_be_zss_mte, } } } } },
5344 /* Note that we overload xs=2 to indicate 64-bit offset. */
5345 static gen_helper_gvec_mem_scatter * const
5346 gather_load_fn64[2][2][2][3][2][4] = {
5347 { /* MTE Inactive */
5348 { /* Little-endian */
5349 { { { gen_helper_sve_ldbds_zsu,
5350 gen_helper_sve_ldhds_le_zsu,
5351 gen_helper_sve_ldsds_le_zsu,
5352 NULL, },
5353 { gen_helper_sve_ldbdu_zsu,
5354 gen_helper_sve_ldhdu_le_zsu,
5355 gen_helper_sve_ldsdu_le_zsu,
5356 gen_helper_sve_lddd_le_zsu, } },
5357 { { gen_helper_sve_ldbds_zss,
5358 gen_helper_sve_ldhds_le_zss,
5359 gen_helper_sve_ldsds_le_zss,
5360 NULL, },
5361 { gen_helper_sve_ldbdu_zss,
5362 gen_helper_sve_ldhdu_le_zss,
5363 gen_helper_sve_ldsdu_le_zss,
5364 gen_helper_sve_lddd_le_zss, } },
5365 { { gen_helper_sve_ldbds_zd,
5366 gen_helper_sve_ldhds_le_zd,
5367 gen_helper_sve_ldsds_le_zd,
5368 NULL, },
5369 { gen_helper_sve_ldbdu_zd,
5370 gen_helper_sve_ldhdu_le_zd,
5371 gen_helper_sve_ldsdu_le_zd,
5372 gen_helper_sve_lddd_le_zd, } } },
5374 /* First-fault */
5375 { { { gen_helper_sve_ldffbds_zsu,
5376 gen_helper_sve_ldffhds_le_zsu,
5377 gen_helper_sve_ldffsds_le_zsu,
5378 NULL, },
5379 { gen_helper_sve_ldffbdu_zsu,
5380 gen_helper_sve_ldffhdu_le_zsu,
5381 gen_helper_sve_ldffsdu_le_zsu,
5382 gen_helper_sve_ldffdd_le_zsu, } },
5383 { { gen_helper_sve_ldffbds_zss,
5384 gen_helper_sve_ldffhds_le_zss,
5385 gen_helper_sve_ldffsds_le_zss,
5386 NULL, },
5387 { gen_helper_sve_ldffbdu_zss,
5388 gen_helper_sve_ldffhdu_le_zss,
5389 gen_helper_sve_ldffsdu_le_zss,
5390 gen_helper_sve_ldffdd_le_zss, } },
5391 { { gen_helper_sve_ldffbds_zd,
5392 gen_helper_sve_ldffhds_le_zd,
5393 gen_helper_sve_ldffsds_le_zd,
5394 NULL, },
5395 { gen_helper_sve_ldffbdu_zd,
5396 gen_helper_sve_ldffhdu_le_zd,
5397 gen_helper_sve_ldffsdu_le_zd,
5398 gen_helper_sve_ldffdd_le_zd, } } } },
5399 { /* Big-endian */
5400 { { { gen_helper_sve_ldbds_zsu,
5401 gen_helper_sve_ldhds_be_zsu,
5402 gen_helper_sve_ldsds_be_zsu,
5403 NULL, },
5404 { gen_helper_sve_ldbdu_zsu,
5405 gen_helper_sve_ldhdu_be_zsu,
5406 gen_helper_sve_ldsdu_be_zsu,
5407 gen_helper_sve_lddd_be_zsu, } },
5408 { { gen_helper_sve_ldbds_zss,
5409 gen_helper_sve_ldhds_be_zss,
5410 gen_helper_sve_ldsds_be_zss,
5411 NULL, },
5412 { gen_helper_sve_ldbdu_zss,
5413 gen_helper_sve_ldhdu_be_zss,
5414 gen_helper_sve_ldsdu_be_zss,
5415 gen_helper_sve_lddd_be_zss, } },
5416 { { gen_helper_sve_ldbds_zd,
5417 gen_helper_sve_ldhds_be_zd,
5418 gen_helper_sve_ldsds_be_zd,
5419 NULL, },
5420 { gen_helper_sve_ldbdu_zd,
5421 gen_helper_sve_ldhdu_be_zd,
5422 gen_helper_sve_ldsdu_be_zd,
5423 gen_helper_sve_lddd_be_zd, } } },
5425 /* First-fault */
5426 { { { gen_helper_sve_ldffbds_zsu,
5427 gen_helper_sve_ldffhds_be_zsu,
5428 gen_helper_sve_ldffsds_be_zsu,
5429 NULL, },
5430 { gen_helper_sve_ldffbdu_zsu,
5431 gen_helper_sve_ldffhdu_be_zsu,
5432 gen_helper_sve_ldffsdu_be_zsu,
5433 gen_helper_sve_ldffdd_be_zsu, } },
5434 { { gen_helper_sve_ldffbds_zss,
5435 gen_helper_sve_ldffhds_be_zss,
5436 gen_helper_sve_ldffsds_be_zss,
5437 NULL, },
5438 { gen_helper_sve_ldffbdu_zss,
5439 gen_helper_sve_ldffhdu_be_zss,
5440 gen_helper_sve_ldffsdu_be_zss,
5441 gen_helper_sve_ldffdd_be_zss, } },
5442 { { gen_helper_sve_ldffbds_zd,
5443 gen_helper_sve_ldffhds_be_zd,
5444 gen_helper_sve_ldffsds_be_zd,
5445 NULL, },
5446 { gen_helper_sve_ldffbdu_zd,
5447 gen_helper_sve_ldffhdu_be_zd,
5448 gen_helper_sve_ldffsdu_be_zd,
5449 gen_helper_sve_ldffdd_be_zd, } } } } },
5450 { /* MTE Active */
5451 { /* Little-endian */
5452 { { { gen_helper_sve_ldbds_zsu_mte,
5453 gen_helper_sve_ldhds_le_zsu_mte,
5454 gen_helper_sve_ldsds_le_zsu_mte,
5455 NULL, },
5456 { gen_helper_sve_ldbdu_zsu_mte,
5457 gen_helper_sve_ldhdu_le_zsu_mte,
5458 gen_helper_sve_ldsdu_le_zsu_mte,
5459 gen_helper_sve_lddd_le_zsu_mte, } },
5460 { { gen_helper_sve_ldbds_zss_mte,
5461 gen_helper_sve_ldhds_le_zss_mte,
5462 gen_helper_sve_ldsds_le_zss_mte,
5463 NULL, },
5464 { gen_helper_sve_ldbdu_zss_mte,
5465 gen_helper_sve_ldhdu_le_zss_mte,
5466 gen_helper_sve_ldsdu_le_zss_mte,
5467 gen_helper_sve_lddd_le_zss_mte, } },
5468 { { gen_helper_sve_ldbds_zd_mte,
5469 gen_helper_sve_ldhds_le_zd_mte,
5470 gen_helper_sve_ldsds_le_zd_mte,
5471 NULL, },
5472 { gen_helper_sve_ldbdu_zd_mte,
5473 gen_helper_sve_ldhdu_le_zd_mte,
5474 gen_helper_sve_ldsdu_le_zd_mte,
5475 gen_helper_sve_lddd_le_zd_mte, } } },
5477 /* First-fault */
5478 { { { gen_helper_sve_ldffbds_zsu_mte,
5479 gen_helper_sve_ldffhds_le_zsu_mte,
5480 gen_helper_sve_ldffsds_le_zsu_mte,
5481 NULL, },
5482 { gen_helper_sve_ldffbdu_zsu_mte,
5483 gen_helper_sve_ldffhdu_le_zsu_mte,
5484 gen_helper_sve_ldffsdu_le_zsu_mte,
5485 gen_helper_sve_ldffdd_le_zsu_mte, } },
5486 { { gen_helper_sve_ldffbds_zss_mte,
5487 gen_helper_sve_ldffhds_le_zss_mte,
5488 gen_helper_sve_ldffsds_le_zss_mte,
5489 NULL, },
5490 { gen_helper_sve_ldffbdu_zss_mte,
5491 gen_helper_sve_ldffhdu_le_zss_mte,
5492 gen_helper_sve_ldffsdu_le_zss_mte,
5493 gen_helper_sve_ldffdd_le_zss_mte, } },
5494 { { gen_helper_sve_ldffbds_zd_mte,
5495 gen_helper_sve_ldffhds_le_zd_mte,
5496 gen_helper_sve_ldffsds_le_zd_mte,
5497 NULL, },
5498 { gen_helper_sve_ldffbdu_zd_mte,
5499 gen_helper_sve_ldffhdu_le_zd_mte,
5500 gen_helper_sve_ldffsdu_le_zd_mte,
5501 gen_helper_sve_ldffdd_le_zd_mte, } } } },
5502 { /* Big-endian */
5503 { { { gen_helper_sve_ldbds_zsu_mte,
5504 gen_helper_sve_ldhds_be_zsu_mte,
5505 gen_helper_sve_ldsds_be_zsu_mte,
5506 NULL, },
5507 { gen_helper_sve_ldbdu_zsu_mte,
5508 gen_helper_sve_ldhdu_be_zsu_mte,
5509 gen_helper_sve_ldsdu_be_zsu_mte,
5510 gen_helper_sve_lddd_be_zsu_mte, } },
5511 { { gen_helper_sve_ldbds_zss_mte,
5512 gen_helper_sve_ldhds_be_zss_mte,
5513 gen_helper_sve_ldsds_be_zss_mte,
5514 NULL, },
5515 { gen_helper_sve_ldbdu_zss_mte,
5516 gen_helper_sve_ldhdu_be_zss_mte,
5517 gen_helper_sve_ldsdu_be_zss_mte,
5518 gen_helper_sve_lddd_be_zss_mte, } },
5519 { { gen_helper_sve_ldbds_zd_mte,
5520 gen_helper_sve_ldhds_be_zd_mte,
5521 gen_helper_sve_ldsds_be_zd_mte,
5522 NULL, },
5523 { gen_helper_sve_ldbdu_zd_mte,
5524 gen_helper_sve_ldhdu_be_zd_mte,
5525 gen_helper_sve_ldsdu_be_zd_mte,
5526 gen_helper_sve_lddd_be_zd_mte, } } },
5528 /* First-fault */
5529 { { { gen_helper_sve_ldffbds_zsu_mte,
5530 gen_helper_sve_ldffhds_be_zsu_mte,
5531 gen_helper_sve_ldffsds_be_zsu_mte,
5532 NULL, },
5533 { gen_helper_sve_ldffbdu_zsu_mte,
5534 gen_helper_sve_ldffhdu_be_zsu_mte,
5535 gen_helper_sve_ldffsdu_be_zsu_mte,
5536 gen_helper_sve_ldffdd_be_zsu_mte, } },
5537 { { gen_helper_sve_ldffbds_zss_mte,
5538 gen_helper_sve_ldffhds_be_zss_mte,
5539 gen_helper_sve_ldffsds_be_zss_mte,
5540 NULL, },
5541 { gen_helper_sve_ldffbdu_zss_mte,
5542 gen_helper_sve_ldffhdu_be_zss_mte,
5543 gen_helper_sve_ldffsdu_be_zss_mte,
5544 gen_helper_sve_ldffdd_be_zss_mte, } },
5545 { { gen_helper_sve_ldffbds_zd_mte,
5546 gen_helper_sve_ldffhds_be_zd_mte,
5547 gen_helper_sve_ldffsds_be_zd_mte,
5548 NULL, },
5549 { gen_helper_sve_ldffbdu_zd_mte,
5550 gen_helper_sve_ldffhdu_be_zd_mte,
5551 gen_helper_sve_ldffsdu_be_zd_mte,
5552 gen_helper_sve_ldffdd_be_zd_mte, } } } } },
5555 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
5557 gen_helper_gvec_mem_scatter *fn = NULL;
5558 bool be = s->be_data == MO_BE;
5559 bool mte = s->mte_active[0];
5561 if (!sve_access_check(s)) {
5562 return true;
5565 switch (a->esz) {
5566 case MO_32:
5567 fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
5568 break;
5569 case MO_64:
5570 fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
5571 break;
5573 assert(fn != NULL);
5575 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5576 cpu_reg_sp(s, a->rn), a->msz, false, fn);
5577 return true;
5580 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
5582 gen_helper_gvec_mem_scatter *fn = NULL;
5583 bool be = s->be_data == MO_BE;
5584 bool mte = s->mte_active[0];
5586 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5587 return false;
5589 if (!sve_access_check(s)) {
5590 return true;
5593 switch (a->esz) {
5594 case MO_32:
5595 fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
5596 break;
5597 case MO_64:
5598 fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
5599 break;
5601 assert(fn != NULL);
5603 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5604 * by loading the immediate into the scalar parameter.
5606 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5607 tcg_constant_i64(a->imm << a->msz), a->msz, false, fn);
5608 return true;
5611 static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
5613 gen_helper_gvec_mem_scatter *fn = NULL;
5614 bool be = s->be_data == MO_BE;
5615 bool mte = s->mte_active[0];
5617 if (a->esz < a->msz + !a->u) {
5618 return false;
5620 if (!dc_isar_feature(aa64_sve2, s)) {
5621 return false;
5623 if (!sve_access_check(s)) {
5624 return true;
5627 switch (a->esz) {
5628 case MO_32:
5629 fn = gather_load_fn32[mte][be][0][0][a->u][a->msz];
5630 break;
5631 case MO_64:
5632 fn = gather_load_fn64[mte][be][0][2][a->u][a->msz];
5633 break;
5635 assert(fn != NULL);
5637 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5638 cpu_reg(s, a->rm), a->msz, false, fn);
5639 return true;
5642 /* Indexed by [mte][be][xs][msz]. */
5643 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
5644 { /* MTE Inactive */
5645 { /* Little-endian */
5646 { gen_helper_sve_stbs_zsu,
5647 gen_helper_sve_sths_le_zsu,
5648 gen_helper_sve_stss_le_zsu, },
5649 { gen_helper_sve_stbs_zss,
5650 gen_helper_sve_sths_le_zss,
5651 gen_helper_sve_stss_le_zss, } },
5652 { /* Big-endian */
5653 { gen_helper_sve_stbs_zsu,
5654 gen_helper_sve_sths_be_zsu,
5655 gen_helper_sve_stss_be_zsu, },
5656 { gen_helper_sve_stbs_zss,
5657 gen_helper_sve_sths_be_zss,
5658 gen_helper_sve_stss_be_zss, } } },
5659 { /* MTE Active */
5660 { /* Little-endian */
5661 { gen_helper_sve_stbs_zsu_mte,
5662 gen_helper_sve_sths_le_zsu_mte,
5663 gen_helper_sve_stss_le_zsu_mte, },
5664 { gen_helper_sve_stbs_zss_mte,
5665 gen_helper_sve_sths_le_zss_mte,
5666 gen_helper_sve_stss_le_zss_mte, } },
5667 { /* Big-endian */
5668 { gen_helper_sve_stbs_zsu_mte,
5669 gen_helper_sve_sths_be_zsu_mte,
5670 gen_helper_sve_stss_be_zsu_mte, },
5671 { gen_helper_sve_stbs_zss_mte,
5672 gen_helper_sve_sths_be_zss_mte,
5673 gen_helper_sve_stss_be_zss_mte, } } },
5676 /* Note that we overload xs=2 to indicate 64-bit offset. */
5677 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
5678 { /* MTE Inactive */
5679 { /* Little-endian */
5680 { gen_helper_sve_stbd_zsu,
5681 gen_helper_sve_sthd_le_zsu,
5682 gen_helper_sve_stsd_le_zsu,
5683 gen_helper_sve_stdd_le_zsu, },
5684 { gen_helper_sve_stbd_zss,
5685 gen_helper_sve_sthd_le_zss,
5686 gen_helper_sve_stsd_le_zss,
5687 gen_helper_sve_stdd_le_zss, },
5688 { gen_helper_sve_stbd_zd,
5689 gen_helper_sve_sthd_le_zd,
5690 gen_helper_sve_stsd_le_zd,
5691 gen_helper_sve_stdd_le_zd, } },
5692 { /* Big-endian */
5693 { gen_helper_sve_stbd_zsu,
5694 gen_helper_sve_sthd_be_zsu,
5695 gen_helper_sve_stsd_be_zsu,
5696 gen_helper_sve_stdd_be_zsu, },
5697 { gen_helper_sve_stbd_zss,
5698 gen_helper_sve_sthd_be_zss,
5699 gen_helper_sve_stsd_be_zss,
5700 gen_helper_sve_stdd_be_zss, },
5701 { gen_helper_sve_stbd_zd,
5702 gen_helper_sve_sthd_be_zd,
5703 gen_helper_sve_stsd_be_zd,
5704 gen_helper_sve_stdd_be_zd, } } },
5705 { /* MTE Inactive */
5706 { /* Little-endian */
5707 { gen_helper_sve_stbd_zsu_mte,
5708 gen_helper_sve_sthd_le_zsu_mte,
5709 gen_helper_sve_stsd_le_zsu_mte,
5710 gen_helper_sve_stdd_le_zsu_mte, },
5711 { gen_helper_sve_stbd_zss_mte,
5712 gen_helper_sve_sthd_le_zss_mte,
5713 gen_helper_sve_stsd_le_zss_mte,
5714 gen_helper_sve_stdd_le_zss_mte, },
5715 { gen_helper_sve_stbd_zd_mte,
5716 gen_helper_sve_sthd_le_zd_mte,
5717 gen_helper_sve_stsd_le_zd_mte,
5718 gen_helper_sve_stdd_le_zd_mte, } },
5719 { /* Big-endian */
5720 { gen_helper_sve_stbd_zsu_mte,
5721 gen_helper_sve_sthd_be_zsu_mte,
5722 gen_helper_sve_stsd_be_zsu_mte,
5723 gen_helper_sve_stdd_be_zsu_mte, },
5724 { gen_helper_sve_stbd_zss_mte,
5725 gen_helper_sve_sthd_be_zss_mte,
5726 gen_helper_sve_stsd_be_zss_mte,
5727 gen_helper_sve_stdd_be_zss_mte, },
5728 { gen_helper_sve_stbd_zd_mte,
5729 gen_helper_sve_sthd_be_zd_mte,
5730 gen_helper_sve_stsd_be_zd_mte,
5731 gen_helper_sve_stdd_be_zd_mte, } } },
5734 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
5736 gen_helper_gvec_mem_scatter *fn;
5737 bool be = s->be_data == MO_BE;
5738 bool mte = s->mte_active[0];
5740 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5741 return false;
5743 if (!sve_access_check(s)) {
5744 return true;
5746 switch (a->esz) {
5747 case MO_32:
5748 fn = scatter_store_fn32[mte][be][a->xs][a->msz];
5749 break;
5750 case MO_64:
5751 fn = scatter_store_fn64[mte][be][a->xs][a->msz];
5752 break;
5753 default:
5754 g_assert_not_reached();
5756 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5757 cpu_reg_sp(s, a->rn), a->msz, true, fn);
5758 return true;
5761 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
5763 gen_helper_gvec_mem_scatter *fn = NULL;
5764 bool be = s->be_data == MO_BE;
5765 bool mte = s->mte_active[0];
5767 if (a->esz < a->msz) {
5768 return false;
5770 if (!sve_access_check(s)) {
5771 return true;
5774 switch (a->esz) {
5775 case MO_32:
5776 fn = scatter_store_fn32[mte][be][0][a->msz];
5777 break;
5778 case MO_64:
5779 fn = scatter_store_fn64[mte][be][2][a->msz];
5780 break;
5782 assert(fn != NULL);
5784 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5785 * by loading the immediate into the scalar parameter.
5787 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5788 tcg_constant_i64(a->imm << a->msz), a->msz, true, fn);
5789 return true;
5792 static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
5794 gen_helper_gvec_mem_scatter *fn;
5795 bool be = s->be_data == MO_BE;
5796 bool mte = s->mte_active[0];
5798 if (a->esz < a->msz) {
5799 return false;
5801 if (!dc_isar_feature(aa64_sve2, s)) {
5802 return false;
5804 if (!sve_access_check(s)) {
5805 return true;
5808 switch (a->esz) {
5809 case MO_32:
5810 fn = scatter_store_fn32[mte][be][0][a->msz];
5811 break;
5812 case MO_64:
5813 fn = scatter_store_fn64[mte][be][2][a->msz];
5814 break;
5815 default:
5816 g_assert_not_reached();
5819 do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
5820 cpu_reg(s, a->rm), a->msz, true, fn);
5821 return true;
5825 * Prefetches
5828 static bool trans_PRF(DisasContext *s, arg_PRF *a)
5830 /* Prefetch is a nop within QEMU. */
5831 (void)sve_access_check(s);
5832 return true;
5835 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
5837 if (a->rm == 31) {
5838 return false;
5840 /* Prefetch is a nop within QEMU. */
5841 (void)sve_access_check(s);
5842 return true;
5846 * Move Prefix
5848 * TODO: The implementation so far could handle predicated merging movprfx.
5849 * The helper functions as written take an extra source register to
5850 * use in the operation, but the result is only written when predication
5851 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
5852 * to allow the final write back to the destination to be unconditional.
5853 * For predicated zeroing movprfx, we need to rearrange the helpers to
5854 * allow the final write back to zero inactives.
5856 * In the meantime, just emit the moves.
5859 TRANS_FEAT(MOVPRFX, aa64_sve, do_mov_z, a->rd, a->rn)
5860 TRANS_FEAT(MOVPRFX_m, aa64_sve, do_sel_z, a->rd, a->rn, a->rd, a->pg, a->esz)
5861 TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false)
5864 * SVE2 Integer Multiply - Unpredicated
5867 TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a)
5869 static gen_helper_gvec_3 * const smulh_zzz_fns[4] = {
5870 gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
5871 gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
5873 TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5874 smulh_zzz_fns[a->esz], a, 0)
5876 static gen_helper_gvec_3 * const umulh_zzz_fns[4] = {
5877 gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
5878 gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
5880 TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5881 umulh_zzz_fns[a->esz], a, 0)
5883 TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5884 gen_helper_gvec_pmul_b, a, 0)
5886 static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = {
5887 gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
5888 gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
5890 TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5891 sqdmulh_zzz_fns[a->esz], a, 0)
5893 static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = {
5894 gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
5895 gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
5897 TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
5898 sqrdmulh_zzz_fns[a->esz], a, 0)
5901 * SVE2 Integer - Predicated
5904 static gen_helper_gvec_4 * const sadlp_fns[4] = {
5905 NULL, gen_helper_sve2_sadalp_zpzz_h,
5906 gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d,
5908 TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
5909 sadlp_fns[a->esz], a, 0)
5911 static gen_helper_gvec_4 * const uadlp_fns[4] = {
5912 NULL, gen_helper_sve2_uadalp_zpzz_h,
5913 gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d,
5915 TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
5916 uadlp_fns[a->esz], a, 0)
5919 * SVE2 integer unary operations (predicated)
5922 TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz,
5923 a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0)
5925 TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz,
5926 a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0)
5928 static gen_helper_gvec_3 * const sqabs_fns[4] = {
5929 gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
5930 gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
5932 TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0)
5934 static gen_helper_gvec_3 * const sqneg_fns[4] = {
5935 gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
5936 gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
5938 TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0)
5940 DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl)
5941 DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl)
5942 DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl)
5944 DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl)
5945 DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl)
5946 DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl)
5948 DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd)
5949 DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd)
5950 DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub)
5952 DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd)
5953 DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd)
5954 DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub)
5956 DO_ZPZZ(ADDP, aa64_sve2, sve2_addp)
5957 DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp)
5958 DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp)
5959 DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp)
5960 DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp)
5962 DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd)
5963 DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd)
5964 DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub)
5965 DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub)
5966 DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd)
5967 DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd)
5970 * SVE2 Widening Integer Arithmetic
5973 static gen_helper_gvec_3 * const saddl_fns[4] = {
5974 NULL, gen_helper_sve2_saddl_h,
5975 gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d,
5977 TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
5978 saddl_fns[a->esz], a, 0)
5979 TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
5980 saddl_fns[a->esz], a, 3)
5981 TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
5982 saddl_fns[a->esz], a, 2)
5984 static gen_helper_gvec_3 * const ssubl_fns[4] = {
5985 NULL, gen_helper_sve2_ssubl_h,
5986 gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d,
5988 TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
5989 ssubl_fns[a->esz], a, 0)
5990 TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
5991 ssubl_fns[a->esz], a, 3)
5992 TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
5993 ssubl_fns[a->esz], a, 2)
5994 TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz,
5995 ssubl_fns[a->esz], a, 1)
5997 static gen_helper_gvec_3 * const sabdl_fns[4] = {
5998 NULL, gen_helper_sve2_sabdl_h,
5999 gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d,
6001 TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6002 sabdl_fns[a->esz], a, 0)
6003 TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6004 sabdl_fns[a->esz], a, 3)
6006 static gen_helper_gvec_3 * const uaddl_fns[4] = {
6007 NULL, gen_helper_sve2_uaddl_h,
6008 gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d,
6010 TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6011 uaddl_fns[a->esz], a, 0)
6012 TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6013 uaddl_fns[a->esz], a, 3)
6015 static gen_helper_gvec_3 * const usubl_fns[4] = {
6016 NULL, gen_helper_sve2_usubl_h,
6017 gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d,
6019 TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6020 usubl_fns[a->esz], a, 0)
6021 TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6022 usubl_fns[a->esz], a, 3)
6024 static gen_helper_gvec_3 * const uabdl_fns[4] = {
6025 NULL, gen_helper_sve2_uabdl_h,
6026 gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d,
6028 TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
6029 uabdl_fns[a->esz], a, 0)
6030 TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
6031 uabdl_fns[a->esz], a, 3)
6033 static gen_helper_gvec_3 * const sqdmull_fns[4] = {
6034 NULL, gen_helper_sve2_sqdmull_zzz_h,
6035 gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d,
6037 TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6038 sqdmull_fns[a->esz], a, 0)
6039 TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6040 sqdmull_fns[a->esz], a, 3)
6042 static gen_helper_gvec_3 * const smull_fns[4] = {
6043 NULL, gen_helper_sve2_smull_zzz_h,
6044 gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d,
6046 TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6047 smull_fns[a->esz], a, 0)
6048 TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6049 smull_fns[a->esz], a, 3)
6051 static gen_helper_gvec_3 * const umull_fns[4] = {
6052 NULL, gen_helper_sve2_umull_zzz_h,
6053 gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d,
6055 TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6056 umull_fns[a->esz], a, 0)
6057 TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
6058 umull_fns[a->esz], a, 3)
6060 static gen_helper_gvec_3 * const eoril_fns[4] = {
6061 gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
6062 gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
6064 TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2)
6065 TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1)
6067 static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
6069 static gen_helper_gvec_3 * const fns[4] = {
6070 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
6071 NULL, gen_helper_sve2_pmull_d,
6073 if (a->esz == 0 && !dc_isar_feature(aa64_sve2_pmull128, s)) {
6074 return false;
6076 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel);
6079 TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false)
6080 TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true)
6082 static gen_helper_gvec_3 * const saddw_fns[4] = {
6083 NULL, gen_helper_sve2_saddw_h,
6084 gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d,
6086 TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0)
6087 TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1)
6089 static gen_helper_gvec_3 * const ssubw_fns[4] = {
6090 NULL, gen_helper_sve2_ssubw_h,
6091 gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d,
6093 TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0)
6094 TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1)
6096 static gen_helper_gvec_3 * const uaddw_fns[4] = {
6097 NULL, gen_helper_sve2_uaddw_h,
6098 gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d,
6100 TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0)
6101 TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1)
6103 static gen_helper_gvec_3 * const usubw_fns[4] = {
6104 NULL, gen_helper_sve2_usubw_h,
6105 gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d,
6107 TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0)
6108 TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1)
6110 static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6112 int top = imm & 1;
6113 int shl = imm >> 1;
6114 int halfbits = 4 << vece;
6116 if (top) {
6117 if (shl == halfbits) {
6118 TCGv_vec t = tcg_temp_new_vec_matching(d);
6119 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6120 tcg_gen_and_vec(vece, d, n, t);
6121 tcg_temp_free_vec(t);
6122 } else {
6123 tcg_gen_sari_vec(vece, d, n, halfbits);
6124 tcg_gen_shli_vec(vece, d, d, shl);
6126 } else {
6127 tcg_gen_shli_vec(vece, d, n, halfbits);
6128 tcg_gen_sari_vec(vece, d, d, halfbits - shl);
6132 static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
6134 int halfbits = 4 << vece;
6135 int top = imm & 1;
6136 int shl = (imm >> 1);
6137 int shift;
6138 uint64_t mask;
6140 mask = MAKE_64BIT_MASK(0, halfbits);
6141 mask <<= shl;
6142 mask = dup_const(vece, mask);
6144 shift = shl - top * halfbits;
6145 if (shift < 0) {
6146 tcg_gen_shri_i64(d, n, -shift);
6147 } else {
6148 tcg_gen_shli_i64(d, n, shift);
6150 tcg_gen_andi_i64(d, d, mask);
6153 static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6155 gen_ushll_i64(MO_16, d, n, imm);
6158 static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6160 gen_ushll_i64(MO_32, d, n, imm);
6163 static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
6165 gen_ushll_i64(MO_64, d, n, imm);
6168 static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
6170 int halfbits = 4 << vece;
6171 int top = imm & 1;
6172 int shl = imm >> 1;
6174 if (top) {
6175 if (shl == halfbits) {
6176 TCGv_vec t = tcg_temp_new_vec_matching(d);
6177 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
6178 tcg_gen_and_vec(vece, d, n, t);
6179 tcg_temp_free_vec(t);
6180 } else {
6181 tcg_gen_shri_vec(vece, d, n, halfbits);
6182 tcg_gen_shli_vec(vece, d, d, shl);
6184 } else {
6185 if (shl == 0) {
6186 TCGv_vec t = tcg_temp_new_vec_matching(d);
6187 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6188 tcg_gen_and_vec(vece, d, n, t);
6189 tcg_temp_free_vec(t);
6190 } else {
6191 tcg_gen_shli_vec(vece, d, n, halfbits);
6192 tcg_gen_shri_vec(vece, d, d, halfbits - shl);
6197 static bool do_shll_tb(DisasContext *s, arg_rri_esz *a,
6198 const GVecGen2i ops[3], bool sel)
6201 if (a->esz < 0 || a->esz > 2) {
6202 return false;
6204 if (sve_access_check(s)) {
6205 unsigned vsz = vec_full_reg_size(s);
6206 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6207 vec_full_reg_offset(s, a->rn),
6208 vsz, vsz, (a->imm << 1) | sel,
6209 &ops[a->esz]);
6211 return true;
6214 static const TCGOpcode sshll_list[] = {
6215 INDEX_op_shli_vec, INDEX_op_sari_vec, 0
6217 static const GVecGen2i sshll_ops[3] = {
6218 { .fniv = gen_sshll_vec,
6219 .opt_opc = sshll_list,
6220 .fno = gen_helper_sve2_sshll_h,
6221 .vece = MO_16 },
6222 { .fniv = gen_sshll_vec,
6223 .opt_opc = sshll_list,
6224 .fno = gen_helper_sve2_sshll_s,
6225 .vece = MO_32 },
6226 { .fniv = gen_sshll_vec,
6227 .opt_opc = sshll_list,
6228 .fno = gen_helper_sve2_sshll_d,
6229 .vece = MO_64 }
6231 TRANS_FEAT(SSHLLB, aa64_sve2, do_shll_tb, a, sshll_ops, false)
6232 TRANS_FEAT(SSHLLT, aa64_sve2, do_shll_tb, a, sshll_ops, true)
6234 static const TCGOpcode ushll_list[] = {
6235 INDEX_op_shli_vec, INDEX_op_shri_vec, 0
6237 static const GVecGen2i ushll_ops[3] = {
6238 { .fni8 = gen_ushll16_i64,
6239 .fniv = gen_ushll_vec,
6240 .opt_opc = ushll_list,
6241 .fno = gen_helper_sve2_ushll_h,
6242 .vece = MO_16 },
6243 { .fni8 = gen_ushll32_i64,
6244 .fniv = gen_ushll_vec,
6245 .opt_opc = ushll_list,
6246 .fno = gen_helper_sve2_ushll_s,
6247 .vece = MO_32 },
6248 { .fni8 = gen_ushll64_i64,
6249 .fniv = gen_ushll_vec,
6250 .opt_opc = ushll_list,
6251 .fno = gen_helper_sve2_ushll_d,
6252 .vece = MO_64 },
6254 TRANS_FEAT(USHLLB, aa64_sve2, do_shll_tb, a, ushll_ops, false)
6255 TRANS_FEAT(USHLLT, aa64_sve2, do_shll_tb, a, ushll_ops, true)
6257 static gen_helper_gvec_3 * const bext_fns[4] = {
6258 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
6259 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
6261 TRANS_FEAT(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6262 bext_fns[a->esz], a, 0)
6264 static gen_helper_gvec_3 * const bdep_fns[4] = {
6265 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
6266 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
6268 TRANS_FEAT(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6269 bdep_fns[a->esz], a, 0)
6271 static gen_helper_gvec_3 * const bgrp_fns[4] = {
6272 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
6273 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
6275 TRANS_FEAT(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
6276 bgrp_fns[a->esz], a, 0)
6278 static gen_helper_gvec_3 * const cadd_fns[4] = {
6279 gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
6280 gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d,
6282 TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6283 cadd_fns[a->esz], a, 0)
6284 TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6285 cadd_fns[a->esz], a, 1)
6287 static gen_helper_gvec_3 * const sqcadd_fns[4] = {
6288 gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
6289 gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d,
6291 TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
6292 sqcadd_fns[a->esz], a, 0)
6293 TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
6294 sqcadd_fns[a->esz], a, 1)
6296 static gen_helper_gvec_4 * const sabal_fns[4] = {
6297 NULL, gen_helper_sve2_sabal_h,
6298 gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d,
6300 TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0)
6301 TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1)
6303 static gen_helper_gvec_4 * const uabal_fns[4] = {
6304 NULL, gen_helper_sve2_uabal_h,
6305 gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d,
6307 TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0)
6308 TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1)
6310 static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
6312 static gen_helper_gvec_4 * const fns[2] = {
6313 gen_helper_sve2_adcl_s,
6314 gen_helper_sve2_adcl_d,
6317 * Note that in this case the ESZ field encodes both size and sign.
6318 * Split out 'subtract' into bit 1 of the data field for the helper.
6320 return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel);
6323 TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false)
6324 TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true)
6326 TRANS_FEAT(SSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ssra, a)
6327 TRANS_FEAT(USRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_usra, a)
6328 TRANS_FEAT(SRSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_srsra, a)
6329 TRANS_FEAT(URSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ursra, a)
6330 TRANS_FEAT(SRI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sri, a)
6331 TRANS_FEAT(SLI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sli, a)
6333 TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a)
6334 TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a)
6336 static bool do_narrow_extract(DisasContext *s, arg_rri_esz *a,
6337 const GVecGen2 ops[3])
6339 if (a->esz < 0 || a->esz > MO_32 || a->imm != 0) {
6340 return false;
6342 if (sve_access_check(s)) {
6343 unsigned vsz = vec_full_reg_size(s);
6344 tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
6345 vec_full_reg_offset(s, a->rn),
6346 vsz, vsz, &ops[a->esz]);
6348 return true;
6351 static const TCGOpcode sqxtn_list[] = {
6352 INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
6355 static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6357 TCGv_vec t = tcg_temp_new_vec_matching(d);
6358 int halfbits = 4 << vece;
6359 int64_t mask = (1ull << halfbits) - 1;
6360 int64_t min = -1ull << (halfbits - 1);
6361 int64_t max = -min - 1;
6363 tcg_gen_dupi_vec(vece, t, min);
6364 tcg_gen_smax_vec(vece, d, n, t);
6365 tcg_gen_dupi_vec(vece, t, max);
6366 tcg_gen_smin_vec(vece, d, d, t);
6367 tcg_gen_dupi_vec(vece, t, mask);
6368 tcg_gen_and_vec(vece, d, d, t);
6369 tcg_temp_free_vec(t);
6372 static const GVecGen2 sqxtnb_ops[3] = {
6373 { .fniv = gen_sqxtnb_vec,
6374 .opt_opc = sqxtn_list,
6375 .fno = gen_helper_sve2_sqxtnb_h,
6376 .vece = MO_16 },
6377 { .fniv = gen_sqxtnb_vec,
6378 .opt_opc = sqxtn_list,
6379 .fno = gen_helper_sve2_sqxtnb_s,
6380 .vece = MO_32 },
6381 { .fniv = gen_sqxtnb_vec,
6382 .opt_opc = sqxtn_list,
6383 .fno = gen_helper_sve2_sqxtnb_d,
6384 .vece = MO_64 },
6386 TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops)
6388 static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6390 TCGv_vec t = tcg_temp_new_vec_matching(d);
6391 int halfbits = 4 << vece;
6392 int64_t mask = (1ull << halfbits) - 1;
6393 int64_t min = -1ull << (halfbits - 1);
6394 int64_t max = -min - 1;
6396 tcg_gen_dupi_vec(vece, t, min);
6397 tcg_gen_smax_vec(vece, n, n, t);
6398 tcg_gen_dupi_vec(vece, t, max);
6399 tcg_gen_smin_vec(vece, n, n, t);
6400 tcg_gen_shli_vec(vece, n, n, halfbits);
6401 tcg_gen_dupi_vec(vece, t, mask);
6402 tcg_gen_bitsel_vec(vece, d, t, d, n);
6403 tcg_temp_free_vec(t);
6406 static const GVecGen2 sqxtnt_ops[3] = {
6407 { .fniv = gen_sqxtnt_vec,
6408 .opt_opc = sqxtn_list,
6409 .load_dest = true,
6410 .fno = gen_helper_sve2_sqxtnt_h,
6411 .vece = MO_16 },
6412 { .fniv = gen_sqxtnt_vec,
6413 .opt_opc = sqxtn_list,
6414 .load_dest = true,
6415 .fno = gen_helper_sve2_sqxtnt_s,
6416 .vece = MO_32 },
6417 { .fniv = gen_sqxtnt_vec,
6418 .opt_opc = sqxtn_list,
6419 .load_dest = true,
6420 .fno = gen_helper_sve2_sqxtnt_d,
6421 .vece = MO_64 },
6423 TRANS_FEAT(SQXTNT, aa64_sve2, do_narrow_extract, a, sqxtnt_ops)
6425 static const TCGOpcode uqxtn_list[] = {
6426 INDEX_op_shli_vec, INDEX_op_umin_vec, 0
6429 static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6431 TCGv_vec t = tcg_temp_new_vec_matching(d);
6432 int halfbits = 4 << vece;
6433 int64_t max = (1ull << halfbits) - 1;
6435 tcg_gen_dupi_vec(vece, t, max);
6436 tcg_gen_umin_vec(vece, d, n, t);
6437 tcg_temp_free_vec(t);
6440 static const GVecGen2 uqxtnb_ops[3] = {
6441 { .fniv = gen_uqxtnb_vec,
6442 .opt_opc = uqxtn_list,
6443 .fno = gen_helper_sve2_uqxtnb_h,
6444 .vece = MO_16 },
6445 { .fniv = gen_uqxtnb_vec,
6446 .opt_opc = uqxtn_list,
6447 .fno = gen_helper_sve2_uqxtnb_s,
6448 .vece = MO_32 },
6449 { .fniv = gen_uqxtnb_vec,
6450 .opt_opc = uqxtn_list,
6451 .fno = gen_helper_sve2_uqxtnb_d,
6452 .vece = MO_64 },
6454 TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops)
6456 static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6458 TCGv_vec t = tcg_temp_new_vec_matching(d);
6459 int halfbits = 4 << vece;
6460 int64_t max = (1ull << halfbits) - 1;
6462 tcg_gen_dupi_vec(vece, t, max);
6463 tcg_gen_umin_vec(vece, n, n, t);
6464 tcg_gen_shli_vec(vece, n, n, halfbits);
6465 tcg_gen_bitsel_vec(vece, d, t, d, n);
6466 tcg_temp_free_vec(t);
6469 static const GVecGen2 uqxtnt_ops[3] = {
6470 { .fniv = gen_uqxtnt_vec,
6471 .opt_opc = uqxtn_list,
6472 .load_dest = true,
6473 .fno = gen_helper_sve2_uqxtnt_h,
6474 .vece = MO_16 },
6475 { .fniv = gen_uqxtnt_vec,
6476 .opt_opc = uqxtn_list,
6477 .load_dest = true,
6478 .fno = gen_helper_sve2_uqxtnt_s,
6479 .vece = MO_32 },
6480 { .fniv = gen_uqxtnt_vec,
6481 .opt_opc = uqxtn_list,
6482 .load_dest = true,
6483 .fno = gen_helper_sve2_uqxtnt_d,
6484 .vece = MO_64 },
6486 TRANS_FEAT(UQXTNT, aa64_sve2, do_narrow_extract, a, uqxtnt_ops)
6488 static const TCGOpcode sqxtun_list[] = {
6489 INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
6492 static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6494 TCGv_vec t = tcg_temp_new_vec_matching(d);
6495 int halfbits = 4 << vece;
6496 int64_t max = (1ull << halfbits) - 1;
6498 tcg_gen_dupi_vec(vece, t, 0);
6499 tcg_gen_smax_vec(vece, d, n, t);
6500 tcg_gen_dupi_vec(vece, t, max);
6501 tcg_gen_umin_vec(vece, d, d, t);
6502 tcg_temp_free_vec(t);
6505 static const GVecGen2 sqxtunb_ops[3] = {
6506 { .fniv = gen_sqxtunb_vec,
6507 .opt_opc = sqxtun_list,
6508 .fno = gen_helper_sve2_sqxtunb_h,
6509 .vece = MO_16 },
6510 { .fniv = gen_sqxtunb_vec,
6511 .opt_opc = sqxtun_list,
6512 .fno = gen_helper_sve2_sqxtunb_s,
6513 .vece = MO_32 },
6514 { .fniv = gen_sqxtunb_vec,
6515 .opt_opc = sqxtun_list,
6516 .fno = gen_helper_sve2_sqxtunb_d,
6517 .vece = MO_64 },
6519 TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops)
6521 static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
6523 TCGv_vec t = tcg_temp_new_vec_matching(d);
6524 int halfbits = 4 << vece;
6525 int64_t max = (1ull << halfbits) - 1;
6527 tcg_gen_dupi_vec(vece, t, 0);
6528 tcg_gen_smax_vec(vece, n, n, t);
6529 tcg_gen_dupi_vec(vece, t, max);
6530 tcg_gen_umin_vec(vece, n, n, t);
6531 tcg_gen_shli_vec(vece, n, n, halfbits);
6532 tcg_gen_bitsel_vec(vece, d, t, d, n);
6533 tcg_temp_free_vec(t);
6536 static const GVecGen2 sqxtunt_ops[3] = {
6537 { .fniv = gen_sqxtunt_vec,
6538 .opt_opc = sqxtun_list,
6539 .load_dest = true,
6540 .fno = gen_helper_sve2_sqxtunt_h,
6541 .vece = MO_16 },
6542 { .fniv = gen_sqxtunt_vec,
6543 .opt_opc = sqxtun_list,
6544 .load_dest = true,
6545 .fno = gen_helper_sve2_sqxtunt_s,
6546 .vece = MO_32 },
6547 { .fniv = gen_sqxtunt_vec,
6548 .opt_opc = sqxtun_list,
6549 .load_dest = true,
6550 .fno = gen_helper_sve2_sqxtunt_d,
6551 .vece = MO_64 },
6553 TRANS_FEAT(SQXTUNT, aa64_sve2, do_narrow_extract, a, sqxtunt_ops)
6555 static bool do_shr_narrow(DisasContext *s, arg_rri_esz *a,
6556 const GVecGen2i ops[3])
6558 if (a->esz < 0 || a->esz > MO_32) {
6559 return false;
6561 assert(a->imm > 0 && a->imm <= (8 << a->esz));
6562 if (sve_access_check(s)) {
6563 unsigned vsz = vec_full_reg_size(s);
6564 tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
6565 vec_full_reg_offset(s, a->rn),
6566 vsz, vsz, a->imm, &ops[a->esz]);
6568 return true;
6571 static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
6573 int halfbits = 4 << vece;
6574 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
6576 tcg_gen_shri_i64(d, n, shr);
6577 tcg_gen_andi_i64(d, d, mask);
6580 static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6582 gen_shrnb_i64(MO_16, d, n, shr);
6585 static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6587 gen_shrnb_i64(MO_32, d, n, shr);
6590 static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6592 gen_shrnb_i64(MO_64, d, n, shr);
6595 static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
6597 TCGv_vec t = tcg_temp_new_vec_matching(d);
6598 int halfbits = 4 << vece;
6599 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
6601 tcg_gen_shri_vec(vece, n, n, shr);
6602 tcg_gen_dupi_vec(vece, t, mask);
6603 tcg_gen_and_vec(vece, d, n, t);
6604 tcg_temp_free_vec(t);
6607 static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 };
6608 static const GVecGen2i shrnb_ops[3] = {
6609 { .fni8 = gen_shrnb16_i64,
6610 .fniv = gen_shrnb_vec,
6611 .opt_opc = shrnb_vec_list,
6612 .fno = gen_helper_sve2_shrnb_h,
6613 .vece = MO_16 },
6614 { .fni8 = gen_shrnb32_i64,
6615 .fniv = gen_shrnb_vec,
6616 .opt_opc = shrnb_vec_list,
6617 .fno = gen_helper_sve2_shrnb_s,
6618 .vece = MO_32 },
6619 { .fni8 = gen_shrnb64_i64,
6620 .fniv = gen_shrnb_vec,
6621 .opt_opc = shrnb_vec_list,
6622 .fno = gen_helper_sve2_shrnb_d,
6623 .vece = MO_64 },
6625 TRANS_FEAT(SHRNB, aa64_sve2, do_shr_narrow, a, shrnb_ops)
6627 static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
6629 int halfbits = 4 << vece;
6630 uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
6632 tcg_gen_shli_i64(n, n, halfbits - shr);
6633 tcg_gen_andi_i64(n, n, ~mask);
6634 tcg_gen_andi_i64(d, d, mask);
6635 tcg_gen_or_i64(d, d, n);
6638 static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6640 gen_shrnt_i64(MO_16, d, n, shr);
6643 static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6645 gen_shrnt_i64(MO_32, d, n, shr);
6648 static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
6650 tcg_gen_shri_i64(n, n, shr);
6651 tcg_gen_deposit_i64(d, d, n, 32, 32);
6654 static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
6656 TCGv_vec t = tcg_temp_new_vec_matching(d);
6657 int halfbits = 4 << vece;
6658 uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
6660 tcg_gen_shli_vec(vece, n, n, halfbits - shr);
6661 tcg_gen_dupi_vec(vece, t, mask);
6662 tcg_gen_bitsel_vec(vece, d, t, d, n);
6663 tcg_temp_free_vec(t);
6666 static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 };
6667 static const GVecGen2i shrnt_ops[3] = {
6668 { .fni8 = gen_shrnt16_i64,
6669 .fniv = gen_shrnt_vec,
6670 .opt_opc = shrnt_vec_list,
6671 .load_dest = true,
6672 .fno = gen_helper_sve2_shrnt_h,
6673 .vece = MO_16 },
6674 { .fni8 = gen_shrnt32_i64,
6675 .fniv = gen_shrnt_vec,
6676 .opt_opc = shrnt_vec_list,
6677 .load_dest = true,
6678 .fno = gen_helper_sve2_shrnt_s,
6679 .vece = MO_32 },
6680 { .fni8 = gen_shrnt64_i64,
6681 .fniv = gen_shrnt_vec,
6682 .opt_opc = shrnt_vec_list,
6683 .load_dest = true,
6684 .fno = gen_helper_sve2_shrnt_d,
6685 .vece = MO_64 },
6687 TRANS_FEAT(SHRNT, aa64_sve2, do_shr_narrow, a, shrnt_ops)
6689 static const GVecGen2i rshrnb_ops[3] = {
6690 { .fno = gen_helper_sve2_rshrnb_h },
6691 { .fno = gen_helper_sve2_rshrnb_s },
6692 { .fno = gen_helper_sve2_rshrnb_d },
6694 TRANS_FEAT(RSHRNB, aa64_sve2, do_shr_narrow, a, rshrnb_ops)
6696 static const GVecGen2i rshrnt_ops[3] = {
6697 { .fno = gen_helper_sve2_rshrnt_h },
6698 { .fno = gen_helper_sve2_rshrnt_s },
6699 { .fno = gen_helper_sve2_rshrnt_d },
6701 TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops)
6703 static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
6704 TCGv_vec n, int64_t shr)
6706 TCGv_vec t = tcg_temp_new_vec_matching(d);
6707 int halfbits = 4 << vece;
6709 tcg_gen_sari_vec(vece, n, n, shr);
6710 tcg_gen_dupi_vec(vece, t, 0);
6711 tcg_gen_smax_vec(vece, n, n, t);
6712 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6713 tcg_gen_umin_vec(vece, d, n, t);
6714 tcg_temp_free_vec(t);
6717 static const TCGOpcode sqshrunb_vec_list[] = {
6718 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
6720 static const GVecGen2i sqshrunb_ops[3] = {
6721 { .fniv = gen_sqshrunb_vec,
6722 .opt_opc = sqshrunb_vec_list,
6723 .fno = gen_helper_sve2_sqshrunb_h,
6724 .vece = MO_16 },
6725 { .fniv = gen_sqshrunb_vec,
6726 .opt_opc = sqshrunb_vec_list,
6727 .fno = gen_helper_sve2_sqshrunb_s,
6728 .vece = MO_32 },
6729 { .fniv = gen_sqshrunb_vec,
6730 .opt_opc = sqshrunb_vec_list,
6731 .fno = gen_helper_sve2_sqshrunb_d,
6732 .vece = MO_64 },
6734 TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops)
6736 static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
6737 TCGv_vec n, int64_t shr)
6739 TCGv_vec t = tcg_temp_new_vec_matching(d);
6740 int halfbits = 4 << vece;
6742 tcg_gen_sari_vec(vece, n, n, shr);
6743 tcg_gen_dupi_vec(vece, t, 0);
6744 tcg_gen_smax_vec(vece, n, n, t);
6745 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6746 tcg_gen_umin_vec(vece, n, n, t);
6747 tcg_gen_shli_vec(vece, n, n, halfbits);
6748 tcg_gen_bitsel_vec(vece, d, t, d, n);
6749 tcg_temp_free_vec(t);
6752 static const TCGOpcode sqshrunt_vec_list[] = {
6753 INDEX_op_shli_vec, INDEX_op_sari_vec,
6754 INDEX_op_smax_vec, INDEX_op_umin_vec, 0
6756 static const GVecGen2i sqshrunt_ops[3] = {
6757 { .fniv = gen_sqshrunt_vec,
6758 .opt_opc = sqshrunt_vec_list,
6759 .load_dest = true,
6760 .fno = gen_helper_sve2_sqshrunt_h,
6761 .vece = MO_16 },
6762 { .fniv = gen_sqshrunt_vec,
6763 .opt_opc = sqshrunt_vec_list,
6764 .load_dest = true,
6765 .fno = gen_helper_sve2_sqshrunt_s,
6766 .vece = MO_32 },
6767 { .fniv = gen_sqshrunt_vec,
6768 .opt_opc = sqshrunt_vec_list,
6769 .load_dest = true,
6770 .fno = gen_helper_sve2_sqshrunt_d,
6771 .vece = MO_64 },
6773 TRANS_FEAT(SQSHRUNT, aa64_sve2, do_shr_narrow, a, sqshrunt_ops)
6775 static const GVecGen2i sqrshrunb_ops[3] = {
6776 { .fno = gen_helper_sve2_sqrshrunb_h },
6777 { .fno = gen_helper_sve2_sqrshrunb_s },
6778 { .fno = gen_helper_sve2_sqrshrunb_d },
6780 TRANS_FEAT(SQRSHRUNB, aa64_sve2, do_shr_narrow, a, sqrshrunb_ops)
6782 static const GVecGen2i sqrshrunt_ops[3] = {
6783 { .fno = gen_helper_sve2_sqrshrunt_h },
6784 { .fno = gen_helper_sve2_sqrshrunt_s },
6785 { .fno = gen_helper_sve2_sqrshrunt_d },
6787 TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops)
6789 static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
6790 TCGv_vec n, int64_t shr)
6792 TCGv_vec t = tcg_temp_new_vec_matching(d);
6793 int halfbits = 4 << vece;
6794 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
6795 int64_t min = -max - 1;
6797 tcg_gen_sari_vec(vece, n, n, shr);
6798 tcg_gen_dupi_vec(vece, t, min);
6799 tcg_gen_smax_vec(vece, n, n, t);
6800 tcg_gen_dupi_vec(vece, t, max);
6801 tcg_gen_smin_vec(vece, n, n, t);
6802 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6803 tcg_gen_and_vec(vece, d, n, t);
6804 tcg_temp_free_vec(t);
6807 static const TCGOpcode sqshrnb_vec_list[] = {
6808 INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
6810 static const GVecGen2i sqshrnb_ops[3] = {
6811 { .fniv = gen_sqshrnb_vec,
6812 .opt_opc = sqshrnb_vec_list,
6813 .fno = gen_helper_sve2_sqshrnb_h,
6814 .vece = MO_16 },
6815 { .fniv = gen_sqshrnb_vec,
6816 .opt_opc = sqshrnb_vec_list,
6817 .fno = gen_helper_sve2_sqshrnb_s,
6818 .vece = MO_32 },
6819 { .fniv = gen_sqshrnb_vec,
6820 .opt_opc = sqshrnb_vec_list,
6821 .fno = gen_helper_sve2_sqshrnb_d,
6822 .vece = MO_64 },
6824 TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops)
6826 static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
6827 TCGv_vec n, int64_t shr)
6829 TCGv_vec t = tcg_temp_new_vec_matching(d);
6830 int halfbits = 4 << vece;
6831 int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
6832 int64_t min = -max - 1;
6834 tcg_gen_sari_vec(vece, n, n, shr);
6835 tcg_gen_dupi_vec(vece, t, min);
6836 tcg_gen_smax_vec(vece, n, n, t);
6837 tcg_gen_dupi_vec(vece, t, max);
6838 tcg_gen_smin_vec(vece, n, n, t);
6839 tcg_gen_shli_vec(vece, n, n, halfbits);
6840 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6841 tcg_gen_bitsel_vec(vece, d, t, d, n);
6842 tcg_temp_free_vec(t);
6845 static const TCGOpcode sqshrnt_vec_list[] = {
6846 INDEX_op_shli_vec, INDEX_op_sari_vec,
6847 INDEX_op_smax_vec, INDEX_op_smin_vec, 0
6849 static const GVecGen2i sqshrnt_ops[3] = {
6850 { .fniv = gen_sqshrnt_vec,
6851 .opt_opc = sqshrnt_vec_list,
6852 .load_dest = true,
6853 .fno = gen_helper_sve2_sqshrnt_h,
6854 .vece = MO_16 },
6855 { .fniv = gen_sqshrnt_vec,
6856 .opt_opc = sqshrnt_vec_list,
6857 .load_dest = true,
6858 .fno = gen_helper_sve2_sqshrnt_s,
6859 .vece = MO_32 },
6860 { .fniv = gen_sqshrnt_vec,
6861 .opt_opc = sqshrnt_vec_list,
6862 .load_dest = true,
6863 .fno = gen_helper_sve2_sqshrnt_d,
6864 .vece = MO_64 },
6866 TRANS_FEAT(SQSHRNT, aa64_sve2, do_shr_narrow, a, sqshrnt_ops)
6868 static const GVecGen2i sqrshrnb_ops[3] = {
6869 { .fno = gen_helper_sve2_sqrshrnb_h },
6870 { .fno = gen_helper_sve2_sqrshrnb_s },
6871 { .fno = gen_helper_sve2_sqrshrnb_d },
6873 TRANS_FEAT(SQRSHRNB, aa64_sve2, do_shr_narrow, a, sqrshrnb_ops)
6875 static const GVecGen2i sqrshrnt_ops[3] = {
6876 { .fno = gen_helper_sve2_sqrshrnt_h },
6877 { .fno = gen_helper_sve2_sqrshrnt_s },
6878 { .fno = gen_helper_sve2_sqrshrnt_d },
6880 TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops)
6882 static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
6883 TCGv_vec n, int64_t shr)
6885 TCGv_vec t = tcg_temp_new_vec_matching(d);
6886 int halfbits = 4 << vece;
6888 tcg_gen_shri_vec(vece, n, n, shr);
6889 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6890 tcg_gen_umin_vec(vece, d, n, t);
6891 tcg_temp_free_vec(t);
6894 static const TCGOpcode uqshrnb_vec_list[] = {
6895 INDEX_op_shri_vec, INDEX_op_umin_vec, 0
6897 static const GVecGen2i uqshrnb_ops[3] = {
6898 { .fniv = gen_uqshrnb_vec,
6899 .opt_opc = uqshrnb_vec_list,
6900 .fno = gen_helper_sve2_uqshrnb_h,
6901 .vece = MO_16 },
6902 { .fniv = gen_uqshrnb_vec,
6903 .opt_opc = uqshrnb_vec_list,
6904 .fno = gen_helper_sve2_uqshrnb_s,
6905 .vece = MO_32 },
6906 { .fniv = gen_uqshrnb_vec,
6907 .opt_opc = uqshrnb_vec_list,
6908 .fno = gen_helper_sve2_uqshrnb_d,
6909 .vece = MO_64 },
6911 TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops)
6913 static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
6914 TCGv_vec n, int64_t shr)
6916 TCGv_vec t = tcg_temp_new_vec_matching(d);
6917 int halfbits = 4 << vece;
6919 tcg_gen_shri_vec(vece, n, n, shr);
6920 tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
6921 tcg_gen_umin_vec(vece, n, n, t);
6922 tcg_gen_shli_vec(vece, n, n, halfbits);
6923 tcg_gen_bitsel_vec(vece, d, t, d, n);
6924 tcg_temp_free_vec(t);
6927 static const TCGOpcode uqshrnt_vec_list[] = {
6928 INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
6930 static const GVecGen2i uqshrnt_ops[3] = {
6931 { .fniv = gen_uqshrnt_vec,
6932 .opt_opc = uqshrnt_vec_list,
6933 .load_dest = true,
6934 .fno = gen_helper_sve2_uqshrnt_h,
6935 .vece = MO_16 },
6936 { .fniv = gen_uqshrnt_vec,
6937 .opt_opc = uqshrnt_vec_list,
6938 .load_dest = true,
6939 .fno = gen_helper_sve2_uqshrnt_s,
6940 .vece = MO_32 },
6941 { .fniv = gen_uqshrnt_vec,
6942 .opt_opc = uqshrnt_vec_list,
6943 .load_dest = true,
6944 .fno = gen_helper_sve2_uqshrnt_d,
6945 .vece = MO_64 },
6947 TRANS_FEAT(UQSHRNT, aa64_sve2, do_shr_narrow, a, uqshrnt_ops)
6949 static const GVecGen2i uqrshrnb_ops[3] = {
6950 { .fno = gen_helper_sve2_uqrshrnb_h },
6951 { .fno = gen_helper_sve2_uqrshrnb_s },
6952 { .fno = gen_helper_sve2_uqrshrnb_d },
6954 TRANS_FEAT(UQRSHRNB, aa64_sve2, do_shr_narrow, a, uqrshrnb_ops)
6956 static const GVecGen2i uqrshrnt_ops[3] = {
6957 { .fno = gen_helper_sve2_uqrshrnt_h },
6958 { .fno = gen_helper_sve2_uqrshrnt_s },
6959 { .fno = gen_helper_sve2_uqrshrnt_d },
6961 TRANS_FEAT(UQRSHRNT, aa64_sve2, do_shr_narrow, a, uqrshrnt_ops)
6963 #define DO_SVE2_ZZZ_NARROW(NAME, name) \
6964 static gen_helper_gvec_3 * const name##_fns[4] = { \
6965 NULL, gen_helper_sve2_##name##_h, \
6966 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
6967 }; \
6968 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \
6969 name##_fns[a->esz], a, 0)
6971 DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
6972 DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
6973 DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
6974 DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
6976 DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
6977 DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
6978 DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
6979 DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
6981 static gen_helper_gvec_flags_4 * const match_fns[4] = {
6982 gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL
6984 TRANS_FEAT(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
6986 static gen_helper_gvec_flags_4 * const nmatch_fns[4] = {
6987 gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL
6989 TRANS_FEAT(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
6991 static gen_helper_gvec_4 * const histcnt_fns[4] = {
6992 NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
6994 TRANS_FEAT(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
6995 histcnt_fns[a->esz], a, 0)
6997 TRANS_FEAT(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
6998 a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
7000 DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz)
7001 DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz)
7002 DO_ZPZZ_FP(FMINNMP, aa64_sve2, sve2_fminnmp_zpzz)
7003 DO_ZPZZ_FP(FMAXP, aa64_sve2, sve2_fmaxp_zpzz)
7004 DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz)
7007 * SVE Integer Multiply-Add (unpredicated)
7010 TRANS_FEAT(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_s,
7011 a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
7012 TRANS_FEAT(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_d,
7013 a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
7015 static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
7016 NULL, gen_helper_sve2_sqdmlal_zzzw_h,
7017 gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
7019 TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7020 sqdmlal_zzzw_fns[a->esz], a, 0)
7021 TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7022 sqdmlal_zzzw_fns[a->esz], a, 3)
7023 TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7024 sqdmlal_zzzw_fns[a->esz], a, 2)
7026 static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = {
7027 NULL, gen_helper_sve2_sqdmlsl_zzzw_h,
7028 gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
7030 TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7031 sqdmlsl_zzzw_fns[a->esz], a, 0)
7032 TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7033 sqdmlsl_zzzw_fns[a->esz], a, 3)
7034 TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
7035 sqdmlsl_zzzw_fns[a->esz], a, 2)
7037 static gen_helper_gvec_4 * const sqrdmlah_fns[] = {
7038 gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
7039 gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
7041 TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
7042 sqrdmlah_fns[a->esz], a, 0)
7044 static gen_helper_gvec_4 * const sqrdmlsh_fns[] = {
7045 gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
7046 gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
7048 TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
7049 sqrdmlsh_fns[a->esz], a, 0)
7051 static gen_helper_gvec_4 * const smlal_zzzw_fns[] = {
7052 NULL, gen_helper_sve2_smlal_zzzw_h,
7053 gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
7055 TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7056 smlal_zzzw_fns[a->esz], a, 0)
7057 TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7058 smlal_zzzw_fns[a->esz], a, 1)
7060 static gen_helper_gvec_4 * const umlal_zzzw_fns[] = {
7061 NULL, gen_helper_sve2_umlal_zzzw_h,
7062 gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
7064 TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7065 umlal_zzzw_fns[a->esz], a, 0)
7066 TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7067 umlal_zzzw_fns[a->esz], a, 1)
7069 static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = {
7070 NULL, gen_helper_sve2_smlsl_zzzw_h,
7071 gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
7073 TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7074 smlsl_zzzw_fns[a->esz], a, 0)
7075 TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7076 smlsl_zzzw_fns[a->esz], a, 1)
7078 static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = {
7079 NULL, gen_helper_sve2_umlsl_zzzw_h,
7080 gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
7082 TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7083 umlsl_zzzw_fns[a->esz], a, 0)
7084 TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
7085 umlsl_zzzw_fns[a->esz], a, 1)
7087 static gen_helper_gvec_4 * const cmla_fns[] = {
7088 gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
7089 gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
7091 TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7092 cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
7094 static gen_helper_gvec_4 * const cdot_fns[] = {
7095 NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d
7097 TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7098 cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
7100 static gen_helper_gvec_4 * const sqrdcmlah_fns[] = {
7101 gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
7102 gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
7104 TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
7105 sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
7107 TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7108 a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
7110 TRANS_FEAT(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
7111 gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
7113 TRANS_FEAT(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7114 gen_helper_crypto_aese, a, false)
7115 TRANS_FEAT(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
7116 gen_helper_crypto_aese, a, true)
7118 TRANS_FEAT(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7119 gen_helper_crypto_sm4e, a, 0)
7120 TRANS_FEAT(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
7121 gen_helper_crypto_sm4ekey, a, 0)
7123 TRANS_FEAT(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, gen_gvec_rax1, a)
7125 TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz,
7126 gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR)
7127 TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz,
7128 gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR)
7130 TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
7131 gen_helper_sve_bfcvtnt, a, 0, FPST_FPCR)
7133 TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz,
7134 gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR)
7135 TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz,
7136 gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR)
7138 TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a,
7139 float_round_to_odd, gen_helper_sve_fcvt_ds)
7140 TRANS_FEAT(FCVTXNT_ds, aa64_sve2, do_frint_mode, a,
7141 float_round_to_odd, gen_helper_sve2_fcvtnt_ds)
7143 static gen_helper_gvec_3_ptr * const flogb_fns[] = {
7144 NULL, gen_helper_flogb_h,
7145 gen_helper_flogb_s, gen_helper_flogb_d
7147 TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz],
7148 a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
7150 static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
7152 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzzw_s,
7153 a->rd, a->rn, a->rm, a->ra,
7154 (sel << 1) | sub, cpu_env);
7157 TRANS_FEAT(FMLALB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, false)
7158 TRANS_FEAT(FMLALT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, true)
7159 TRANS_FEAT(FMLSLB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, false)
7160 TRANS_FEAT(FMLSLT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, true)
7162 static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
7164 if (!dc_isar_feature(aa64_sve2, s)) {
7165 return false;
7167 return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s,
7168 a->rd, a->rn, a->rm, a->ra,
7169 (a->index << 2) | (sel << 1) | sub, cpu_env);
7172 static bool trans_FMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
7174 return do_FMLAL_zzxw(s, a, false, false);
7177 static bool trans_FMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
7179 return do_FMLAL_zzxw(s, a, false, true);
7182 static bool trans_FMLSLB_zzxw(DisasContext *s, arg_rrxr_esz *a)
7184 return do_FMLAL_zzxw(s, a, true, false);
7187 static bool trans_FMLSLT_zzxw(DisasContext *s, arg_rrxr_esz *a)
7189 return do_FMLAL_zzxw(s, a, true, true);
7192 TRANS_FEAT(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7193 gen_helper_gvec_smmla_b, a, 0)
7194 TRANS_FEAT(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7195 gen_helper_gvec_usmmla_b, a, 0)
7196 TRANS_FEAT(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
7197 gen_helper_gvec_ummla_b, a, 0)
7199 TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
7200 gen_helper_gvec_bfdot, a, 0)
7201 TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
7202 gen_helper_gvec_bfdot_idx, a)
7204 TRANS_FEAT(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
7205 gen_helper_gvec_bfmmla, a, 0)
7207 static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
7209 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal,
7210 a->rd, a->rn, a->rm, a->ra, sel, FPST_FPCR);
7213 TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false)
7214 TRANS_FEAT(BFMLALT_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, true)
7216 static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
7218 return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx,
7219 a->rd, a->rn, a->rm, a->ra,
7220 (a->index << 1) | sel, FPST_FPCR);
7223 TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
7224 TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true)