xen: introduce 'xen-block', 'xen-disk' and 'xen-cdrom'
[qemu.git] / target / arm / translate-sve.c
blobb15b615ceb3dcbee66a1b034cc9a1dd1bcd6f4ae
1 /*
2 * AArch64 SVE translation
4 * Copyright (c) 2018 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "tcg-op.h"
24 #include "tcg-op-gvec.h"
25 #include "tcg-gvec-desc.h"
26 #include "qemu/log.h"
27 #include "arm_ldst.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
32 #include "exec/log.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
35 #include "fpu/softfloat.h"
38 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
39 TCGv_i64, uint32_t, uint32_t);
41 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
42 TCGv_ptr, TCGv_i32);
43 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
44 TCGv_ptr, TCGv_ptr, TCGv_i32);
46 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
47 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
48 TCGv_ptr, TCGv_i64, TCGv_i32);
51 * Helpers for extracting complex instruction fields.
54 /* See e.g. ASR (immediate, predicated).
55 * Returns -1 for unallocated encoding; diagnose later.
57 static int tszimm_esz(int x)
59 x >>= 3; /* discard imm3 */
60 return 31 - clz32(x);
63 static int tszimm_shr(int x)
65 return (16 << tszimm_esz(x)) - x;
68 /* See e.g. LSL (immediate, predicated). */
69 static int tszimm_shl(int x)
71 return x - (8 << tszimm_esz(x));
74 static inline int plus1(int x)
76 return x + 1;
79 /* The SH bit is in bit 8. Extract the low 8 and shift. */
80 static inline int expand_imm_sh8s(int x)
82 return (int8_t)x << (x & 0x100 ? 8 : 0);
85 static inline int expand_imm_sh8u(int x)
87 return (uint8_t)x << (x & 0x100 ? 8 : 0);
90 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
91 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
93 static inline int msz_dtype(int msz)
95 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
96 return dtype[msz];
100 * Include the generated decoder.
103 #include "decode-sve.inc.c"
106 * Implement all of the translator functions referenced by the decoder.
109 /* Return the offset info CPUARMState of the predicate vector register Pn.
110 * Note for this purpose, FFR is P16.
112 static inline int pred_full_reg_offset(DisasContext *s, int regno)
114 return offsetof(CPUARMState, vfp.pregs[regno]);
117 /* Return the byte size of the whole predicate register, VL / 64. */
118 static inline int pred_full_reg_size(DisasContext *s)
120 return s->sve_len >> 3;
123 /* Round up the size of a register to a size allowed by
124 * the tcg vector infrastructure. Any operation which uses this
125 * size may assume that the bits above pred_full_reg_size are zero,
126 * and must leave them the same way.
128 * Note that this is not needed for the vector registers as they
129 * are always properly sized for tcg vectors.
131 static int size_for_gvec(int size)
133 if (size <= 8) {
134 return 8;
135 } else {
136 return QEMU_ALIGN_UP(size, 16);
140 static int pred_gvec_reg_size(DisasContext *s)
142 return size_for_gvec(pred_full_reg_size(s));
145 /* Invoke a vector expander on two Zregs. */
146 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
147 int esz, int rd, int rn)
149 if (sve_access_check(s)) {
150 unsigned vsz = vec_full_reg_size(s);
151 gvec_fn(esz, vec_full_reg_offset(s, rd),
152 vec_full_reg_offset(s, rn), vsz, vsz);
154 return true;
157 /* Invoke a vector expander on three Zregs. */
158 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
159 int esz, int rd, int rn, int rm)
161 if (sve_access_check(s)) {
162 unsigned vsz = vec_full_reg_size(s);
163 gvec_fn(esz, vec_full_reg_offset(s, rd),
164 vec_full_reg_offset(s, rn),
165 vec_full_reg_offset(s, rm), vsz, vsz);
167 return true;
170 /* Invoke a vector move on two Zregs. */
171 static bool do_mov_z(DisasContext *s, int rd, int rn)
173 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
176 /* Initialize a Zreg with replications of a 64-bit immediate. */
177 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
179 unsigned vsz = vec_full_reg_size(s);
180 tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
183 /* Invoke a vector expander on two Pregs. */
184 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
185 int esz, int rd, int rn)
187 if (sve_access_check(s)) {
188 unsigned psz = pred_gvec_reg_size(s);
189 gvec_fn(esz, pred_full_reg_offset(s, rd),
190 pred_full_reg_offset(s, rn), psz, psz);
192 return true;
195 /* Invoke a vector expander on three Pregs. */
196 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
197 int esz, int rd, int rn, int rm)
199 if (sve_access_check(s)) {
200 unsigned psz = pred_gvec_reg_size(s);
201 gvec_fn(esz, pred_full_reg_offset(s, rd),
202 pred_full_reg_offset(s, rn),
203 pred_full_reg_offset(s, rm), psz, psz);
205 return true;
208 /* Invoke a vector operation on four Pregs. */
209 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
210 int rd, int rn, int rm, int rg)
212 if (sve_access_check(s)) {
213 unsigned psz = pred_gvec_reg_size(s);
214 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
215 pred_full_reg_offset(s, rn),
216 pred_full_reg_offset(s, rm),
217 pred_full_reg_offset(s, rg),
218 psz, psz, gvec_op);
220 return true;
223 /* Invoke a vector move on two Pregs. */
224 static bool do_mov_p(DisasContext *s, int rd, int rn)
226 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
229 /* Set the cpu flags as per a return from an SVE helper. */
230 static void do_pred_flags(TCGv_i32 t)
232 tcg_gen_mov_i32(cpu_NF, t);
233 tcg_gen_andi_i32(cpu_ZF, t, 2);
234 tcg_gen_andi_i32(cpu_CF, t, 1);
235 tcg_gen_movi_i32(cpu_VF, 0);
238 /* Subroutines computing the ARM PredTest psuedofunction. */
239 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
241 TCGv_i32 t = tcg_temp_new_i32();
243 gen_helper_sve_predtest1(t, d, g);
244 do_pred_flags(t);
245 tcg_temp_free_i32(t);
248 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
250 TCGv_ptr dptr = tcg_temp_new_ptr();
251 TCGv_ptr gptr = tcg_temp_new_ptr();
252 TCGv_i32 t;
254 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
255 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
256 t = tcg_const_i32(words);
258 gen_helper_sve_predtest(t, dptr, gptr, t);
259 tcg_temp_free_ptr(dptr);
260 tcg_temp_free_ptr(gptr);
262 do_pred_flags(t);
263 tcg_temp_free_i32(t);
266 /* For each element size, the bits within a predicate word that are active. */
267 const uint64_t pred_esz_masks[4] = {
268 0xffffffffffffffffull, 0x5555555555555555ull,
269 0x1111111111111111ull, 0x0101010101010101ull
273 *** SVE Logical - Unpredicated Group
276 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
278 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
281 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
283 if (a->rn == a->rm) { /* MOV */
284 return do_mov_z(s, a->rd, a->rn);
285 } else {
286 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
290 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
292 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
295 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
297 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
301 *** SVE Integer Arithmetic - Unpredicated Group
304 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
306 return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
309 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
311 return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
314 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
316 return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
319 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
321 return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
324 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
326 return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
329 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
331 return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
335 *** SVE Integer Arithmetic - Binary Predicated Group
338 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
340 unsigned vsz = vec_full_reg_size(s);
341 if (fn == NULL) {
342 return false;
344 if (sve_access_check(s)) {
345 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
346 vec_full_reg_offset(s, a->rn),
347 vec_full_reg_offset(s, a->rm),
348 pred_full_reg_offset(s, a->pg),
349 vsz, vsz, 0, fn);
351 return true;
354 /* Select active elememnts from Zn and inactive elements from Zm,
355 * storing the result in Zd.
357 static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
359 static gen_helper_gvec_4 * const fns[4] = {
360 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
361 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
363 unsigned vsz = vec_full_reg_size(s);
364 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
365 vec_full_reg_offset(s, rn),
366 vec_full_reg_offset(s, rm),
367 pred_full_reg_offset(s, pg),
368 vsz, vsz, 0, fns[esz]);
371 #define DO_ZPZZ(NAME, name) \
372 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a) \
374 static gen_helper_gvec_4 * const fns[4] = { \
375 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
376 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
377 }; \
378 return do_zpzz_ool(s, a, fns[a->esz]); \
381 DO_ZPZZ(AND, and)
382 DO_ZPZZ(EOR, eor)
383 DO_ZPZZ(ORR, orr)
384 DO_ZPZZ(BIC, bic)
386 DO_ZPZZ(ADD, add)
387 DO_ZPZZ(SUB, sub)
389 DO_ZPZZ(SMAX, smax)
390 DO_ZPZZ(UMAX, umax)
391 DO_ZPZZ(SMIN, smin)
392 DO_ZPZZ(UMIN, umin)
393 DO_ZPZZ(SABD, sabd)
394 DO_ZPZZ(UABD, uabd)
396 DO_ZPZZ(MUL, mul)
397 DO_ZPZZ(SMULH, smulh)
398 DO_ZPZZ(UMULH, umulh)
400 DO_ZPZZ(ASR, asr)
401 DO_ZPZZ(LSR, lsr)
402 DO_ZPZZ(LSL, lsl)
404 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
406 static gen_helper_gvec_4 * const fns[4] = {
407 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
409 return do_zpzz_ool(s, a, fns[a->esz]);
412 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
414 static gen_helper_gvec_4 * const fns[4] = {
415 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
417 return do_zpzz_ool(s, a, fns[a->esz]);
420 static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
422 if (sve_access_check(s)) {
423 do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
425 return true;
428 #undef DO_ZPZZ
431 *** SVE Integer Arithmetic - Unary Predicated Group
434 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
436 if (fn == NULL) {
437 return false;
439 if (sve_access_check(s)) {
440 unsigned vsz = vec_full_reg_size(s);
441 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
442 vec_full_reg_offset(s, a->rn),
443 pred_full_reg_offset(s, a->pg),
444 vsz, vsz, 0, fn);
446 return true;
449 #define DO_ZPZ(NAME, name) \
450 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
452 static gen_helper_gvec_3 * const fns[4] = { \
453 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
454 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
455 }; \
456 return do_zpz_ool(s, a, fns[a->esz]); \
459 DO_ZPZ(CLS, cls)
460 DO_ZPZ(CLZ, clz)
461 DO_ZPZ(CNT_zpz, cnt_zpz)
462 DO_ZPZ(CNOT, cnot)
463 DO_ZPZ(NOT_zpz, not_zpz)
464 DO_ZPZ(ABS, abs)
465 DO_ZPZ(NEG, neg)
467 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
469 static gen_helper_gvec_3 * const fns[4] = {
470 NULL,
471 gen_helper_sve_fabs_h,
472 gen_helper_sve_fabs_s,
473 gen_helper_sve_fabs_d
475 return do_zpz_ool(s, a, fns[a->esz]);
478 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
480 static gen_helper_gvec_3 * const fns[4] = {
481 NULL,
482 gen_helper_sve_fneg_h,
483 gen_helper_sve_fneg_s,
484 gen_helper_sve_fneg_d
486 return do_zpz_ool(s, a, fns[a->esz]);
489 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
491 static gen_helper_gvec_3 * const fns[4] = {
492 NULL,
493 gen_helper_sve_sxtb_h,
494 gen_helper_sve_sxtb_s,
495 gen_helper_sve_sxtb_d
497 return do_zpz_ool(s, a, fns[a->esz]);
500 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
502 static gen_helper_gvec_3 * const fns[4] = {
503 NULL,
504 gen_helper_sve_uxtb_h,
505 gen_helper_sve_uxtb_s,
506 gen_helper_sve_uxtb_d
508 return do_zpz_ool(s, a, fns[a->esz]);
511 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
513 static gen_helper_gvec_3 * const fns[4] = {
514 NULL, NULL,
515 gen_helper_sve_sxth_s,
516 gen_helper_sve_sxth_d
518 return do_zpz_ool(s, a, fns[a->esz]);
521 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
523 static gen_helper_gvec_3 * const fns[4] = {
524 NULL, NULL,
525 gen_helper_sve_uxth_s,
526 gen_helper_sve_uxth_d
528 return do_zpz_ool(s, a, fns[a->esz]);
531 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
533 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
536 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
538 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
541 #undef DO_ZPZ
544 *** SVE Integer Reduction Group
547 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
548 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
549 gen_helper_gvec_reduc *fn)
551 unsigned vsz = vec_full_reg_size(s);
552 TCGv_ptr t_zn, t_pg;
553 TCGv_i32 desc;
554 TCGv_i64 temp;
556 if (fn == NULL) {
557 return false;
559 if (!sve_access_check(s)) {
560 return true;
563 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
564 temp = tcg_temp_new_i64();
565 t_zn = tcg_temp_new_ptr();
566 t_pg = tcg_temp_new_ptr();
568 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
569 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
570 fn(temp, t_zn, t_pg, desc);
571 tcg_temp_free_ptr(t_zn);
572 tcg_temp_free_ptr(t_pg);
573 tcg_temp_free_i32(desc);
575 write_fp_dreg(s, a->rd, temp);
576 tcg_temp_free_i64(temp);
577 return true;
580 #define DO_VPZ(NAME, name) \
581 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
583 static gen_helper_gvec_reduc * const fns[4] = { \
584 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
585 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
586 }; \
587 return do_vpz_ool(s, a, fns[a->esz]); \
590 DO_VPZ(ORV, orv)
591 DO_VPZ(ANDV, andv)
592 DO_VPZ(EORV, eorv)
594 DO_VPZ(UADDV, uaddv)
595 DO_VPZ(SMAXV, smaxv)
596 DO_VPZ(UMAXV, umaxv)
597 DO_VPZ(SMINV, sminv)
598 DO_VPZ(UMINV, uminv)
600 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
602 static gen_helper_gvec_reduc * const fns[4] = {
603 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
604 gen_helper_sve_saddv_s, NULL
606 return do_vpz_ool(s, a, fns[a->esz]);
609 #undef DO_VPZ
612 *** SVE Shift by Immediate - Predicated Group
615 /* Store zero into every active element of Zd. We will use this for two
616 * and three-operand predicated instructions for which logic dictates a
617 * zero result.
619 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
621 static gen_helper_gvec_2 * const fns[4] = {
622 gen_helper_sve_clr_b, gen_helper_sve_clr_h,
623 gen_helper_sve_clr_s, gen_helper_sve_clr_d,
625 if (sve_access_check(s)) {
626 unsigned vsz = vec_full_reg_size(s);
627 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
628 pred_full_reg_offset(s, pg),
629 vsz, vsz, 0, fns[esz]);
631 return true;
634 /* Copy Zn into Zd, storing zeros into inactive elements. */
635 static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
637 static gen_helper_gvec_3 * const fns[4] = {
638 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
639 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
641 unsigned vsz = vec_full_reg_size(s);
642 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
643 vec_full_reg_offset(s, rn),
644 pred_full_reg_offset(s, pg),
645 vsz, vsz, 0, fns[esz]);
648 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
649 gen_helper_gvec_3 *fn)
651 if (sve_access_check(s)) {
652 unsigned vsz = vec_full_reg_size(s);
653 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
654 vec_full_reg_offset(s, a->rn),
655 pred_full_reg_offset(s, a->pg),
656 vsz, vsz, a->imm, fn);
658 return true;
661 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
663 static gen_helper_gvec_3 * const fns[4] = {
664 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
665 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
667 if (a->esz < 0) {
668 /* Invalid tsz encoding -- see tszimm_esz. */
669 return false;
671 /* Shift by element size is architecturally valid. For
672 arithmetic right-shift, it's the same as by one less. */
673 a->imm = MIN(a->imm, (8 << a->esz) - 1);
674 return do_zpzi_ool(s, a, fns[a->esz]);
677 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
679 static gen_helper_gvec_3 * const fns[4] = {
680 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
681 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
683 if (a->esz < 0) {
684 return false;
686 /* Shift by element size is architecturally valid.
687 For logical shifts, it is a zeroing operation. */
688 if (a->imm >= (8 << a->esz)) {
689 return do_clr_zp(s, a->rd, a->pg, a->esz);
690 } else {
691 return do_zpzi_ool(s, a, fns[a->esz]);
695 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
697 static gen_helper_gvec_3 * const fns[4] = {
698 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
699 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
701 if (a->esz < 0) {
702 return false;
704 /* Shift by element size is architecturally valid.
705 For logical shifts, it is a zeroing operation. */
706 if (a->imm >= (8 << a->esz)) {
707 return do_clr_zp(s, a->rd, a->pg, a->esz);
708 } else {
709 return do_zpzi_ool(s, a, fns[a->esz]);
713 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
715 static gen_helper_gvec_3 * const fns[4] = {
716 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
717 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
719 if (a->esz < 0) {
720 return false;
722 /* Shift by element size is architecturally valid. For arithmetic
723 right shift for division, it is a zeroing operation. */
724 if (a->imm >= (8 << a->esz)) {
725 return do_clr_zp(s, a->rd, a->pg, a->esz);
726 } else {
727 return do_zpzi_ool(s, a, fns[a->esz]);
732 *** SVE Bitwise Shift - Predicated Group
735 #define DO_ZPZW(NAME, name) \
736 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a) \
738 static gen_helper_gvec_4 * const fns[3] = { \
739 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
740 gen_helper_sve_##name##_zpzw_s, \
741 }; \
742 if (a->esz < 0 || a->esz >= 3) { \
743 return false; \
745 return do_zpzz_ool(s, a, fns[a->esz]); \
748 DO_ZPZW(ASR, asr)
749 DO_ZPZW(LSR, lsr)
750 DO_ZPZW(LSL, lsl)
752 #undef DO_ZPZW
755 *** SVE Bitwise Shift - Unpredicated Group
758 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
759 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
760 int64_t, uint32_t, uint32_t))
762 if (a->esz < 0) {
763 /* Invalid tsz encoding -- see tszimm_esz. */
764 return false;
766 if (sve_access_check(s)) {
767 unsigned vsz = vec_full_reg_size(s);
768 /* Shift by element size is architecturally valid. For
769 arithmetic right-shift, it's the same as by one less.
770 Otherwise it is a zeroing operation. */
771 if (a->imm >= 8 << a->esz) {
772 if (asr) {
773 a->imm = (8 << a->esz) - 1;
774 } else {
775 do_dupi_z(s, a->rd, 0);
776 return true;
779 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
780 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
782 return true;
785 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
787 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
790 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
792 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
795 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
797 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
800 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
802 if (fn == NULL) {
803 return false;
805 if (sve_access_check(s)) {
806 unsigned vsz = vec_full_reg_size(s);
807 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
808 vec_full_reg_offset(s, a->rn),
809 vec_full_reg_offset(s, a->rm),
810 vsz, vsz, 0, fn);
812 return true;
815 #define DO_ZZW(NAME, name) \
816 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a) \
818 static gen_helper_gvec_3 * const fns[4] = { \
819 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
820 gen_helper_sve_##name##_zzw_s, NULL \
821 }; \
822 return do_zzw_ool(s, a, fns[a->esz]); \
825 DO_ZZW(ASR, asr)
826 DO_ZZW(LSR, lsr)
827 DO_ZZW(LSL, lsl)
829 #undef DO_ZZW
832 *** SVE Integer Multiply-Add Group
835 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
836 gen_helper_gvec_5 *fn)
838 if (sve_access_check(s)) {
839 unsigned vsz = vec_full_reg_size(s);
840 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
841 vec_full_reg_offset(s, a->ra),
842 vec_full_reg_offset(s, a->rn),
843 vec_full_reg_offset(s, a->rm),
844 pred_full_reg_offset(s, a->pg),
845 vsz, vsz, 0, fn);
847 return true;
850 #define DO_ZPZZZ(NAME, name) \
851 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
853 static gen_helper_gvec_5 * const fns[4] = { \
854 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
855 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
856 }; \
857 return do_zpzzz_ool(s, a, fns[a->esz]); \
860 DO_ZPZZZ(MLA, mla)
861 DO_ZPZZZ(MLS, mls)
863 #undef DO_ZPZZZ
866 *** SVE Index Generation Group
869 static void do_index(DisasContext *s, int esz, int rd,
870 TCGv_i64 start, TCGv_i64 incr)
872 unsigned vsz = vec_full_reg_size(s);
873 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
874 TCGv_ptr t_zd = tcg_temp_new_ptr();
876 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
877 if (esz == 3) {
878 gen_helper_sve_index_d(t_zd, start, incr, desc);
879 } else {
880 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
881 static index_fn * const fns[3] = {
882 gen_helper_sve_index_b,
883 gen_helper_sve_index_h,
884 gen_helper_sve_index_s,
886 TCGv_i32 s32 = tcg_temp_new_i32();
887 TCGv_i32 i32 = tcg_temp_new_i32();
889 tcg_gen_extrl_i64_i32(s32, start);
890 tcg_gen_extrl_i64_i32(i32, incr);
891 fns[esz](t_zd, s32, i32, desc);
893 tcg_temp_free_i32(s32);
894 tcg_temp_free_i32(i32);
896 tcg_temp_free_ptr(t_zd);
897 tcg_temp_free_i32(desc);
900 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
902 if (sve_access_check(s)) {
903 TCGv_i64 start = tcg_const_i64(a->imm1);
904 TCGv_i64 incr = tcg_const_i64(a->imm2);
905 do_index(s, a->esz, a->rd, start, incr);
906 tcg_temp_free_i64(start);
907 tcg_temp_free_i64(incr);
909 return true;
912 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
914 if (sve_access_check(s)) {
915 TCGv_i64 start = tcg_const_i64(a->imm);
916 TCGv_i64 incr = cpu_reg(s, a->rm);
917 do_index(s, a->esz, a->rd, start, incr);
918 tcg_temp_free_i64(start);
920 return true;
923 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
925 if (sve_access_check(s)) {
926 TCGv_i64 start = cpu_reg(s, a->rn);
927 TCGv_i64 incr = tcg_const_i64(a->imm);
928 do_index(s, a->esz, a->rd, start, incr);
929 tcg_temp_free_i64(incr);
931 return true;
934 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
936 if (sve_access_check(s)) {
937 TCGv_i64 start = cpu_reg(s, a->rn);
938 TCGv_i64 incr = cpu_reg(s, a->rm);
939 do_index(s, a->esz, a->rd, start, incr);
941 return true;
945 *** SVE Stack Allocation Group
948 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
950 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
951 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
952 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
953 return true;
956 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
958 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
959 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
960 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
961 return true;
964 static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
966 TCGv_i64 reg = cpu_reg(s, a->rd);
967 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
968 return true;
972 *** SVE Compute Vector Address Group
975 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
977 if (sve_access_check(s)) {
978 unsigned vsz = vec_full_reg_size(s);
979 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
980 vec_full_reg_offset(s, a->rn),
981 vec_full_reg_offset(s, a->rm),
982 vsz, vsz, a->imm, fn);
984 return true;
987 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
989 return do_adr(s, a, gen_helper_sve_adr_p32);
992 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
994 return do_adr(s, a, gen_helper_sve_adr_p64);
997 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
999 return do_adr(s, a, gen_helper_sve_adr_s32);
1002 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
1004 return do_adr(s, a, gen_helper_sve_adr_u32);
1008 *** SVE Integer Misc - Unpredicated Group
1011 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
1013 static gen_helper_gvec_2 * const fns[4] = {
1014 NULL,
1015 gen_helper_sve_fexpa_h,
1016 gen_helper_sve_fexpa_s,
1017 gen_helper_sve_fexpa_d,
1019 if (a->esz == 0) {
1020 return false;
1022 if (sve_access_check(s)) {
1023 unsigned vsz = vec_full_reg_size(s);
1024 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1025 vec_full_reg_offset(s, a->rn),
1026 vsz, vsz, 0, fns[a->esz]);
1028 return true;
1031 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
1033 static gen_helper_gvec_3 * const fns[4] = {
1034 NULL,
1035 gen_helper_sve_ftssel_h,
1036 gen_helper_sve_ftssel_s,
1037 gen_helper_sve_ftssel_d,
1039 if (a->esz == 0) {
1040 return false;
1042 if (sve_access_check(s)) {
1043 unsigned vsz = vec_full_reg_size(s);
1044 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1045 vec_full_reg_offset(s, a->rn),
1046 vec_full_reg_offset(s, a->rm),
1047 vsz, vsz, 0, fns[a->esz]);
1049 return true;
1053 *** SVE Predicate Logical Operations Group
1056 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1057 const GVecGen4 *gvec_op)
1059 if (!sve_access_check(s)) {
1060 return true;
1063 unsigned psz = pred_gvec_reg_size(s);
1064 int dofs = pred_full_reg_offset(s, a->rd);
1065 int nofs = pred_full_reg_offset(s, a->rn);
1066 int mofs = pred_full_reg_offset(s, a->rm);
1067 int gofs = pred_full_reg_offset(s, a->pg);
1069 if (psz == 8) {
1070 /* Do the operation and the flags generation in temps. */
1071 TCGv_i64 pd = tcg_temp_new_i64();
1072 TCGv_i64 pn = tcg_temp_new_i64();
1073 TCGv_i64 pm = tcg_temp_new_i64();
1074 TCGv_i64 pg = tcg_temp_new_i64();
1076 tcg_gen_ld_i64(pn, cpu_env, nofs);
1077 tcg_gen_ld_i64(pm, cpu_env, mofs);
1078 tcg_gen_ld_i64(pg, cpu_env, gofs);
1080 gvec_op->fni8(pd, pn, pm, pg);
1081 tcg_gen_st_i64(pd, cpu_env, dofs);
1083 do_predtest1(pd, pg);
1085 tcg_temp_free_i64(pd);
1086 tcg_temp_free_i64(pn);
1087 tcg_temp_free_i64(pm);
1088 tcg_temp_free_i64(pg);
1089 } else {
1090 /* The operation and flags generation is large. The computation
1091 * of the flags depends on the original contents of the guarding
1092 * predicate. If the destination overwrites the guarding predicate,
1093 * then the easiest way to get this right is to save a copy.
1095 int tofs = gofs;
1096 if (a->rd == a->pg) {
1097 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1098 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1101 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1102 do_predtest(s, dofs, tofs, psz / 8);
1104 return true;
1107 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1109 tcg_gen_and_i64(pd, pn, pm);
1110 tcg_gen_and_i64(pd, pd, pg);
1113 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1114 TCGv_vec pm, TCGv_vec pg)
1116 tcg_gen_and_vec(vece, pd, pn, pm);
1117 tcg_gen_and_vec(vece, pd, pd, pg);
1120 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
1122 static const GVecGen4 op = {
1123 .fni8 = gen_and_pg_i64,
1124 .fniv = gen_and_pg_vec,
1125 .fno = gen_helper_sve_and_pppp,
1126 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1128 if (a->s) {
1129 return do_pppp_flags(s, a, &op);
1130 } else if (a->rn == a->rm) {
1131 if (a->pg == a->rn) {
1132 return do_mov_p(s, a->rd, a->rn);
1133 } else {
1134 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1136 } else if (a->pg == a->rn || a->pg == a->rm) {
1137 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1138 } else {
1139 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1143 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1145 tcg_gen_andc_i64(pd, pn, pm);
1146 tcg_gen_and_i64(pd, pd, pg);
1149 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1150 TCGv_vec pm, TCGv_vec pg)
1152 tcg_gen_andc_vec(vece, pd, pn, pm);
1153 tcg_gen_and_vec(vece, pd, pd, pg);
1156 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
1158 static const GVecGen4 op = {
1159 .fni8 = gen_bic_pg_i64,
1160 .fniv = gen_bic_pg_vec,
1161 .fno = gen_helper_sve_bic_pppp,
1162 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1164 if (a->s) {
1165 return do_pppp_flags(s, a, &op);
1166 } else if (a->pg == a->rn) {
1167 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1168 } else {
1169 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1173 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1175 tcg_gen_xor_i64(pd, pn, pm);
1176 tcg_gen_and_i64(pd, pd, pg);
1179 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1180 TCGv_vec pm, TCGv_vec pg)
1182 tcg_gen_xor_vec(vece, pd, pn, pm);
1183 tcg_gen_and_vec(vece, pd, pd, pg);
1186 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
1188 static const GVecGen4 op = {
1189 .fni8 = gen_eor_pg_i64,
1190 .fniv = gen_eor_pg_vec,
1191 .fno = gen_helper_sve_eor_pppp,
1192 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1194 if (a->s) {
1195 return do_pppp_flags(s, a, &op);
1196 } else {
1197 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1201 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1203 tcg_gen_and_i64(pn, pn, pg);
1204 tcg_gen_andc_i64(pm, pm, pg);
1205 tcg_gen_or_i64(pd, pn, pm);
1208 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1209 TCGv_vec pm, TCGv_vec pg)
1211 tcg_gen_and_vec(vece, pn, pn, pg);
1212 tcg_gen_andc_vec(vece, pm, pm, pg);
1213 tcg_gen_or_vec(vece, pd, pn, pm);
1216 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
1218 static const GVecGen4 op = {
1219 .fni8 = gen_sel_pg_i64,
1220 .fniv = gen_sel_pg_vec,
1221 .fno = gen_helper_sve_sel_pppp,
1222 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1224 if (a->s) {
1225 return false;
1226 } else {
1227 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1231 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1233 tcg_gen_or_i64(pd, pn, pm);
1234 tcg_gen_and_i64(pd, pd, pg);
1237 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1238 TCGv_vec pm, TCGv_vec pg)
1240 tcg_gen_or_vec(vece, pd, pn, pm);
1241 tcg_gen_and_vec(vece, pd, pd, pg);
1244 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
1246 static const GVecGen4 op = {
1247 .fni8 = gen_orr_pg_i64,
1248 .fniv = gen_orr_pg_vec,
1249 .fno = gen_helper_sve_orr_pppp,
1250 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1252 if (a->s) {
1253 return do_pppp_flags(s, a, &op);
1254 } else if (a->pg == a->rn && a->rn == a->rm) {
1255 return do_mov_p(s, a->rd, a->rn);
1256 } else {
1257 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1261 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1263 tcg_gen_orc_i64(pd, pn, pm);
1264 tcg_gen_and_i64(pd, pd, pg);
1267 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1268 TCGv_vec pm, TCGv_vec pg)
1270 tcg_gen_orc_vec(vece, pd, pn, pm);
1271 tcg_gen_and_vec(vece, pd, pd, pg);
1274 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
1276 static const GVecGen4 op = {
1277 .fni8 = gen_orn_pg_i64,
1278 .fniv = gen_orn_pg_vec,
1279 .fno = gen_helper_sve_orn_pppp,
1280 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1282 if (a->s) {
1283 return do_pppp_flags(s, a, &op);
1284 } else {
1285 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1289 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1291 tcg_gen_or_i64(pd, pn, pm);
1292 tcg_gen_andc_i64(pd, pg, pd);
1295 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1296 TCGv_vec pm, TCGv_vec pg)
1298 tcg_gen_or_vec(vece, pd, pn, pm);
1299 tcg_gen_andc_vec(vece, pd, pg, pd);
1302 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
1304 static const GVecGen4 op = {
1305 .fni8 = gen_nor_pg_i64,
1306 .fniv = gen_nor_pg_vec,
1307 .fno = gen_helper_sve_nor_pppp,
1308 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1310 if (a->s) {
1311 return do_pppp_flags(s, a, &op);
1312 } else {
1313 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1317 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1319 tcg_gen_and_i64(pd, pn, pm);
1320 tcg_gen_andc_i64(pd, pg, pd);
1323 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1324 TCGv_vec pm, TCGv_vec pg)
1326 tcg_gen_and_vec(vece, pd, pn, pm);
1327 tcg_gen_andc_vec(vece, pd, pg, pd);
1330 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
1332 static const GVecGen4 op = {
1333 .fni8 = gen_nand_pg_i64,
1334 .fniv = gen_nand_pg_vec,
1335 .fno = gen_helper_sve_nand_pppp,
1336 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1338 if (a->s) {
1339 return do_pppp_flags(s, a, &op);
1340 } else {
1341 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1346 *** SVE Predicate Misc Group
1349 static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
1351 if (sve_access_check(s)) {
1352 int nofs = pred_full_reg_offset(s, a->rn);
1353 int gofs = pred_full_reg_offset(s, a->pg);
1354 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1356 if (words == 1) {
1357 TCGv_i64 pn = tcg_temp_new_i64();
1358 TCGv_i64 pg = tcg_temp_new_i64();
1360 tcg_gen_ld_i64(pn, cpu_env, nofs);
1361 tcg_gen_ld_i64(pg, cpu_env, gofs);
1362 do_predtest1(pn, pg);
1364 tcg_temp_free_i64(pn);
1365 tcg_temp_free_i64(pg);
1366 } else {
1367 do_predtest(s, nofs, gofs, words);
1370 return true;
1373 /* See the ARM pseudocode DecodePredCount. */
1374 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1376 unsigned elements = fullsz >> esz;
1377 unsigned bound;
1379 switch (pattern) {
1380 case 0x0: /* POW2 */
1381 return pow2floor(elements);
1382 case 0x1: /* VL1 */
1383 case 0x2: /* VL2 */
1384 case 0x3: /* VL3 */
1385 case 0x4: /* VL4 */
1386 case 0x5: /* VL5 */
1387 case 0x6: /* VL6 */
1388 case 0x7: /* VL7 */
1389 case 0x8: /* VL8 */
1390 bound = pattern;
1391 break;
1392 case 0x9: /* VL16 */
1393 case 0xa: /* VL32 */
1394 case 0xb: /* VL64 */
1395 case 0xc: /* VL128 */
1396 case 0xd: /* VL256 */
1397 bound = 16 << (pattern - 9);
1398 break;
1399 case 0x1d: /* MUL4 */
1400 return elements - elements % 4;
1401 case 0x1e: /* MUL3 */
1402 return elements - elements % 3;
1403 case 0x1f: /* ALL */
1404 return elements;
1405 default: /* #uimm5 */
1406 return 0;
1408 return elements >= bound ? bound : 0;
1411 /* This handles all of the predicate initialization instructions,
1412 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1413 * so that decode_pred_count returns 0. For SETFFR, we will have
1414 * set RD == 16 == FFR.
1416 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1418 if (!sve_access_check(s)) {
1419 return true;
1422 unsigned fullsz = vec_full_reg_size(s);
1423 unsigned ofs = pred_full_reg_offset(s, rd);
1424 unsigned numelem, setsz, i;
1425 uint64_t word, lastword;
1426 TCGv_i64 t;
1428 numelem = decode_pred_count(fullsz, pat, esz);
1430 /* Determine what we must store into each bit, and how many. */
1431 if (numelem == 0) {
1432 lastword = word = 0;
1433 setsz = fullsz;
1434 } else {
1435 setsz = numelem << esz;
1436 lastword = word = pred_esz_masks[esz];
1437 if (setsz % 64) {
1438 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
1442 t = tcg_temp_new_i64();
1443 if (fullsz <= 64) {
1444 tcg_gen_movi_i64(t, lastword);
1445 tcg_gen_st_i64(t, cpu_env, ofs);
1446 goto done;
1449 if (word == lastword) {
1450 unsigned maxsz = size_for_gvec(fullsz / 8);
1451 unsigned oprsz = size_for_gvec(setsz / 8);
1453 if (oprsz * 8 == setsz) {
1454 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1455 goto done;
1459 setsz /= 8;
1460 fullsz /= 8;
1462 tcg_gen_movi_i64(t, word);
1463 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
1464 tcg_gen_st_i64(t, cpu_env, ofs + i);
1466 if (lastword != word) {
1467 tcg_gen_movi_i64(t, lastword);
1468 tcg_gen_st_i64(t, cpu_env, ofs + i);
1469 i += 8;
1471 if (i < fullsz) {
1472 tcg_gen_movi_i64(t, 0);
1473 for (; i < fullsz; i += 8) {
1474 tcg_gen_st_i64(t, cpu_env, ofs + i);
1478 done:
1479 tcg_temp_free_i64(t);
1481 /* PTRUES */
1482 if (setflag) {
1483 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1484 tcg_gen_movi_i32(cpu_CF, word == 0);
1485 tcg_gen_movi_i32(cpu_VF, 0);
1486 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1488 return true;
1491 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
1493 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1496 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
1498 /* Note pat == 31 is #all, to set all elements. */
1499 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1502 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
1504 /* Note pat == 32 is #unimp, to set no elements. */
1505 return do_predset(s, 0, a->rd, 32, false);
1508 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
1510 /* The path through do_pppp_flags is complicated enough to want to avoid
1511 * duplication. Frob the arguments into the form of a predicated AND.
1513 arg_rprr_s alt_a = {
1514 .rd = a->rd, .pg = a->pg, .s = a->s,
1515 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1517 return trans_AND_pppp(s, &alt_a);
1520 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
1522 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1525 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
1527 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1530 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1531 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1532 TCGv_ptr, TCGv_i32))
1534 if (!sve_access_check(s)) {
1535 return true;
1538 TCGv_ptr t_pd = tcg_temp_new_ptr();
1539 TCGv_ptr t_pg = tcg_temp_new_ptr();
1540 TCGv_i32 t;
1541 unsigned desc;
1543 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1544 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1546 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1547 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1548 t = tcg_const_i32(desc);
1550 gen_fn(t, t_pd, t_pg, t);
1551 tcg_temp_free_ptr(t_pd);
1552 tcg_temp_free_ptr(t_pg);
1554 do_pred_flags(t);
1555 tcg_temp_free_i32(t);
1556 return true;
1559 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
1561 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1564 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
1566 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1570 *** SVE Element Count Group
1573 /* Perform an inline saturating addition of a 32-bit value within
1574 * a 64-bit register. The second operand is known to be positive,
1575 * which halves the comparisions we must perform to bound the result.
1577 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1579 int64_t ibound;
1580 TCGv_i64 bound;
1581 TCGCond cond;
1583 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1584 if (u) {
1585 tcg_gen_ext32u_i64(reg, reg);
1586 } else {
1587 tcg_gen_ext32s_i64(reg, reg);
1589 if (d) {
1590 tcg_gen_sub_i64(reg, reg, val);
1591 ibound = (u ? 0 : INT32_MIN);
1592 cond = TCG_COND_LT;
1593 } else {
1594 tcg_gen_add_i64(reg, reg, val);
1595 ibound = (u ? UINT32_MAX : INT32_MAX);
1596 cond = TCG_COND_GT;
1598 bound = tcg_const_i64(ibound);
1599 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1600 tcg_temp_free_i64(bound);
1603 /* Similarly with 64-bit values. */
1604 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1606 TCGv_i64 t0 = tcg_temp_new_i64();
1607 TCGv_i64 t1 = tcg_temp_new_i64();
1608 TCGv_i64 t2;
1610 if (u) {
1611 if (d) {
1612 tcg_gen_sub_i64(t0, reg, val);
1613 tcg_gen_movi_i64(t1, 0);
1614 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1615 } else {
1616 tcg_gen_add_i64(t0, reg, val);
1617 tcg_gen_movi_i64(t1, -1);
1618 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1620 } else {
1621 if (d) {
1622 /* Detect signed overflow for subtraction. */
1623 tcg_gen_xor_i64(t0, reg, val);
1624 tcg_gen_sub_i64(t1, reg, val);
1625 tcg_gen_xor_i64(reg, reg, t1);
1626 tcg_gen_and_i64(t0, t0, reg);
1628 /* Bound the result. */
1629 tcg_gen_movi_i64(reg, INT64_MIN);
1630 t2 = tcg_const_i64(0);
1631 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1632 } else {
1633 /* Detect signed overflow for addition. */
1634 tcg_gen_xor_i64(t0, reg, val);
1635 tcg_gen_add_i64(reg, reg, val);
1636 tcg_gen_xor_i64(t1, reg, val);
1637 tcg_gen_andc_i64(t0, t1, t0);
1639 /* Bound the result. */
1640 tcg_gen_movi_i64(t1, INT64_MAX);
1641 t2 = tcg_const_i64(0);
1642 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1644 tcg_temp_free_i64(t2);
1646 tcg_temp_free_i64(t0);
1647 tcg_temp_free_i64(t1);
1650 /* Similarly with a vector and a scalar operand. */
1651 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1652 TCGv_i64 val, bool u, bool d)
1654 unsigned vsz = vec_full_reg_size(s);
1655 TCGv_ptr dptr, nptr;
1656 TCGv_i32 t32, desc;
1657 TCGv_i64 t64;
1659 dptr = tcg_temp_new_ptr();
1660 nptr = tcg_temp_new_ptr();
1661 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1662 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1663 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1665 switch (esz) {
1666 case MO_8:
1667 t32 = tcg_temp_new_i32();
1668 tcg_gen_extrl_i64_i32(t32, val);
1669 if (d) {
1670 tcg_gen_neg_i32(t32, t32);
1672 if (u) {
1673 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1674 } else {
1675 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1677 tcg_temp_free_i32(t32);
1678 break;
1680 case MO_16:
1681 t32 = tcg_temp_new_i32();
1682 tcg_gen_extrl_i64_i32(t32, val);
1683 if (d) {
1684 tcg_gen_neg_i32(t32, t32);
1686 if (u) {
1687 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1688 } else {
1689 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1691 tcg_temp_free_i32(t32);
1692 break;
1694 case MO_32:
1695 t64 = tcg_temp_new_i64();
1696 if (d) {
1697 tcg_gen_neg_i64(t64, val);
1698 } else {
1699 tcg_gen_mov_i64(t64, val);
1701 if (u) {
1702 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1703 } else {
1704 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1706 tcg_temp_free_i64(t64);
1707 break;
1709 case MO_64:
1710 if (u) {
1711 if (d) {
1712 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1713 } else {
1714 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1716 } else if (d) {
1717 t64 = tcg_temp_new_i64();
1718 tcg_gen_neg_i64(t64, val);
1719 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1720 tcg_temp_free_i64(t64);
1721 } else {
1722 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1724 break;
1726 default:
1727 g_assert_not_reached();
1730 tcg_temp_free_ptr(dptr);
1731 tcg_temp_free_ptr(nptr);
1732 tcg_temp_free_i32(desc);
1735 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
1737 if (sve_access_check(s)) {
1738 unsigned fullsz = vec_full_reg_size(s);
1739 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1740 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1742 return true;
1745 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
1747 if (sve_access_check(s)) {
1748 unsigned fullsz = vec_full_reg_size(s);
1749 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1750 int inc = numelem * a->imm * (a->d ? -1 : 1);
1751 TCGv_i64 reg = cpu_reg(s, a->rd);
1753 tcg_gen_addi_i64(reg, reg, inc);
1755 return true;
1758 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
1760 if (!sve_access_check(s)) {
1761 return true;
1764 unsigned fullsz = vec_full_reg_size(s);
1765 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1766 int inc = numelem * a->imm;
1767 TCGv_i64 reg = cpu_reg(s, a->rd);
1769 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1770 if (inc == 0) {
1771 if (a->u) {
1772 tcg_gen_ext32u_i64(reg, reg);
1773 } else {
1774 tcg_gen_ext32s_i64(reg, reg);
1776 } else {
1777 TCGv_i64 t = tcg_const_i64(inc);
1778 do_sat_addsub_32(reg, t, a->u, a->d);
1779 tcg_temp_free_i64(t);
1781 return true;
1784 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
1786 if (!sve_access_check(s)) {
1787 return true;
1790 unsigned fullsz = vec_full_reg_size(s);
1791 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1792 int inc = numelem * a->imm;
1793 TCGv_i64 reg = cpu_reg(s, a->rd);
1795 if (inc != 0) {
1796 TCGv_i64 t = tcg_const_i64(inc);
1797 do_sat_addsub_64(reg, t, a->u, a->d);
1798 tcg_temp_free_i64(t);
1800 return true;
1803 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1805 if (a->esz == 0) {
1806 return false;
1809 unsigned fullsz = vec_full_reg_size(s);
1810 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1811 int inc = numelem * a->imm;
1813 if (inc != 0) {
1814 if (sve_access_check(s)) {
1815 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1816 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1817 vec_full_reg_offset(s, a->rn),
1818 t, fullsz, fullsz);
1819 tcg_temp_free_i64(t);
1821 } else {
1822 do_mov_z(s, a->rd, a->rn);
1824 return true;
1827 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1829 if (a->esz == 0) {
1830 return false;
1833 unsigned fullsz = vec_full_reg_size(s);
1834 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1835 int inc = numelem * a->imm;
1837 if (inc != 0) {
1838 if (sve_access_check(s)) {
1839 TCGv_i64 t = tcg_const_i64(inc);
1840 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1841 tcg_temp_free_i64(t);
1843 } else {
1844 do_mov_z(s, a->rd, a->rn);
1846 return true;
1850 *** SVE Bitwise Immediate Group
1853 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1855 uint64_t imm;
1856 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1857 extract32(a->dbm, 0, 6),
1858 extract32(a->dbm, 6, 6))) {
1859 return false;
1861 if (sve_access_check(s)) {
1862 unsigned vsz = vec_full_reg_size(s);
1863 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1864 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1866 return true;
1869 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
1871 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1874 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
1876 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1879 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
1881 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1884 static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
1886 uint64_t imm;
1887 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1888 extract32(a->dbm, 0, 6),
1889 extract32(a->dbm, 6, 6))) {
1890 return false;
1892 if (sve_access_check(s)) {
1893 do_dupi_z(s, a->rd, imm);
1895 return true;
1899 *** SVE Integer Wide Immediate - Predicated Group
1902 /* Implement all merging copies. This is used for CPY (immediate),
1903 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1905 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1906 TCGv_i64 val)
1908 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1909 static gen_cpy * const fns[4] = {
1910 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1911 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1913 unsigned vsz = vec_full_reg_size(s);
1914 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1915 TCGv_ptr t_zd = tcg_temp_new_ptr();
1916 TCGv_ptr t_zn = tcg_temp_new_ptr();
1917 TCGv_ptr t_pg = tcg_temp_new_ptr();
1919 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1920 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1921 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1923 fns[esz](t_zd, t_zn, t_pg, val, desc);
1925 tcg_temp_free_ptr(t_zd);
1926 tcg_temp_free_ptr(t_zn);
1927 tcg_temp_free_ptr(t_pg);
1928 tcg_temp_free_i32(desc);
1931 static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
1933 if (a->esz == 0) {
1934 return false;
1936 if (sve_access_check(s)) {
1937 /* Decode the VFP immediate. */
1938 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1939 TCGv_i64 t_imm = tcg_const_i64(imm);
1940 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1941 tcg_temp_free_i64(t_imm);
1943 return true;
1946 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
1948 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
1949 return false;
1951 if (sve_access_check(s)) {
1952 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1953 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1954 tcg_temp_free_i64(t_imm);
1956 return true;
1959 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
1961 static gen_helper_gvec_2i * const fns[4] = {
1962 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1963 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1966 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
1967 return false;
1969 if (sve_access_check(s)) {
1970 unsigned vsz = vec_full_reg_size(s);
1971 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1972 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1973 pred_full_reg_offset(s, a->pg),
1974 t_imm, vsz, vsz, 0, fns[a->esz]);
1975 tcg_temp_free_i64(t_imm);
1977 return true;
1981 *** SVE Permute Extract Group
1984 static bool trans_EXT(DisasContext *s, arg_EXT *a)
1986 if (!sve_access_check(s)) {
1987 return true;
1990 unsigned vsz = vec_full_reg_size(s);
1991 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1992 unsigned n_siz = vsz - n_ofs;
1993 unsigned d = vec_full_reg_offset(s, a->rd);
1994 unsigned n = vec_full_reg_offset(s, a->rn);
1995 unsigned m = vec_full_reg_offset(s, a->rm);
1997 /* Use host vector move insns if we have appropriate sizes
1998 * and no unfortunate overlap.
2000 if (m != d
2001 && n_ofs == size_for_gvec(n_ofs)
2002 && n_siz == size_for_gvec(n_siz)
2003 && (d != n || n_siz <= n_ofs)) {
2004 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2005 if (n_ofs != 0) {
2006 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2008 } else {
2009 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2011 return true;
2015 *** SVE Permute - Unpredicated Group
2018 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
2020 if (sve_access_check(s)) {
2021 unsigned vsz = vec_full_reg_size(s);
2022 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2023 vsz, vsz, cpu_reg_sp(s, a->rn));
2025 return true;
2028 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
2030 if ((a->imm & 0x1f) == 0) {
2031 return false;
2033 if (sve_access_check(s)) {
2034 unsigned vsz = vec_full_reg_size(s);
2035 unsigned dofs = vec_full_reg_offset(s, a->rd);
2036 unsigned esz, index;
2038 esz = ctz32(a->imm);
2039 index = a->imm >> (esz + 1);
2041 if ((index << esz) < vsz) {
2042 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2043 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2044 } else {
2045 tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2048 return true;
2051 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2053 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2054 static gen_insr * const fns[4] = {
2055 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2056 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2058 unsigned vsz = vec_full_reg_size(s);
2059 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2060 TCGv_ptr t_zd = tcg_temp_new_ptr();
2061 TCGv_ptr t_zn = tcg_temp_new_ptr();
2063 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2064 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2066 fns[a->esz](t_zd, t_zn, val, desc);
2068 tcg_temp_free_ptr(t_zd);
2069 tcg_temp_free_ptr(t_zn);
2070 tcg_temp_free_i32(desc);
2073 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
2075 if (sve_access_check(s)) {
2076 TCGv_i64 t = tcg_temp_new_i64();
2077 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2078 do_insr_i64(s, a, t);
2079 tcg_temp_free_i64(t);
2081 return true;
2084 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
2086 if (sve_access_check(s)) {
2087 do_insr_i64(s, a, cpu_reg(s, a->rm));
2089 return true;
2092 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
2094 static gen_helper_gvec_2 * const fns[4] = {
2095 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2096 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2099 if (sve_access_check(s)) {
2100 unsigned vsz = vec_full_reg_size(s);
2101 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2102 vec_full_reg_offset(s, a->rn),
2103 vsz, vsz, 0, fns[a->esz]);
2105 return true;
2108 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
2110 static gen_helper_gvec_3 * const fns[4] = {
2111 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2112 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2115 if (sve_access_check(s)) {
2116 unsigned vsz = vec_full_reg_size(s);
2117 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2118 vec_full_reg_offset(s, a->rn),
2119 vec_full_reg_offset(s, a->rm),
2120 vsz, vsz, 0, fns[a->esz]);
2122 return true;
2125 static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
2127 static gen_helper_gvec_2 * const fns[4][2] = {
2128 { NULL, NULL },
2129 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2130 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2131 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2134 if (a->esz == 0) {
2135 return false;
2137 if (sve_access_check(s)) {
2138 unsigned vsz = vec_full_reg_size(s);
2139 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2140 vec_full_reg_offset(s, a->rn)
2141 + (a->h ? vsz / 2 : 0),
2142 vsz, vsz, 0, fns[a->esz][a->u]);
2144 return true;
2148 *** SVE Permute - Predicates Group
2151 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2152 gen_helper_gvec_3 *fn)
2154 if (!sve_access_check(s)) {
2155 return true;
2158 unsigned vsz = pred_full_reg_size(s);
2160 /* Predicate sizes may be smaller and cannot use simd_desc.
2161 We cannot round up, as we do elsewhere, because we need
2162 the exact size for ZIP2 and REV. We retain the style for
2163 the other helpers for consistency. */
2164 TCGv_ptr t_d = tcg_temp_new_ptr();
2165 TCGv_ptr t_n = tcg_temp_new_ptr();
2166 TCGv_ptr t_m = tcg_temp_new_ptr();
2167 TCGv_i32 t_desc;
2168 int desc;
2170 desc = vsz - 2;
2171 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2172 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2174 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2175 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2176 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2177 t_desc = tcg_const_i32(desc);
2179 fn(t_d, t_n, t_m, t_desc);
2181 tcg_temp_free_ptr(t_d);
2182 tcg_temp_free_ptr(t_n);
2183 tcg_temp_free_ptr(t_m);
2184 tcg_temp_free_i32(t_desc);
2185 return true;
2188 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2189 gen_helper_gvec_2 *fn)
2191 if (!sve_access_check(s)) {
2192 return true;
2195 unsigned vsz = pred_full_reg_size(s);
2196 TCGv_ptr t_d = tcg_temp_new_ptr();
2197 TCGv_ptr t_n = tcg_temp_new_ptr();
2198 TCGv_i32 t_desc;
2199 int desc;
2201 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2202 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2204 /* Predicate sizes may be smaller and cannot use simd_desc.
2205 We cannot round up, as we do elsewhere, because we need
2206 the exact size for ZIP2 and REV. We retain the style for
2207 the other helpers for consistency. */
2209 desc = vsz - 2;
2210 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2211 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2212 t_desc = tcg_const_i32(desc);
2214 fn(t_d, t_n, t_desc);
2216 tcg_temp_free_i32(t_desc);
2217 tcg_temp_free_ptr(t_d);
2218 tcg_temp_free_ptr(t_n);
2219 return true;
2222 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
2224 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2227 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
2229 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2232 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
2234 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2237 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
2239 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2242 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
2244 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2247 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
2249 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2252 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
2254 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2257 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
2259 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2262 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
2264 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2268 *** SVE Permute - Interleaving Group
2271 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2273 static gen_helper_gvec_3 * const fns[4] = {
2274 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2275 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2278 if (sve_access_check(s)) {
2279 unsigned vsz = vec_full_reg_size(s);
2280 unsigned high_ofs = high ? vsz / 2 : 0;
2281 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2282 vec_full_reg_offset(s, a->rn) + high_ofs,
2283 vec_full_reg_offset(s, a->rm) + high_ofs,
2284 vsz, vsz, 0, fns[a->esz]);
2286 return true;
2289 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2290 gen_helper_gvec_3 *fn)
2292 if (sve_access_check(s)) {
2293 unsigned vsz = vec_full_reg_size(s);
2294 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2295 vec_full_reg_offset(s, a->rn),
2296 vec_full_reg_offset(s, a->rm),
2297 vsz, vsz, data, fn);
2299 return true;
2302 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
2304 return do_zip(s, a, false);
2307 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
2309 return do_zip(s, a, true);
2312 static gen_helper_gvec_3 * const uzp_fns[4] = {
2313 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2314 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2317 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
2319 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2322 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
2324 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2327 static gen_helper_gvec_3 * const trn_fns[4] = {
2328 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2329 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2332 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
2334 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2337 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
2339 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2343 *** SVE Permute Vector - Predicated Group
2346 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
2348 static gen_helper_gvec_3 * const fns[4] = {
2349 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2351 return do_zpz_ool(s, a, fns[a->esz]);
2354 /* Call the helper that computes the ARM LastActiveElement pseudocode
2355 * function, scaled by the element size. This includes the not found
2356 * indication; e.g. not found for esz=3 is -8.
2358 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2360 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2361 * round up, as we do elsewhere, because we need the exact size.
2363 TCGv_ptr t_p = tcg_temp_new_ptr();
2364 TCGv_i32 t_desc;
2365 unsigned vsz = pred_full_reg_size(s);
2366 unsigned desc;
2368 desc = vsz - 2;
2369 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2371 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2372 t_desc = tcg_const_i32(desc);
2374 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2376 tcg_temp_free_i32(t_desc);
2377 tcg_temp_free_ptr(t_p);
2380 /* Increment LAST to the offset of the next element in the vector,
2381 * wrapping around to 0.
2383 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2385 unsigned vsz = vec_full_reg_size(s);
2387 tcg_gen_addi_i32(last, last, 1 << esz);
2388 if (is_power_of_2(vsz)) {
2389 tcg_gen_andi_i32(last, last, vsz - 1);
2390 } else {
2391 TCGv_i32 max = tcg_const_i32(vsz);
2392 TCGv_i32 zero = tcg_const_i32(0);
2393 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2394 tcg_temp_free_i32(max);
2395 tcg_temp_free_i32(zero);
2399 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
2400 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2402 unsigned vsz = vec_full_reg_size(s);
2404 if (is_power_of_2(vsz)) {
2405 tcg_gen_andi_i32(last, last, vsz - 1);
2406 } else {
2407 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2408 TCGv_i32 zero = tcg_const_i32(0);
2409 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2410 tcg_temp_free_i32(max);
2411 tcg_temp_free_i32(zero);
2415 /* Load an unsigned element of ESZ from BASE+OFS. */
2416 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2418 TCGv_i64 r = tcg_temp_new_i64();
2420 switch (esz) {
2421 case 0:
2422 tcg_gen_ld8u_i64(r, base, ofs);
2423 break;
2424 case 1:
2425 tcg_gen_ld16u_i64(r, base, ofs);
2426 break;
2427 case 2:
2428 tcg_gen_ld32u_i64(r, base, ofs);
2429 break;
2430 case 3:
2431 tcg_gen_ld_i64(r, base, ofs);
2432 break;
2433 default:
2434 g_assert_not_reached();
2436 return r;
2439 /* Load an unsigned element of ESZ from RM[LAST]. */
2440 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2441 int rm, int esz)
2443 TCGv_ptr p = tcg_temp_new_ptr();
2444 TCGv_i64 r;
2446 /* Convert offset into vector into offset into ENV.
2447 * The final adjustment for the vector register base
2448 * is added via constant offset to the load.
2450 #ifdef HOST_WORDS_BIGENDIAN
2451 /* Adjust for element ordering. See vec_reg_offset. */
2452 if (esz < 3) {
2453 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2455 #endif
2456 tcg_gen_ext_i32_ptr(p, last);
2457 tcg_gen_add_ptr(p, p, cpu_env);
2459 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2460 tcg_temp_free_ptr(p);
2462 return r;
2465 /* Compute CLAST for a Zreg. */
2466 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2468 TCGv_i32 last;
2469 TCGLabel *over;
2470 TCGv_i64 ele;
2471 unsigned vsz, esz = a->esz;
2473 if (!sve_access_check(s)) {
2474 return true;
2477 last = tcg_temp_local_new_i32();
2478 over = gen_new_label();
2480 find_last_active(s, last, esz, a->pg);
2482 /* There is of course no movcond for a 2048-bit vector,
2483 * so we must branch over the actual store.
2485 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2487 if (!before) {
2488 incr_last_active(s, last, esz);
2491 ele = load_last_active(s, last, a->rm, esz);
2492 tcg_temp_free_i32(last);
2494 vsz = vec_full_reg_size(s);
2495 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2496 tcg_temp_free_i64(ele);
2498 /* If this insn used MOVPRFX, we may need a second move. */
2499 if (a->rd != a->rn) {
2500 TCGLabel *done = gen_new_label();
2501 tcg_gen_br(done);
2503 gen_set_label(over);
2504 do_mov_z(s, a->rd, a->rn);
2506 gen_set_label(done);
2507 } else {
2508 gen_set_label(over);
2510 return true;
2513 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
2515 return do_clast_vector(s, a, false);
2518 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
2520 return do_clast_vector(s, a, true);
2523 /* Compute CLAST for a scalar. */
2524 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2525 bool before, TCGv_i64 reg_val)
2527 TCGv_i32 last = tcg_temp_new_i32();
2528 TCGv_i64 ele, cmp, zero;
2530 find_last_active(s, last, esz, pg);
2532 /* Extend the original value of last prior to incrementing. */
2533 cmp = tcg_temp_new_i64();
2534 tcg_gen_ext_i32_i64(cmp, last);
2536 if (!before) {
2537 incr_last_active(s, last, esz);
2540 /* The conceit here is that while last < 0 indicates not found, after
2541 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2542 * from which we can load garbage. We then discard the garbage with
2543 * a conditional move.
2545 ele = load_last_active(s, last, rm, esz);
2546 tcg_temp_free_i32(last);
2548 zero = tcg_const_i64(0);
2549 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2551 tcg_temp_free_i64(zero);
2552 tcg_temp_free_i64(cmp);
2553 tcg_temp_free_i64(ele);
2556 /* Compute CLAST for a Vreg. */
2557 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2559 if (sve_access_check(s)) {
2560 int esz = a->esz;
2561 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2562 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2564 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2565 write_fp_dreg(s, a->rd, reg);
2566 tcg_temp_free_i64(reg);
2568 return true;
2571 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
2573 return do_clast_fp(s, a, false);
2576 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
2578 return do_clast_fp(s, a, true);
2581 /* Compute CLAST for a Xreg. */
2582 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2584 TCGv_i64 reg;
2586 if (!sve_access_check(s)) {
2587 return true;
2590 reg = cpu_reg(s, a->rd);
2591 switch (a->esz) {
2592 case 0:
2593 tcg_gen_ext8u_i64(reg, reg);
2594 break;
2595 case 1:
2596 tcg_gen_ext16u_i64(reg, reg);
2597 break;
2598 case 2:
2599 tcg_gen_ext32u_i64(reg, reg);
2600 break;
2601 case 3:
2602 break;
2603 default:
2604 g_assert_not_reached();
2607 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2608 return true;
2611 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
2613 return do_clast_general(s, a, false);
2616 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
2618 return do_clast_general(s, a, true);
2621 /* Compute LAST for a scalar. */
2622 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2623 int pg, int rm, bool before)
2625 TCGv_i32 last = tcg_temp_new_i32();
2626 TCGv_i64 ret;
2628 find_last_active(s, last, esz, pg);
2629 if (before) {
2630 wrap_last_active(s, last, esz);
2631 } else {
2632 incr_last_active(s, last, esz);
2635 ret = load_last_active(s, last, rm, esz);
2636 tcg_temp_free_i32(last);
2637 return ret;
2640 /* Compute LAST for a Vreg. */
2641 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2643 if (sve_access_check(s)) {
2644 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2645 write_fp_dreg(s, a->rd, val);
2646 tcg_temp_free_i64(val);
2648 return true;
2651 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
2653 return do_last_fp(s, a, false);
2656 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
2658 return do_last_fp(s, a, true);
2661 /* Compute LAST for a Xreg. */
2662 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2664 if (sve_access_check(s)) {
2665 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2666 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2667 tcg_temp_free_i64(val);
2669 return true;
2672 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
2674 return do_last_general(s, a, false);
2677 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
2679 return do_last_general(s, a, true);
2682 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
2684 if (sve_access_check(s)) {
2685 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2687 return true;
2690 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
2692 if (sve_access_check(s)) {
2693 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2694 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2695 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2696 tcg_temp_free_i64(t);
2698 return true;
2701 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
2703 static gen_helper_gvec_3 * const fns[4] = {
2704 NULL,
2705 gen_helper_sve_revb_h,
2706 gen_helper_sve_revb_s,
2707 gen_helper_sve_revb_d,
2709 return do_zpz_ool(s, a, fns[a->esz]);
2712 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
2714 static gen_helper_gvec_3 * const fns[4] = {
2715 NULL,
2716 NULL,
2717 gen_helper_sve_revh_s,
2718 gen_helper_sve_revh_d,
2720 return do_zpz_ool(s, a, fns[a->esz]);
2723 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
2725 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2728 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
2730 static gen_helper_gvec_3 * const fns[4] = {
2731 gen_helper_sve_rbit_b,
2732 gen_helper_sve_rbit_h,
2733 gen_helper_sve_rbit_s,
2734 gen_helper_sve_rbit_d,
2736 return do_zpz_ool(s, a, fns[a->esz]);
2739 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
2741 if (sve_access_check(s)) {
2742 unsigned vsz = vec_full_reg_size(s);
2743 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2744 vec_full_reg_offset(s, a->rn),
2745 vec_full_reg_offset(s, a->rm),
2746 pred_full_reg_offset(s, a->pg),
2747 vsz, vsz, a->esz, gen_helper_sve_splice);
2749 return true;
2753 *** SVE Integer Compare - Vectors Group
2756 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2757 gen_helper_gvec_flags_4 *gen_fn)
2759 TCGv_ptr pd, zn, zm, pg;
2760 unsigned vsz;
2761 TCGv_i32 t;
2763 if (gen_fn == NULL) {
2764 return false;
2766 if (!sve_access_check(s)) {
2767 return true;
2770 vsz = vec_full_reg_size(s);
2771 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2772 pd = tcg_temp_new_ptr();
2773 zn = tcg_temp_new_ptr();
2774 zm = tcg_temp_new_ptr();
2775 pg = tcg_temp_new_ptr();
2777 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2778 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2779 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2780 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2782 gen_fn(t, pd, zn, zm, pg, t);
2784 tcg_temp_free_ptr(pd);
2785 tcg_temp_free_ptr(zn);
2786 tcg_temp_free_ptr(zm);
2787 tcg_temp_free_ptr(pg);
2789 do_pred_flags(t);
2791 tcg_temp_free_i32(t);
2792 return true;
2795 #define DO_PPZZ(NAME, name) \
2796 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
2798 static gen_helper_gvec_flags_4 * const fns[4] = { \
2799 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2800 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2801 }; \
2802 return do_ppzz_flags(s, a, fns[a->esz]); \
2805 DO_PPZZ(CMPEQ, cmpeq)
2806 DO_PPZZ(CMPNE, cmpne)
2807 DO_PPZZ(CMPGT, cmpgt)
2808 DO_PPZZ(CMPGE, cmpge)
2809 DO_PPZZ(CMPHI, cmphi)
2810 DO_PPZZ(CMPHS, cmphs)
2812 #undef DO_PPZZ
2814 #define DO_PPZW(NAME, name) \
2815 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a) \
2817 static gen_helper_gvec_flags_4 * const fns[4] = { \
2818 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2819 gen_helper_sve_##name##_ppzw_s, NULL \
2820 }; \
2821 return do_ppzz_flags(s, a, fns[a->esz]); \
2824 DO_PPZW(CMPEQ, cmpeq)
2825 DO_PPZW(CMPNE, cmpne)
2826 DO_PPZW(CMPGT, cmpgt)
2827 DO_PPZW(CMPGE, cmpge)
2828 DO_PPZW(CMPHI, cmphi)
2829 DO_PPZW(CMPHS, cmphs)
2830 DO_PPZW(CMPLT, cmplt)
2831 DO_PPZW(CMPLE, cmple)
2832 DO_PPZW(CMPLO, cmplo)
2833 DO_PPZW(CMPLS, cmpls)
2835 #undef DO_PPZW
2838 *** SVE Integer Compare - Immediate Groups
2841 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2842 gen_helper_gvec_flags_3 *gen_fn)
2844 TCGv_ptr pd, zn, pg;
2845 unsigned vsz;
2846 TCGv_i32 t;
2848 if (gen_fn == NULL) {
2849 return false;
2851 if (!sve_access_check(s)) {
2852 return true;
2855 vsz = vec_full_reg_size(s);
2856 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2857 pd = tcg_temp_new_ptr();
2858 zn = tcg_temp_new_ptr();
2859 pg = tcg_temp_new_ptr();
2861 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2862 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2863 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2865 gen_fn(t, pd, zn, pg, t);
2867 tcg_temp_free_ptr(pd);
2868 tcg_temp_free_ptr(zn);
2869 tcg_temp_free_ptr(pg);
2871 do_pred_flags(t);
2873 tcg_temp_free_i32(t);
2874 return true;
2877 #define DO_PPZI(NAME, name) \
2878 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a) \
2880 static gen_helper_gvec_flags_3 * const fns[4] = { \
2881 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2882 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2883 }; \
2884 return do_ppzi_flags(s, a, fns[a->esz]); \
2887 DO_PPZI(CMPEQ, cmpeq)
2888 DO_PPZI(CMPNE, cmpne)
2889 DO_PPZI(CMPGT, cmpgt)
2890 DO_PPZI(CMPGE, cmpge)
2891 DO_PPZI(CMPHI, cmphi)
2892 DO_PPZI(CMPHS, cmphs)
2893 DO_PPZI(CMPLT, cmplt)
2894 DO_PPZI(CMPLE, cmple)
2895 DO_PPZI(CMPLO, cmplo)
2896 DO_PPZI(CMPLS, cmpls)
2898 #undef DO_PPZI
2901 *** SVE Partition Break Group
2904 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2905 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2907 if (!sve_access_check(s)) {
2908 return true;
2911 unsigned vsz = pred_full_reg_size(s);
2913 /* Predicate sizes may be smaller and cannot use simd_desc. */
2914 TCGv_ptr d = tcg_temp_new_ptr();
2915 TCGv_ptr n = tcg_temp_new_ptr();
2916 TCGv_ptr m = tcg_temp_new_ptr();
2917 TCGv_ptr g = tcg_temp_new_ptr();
2918 TCGv_i32 t = tcg_const_i32(vsz - 2);
2920 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2921 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2922 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2923 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2925 if (a->s) {
2926 fn_s(t, d, n, m, g, t);
2927 do_pred_flags(t);
2928 } else {
2929 fn(d, n, m, g, t);
2931 tcg_temp_free_ptr(d);
2932 tcg_temp_free_ptr(n);
2933 tcg_temp_free_ptr(m);
2934 tcg_temp_free_ptr(g);
2935 tcg_temp_free_i32(t);
2936 return true;
2939 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2940 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2942 if (!sve_access_check(s)) {
2943 return true;
2946 unsigned vsz = pred_full_reg_size(s);
2948 /* Predicate sizes may be smaller and cannot use simd_desc. */
2949 TCGv_ptr d = tcg_temp_new_ptr();
2950 TCGv_ptr n = tcg_temp_new_ptr();
2951 TCGv_ptr g = tcg_temp_new_ptr();
2952 TCGv_i32 t = tcg_const_i32(vsz - 2);
2954 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2955 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2956 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2958 if (a->s) {
2959 fn_s(t, d, n, g, t);
2960 do_pred_flags(t);
2961 } else {
2962 fn(d, n, g, t);
2964 tcg_temp_free_ptr(d);
2965 tcg_temp_free_ptr(n);
2966 tcg_temp_free_ptr(g);
2967 tcg_temp_free_i32(t);
2968 return true;
2971 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
2973 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2976 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
2978 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2981 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
2983 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2986 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
2988 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2991 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
2993 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2996 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
2998 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3001 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
3003 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3007 *** SVE Predicate Count Group
3010 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3012 unsigned psz = pred_full_reg_size(s);
3014 if (psz <= 8) {
3015 uint64_t psz_mask;
3017 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3018 if (pn != pg) {
3019 TCGv_i64 g = tcg_temp_new_i64();
3020 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3021 tcg_gen_and_i64(val, val, g);
3022 tcg_temp_free_i64(g);
3025 /* Reduce the pred_esz_masks value simply to reduce the
3026 * size of the code generated here.
3028 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3029 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3031 tcg_gen_ctpop_i64(val, val);
3032 } else {
3033 TCGv_ptr t_pn = tcg_temp_new_ptr();
3034 TCGv_ptr t_pg = tcg_temp_new_ptr();
3035 unsigned desc;
3036 TCGv_i32 t_desc;
3038 desc = psz - 2;
3039 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3041 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3042 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3043 t_desc = tcg_const_i32(desc);
3045 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3046 tcg_temp_free_ptr(t_pn);
3047 tcg_temp_free_ptr(t_pg);
3048 tcg_temp_free_i32(t_desc);
3052 static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
3054 if (sve_access_check(s)) {
3055 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3057 return true;
3060 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
3062 if (sve_access_check(s)) {
3063 TCGv_i64 reg = cpu_reg(s, a->rd);
3064 TCGv_i64 val = tcg_temp_new_i64();
3066 do_cntp(s, val, a->esz, a->pg, a->pg);
3067 if (a->d) {
3068 tcg_gen_sub_i64(reg, reg, val);
3069 } else {
3070 tcg_gen_add_i64(reg, reg, val);
3072 tcg_temp_free_i64(val);
3074 return true;
3077 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3079 if (a->esz == 0) {
3080 return false;
3082 if (sve_access_check(s)) {
3083 unsigned vsz = vec_full_reg_size(s);
3084 TCGv_i64 val = tcg_temp_new_i64();
3085 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3087 do_cntp(s, val, a->esz, a->pg, a->pg);
3088 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3089 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3091 return true;
3094 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
3096 if (sve_access_check(s)) {
3097 TCGv_i64 reg = cpu_reg(s, a->rd);
3098 TCGv_i64 val = tcg_temp_new_i64();
3100 do_cntp(s, val, a->esz, a->pg, a->pg);
3101 do_sat_addsub_32(reg, val, a->u, a->d);
3103 return true;
3106 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
3108 if (sve_access_check(s)) {
3109 TCGv_i64 reg = cpu_reg(s, a->rd);
3110 TCGv_i64 val = tcg_temp_new_i64();
3112 do_cntp(s, val, a->esz, a->pg, a->pg);
3113 do_sat_addsub_64(reg, val, a->u, a->d);
3115 return true;
3118 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3120 if (a->esz == 0) {
3121 return false;
3123 if (sve_access_check(s)) {
3124 TCGv_i64 val = tcg_temp_new_i64();
3125 do_cntp(s, val, a->esz, a->pg, a->pg);
3126 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3128 return true;
3132 *** SVE Integer Compare Scalars Group
3135 static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
3137 if (!sve_access_check(s)) {
3138 return true;
3141 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3142 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3143 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3144 TCGv_i64 cmp = tcg_temp_new_i64();
3146 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3147 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3148 tcg_temp_free_i64(cmp);
3150 /* VF = !NF & !CF. */
3151 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3152 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3154 /* Both NF and VF actually look at bit 31. */
3155 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3156 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3157 return true;
3160 static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
3162 TCGv_i64 op0, op1, t0, t1, tmax;
3163 TCGv_i32 t2, t3;
3164 TCGv_ptr ptr;
3165 unsigned desc, vsz = vec_full_reg_size(s);
3166 TCGCond cond;
3168 if (!sve_access_check(s)) {
3169 return true;
3172 op0 = read_cpu_reg(s, a->rn, 1);
3173 op1 = read_cpu_reg(s, a->rm, 1);
3175 if (!a->sf) {
3176 if (a->u) {
3177 tcg_gen_ext32u_i64(op0, op0);
3178 tcg_gen_ext32u_i64(op1, op1);
3179 } else {
3180 tcg_gen_ext32s_i64(op0, op0);
3181 tcg_gen_ext32s_i64(op1, op1);
3185 /* For the helper, compress the different conditions into a computation
3186 * of how many iterations for which the condition is true.
3188 t0 = tcg_temp_new_i64();
3189 t1 = tcg_temp_new_i64();
3190 tcg_gen_sub_i64(t0, op1, op0);
3192 tmax = tcg_const_i64(vsz >> a->esz);
3193 if (a->eq) {
3194 /* Equality means one more iteration. */
3195 tcg_gen_addi_i64(t0, t0, 1);
3197 /* If op1 is max (un)signed integer (and the only time the addition
3198 * above could overflow), then we produce an all-true predicate by
3199 * setting the count to the vector length. This is because the
3200 * pseudocode is described as an increment + compare loop, and the
3201 * max integer would always compare true.
3203 tcg_gen_movi_i64(t1, (a->sf
3204 ? (a->u ? UINT64_MAX : INT64_MAX)
3205 : (a->u ? UINT32_MAX : INT32_MAX)));
3206 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
3209 /* Bound to the maximum. */
3210 tcg_gen_umin_i64(t0, t0, tmax);
3211 tcg_temp_free_i64(tmax);
3213 /* Set the count to zero if the condition is false. */
3214 cond = (a->u
3215 ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3216 : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3217 tcg_gen_movi_i64(t1, 0);
3218 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3219 tcg_temp_free_i64(t1);
3221 /* Since we're bounded, pass as a 32-bit type. */
3222 t2 = tcg_temp_new_i32();
3223 tcg_gen_extrl_i64_i32(t2, t0);
3224 tcg_temp_free_i64(t0);
3226 /* Scale elements to bits. */
3227 tcg_gen_shli_i32(t2, t2, a->esz);
3229 desc = (vsz / 8) - 2;
3230 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3231 t3 = tcg_const_i32(desc);
3233 ptr = tcg_temp_new_ptr();
3234 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3236 gen_helper_sve_while(t2, ptr, t2, t3);
3237 do_pred_flags(t2);
3239 tcg_temp_free_ptr(ptr);
3240 tcg_temp_free_i32(t2);
3241 tcg_temp_free_i32(t3);
3242 return true;
3246 *** SVE Integer Wide Immediate - Unpredicated Group
3249 static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
3251 if (a->esz == 0) {
3252 return false;
3254 if (sve_access_check(s)) {
3255 unsigned vsz = vec_full_reg_size(s);
3256 int dofs = vec_full_reg_offset(s, a->rd);
3257 uint64_t imm;
3259 /* Decode the VFP immediate. */
3260 imm = vfp_expand_imm(a->esz, a->imm);
3261 imm = dup_const(a->esz, imm);
3263 tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3265 return true;
3268 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
3270 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3271 return false;
3273 if (sve_access_check(s)) {
3274 unsigned vsz = vec_full_reg_size(s);
3275 int dofs = vec_full_reg_offset(s, a->rd);
3277 tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3279 return true;
3282 static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
3284 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3285 return false;
3287 if (sve_access_check(s)) {
3288 unsigned vsz = vec_full_reg_size(s);
3289 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3290 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3292 return true;
3295 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
3297 a->imm = -a->imm;
3298 return trans_ADD_zzi(s, a);
3301 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
3303 static const GVecGen2s op[4] = {
3304 { .fni8 = tcg_gen_vec_sub8_i64,
3305 .fniv = tcg_gen_sub_vec,
3306 .fno = gen_helper_sve_subri_b,
3307 .opc = INDEX_op_sub_vec,
3308 .vece = MO_8,
3309 .scalar_first = true },
3310 { .fni8 = tcg_gen_vec_sub16_i64,
3311 .fniv = tcg_gen_sub_vec,
3312 .fno = gen_helper_sve_subri_h,
3313 .opc = INDEX_op_sub_vec,
3314 .vece = MO_16,
3315 .scalar_first = true },
3316 { .fni4 = tcg_gen_sub_i32,
3317 .fniv = tcg_gen_sub_vec,
3318 .fno = gen_helper_sve_subri_s,
3319 .opc = INDEX_op_sub_vec,
3320 .vece = MO_32,
3321 .scalar_first = true },
3322 { .fni8 = tcg_gen_sub_i64,
3323 .fniv = tcg_gen_sub_vec,
3324 .fno = gen_helper_sve_subri_d,
3325 .opc = INDEX_op_sub_vec,
3326 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3327 .vece = MO_64,
3328 .scalar_first = true }
3331 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3332 return false;
3334 if (sve_access_check(s)) {
3335 unsigned vsz = vec_full_reg_size(s);
3336 TCGv_i64 c = tcg_const_i64(a->imm);
3337 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3338 vec_full_reg_offset(s, a->rn),
3339 vsz, vsz, c, &op[a->esz]);
3340 tcg_temp_free_i64(c);
3342 return true;
3345 static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
3347 if (sve_access_check(s)) {
3348 unsigned vsz = vec_full_reg_size(s);
3349 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3350 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3352 return true;
3355 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
3357 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3358 return false;
3360 if (sve_access_check(s)) {
3361 TCGv_i64 val = tcg_const_i64(a->imm);
3362 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3363 tcg_temp_free_i64(val);
3365 return true;
3368 static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
3370 return do_zzi_sat(s, a, false, false);
3373 static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
3375 return do_zzi_sat(s, a, true, false);
3378 static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3380 return do_zzi_sat(s, a, false, true);
3383 static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3385 return do_zzi_sat(s, a, true, true);
3388 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3390 if (sve_access_check(s)) {
3391 unsigned vsz = vec_full_reg_size(s);
3392 TCGv_i64 c = tcg_const_i64(a->imm);
3394 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3395 vec_full_reg_offset(s, a->rn),
3396 c, vsz, vsz, 0, fn);
3397 tcg_temp_free_i64(c);
3399 return true;
3402 #define DO_ZZI(NAME, name) \
3403 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a) \
3405 static gen_helper_gvec_2i * const fns[4] = { \
3406 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3407 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3408 }; \
3409 return do_zzi_ool(s, a, fns[a->esz]); \
3412 DO_ZZI(SMAX, smax)
3413 DO_ZZI(UMAX, umax)
3414 DO_ZZI(SMIN, smin)
3415 DO_ZZI(UMIN, umin)
3417 #undef DO_ZZI
3419 static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a)
3421 static gen_helper_gvec_3 * const fns[2][2] = {
3422 { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3423 { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3426 if (sve_access_check(s)) {
3427 unsigned vsz = vec_full_reg_size(s);
3428 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3429 vec_full_reg_offset(s, a->rn),
3430 vec_full_reg_offset(s, a->rm),
3431 vsz, vsz, 0, fns[a->u][a->sz]);
3433 return true;
3436 static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a)
3438 static gen_helper_gvec_3 * const fns[2][2] = {
3439 { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
3440 { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
3443 if (sve_access_check(s)) {
3444 unsigned vsz = vec_full_reg_size(s);
3445 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3446 vec_full_reg_offset(s, a->rn),
3447 vec_full_reg_offset(s, a->rm),
3448 vsz, vsz, a->index, fns[a->u][a->sz]);
3450 return true;
3455 *** SVE Floating Point Multiply-Add Indexed Group
3458 static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
3460 static gen_helper_gvec_4_ptr * const fns[3] = {
3461 gen_helper_gvec_fmla_idx_h,
3462 gen_helper_gvec_fmla_idx_s,
3463 gen_helper_gvec_fmla_idx_d,
3466 if (sve_access_check(s)) {
3467 unsigned vsz = vec_full_reg_size(s);
3468 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3469 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3470 vec_full_reg_offset(s, a->rn),
3471 vec_full_reg_offset(s, a->rm),
3472 vec_full_reg_offset(s, a->ra),
3473 status, vsz, vsz, (a->index << 1) | a->sub,
3474 fns[a->esz - 1]);
3475 tcg_temp_free_ptr(status);
3477 return true;
3481 *** SVE Floating Point Multiply Indexed Group
3484 static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
3486 static gen_helper_gvec_3_ptr * const fns[3] = {
3487 gen_helper_gvec_fmul_idx_h,
3488 gen_helper_gvec_fmul_idx_s,
3489 gen_helper_gvec_fmul_idx_d,
3492 if (sve_access_check(s)) {
3493 unsigned vsz = vec_full_reg_size(s);
3494 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3495 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3496 vec_full_reg_offset(s, a->rn),
3497 vec_full_reg_offset(s, a->rm),
3498 status, vsz, vsz, a->index, fns[a->esz - 1]);
3499 tcg_temp_free_ptr(status);
3501 return true;
3505 *** SVE Floating Point Fast Reduction Group
3508 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3509 TCGv_ptr, TCGv_i32);
3511 static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3512 gen_helper_fp_reduce *fn)
3514 unsigned vsz = vec_full_reg_size(s);
3515 unsigned p2vsz = pow2ceil(vsz);
3516 TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
3517 TCGv_ptr t_zn, t_pg, status;
3518 TCGv_i64 temp;
3520 temp = tcg_temp_new_i64();
3521 t_zn = tcg_temp_new_ptr();
3522 t_pg = tcg_temp_new_ptr();
3524 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3525 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3526 status = get_fpstatus_ptr(a->esz == MO_16);
3528 fn(temp, t_zn, t_pg, status, t_desc);
3529 tcg_temp_free_ptr(t_zn);
3530 tcg_temp_free_ptr(t_pg);
3531 tcg_temp_free_ptr(status);
3532 tcg_temp_free_i32(t_desc);
3534 write_fp_dreg(s, a->rd, temp);
3535 tcg_temp_free_i64(temp);
3538 #define DO_VPZ(NAME, name) \
3539 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
3541 static gen_helper_fp_reduce * const fns[3] = { \
3542 gen_helper_sve_##name##_h, \
3543 gen_helper_sve_##name##_s, \
3544 gen_helper_sve_##name##_d, \
3545 }; \
3546 if (a->esz == 0) { \
3547 return false; \
3549 if (sve_access_check(s)) { \
3550 do_reduce(s, a, fns[a->esz - 1]); \
3552 return true; \
3555 DO_VPZ(FADDV, faddv)
3556 DO_VPZ(FMINNMV, fminnmv)
3557 DO_VPZ(FMAXNMV, fmaxnmv)
3558 DO_VPZ(FMINV, fminv)
3559 DO_VPZ(FMAXV, fmaxv)
3562 *** SVE Floating Point Unary Operations - Unpredicated Group
3565 static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3567 unsigned vsz = vec_full_reg_size(s);
3568 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3570 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3571 vec_full_reg_offset(s, a->rn),
3572 status, vsz, vsz, 0, fn);
3573 tcg_temp_free_ptr(status);
3576 static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3578 static gen_helper_gvec_2_ptr * const fns[3] = {
3579 gen_helper_gvec_frecpe_h,
3580 gen_helper_gvec_frecpe_s,
3581 gen_helper_gvec_frecpe_d,
3583 if (a->esz == 0) {
3584 return false;
3586 if (sve_access_check(s)) {
3587 do_zz_fp(s, a, fns[a->esz - 1]);
3589 return true;
3592 static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3594 static gen_helper_gvec_2_ptr * const fns[3] = {
3595 gen_helper_gvec_frsqrte_h,
3596 gen_helper_gvec_frsqrte_s,
3597 gen_helper_gvec_frsqrte_d,
3599 if (a->esz == 0) {
3600 return false;
3602 if (sve_access_check(s)) {
3603 do_zz_fp(s, a, fns[a->esz - 1]);
3605 return true;
3609 *** SVE Floating Point Compare with Zero Group
3612 static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3613 gen_helper_gvec_3_ptr *fn)
3615 unsigned vsz = vec_full_reg_size(s);
3616 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3618 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3619 vec_full_reg_offset(s, a->rn),
3620 pred_full_reg_offset(s, a->pg),
3621 status, vsz, vsz, 0, fn);
3622 tcg_temp_free_ptr(status);
3625 #define DO_PPZ(NAME, name) \
3626 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
3628 static gen_helper_gvec_3_ptr * const fns[3] = { \
3629 gen_helper_sve_##name##_h, \
3630 gen_helper_sve_##name##_s, \
3631 gen_helper_sve_##name##_d, \
3632 }; \
3633 if (a->esz == 0) { \
3634 return false; \
3636 if (sve_access_check(s)) { \
3637 do_ppz_fp(s, a, fns[a->esz - 1]); \
3639 return true; \
3642 DO_PPZ(FCMGE_ppz0, fcmge0)
3643 DO_PPZ(FCMGT_ppz0, fcmgt0)
3644 DO_PPZ(FCMLE_ppz0, fcmle0)
3645 DO_PPZ(FCMLT_ppz0, fcmlt0)
3646 DO_PPZ(FCMEQ_ppz0, fcmeq0)
3647 DO_PPZ(FCMNE_ppz0, fcmne0)
3649 #undef DO_PPZ
3652 *** SVE floating-point trig multiply-add coefficient
3655 static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
3657 static gen_helper_gvec_3_ptr * const fns[3] = {
3658 gen_helper_sve_ftmad_h,
3659 gen_helper_sve_ftmad_s,
3660 gen_helper_sve_ftmad_d,
3663 if (a->esz == 0) {
3664 return false;
3666 if (sve_access_check(s)) {
3667 unsigned vsz = vec_full_reg_size(s);
3668 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3669 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3670 vec_full_reg_offset(s, a->rn),
3671 vec_full_reg_offset(s, a->rm),
3672 status, vsz, vsz, a->imm, fns[a->esz - 1]);
3673 tcg_temp_free_ptr(status);
3675 return true;
3679 *** SVE Floating Point Accumulating Reduction Group
3682 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
3684 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3685 TCGv_ptr, TCGv_ptr, TCGv_i32);
3686 static fadda_fn * const fns[3] = {
3687 gen_helper_sve_fadda_h,
3688 gen_helper_sve_fadda_s,
3689 gen_helper_sve_fadda_d,
3691 unsigned vsz = vec_full_reg_size(s);
3692 TCGv_ptr t_rm, t_pg, t_fpst;
3693 TCGv_i64 t_val;
3694 TCGv_i32 t_desc;
3696 if (a->esz == 0) {
3697 return false;
3699 if (!sve_access_check(s)) {
3700 return true;
3703 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3704 t_rm = tcg_temp_new_ptr();
3705 t_pg = tcg_temp_new_ptr();
3706 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3707 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3708 t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3709 t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3711 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3713 tcg_temp_free_i32(t_desc);
3714 tcg_temp_free_ptr(t_fpst);
3715 tcg_temp_free_ptr(t_pg);
3716 tcg_temp_free_ptr(t_rm);
3718 write_fp_dreg(s, a->rd, t_val);
3719 tcg_temp_free_i64(t_val);
3720 return true;
3724 *** SVE Floating Point Arithmetic - Unpredicated Group
3727 static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3728 gen_helper_gvec_3_ptr *fn)
3730 if (fn == NULL) {
3731 return false;
3733 if (sve_access_check(s)) {
3734 unsigned vsz = vec_full_reg_size(s);
3735 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3736 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3737 vec_full_reg_offset(s, a->rn),
3738 vec_full_reg_offset(s, a->rm),
3739 status, vsz, vsz, 0, fn);
3740 tcg_temp_free_ptr(status);
3742 return true;
3746 #define DO_FP3(NAME, name) \
3747 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \
3749 static gen_helper_gvec_3_ptr * const fns[4] = { \
3750 NULL, gen_helper_gvec_##name##_h, \
3751 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3752 }; \
3753 return do_zzz_fp(s, a, fns[a->esz]); \
3756 DO_FP3(FADD_zzz, fadd)
3757 DO_FP3(FSUB_zzz, fsub)
3758 DO_FP3(FMUL_zzz, fmul)
3759 DO_FP3(FTSMUL, ftsmul)
3760 DO_FP3(FRECPS, recps)
3761 DO_FP3(FRSQRTS, rsqrts)
3763 #undef DO_FP3
3766 *** SVE Floating Point Arithmetic - Predicated Group
3769 static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3770 gen_helper_gvec_4_ptr *fn)
3772 if (fn == NULL) {
3773 return false;
3775 if (sve_access_check(s)) {
3776 unsigned vsz = vec_full_reg_size(s);
3777 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3778 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3779 vec_full_reg_offset(s, a->rn),
3780 vec_full_reg_offset(s, a->rm),
3781 pred_full_reg_offset(s, a->pg),
3782 status, vsz, vsz, 0, fn);
3783 tcg_temp_free_ptr(status);
3785 return true;
3788 #define DO_FP3(NAME, name) \
3789 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \
3791 static gen_helper_gvec_4_ptr * const fns[4] = { \
3792 NULL, gen_helper_sve_##name##_h, \
3793 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3794 }; \
3795 return do_zpzz_fp(s, a, fns[a->esz]); \
3798 DO_FP3(FADD_zpzz, fadd)
3799 DO_FP3(FSUB_zpzz, fsub)
3800 DO_FP3(FMUL_zpzz, fmul)
3801 DO_FP3(FMIN_zpzz, fmin)
3802 DO_FP3(FMAX_zpzz, fmax)
3803 DO_FP3(FMINNM_zpzz, fminnum)
3804 DO_FP3(FMAXNM_zpzz, fmaxnum)
3805 DO_FP3(FABD, fabd)
3806 DO_FP3(FSCALE, fscalbn)
3807 DO_FP3(FDIV, fdiv)
3808 DO_FP3(FMULX, fmulx)
3810 #undef DO_FP3
3812 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3813 TCGv_i64, TCGv_ptr, TCGv_i32);
3815 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3816 TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3818 unsigned vsz = vec_full_reg_size(s);
3819 TCGv_ptr t_zd, t_zn, t_pg, status;
3820 TCGv_i32 desc;
3822 t_zd = tcg_temp_new_ptr();
3823 t_zn = tcg_temp_new_ptr();
3824 t_pg = tcg_temp_new_ptr();
3825 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3826 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3827 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3829 status = get_fpstatus_ptr(is_fp16);
3830 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3831 fn(t_zd, t_zn, t_pg, scalar, status, desc);
3833 tcg_temp_free_i32(desc);
3834 tcg_temp_free_ptr(status);
3835 tcg_temp_free_ptr(t_pg);
3836 tcg_temp_free_ptr(t_zn);
3837 tcg_temp_free_ptr(t_zd);
3840 static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3841 gen_helper_sve_fp2scalar *fn)
3843 TCGv_i64 temp = tcg_const_i64(imm);
3844 do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3845 tcg_temp_free_i64(temp);
3848 #define DO_FP_IMM(NAME, name, const0, const1) \
3849 static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \
3851 static gen_helper_sve_fp2scalar * const fns[3] = { \
3852 gen_helper_sve_##name##_h, \
3853 gen_helper_sve_##name##_s, \
3854 gen_helper_sve_##name##_d \
3855 }; \
3856 static uint64_t const val[3][2] = { \
3857 { float16_##const0, float16_##const1 }, \
3858 { float32_##const0, float32_##const1 }, \
3859 { float64_##const0, float64_##const1 }, \
3860 }; \
3861 if (a->esz == 0) { \
3862 return false; \
3864 if (sve_access_check(s)) { \
3865 do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
3867 return true; \
3870 #define float16_two make_float16(0x4000)
3871 #define float32_two make_float32(0x40000000)
3872 #define float64_two make_float64(0x4000000000000000ULL)
3874 DO_FP_IMM(FADD, fadds, half, one)
3875 DO_FP_IMM(FSUB, fsubs, half, one)
3876 DO_FP_IMM(FMUL, fmuls, half, two)
3877 DO_FP_IMM(FSUBR, fsubrs, half, one)
3878 DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3879 DO_FP_IMM(FMINNM, fminnms, zero, one)
3880 DO_FP_IMM(FMAX, fmaxs, zero, one)
3881 DO_FP_IMM(FMIN, fmins, zero, one)
3883 #undef DO_FP_IMM
3885 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3886 gen_helper_gvec_4_ptr *fn)
3888 if (fn == NULL) {
3889 return false;
3891 if (sve_access_check(s)) {
3892 unsigned vsz = vec_full_reg_size(s);
3893 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3894 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3895 vec_full_reg_offset(s, a->rn),
3896 vec_full_reg_offset(s, a->rm),
3897 pred_full_reg_offset(s, a->pg),
3898 status, vsz, vsz, 0, fn);
3899 tcg_temp_free_ptr(status);
3901 return true;
3904 #define DO_FPCMP(NAME, name) \
3905 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
3907 static gen_helper_gvec_4_ptr * const fns[4] = { \
3908 NULL, gen_helper_sve_##name##_h, \
3909 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3910 }; \
3911 return do_fp_cmp(s, a, fns[a->esz]); \
3914 DO_FPCMP(FCMGE, fcmge)
3915 DO_FPCMP(FCMGT, fcmgt)
3916 DO_FPCMP(FCMEQ, fcmeq)
3917 DO_FPCMP(FCMNE, fcmne)
3918 DO_FPCMP(FCMUO, fcmuo)
3919 DO_FPCMP(FACGE, facge)
3920 DO_FPCMP(FACGT, facgt)
3922 #undef DO_FPCMP
3924 static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
3926 static gen_helper_gvec_4_ptr * const fns[3] = {
3927 gen_helper_sve_fcadd_h,
3928 gen_helper_sve_fcadd_s,
3929 gen_helper_sve_fcadd_d
3932 if (a->esz == 0) {
3933 return false;
3935 if (sve_access_check(s)) {
3936 unsigned vsz = vec_full_reg_size(s);
3937 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3938 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3939 vec_full_reg_offset(s, a->rn),
3940 vec_full_reg_offset(s, a->rm),
3941 pred_full_reg_offset(s, a->pg),
3942 status, vsz, vsz, a->rot, fns[a->esz - 1]);
3943 tcg_temp_free_ptr(status);
3945 return true;
3948 typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32);
3950 static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)
3952 if (fn == NULL) {
3953 return false;
3955 if (!sve_access_check(s)) {
3956 return true;
3959 unsigned vsz = vec_full_reg_size(s);
3960 unsigned desc;
3961 TCGv_i32 t_desc;
3962 TCGv_ptr pg = tcg_temp_new_ptr();
3964 /* We would need 7 operands to pass these arguments "properly".
3965 * So we encode all the register numbers into the descriptor.
3967 desc = deposit32(a->rd, 5, 5, a->rn);
3968 desc = deposit32(desc, 10, 5, a->rm);
3969 desc = deposit32(desc, 15, 5, a->ra);
3970 desc = simd_desc(vsz, vsz, desc);
3972 t_desc = tcg_const_i32(desc);
3973 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3974 fn(cpu_env, pg, t_desc);
3975 tcg_temp_free_i32(t_desc);
3976 tcg_temp_free_ptr(pg);
3977 return true;
3980 #define DO_FMLA(NAME, name) \
3981 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
3983 static gen_helper_sve_fmla * const fns[4] = { \
3984 NULL, gen_helper_sve_##name##_h, \
3985 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3986 }; \
3987 return do_fmla(s, a, fns[a->esz]); \
3990 DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3991 DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3992 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3993 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3995 #undef DO_FMLA
3997 static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
3999 static gen_helper_sve_fmla * const fns[3] = {
4000 gen_helper_sve_fcmla_zpzzz_h,
4001 gen_helper_sve_fcmla_zpzzz_s,
4002 gen_helper_sve_fcmla_zpzzz_d,
4005 if (a->esz == 0) {
4006 return false;
4008 if (sve_access_check(s)) {
4009 unsigned vsz = vec_full_reg_size(s);
4010 unsigned desc;
4011 TCGv_i32 t_desc;
4012 TCGv_ptr pg = tcg_temp_new_ptr();
4014 /* We would need 7 operands to pass these arguments "properly".
4015 * So we encode all the register numbers into the descriptor.
4017 desc = deposit32(a->rd, 5, 5, a->rn);
4018 desc = deposit32(desc, 10, 5, a->rm);
4019 desc = deposit32(desc, 15, 5, a->ra);
4020 desc = deposit32(desc, 20, 2, a->rot);
4021 desc = sextract32(desc, 0, 22);
4022 desc = simd_desc(vsz, vsz, desc);
4024 t_desc = tcg_const_i32(desc);
4025 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
4026 fns[a->esz - 1](cpu_env, pg, t_desc);
4027 tcg_temp_free_i32(t_desc);
4028 tcg_temp_free_ptr(pg);
4030 return true;
4033 static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
4035 static gen_helper_gvec_3_ptr * const fns[2] = {
4036 gen_helper_gvec_fcmlah_idx,
4037 gen_helper_gvec_fcmlas_idx,
4040 tcg_debug_assert(a->esz == 1 || a->esz == 2);
4041 tcg_debug_assert(a->rd == a->ra);
4042 if (sve_access_check(s)) {
4043 unsigned vsz = vec_full_reg_size(s);
4044 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4045 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4046 vec_full_reg_offset(s, a->rn),
4047 vec_full_reg_offset(s, a->rm),
4048 status, vsz, vsz,
4049 a->index * 4 + a->rot,
4050 fns[a->esz - 1]);
4051 tcg_temp_free_ptr(status);
4053 return true;
4057 *** SVE Floating Point Unary Operations Predicated Group
4060 static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4061 bool is_fp16, gen_helper_gvec_3_ptr *fn)
4063 if (sve_access_check(s)) {
4064 unsigned vsz = vec_full_reg_size(s);
4065 TCGv_ptr status = get_fpstatus_ptr(is_fp16);
4066 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4067 vec_full_reg_offset(s, rn),
4068 pred_full_reg_offset(s, pg),
4069 status, vsz, vsz, 0, fn);
4070 tcg_temp_free_ptr(status);
4072 return true;
4075 static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
4077 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
4080 static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
4082 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4085 static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
4087 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
4090 static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
4092 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4095 static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
4097 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4100 static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
4102 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4105 static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
4107 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4110 static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
4112 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4115 static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
4117 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4120 static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
4122 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4125 static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
4127 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4130 static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
4132 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4135 static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
4137 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4140 static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
4142 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4145 static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
4147 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4150 static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
4152 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4155 static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
4157 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4160 static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
4162 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4165 static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
4167 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4170 static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
4172 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4175 static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4176 gen_helper_sve_frint_h,
4177 gen_helper_sve_frint_s,
4178 gen_helper_sve_frint_d
4181 static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
4183 if (a->esz == 0) {
4184 return false;
4186 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4187 frint_fns[a->esz - 1]);
4190 static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
4192 static gen_helper_gvec_3_ptr * const fns[3] = {
4193 gen_helper_sve_frintx_h,
4194 gen_helper_sve_frintx_s,
4195 gen_helper_sve_frintx_d
4197 if (a->esz == 0) {
4198 return false;
4200 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4203 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
4205 if (a->esz == 0) {
4206 return false;
4208 if (sve_access_check(s)) {
4209 unsigned vsz = vec_full_reg_size(s);
4210 TCGv_i32 tmode = tcg_const_i32(mode);
4211 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4213 gen_helper_set_rmode(tmode, tmode, status);
4215 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4216 vec_full_reg_offset(s, a->rn),
4217 pred_full_reg_offset(s, a->pg),
4218 status, vsz, vsz, 0, frint_fns[a->esz - 1]);
4220 gen_helper_set_rmode(tmode, tmode, status);
4221 tcg_temp_free_i32(tmode);
4222 tcg_temp_free_ptr(status);
4224 return true;
4227 static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
4229 return do_frint_mode(s, a, float_round_nearest_even);
4232 static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
4234 return do_frint_mode(s, a, float_round_up);
4237 static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
4239 return do_frint_mode(s, a, float_round_down);
4242 static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
4244 return do_frint_mode(s, a, float_round_to_zero);
4247 static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
4249 return do_frint_mode(s, a, float_round_ties_away);
4252 static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
4254 static gen_helper_gvec_3_ptr * const fns[3] = {
4255 gen_helper_sve_frecpx_h,
4256 gen_helper_sve_frecpx_s,
4257 gen_helper_sve_frecpx_d
4259 if (a->esz == 0) {
4260 return false;
4262 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4265 static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
4267 static gen_helper_gvec_3_ptr * const fns[3] = {
4268 gen_helper_sve_fsqrt_h,
4269 gen_helper_sve_fsqrt_s,
4270 gen_helper_sve_fsqrt_d
4272 if (a->esz == 0) {
4273 return false;
4275 return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4278 static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4280 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4283 static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4285 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4288 static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
4290 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4293 static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
4295 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4298 static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
4300 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4303 static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
4305 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4308 static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
4310 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4313 static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4315 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4318 static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4320 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4323 static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
4325 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4328 static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
4330 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4333 static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
4335 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4338 static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
4340 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4343 static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
4345 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4349 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4352 /* Subroutine loading a vector register at VOFS of LEN bytes.
4353 * The load should begin at the address Rn + IMM.
4356 static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4358 int len_align = QEMU_ALIGN_DOWN(len, 8);
4359 int len_remain = len % 8;
4360 int nparts = len / 8 + ctpop8(len_remain);
4361 int midx = get_mem_index(s);
4362 TCGv_i64 addr, t0, t1;
4364 addr = tcg_temp_new_i64();
4365 t0 = tcg_temp_new_i64();
4367 /* Note that unpredicated load/store of vector/predicate registers
4368 * are defined as a stream of bytes, which equates to little-endian
4369 * operations on larger quantities. There is no nice way to force
4370 * a little-endian load for aarch64_be-linux-user out of line.
4372 * Attempt to keep code expansion to a minimum by limiting the
4373 * amount of unrolling done.
4375 if (nparts <= 4) {
4376 int i;
4378 for (i = 0; i < len_align; i += 8) {
4379 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4380 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4381 tcg_gen_st_i64(t0, cpu_env, vofs + i);
4383 } else {
4384 TCGLabel *loop = gen_new_label();
4385 TCGv_ptr tp, i = tcg_const_local_ptr(0);
4387 gen_set_label(loop);
4389 /* Minimize the number of local temps that must be re-read from
4390 * the stack each iteration. Instead, re-compute values other
4391 * than the loop counter.
4393 tp = tcg_temp_new_ptr();
4394 tcg_gen_addi_ptr(tp, i, imm);
4395 tcg_gen_extu_ptr_i64(addr, tp);
4396 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4398 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4400 tcg_gen_add_ptr(tp, cpu_env, i);
4401 tcg_gen_addi_ptr(i, i, 8);
4402 tcg_gen_st_i64(t0, tp, vofs);
4403 tcg_temp_free_ptr(tp);
4405 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4406 tcg_temp_free_ptr(i);
4409 /* Predicate register loads can be any multiple of 2.
4410 * Note that we still store the entire 64-bit unit into cpu_env.
4412 if (len_remain) {
4413 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4415 switch (len_remain) {
4416 case 2:
4417 case 4:
4418 case 8:
4419 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4420 break;
4422 case 6:
4423 t1 = tcg_temp_new_i64();
4424 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
4425 tcg_gen_addi_i64(addr, addr, 4);
4426 tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
4427 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4428 tcg_temp_free_i64(t1);
4429 break;
4431 default:
4432 g_assert_not_reached();
4434 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
4436 tcg_temp_free_i64(addr);
4437 tcg_temp_free_i64(t0);
4440 /* Similarly for stores. */
4441 static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4443 int len_align = QEMU_ALIGN_DOWN(len, 8);
4444 int len_remain = len % 8;
4445 int nparts = len / 8 + ctpop8(len_remain);
4446 int midx = get_mem_index(s);
4447 TCGv_i64 addr, t0;
4449 addr = tcg_temp_new_i64();
4450 t0 = tcg_temp_new_i64();
4452 /* Note that unpredicated load/store of vector/predicate registers
4453 * are defined as a stream of bytes, which equates to little-endian
4454 * operations on larger quantities. There is no nice way to force
4455 * a little-endian store for aarch64_be-linux-user out of line.
4457 * Attempt to keep code expansion to a minimum by limiting the
4458 * amount of unrolling done.
4460 if (nparts <= 4) {
4461 int i;
4463 for (i = 0; i < len_align; i += 8) {
4464 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
4465 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4466 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4468 } else {
4469 TCGLabel *loop = gen_new_label();
4470 TCGv_ptr t2, i = tcg_const_local_ptr(0);
4472 gen_set_label(loop);
4474 t2 = tcg_temp_new_ptr();
4475 tcg_gen_add_ptr(t2, cpu_env, i);
4476 tcg_gen_ld_i64(t0, t2, vofs);
4478 /* Minimize the number of local temps that must be re-read from
4479 * the stack each iteration. Instead, re-compute values other
4480 * than the loop counter.
4482 tcg_gen_addi_ptr(t2, i, imm);
4483 tcg_gen_extu_ptr_i64(addr, t2);
4484 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4485 tcg_temp_free_ptr(t2);
4487 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4489 tcg_gen_addi_ptr(i, i, 8);
4491 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4492 tcg_temp_free_ptr(i);
4495 /* Predicate register stores can be any multiple of 2. */
4496 if (len_remain) {
4497 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
4498 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4500 switch (len_remain) {
4501 case 2:
4502 case 4:
4503 case 8:
4504 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4505 break;
4507 case 6:
4508 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
4509 tcg_gen_addi_i64(addr, addr, 4);
4510 tcg_gen_shri_i64(t0, t0, 32);
4511 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
4512 break;
4514 default:
4515 g_assert_not_reached();
4518 tcg_temp_free_i64(addr);
4519 tcg_temp_free_i64(t0);
4522 static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
4524 if (sve_access_check(s)) {
4525 int size = vec_full_reg_size(s);
4526 int off = vec_full_reg_offset(s, a->rd);
4527 do_ldr(s, off, size, a->rn, a->imm * size);
4529 return true;
4532 static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
4534 if (sve_access_check(s)) {
4535 int size = pred_full_reg_size(s);
4536 int off = pred_full_reg_offset(s, a->rd);
4537 do_ldr(s, off, size, a->rn, a->imm * size);
4539 return true;
4542 static bool trans_STR_zri(DisasContext *s, arg_rri *a)
4544 if (sve_access_check(s)) {
4545 int size = vec_full_reg_size(s);
4546 int off = vec_full_reg_offset(s, a->rd);
4547 do_str(s, off, size, a->rn, a->imm * size);
4549 return true;
4552 static bool trans_STR_pri(DisasContext *s, arg_rri *a)
4554 if (sve_access_check(s)) {
4555 int size = pred_full_reg_size(s);
4556 int off = pred_full_reg_offset(s, a->rd);
4557 do_str(s, off, size, a->rn, a->imm * size);
4559 return true;
4563 *** SVE Memory - Contiguous Load Group
4566 /* The memory mode of the dtype. */
4567 static const TCGMemOp dtype_mop[16] = {
4568 MO_UB, MO_UB, MO_UB, MO_UB,
4569 MO_SL, MO_UW, MO_UW, MO_UW,
4570 MO_SW, MO_SW, MO_UL, MO_UL,
4571 MO_SB, MO_SB, MO_SB, MO_Q
4574 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4576 /* The vector element size of dtype. */
4577 static const uint8_t dtype_esz[16] = {
4578 0, 1, 2, 3,
4579 3, 1, 2, 3,
4580 3, 2, 2, 3,
4581 3, 2, 1, 3
4584 static TCGMemOpIdx sve_memopidx(DisasContext *s, int dtype)
4586 return make_memop_idx(s->be_data | dtype_mop[dtype], get_mem_index(s));
4589 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4590 int dtype, gen_helper_gvec_mem *fn)
4592 unsigned vsz = vec_full_reg_size(s);
4593 TCGv_ptr t_pg;
4594 TCGv_i32 t_desc;
4595 int desc;
4597 /* For e.g. LD4, there are not enough arguments to pass all 4
4598 * registers as pointers, so encode the regno into the data field.
4599 * For consistency, do this even for LD1.
4601 desc = sve_memopidx(s, dtype);
4602 desc |= zt << MEMOPIDX_SHIFT;
4603 desc = simd_desc(vsz, vsz, desc);
4604 t_desc = tcg_const_i32(desc);
4605 t_pg = tcg_temp_new_ptr();
4607 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4608 fn(cpu_env, t_pg, addr, t_desc);
4610 tcg_temp_free_ptr(t_pg);
4611 tcg_temp_free_i32(t_desc);
4614 static void do_ld_zpa(DisasContext *s, int zt, int pg,
4615 TCGv_i64 addr, int dtype, int nreg)
4617 static gen_helper_gvec_mem * const fns[2][16][4] = {
4618 /* Little-endian */
4619 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4620 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4621 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4622 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4623 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4625 { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4626 { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4627 gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4628 { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4629 { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4631 { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4632 { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4633 { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4634 gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4635 { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4637 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4638 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4639 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4640 { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4641 gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4643 /* Big-endian */
4644 { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4645 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4646 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4647 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4648 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4650 { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4651 { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4652 gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4653 { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4654 { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4656 { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4657 { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4658 { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4659 gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4660 { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4662 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4663 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4664 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4665 { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4666 gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } }
4668 gen_helper_gvec_mem *fn = fns[s->be_data == MO_BE][dtype][nreg];
4670 /* While there are holes in the table, they are not
4671 * accessible via the instruction encoding.
4673 assert(fn != NULL);
4674 do_mem_zpa(s, zt, pg, addr, dtype, fn);
4677 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
4679 if (a->rm == 31) {
4680 return false;
4682 if (sve_access_check(s)) {
4683 TCGv_i64 addr = new_tmp_a64(s);
4684 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4685 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4686 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4688 return true;
4691 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
4693 if (sve_access_check(s)) {
4694 int vsz = vec_full_reg_size(s);
4695 int elements = vsz >> dtype_esz[a->dtype];
4696 TCGv_i64 addr = new_tmp_a64(s);
4698 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4699 (a->imm * elements * (a->nreg + 1))
4700 << dtype_msz(a->dtype));
4701 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4703 return true;
4706 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
4708 static gen_helper_gvec_mem * const fns[2][16] = {
4709 /* Little-endian */
4710 { gen_helper_sve_ldff1bb_r,
4711 gen_helper_sve_ldff1bhu_r,
4712 gen_helper_sve_ldff1bsu_r,
4713 gen_helper_sve_ldff1bdu_r,
4715 gen_helper_sve_ldff1sds_le_r,
4716 gen_helper_sve_ldff1hh_le_r,
4717 gen_helper_sve_ldff1hsu_le_r,
4718 gen_helper_sve_ldff1hdu_le_r,
4720 gen_helper_sve_ldff1hds_le_r,
4721 gen_helper_sve_ldff1hss_le_r,
4722 gen_helper_sve_ldff1ss_le_r,
4723 gen_helper_sve_ldff1sdu_le_r,
4725 gen_helper_sve_ldff1bds_r,
4726 gen_helper_sve_ldff1bss_r,
4727 gen_helper_sve_ldff1bhs_r,
4728 gen_helper_sve_ldff1dd_le_r },
4730 /* Big-endian */
4731 { gen_helper_sve_ldff1bb_r,
4732 gen_helper_sve_ldff1bhu_r,
4733 gen_helper_sve_ldff1bsu_r,
4734 gen_helper_sve_ldff1bdu_r,
4736 gen_helper_sve_ldff1sds_be_r,
4737 gen_helper_sve_ldff1hh_be_r,
4738 gen_helper_sve_ldff1hsu_be_r,
4739 gen_helper_sve_ldff1hdu_be_r,
4741 gen_helper_sve_ldff1hds_be_r,
4742 gen_helper_sve_ldff1hss_be_r,
4743 gen_helper_sve_ldff1ss_be_r,
4744 gen_helper_sve_ldff1sdu_be_r,
4746 gen_helper_sve_ldff1bds_r,
4747 gen_helper_sve_ldff1bss_r,
4748 gen_helper_sve_ldff1bhs_r,
4749 gen_helper_sve_ldff1dd_be_r },
4752 if (sve_access_check(s)) {
4753 TCGv_i64 addr = new_tmp_a64(s);
4754 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4755 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4756 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
4757 fns[s->be_data == MO_BE][a->dtype]);
4759 return true;
4762 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
4764 static gen_helper_gvec_mem * const fns[2][16] = {
4765 /* Little-endian */
4766 { gen_helper_sve_ldnf1bb_r,
4767 gen_helper_sve_ldnf1bhu_r,
4768 gen_helper_sve_ldnf1bsu_r,
4769 gen_helper_sve_ldnf1bdu_r,
4771 gen_helper_sve_ldnf1sds_le_r,
4772 gen_helper_sve_ldnf1hh_le_r,
4773 gen_helper_sve_ldnf1hsu_le_r,
4774 gen_helper_sve_ldnf1hdu_le_r,
4776 gen_helper_sve_ldnf1hds_le_r,
4777 gen_helper_sve_ldnf1hss_le_r,
4778 gen_helper_sve_ldnf1ss_le_r,
4779 gen_helper_sve_ldnf1sdu_le_r,
4781 gen_helper_sve_ldnf1bds_r,
4782 gen_helper_sve_ldnf1bss_r,
4783 gen_helper_sve_ldnf1bhs_r,
4784 gen_helper_sve_ldnf1dd_le_r },
4786 /* Big-endian */
4787 { gen_helper_sve_ldnf1bb_r,
4788 gen_helper_sve_ldnf1bhu_r,
4789 gen_helper_sve_ldnf1bsu_r,
4790 gen_helper_sve_ldnf1bdu_r,
4792 gen_helper_sve_ldnf1sds_be_r,
4793 gen_helper_sve_ldnf1hh_be_r,
4794 gen_helper_sve_ldnf1hsu_be_r,
4795 gen_helper_sve_ldnf1hdu_be_r,
4797 gen_helper_sve_ldnf1hds_be_r,
4798 gen_helper_sve_ldnf1hss_be_r,
4799 gen_helper_sve_ldnf1ss_be_r,
4800 gen_helper_sve_ldnf1sdu_be_r,
4802 gen_helper_sve_ldnf1bds_r,
4803 gen_helper_sve_ldnf1bss_r,
4804 gen_helper_sve_ldnf1bhs_r,
4805 gen_helper_sve_ldnf1dd_be_r },
4808 if (sve_access_check(s)) {
4809 int vsz = vec_full_reg_size(s);
4810 int elements = vsz >> dtype_esz[a->dtype];
4811 int off = (a->imm * elements) << dtype_msz(a->dtype);
4812 TCGv_i64 addr = new_tmp_a64(s);
4814 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4815 do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
4816 fns[s->be_data == MO_BE][a->dtype]);
4818 return true;
4821 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4823 static gen_helper_gvec_mem * const fns[2][4] = {
4824 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_le_r,
4825 gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld1dd_le_r },
4826 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_be_r,
4827 gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld1dd_be_r },
4829 unsigned vsz = vec_full_reg_size(s);
4830 TCGv_ptr t_pg;
4831 TCGv_i32 t_desc;
4832 int desc, poff;
4834 /* Load the first quadword using the normal predicated load helpers. */
4835 desc = sve_memopidx(s, msz_dtype(msz));
4836 desc |= zt << MEMOPIDX_SHIFT;
4837 desc = simd_desc(16, 16, desc);
4838 t_desc = tcg_const_i32(desc);
4840 poff = pred_full_reg_offset(s, pg);
4841 if (vsz > 16) {
4843 * Zero-extend the first 16 bits of the predicate into a temporary.
4844 * This avoids triggering an assert making sure we don't have bits
4845 * set within a predicate beyond VQ, but we have lowered VQ to 1
4846 * for this load operation.
4848 TCGv_i64 tmp = tcg_temp_new_i64();
4849 #ifdef HOST_WORDS_BIGENDIAN
4850 poff += 6;
4851 #endif
4852 tcg_gen_ld16u_i64(tmp, cpu_env, poff);
4854 poff = offsetof(CPUARMState, vfp.preg_tmp);
4855 tcg_gen_st_i64(tmp, cpu_env, poff);
4856 tcg_temp_free_i64(tmp);
4859 t_pg = tcg_temp_new_ptr();
4860 tcg_gen_addi_ptr(t_pg, cpu_env, poff);
4862 fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, t_desc);
4864 tcg_temp_free_ptr(t_pg);
4865 tcg_temp_free_i32(t_desc);
4867 /* Replicate that first quadword. */
4868 if (vsz > 16) {
4869 unsigned dofs = vec_full_reg_offset(s, zt);
4870 tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4874 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
4876 if (a->rm == 31) {
4877 return false;
4879 if (sve_access_check(s)) {
4880 int msz = dtype_msz(a->dtype);
4881 TCGv_i64 addr = new_tmp_a64(s);
4882 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4883 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4884 do_ldrq(s, a->rd, a->pg, addr, msz);
4886 return true;
4889 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
4891 if (sve_access_check(s)) {
4892 TCGv_i64 addr = new_tmp_a64(s);
4893 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4894 do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4896 return true;
4899 /* Load and broadcast element. */
4900 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
4902 if (!sve_access_check(s)) {
4903 return true;
4906 unsigned vsz = vec_full_reg_size(s);
4907 unsigned psz = pred_full_reg_size(s);
4908 unsigned esz = dtype_esz[a->dtype];
4909 unsigned msz = dtype_msz(a->dtype);
4910 TCGLabel *over = gen_new_label();
4911 TCGv_i64 temp;
4913 /* If the guarding predicate has no bits set, no load occurs. */
4914 if (psz <= 8) {
4915 /* Reduce the pred_esz_masks value simply to reduce the
4916 * size of the code generated here.
4918 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4919 temp = tcg_temp_new_i64();
4920 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4921 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4922 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4923 tcg_temp_free_i64(temp);
4924 } else {
4925 TCGv_i32 t32 = tcg_temp_new_i32();
4926 find_last_active(s, t32, esz, a->pg);
4927 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4928 tcg_temp_free_i32(t32);
4931 /* Load the data. */
4932 temp = tcg_temp_new_i64();
4933 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4934 tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
4935 s->be_data | dtype_mop[a->dtype]);
4937 /* Broadcast to *all* elements. */
4938 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4939 vsz, vsz, temp);
4940 tcg_temp_free_i64(temp);
4942 /* Zero the inactive elements. */
4943 gen_set_label(over);
4944 do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
4945 return true;
4948 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4949 int msz, int esz, int nreg)
4951 static gen_helper_gvec_mem * const fn_single[2][4][4] = {
4952 { { gen_helper_sve_st1bb_r,
4953 gen_helper_sve_st1bh_r,
4954 gen_helper_sve_st1bs_r,
4955 gen_helper_sve_st1bd_r },
4956 { NULL,
4957 gen_helper_sve_st1hh_le_r,
4958 gen_helper_sve_st1hs_le_r,
4959 gen_helper_sve_st1hd_le_r },
4960 { NULL, NULL,
4961 gen_helper_sve_st1ss_le_r,
4962 gen_helper_sve_st1sd_le_r },
4963 { NULL, NULL, NULL,
4964 gen_helper_sve_st1dd_le_r } },
4965 { { gen_helper_sve_st1bb_r,
4966 gen_helper_sve_st1bh_r,
4967 gen_helper_sve_st1bs_r,
4968 gen_helper_sve_st1bd_r },
4969 { NULL,
4970 gen_helper_sve_st1hh_be_r,
4971 gen_helper_sve_st1hs_be_r,
4972 gen_helper_sve_st1hd_be_r },
4973 { NULL, NULL,
4974 gen_helper_sve_st1ss_be_r,
4975 gen_helper_sve_st1sd_be_r },
4976 { NULL, NULL, NULL,
4977 gen_helper_sve_st1dd_be_r } },
4979 static gen_helper_gvec_mem * const fn_multiple[2][3][4] = {
4980 { { gen_helper_sve_st2bb_r,
4981 gen_helper_sve_st2hh_le_r,
4982 gen_helper_sve_st2ss_le_r,
4983 gen_helper_sve_st2dd_le_r },
4984 { gen_helper_sve_st3bb_r,
4985 gen_helper_sve_st3hh_le_r,
4986 gen_helper_sve_st3ss_le_r,
4987 gen_helper_sve_st3dd_le_r },
4988 { gen_helper_sve_st4bb_r,
4989 gen_helper_sve_st4hh_le_r,
4990 gen_helper_sve_st4ss_le_r,
4991 gen_helper_sve_st4dd_le_r } },
4992 { { gen_helper_sve_st2bb_r,
4993 gen_helper_sve_st2hh_be_r,
4994 gen_helper_sve_st2ss_be_r,
4995 gen_helper_sve_st2dd_be_r },
4996 { gen_helper_sve_st3bb_r,
4997 gen_helper_sve_st3hh_be_r,
4998 gen_helper_sve_st3ss_be_r,
4999 gen_helper_sve_st3dd_be_r },
5000 { gen_helper_sve_st4bb_r,
5001 gen_helper_sve_st4hh_be_r,
5002 gen_helper_sve_st4ss_be_r,
5003 gen_helper_sve_st4dd_be_r } },
5005 gen_helper_gvec_mem *fn;
5006 int be = s->be_data == MO_BE;
5008 if (nreg == 0) {
5009 /* ST1 */
5010 fn = fn_single[be][msz][esz];
5011 } else {
5012 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5013 assert(msz == esz);
5014 fn = fn_multiple[be][nreg - 1][msz];
5016 assert(fn != NULL);
5017 do_mem_zpa(s, zt, pg, addr, msz_dtype(msz), fn);
5020 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
5022 if (a->rm == 31 || a->msz > a->esz) {
5023 return false;
5025 if (sve_access_check(s)) {
5026 TCGv_i64 addr = new_tmp_a64(s);
5027 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
5028 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5029 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5031 return true;
5034 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
5036 if (a->msz > a->esz) {
5037 return false;
5039 if (sve_access_check(s)) {
5040 int vsz = vec_full_reg_size(s);
5041 int elements = vsz >> a->esz;
5042 TCGv_i64 addr = new_tmp_a64(s);
5044 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5045 (a->imm * elements * (a->nreg + 1)) << a->msz);
5046 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5048 return true;
5052 *** SVE gather loads / scatter stores
5055 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
5056 int scale, TCGv_i64 scalar, int msz,
5057 gen_helper_gvec_mem_scatter *fn)
5059 unsigned vsz = vec_full_reg_size(s);
5060 TCGv_ptr t_zm = tcg_temp_new_ptr();
5061 TCGv_ptr t_pg = tcg_temp_new_ptr();
5062 TCGv_ptr t_zt = tcg_temp_new_ptr();
5063 TCGv_i32 t_desc;
5064 int desc;
5066 desc = sve_memopidx(s, msz_dtype(msz));
5067 desc |= scale << MEMOPIDX_SHIFT;
5068 desc = simd_desc(vsz, vsz, desc);
5069 t_desc = tcg_const_i32(desc);
5071 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5072 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5073 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
5074 fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
5076 tcg_temp_free_ptr(t_zt);
5077 tcg_temp_free_ptr(t_zm);
5078 tcg_temp_free_ptr(t_pg);
5079 tcg_temp_free_i32(t_desc);
5082 /* Indexed by [be][ff][xs][u][msz]. */
5083 static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][2][3] = {
5084 /* Little-endian */
5085 { { { { gen_helper_sve_ldbss_zsu,
5086 gen_helper_sve_ldhss_le_zsu,
5087 NULL, },
5088 { gen_helper_sve_ldbsu_zsu,
5089 gen_helper_sve_ldhsu_le_zsu,
5090 gen_helper_sve_ldss_le_zsu, } },
5091 { { gen_helper_sve_ldbss_zss,
5092 gen_helper_sve_ldhss_le_zss,
5093 NULL, },
5094 { gen_helper_sve_ldbsu_zss,
5095 gen_helper_sve_ldhsu_le_zss,
5096 gen_helper_sve_ldss_le_zss, } } },
5098 /* First-fault */
5099 { { { gen_helper_sve_ldffbss_zsu,
5100 gen_helper_sve_ldffhss_le_zsu,
5101 NULL, },
5102 { gen_helper_sve_ldffbsu_zsu,
5103 gen_helper_sve_ldffhsu_le_zsu,
5104 gen_helper_sve_ldffss_le_zsu, } },
5105 { { gen_helper_sve_ldffbss_zss,
5106 gen_helper_sve_ldffhss_le_zss,
5107 NULL, },
5108 { gen_helper_sve_ldffbsu_zss,
5109 gen_helper_sve_ldffhsu_le_zss,
5110 gen_helper_sve_ldffss_le_zss, } } } },
5112 /* Big-endian */
5113 { { { { gen_helper_sve_ldbss_zsu,
5114 gen_helper_sve_ldhss_be_zsu,
5115 NULL, },
5116 { gen_helper_sve_ldbsu_zsu,
5117 gen_helper_sve_ldhsu_be_zsu,
5118 gen_helper_sve_ldss_be_zsu, } },
5119 { { gen_helper_sve_ldbss_zss,
5120 gen_helper_sve_ldhss_be_zss,
5121 NULL, },
5122 { gen_helper_sve_ldbsu_zss,
5123 gen_helper_sve_ldhsu_be_zss,
5124 gen_helper_sve_ldss_be_zss, } } },
5126 /* First-fault */
5127 { { { gen_helper_sve_ldffbss_zsu,
5128 gen_helper_sve_ldffhss_be_zsu,
5129 NULL, },
5130 { gen_helper_sve_ldffbsu_zsu,
5131 gen_helper_sve_ldffhsu_be_zsu,
5132 gen_helper_sve_ldffss_be_zsu, } },
5133 { { gen_helper_sve_ldffbss_zss,
5134 gen_helper_sve_ldffhss_be_zss,
5135 NULL, },
5136 { gen_helper_sve_ldffbsu_zss,
5137 gen_helper_sve_ldffhsu_be_zss,
5138 gen_helper_sve_ldffss_be_zss, } } } },
5141 /* Note that we overload xs=2 to indicate 64-bit offset. */
5142 static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][2][3][2][4] = {
5143 /* Little-endian */
5144 { { { { gen_helper_sve_ldbds_zsu,
5145 gen_helper_sve_ldhds_le_zsu,
5146 gen_helper_sve_ldsds_le_zsu,
5147 NULL, },
5148 { gen_helper_sve_ldbdu_zsu,
5149 gen_helper_sve_ldhdu_le_zsu,
5150 gen_helper_sve_ldsdu_le_zsu,
5151 gen_helper_sve_lddd_le_zsu, } },
5152 { { gen_helper_sve_ldbds_zss,
5153 gen_helper_sve_ldhds_le_zss,
5154 gen_helper_sve_ldsds_le_zss,
5155 NULL, },
5156 { gen_helper_sve_ldbdu_zss,
5157 gen_helper_sve_ldhdu_le_zss,
5158 gen_helper_sve_ldsdu_le_zss,
5159 gen_helper_sve_lddd_le_zss, } },
5160 { { gen_helper_sve_ldbds_zd,
5161 gen_helper_sve_ldhds_le_zd,
5162 gen_helper_sve_ldsds_le_zd,
5163 NULL, },
5164 { gen_helper_sve_ldbdu_zd,
5165 gen_helper_sve_ldhdu_le_zd,
5166 gen_helper_sve_ldsdu_le_zd,
5167 gen_helper_sve_lddd_le_zd, } } },
5169 /* First-fault */
5170 { { { gen_helper_sve_ldffbds_zsu,
5171 gen_helper_sve_ldffhds_le_zsu,
5172 gen_helper_sve_ldffsds_le_zsu,
5173 NULL, },
5174 { gen_helper_sve_ldffbdu_zsu,
5175 gen_helper_sve_ldffhdu_le_zsu,
5176 gen_helper_sve_ldffsdu_le_zsu,
5177 gen_helper_sve_ldffdd_le_zsu, } },
5178 { { gen_helper_sve_ldffbds_zss,
5179 gen_helper_sve_ldffhds_le_zss,
5180 gen_helper_sve_ldffsds_le_zss,
5181 NULL, },
5182 { gen_helper_sve_ldffbdu_zss,
5183 gen_helper_sve_ldffhdu_le_zss,
5184 gen_helper_sve_ldffsdu_le_zss,
5185 gen_helper_sve_ldffdd_le_zss, } },
5186 { { gen_helper_sve_ldffbds_zd,
5187 gen_helper_sve_ldffhds_le_zd,
5188 gen_helper_sve_ldffsds_le_zd,
5189 NULL, },
5190 { gen_helper_sve_ldffbdu_zd,
5191 gen_helper_sve_ldffhdu_le_zd,
5192 gen_helper_sve_ldffsdu_le_zd,
5193 gen_helper_sve_ldffdd_le_zd, } } } },
5195 /* Big-endian */
5196 { { { { gen_helper_sve_ldbds_zsu,
5197 gen_helper_sve_ldhds_be_zsu,
5198 gen_helper_sve_ldsds_be_zsu,
5199 NULL, },
5200 { gen_helper_sve_ldbdu_zsu,
5201 gen_helper_sve_ldhdu_be_zsu,
5202 gen_helper_sve_ldsdu_be_zsu,
5203 gen_helper_sve_lddd_be_zsu, } },
5204 { { gen_helper_sve_ldbds_zss,
5205 gen_helper_sve_ldhds_be_zss,
5206 gen_helper_sve_ldsds_be_zss,
5207 NULL, },
5208 { gen_helper_sve_ldbdu_zss,
5209 gen_helper_sve_ldhdu_be_zss,
5210 gen_helper_sve_ldsdu_be_zss,
5211 gen_helper_sve_lddd_be_zss, } },
5212 { { gen_helper_sve_ldbds_zd,
5213 gen_helper_sve_ldhds_be_zd,
5214 gen_helper_sve_ldsds_be_zd,
5215 NULL, },
5216 { gen_helper_sve_ldbdu_zd,
5217 gen_helper_sve_ldhdu_be_zd,
5218 gen_helper_sve_ldsdu_be_zd,
5219 gen_helper_sve_lddd_be_zd, } } },
5221 /* First-fault */
5222 { { { gen_helper_sve_ldffbds_zsu,
5223 gen_helper_sve_ldffhds_be_zsu,
5224 gen_helper_sve_ldffsds_be_zsu,
5225 NULL, },
5226 { gen_helper_sve_ldffbdu_zsu,
5227 gen_helper_sve_ldffhdu_be_zsu,
5228 gen_helper_sve_ldffsdu_be_zsu,
5229 gen_helper_sve_ldffdd_be_zsu, } },
5230 { { gen_helper_sve_ldffbds_zss,
5231 gen_helper_sve_ldffhds_be_zss,
5232 gen_helper_sve_ldffsds_be_zss,
5233 NULL, },
5234 { gen_helper_sve_ldffbdu_zss,
5235 gen_helper_sve_ldffhdu_be_zss,
5236 gen_helper_sve_ldffsdu_be_zss,
5237 gen_helper_sve_ldffdd_be_zss, } },
5238 { { gen_helper_sve_ldffbds_zd,
5239 gen_helper_sve_ldffhds_be_zd,
5240 gen_helper_sve_ldffsds_be_zd,
5241 NULL, },
5242 { gen_helper_sve_ldffbdu_zd,
5243 gen_helper_sve_ldffhdu_be_zd,
5244 gen_helper_sve_ldffsdu_be_zd,
5245 gen_helper_sve_ldffdd_be_zd, } } } },
5248 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
5250 gen_helper_gvec_mem_scatter *fn = NULL;
5251 int be = s->be_data == MO_BE;
5253 if (!sve_access_check(s)) {
5254 return true;
5257 switch (a->esz) {
5258 case MO_32:
5259 fn = gather_load_fn32[be][a->ff][a->xs][a->u][a->msz];
5260 break;
5261 case MO_64:
5262 fn = gather_load_fn64[be][a->ff][a->xs][a->u][a->msz];
5263 break;
5265 assert(fn != NULL);
5267 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5268 cpu_reg_sp(s, a->rn), a->msz, fn);
5269 return true;
5272 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
5274 gen_helper_gvec_mem_scatter *fn = NULL;
5275 int be = s->be_data == MO_BE;
5276 TCGv_i64 imm;
5278 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5279 return false;
5281 if (!sve_access_check(s)) {
5282 return true;
5285 switch (a->esz) {
5286 case MO_32:
5287 fn = gather_load_fn32[be][a->ff][0][a->u][a->msz];
5288 break;
5289 case MO_64:
5290 fn = gather_load_fn64[be][a->ff][2][a->u][a->msz];
5291 break;
5293 assert(fn != NULL);
5295 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5296 * by loading the immediate into the scalar parameter.
5298 imm = tcg_const_i64(a->imm << a->msz);
5299 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
5300 tcg_temp_free_i64(imm);
5301 return true;
5304 /* Indexed by [be][xs][msz]. */
5305 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][3] = {
5306 /* Little-endian */
5307 { { gen_helper_sve_stbs_zsu,
5308 gen_helper_sve_sths_le_zsu,
5309 gen_helper_sve_stss_le_zsu, },
5310 { gen_helper_sve_stbs_zss,
5311 gen_helper_sve_sths_le_zss,
5312 gen_helper_sve_stss_le_zss, } },
5313 /* Big-endian */
5314 { { gen_helper_sve_stbs_zsu,
5315 gen_helper_sve_sths_be_zsu,
5316 gen_helper_sve_stss_be_zsu, },
5317 { gen_helper_sve_stbs_zss,
5318 gen_helper_sve_sths_be_zss,
5319 gen_helper_sve_stss_be_zss, } },
5322 /* Note that we overload xs=2 to indicate 64-bit offset. */
5323 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][3][4] = {
5324 /* Little-endian */
5325 { { gen_helper_sve_stbd_zsu,
5326 gen_helper_sve_sthd_le_zsu,
5327 gen_helper_sve_stsd_le_zsu,
5328 gen_helper_sve_stdd_le_zsu, },
5329 { gen_helper_sve_stbd_zss,
5330 gen_helper_sve_sthd_le_zss,
5331 gen_helper_sve_stsd_le_zss,
5332 gen_helper_sve_stdd_le_zss, },
5333 { gen_helper_sve_stbd_zd,
5334 gen_helper_sve_sthd_le_zd,
5335 gen_helper_sve_stsd_le_zd,
5336 gen_helper_sve_stdd_le_zd, } },
5337 /* Big-endian */
5338 { { gen_helper_sve_stbd_zsu,
5339 gen_helper_sve_sthd_be_zsu,
5340 gen_helper_sve_stsd_be_zsu,
5341 gen_helper_sve_stdd_be_zsu, },
5342 { gen_helper_sve_stbd_zss,
5343 gen_helper_sve_sthd_be_zss,
5344 gen_helper_sve_stsd_be_zss,
5345 gen_helper_sve_stdd_be_zss, },
5346 { gen_helper_sve_stbd_zd,
5347 gen_helper_sve_sthd_be_zd,
5348 gen_helper_sve_stsd_be_zd,
5349 gen_helper_sve_stdd_be_zd, } },
5352 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
5354 gen_helper_gvec_mem_scatter *fn;
5355 int be = s->be_data == MO_BE;
5357 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5358 return false;
5360 if (!sve_access_check(s)) {
5361 return true;
5363 switch (a->esz) {
5364 case MO_32:
5365 fn = scatter_store_fn32[be][a->xs][a->msz];
5366 break;
5367 case MO_64:
5368 fn = scatter_store_fn64[be][a->xs][a->msz];
5369 break;
5370 default:
5371 g_assert_not_reached();
5373 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5374 cpu_reg_sp(s, a->rn), a->msz, fn);
5375 return true;
5378 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
5380 gen_helper_gvec_mem_scatter *fn = NULL;
5381 int be = s->be_data == MO_BE;
5382 TCGv_i64 imm;
5384 if (a->esz < a->msz) {
5385 return false;
5387 if (!sve_access_check(s)) {
5388 return true;
5391 switch (a->esz) {
5392 case MO_32:
5393 fn = scatter_store_fn32[be][0][a->msz];
5394 break;
5395 case MO_64:
5396 fn = scatter_store_fn64[be][2][a->msz];
5397 break;
5399 assert(fn != NULL);
5401 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5402 * by loading the immediate into the scalar parameter.
5404 imm = tcg_const_i64(a->imm << a->msz);
5405 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
5406 tcg_temp_free_i64(imm);
5407 return true;
5411 * Prefetches
5414 static bool trans_PRF(DisasContext *s, arg_PRF *a)
5416 /* Prefetch is a nop within QEMU. */
5417 (void)sve_access_check(s);
5418 return true;
5421 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
5423 if (a->rm == 31) {
5424 return false;
5426 /* Prefetch is a nop within QEMU. */
5427 (void)sve_access_check(s);
5428 return true;
5432 * Move Prefix
5434 * TODO: The implementation so far could handle predicated merging movprfx.
5435 * The helper functions as written take an extra source register to
5436 * use in the operation, but the result is only written when predication
5437 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
5438 * to allow the final write back to the destination to be unconditional.
5439 * For predicated zeroing movprfx, we need to rearrange the helpers to
5440 * allow the final write back to zero inactives.
5442 * In the meantime, just emit the moves.
5445 static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
5447 return do_mov_z(s, a->rd, a->rn);
5450 static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
5452 if (sve_access_check(s)) {
5453 do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
5455 return true;
5458 static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
5460 if (sve_access_check(s)) {
5461 do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz);
5463 return true;