target/arm: Implement SVE scatter stores
[qemu.git] / target / arm / translate-sve.c
blob11541b1a502d02966a972e77748d8f1f21451061
1 /*
2 * AArch64 SVE translation
4 * Copyright (c) 2018 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "tcg-op.h"
24 #include "tcg-op-gvec.h"
25 #include "tcg-gvec-desc.h"
26 #include "qemu/log.h"
27 #include "arm_ldst.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
32 #include "exec/log.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
37 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
40 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
42 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
45 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
46 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
47 TCGv_ptr, TCGv_i64, TCGv_i32);
50 * Helpers for extracting complex instruction fields.
53 /* See e.g. ASR (immediate, predicated).
54 * Returns -1 for unallocated encoding; diagnose later.
56 static int tszimm_esz(int x)
58 x >>= 3; /* discard imm3 */
59 return 31 - clz32(x);
62 static int tszimm_shr(int x)
64 return (16 << tszimm_esz(x)) - x;
67 /* See e.g. LSL (immediate, predicated). */
68 static int tszimm_shl(int x)
70 return x - (8 << tszimm_esz(x));
73 static inline int plus1(int x)
75 return x + 1;
78 /* The SH bit is in bit 8. Extract the low 8 and shift. */
79 static inline int expand_imm_sh8s(int x)
81 return (int8_t)x << (x & 0x100 ? 8 : 0);
84 static inline int expand_imm_sh8u(int x)
86 return (uint8_t)x << (x & 0x100 ? 8 : 0);
89 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
90 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
92 static inline int msz_dtype(int msz)
94 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
95 return dtype[msz];
99 * Include the generated decoder.
102 #include "decode-sve.inc.c"
105 * Implement all of the translator functions referenced by the decoder.
108 /* Return the offset info CPUARMState of the predicate vector register Pn.
109 * Note for this purpose, FFR is P16.
111 static inline int pred_full_reg_offset(DisasContext *s, int regno)
113 return offsetof(CPUARMState, vfp.pregs[regno]);
116 /* Return the byte size of the whole predicate register, VL / 64. */
117 static inline int pred_full_reg_size(DisasContext *s)
119 return s->sve_len >> 3;
122 /* Round up the size of a register to a size allowed by
123 * the tcg vector infrastructure. Any operation which uses this
124 * size may assume that the bits above pred_full_reg_size are zero,
125 * and must leave them the same way.
127 * Note that this is not needed for the vector registers as they
128 * are always properly sized for tcg vectors.
130 static int size_for_gvec(int size)
132 if (size <= 8) {
133 return 8;
134 } else {
135 return QEMU_ALIGN_UP(size, 16);
139 static int pred_gvec_reg_size(DisasContext *s)
141 return size_for_gvec(pred_full_reg_size(s));
144 /* Invoke a vector expander on two Zregs. */
145 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
146 int esz, int rd, int rn)
148 if (sve_access_check(s)) {
149 unsigned vsz = vec_full_reg_size(s);
150 gvec_fn(esz, vec_full_reg_offset(s, rd),
151 vec_full_reg_offset(s, rn), vsz, vsz);
153 return true;
156 /* Invoke a vector expander on three Zregs. */
157 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
158 int esz, int rd, int rn, int rm)
160 if (sve_access_check(s)) {
161 unsigned vsz = vec_full_reg_size(s);
162 gvec_fn(esz, vec_full_reg_offset(s, rd),
163 vec_full_reg_offset(s, rn),
164 vec_full_reg_offset(s, rm), vsz, vsz);
166 return true;
169 /* Invoke a vector move on two Zregs. */
170 static bool do_mov_z(DisasContext *s, int rd, int rn)
172 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
175 /* Initialize a Zreg with replications of a 64-bit immediate. */
176 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
178 unsigned vsz = vec_full_reg_size(s);
179 tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
182 /* Invoke a vector expander on two Pregs. */
183 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
184 int esz, int rd, int rn)
186 if (sve_access_check(s)) {
187 unsigned psz = pred_gvec_reg_size(s);
188 gvec_fn(esz, pred_full_reg_offset(s, rd),
189 pred_full_reg_offset(s, rn), psz, psz);
191 return true;
194 /* Invoke a vector expander on three Pregs. */
195 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
196 int esz, int rd, int rn, int rm)
198 if (sve_access_check(s)) {
199 unsigned psz = pred_gvec_reg_size(s);
200 gvec_fn(esz, pred_full_reg_offset(s, rd),
201 pred_full_reg_offset(s, rn),
202 pred_full_reg_offset(s, rm), psz, psz);
204 return true;
207 /* Invoke a vector operation on four Pregs. */
208 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
209 int rd, int rn, int rm, int rg)
211 if (sve_access_check(s)) {
212 unsigned psz = pred_gvec_reg_size(s);
213 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
214 pred_full_reg_offset(s, rn),
215 pred_full_reg_offset(s, rm),
216 pred_full_reg_offset(s, rg),
217 psz, psz, gvec_op);
219 return true;
222 /* Invoke a vector move on two Pregs. */
223 static bool do_mov_p(DisasContext *s, int rd, int rn)
225 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
228 /* Set the cpu flags as per a return from an SVE helper. */
229 static void do_pred_flags(TCGv_i32 t)
231 tcg_gen_mov_i32(cpu_NF, t);
232 tcg_gen_andi_i32(cpu_ZF, t, 2);
233 tcg_gen_andi_i32(cpu_CF, t, 1);
234 tcg_gen_movi_i32(cpu_VF, 0);
237 /* Subroutines computing the ARM PredTest psuedofunction. */
238 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
240 TCGv_i32 t = tcg_temp_new_i32();
242 gen_helper_sve_predtest1(t, d, g);
243 do_pred_flags(t);
244 tcg_temp_free_i32(t);
247 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
249 TCGv_ptr dptr = tcg_temp_new_ptr();
250 TCGv_ptr gptr = tcg_temp_new_ptr();
251 TCGv_i32 t;
253 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
254 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
255 t = tcg_const_i32(words);
257 gen_helper_sve_predtest(t, dptr, gptr, t);
258 tcg_temp_free_ptr(dptr);
259 tcg_temp_free_ptr(gptr);
261 do_pred_flags(t);
262 tcg_temp_free_i32(t);
265 /* For each element size, the bits within a predicate word that are active. */
266 const uint64_t pred_esz_masks[4] = {
267 0xffffffffffffffffull, 0x5555555555555555ull,
268 0x1111111111111111ull, 0x0101010101010101ull
272 *** SVE Logical - Unpredicated Group
275 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
277 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
280 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
282 if (a->rn == a->rm) { /* MOV */
283 return do_mov_z(s, a->rd, a->rn);
284 } else {
285 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
289 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
291 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
294 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
296 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
300 *** SVE Integer Arithmetic - Unpredicated Group
303 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
305 return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
308 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
310 return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
313 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
315 return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
318 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
320 return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
323 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
325 return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
328 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
330 return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
334 *** SVE Integer Arithmetic - Binary Predicated Group
337 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
339 unsigned vsz = vec_full_reg_size(s);
340 if (fn == NULL) {
341 return false;
343 if (sve_access_check(s)) {
344 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
345 vec_full_reg_offset(s, a->rn),
346 vec_full_reg_offset(s, a->rm),
347 pred_full_reg_offset(s, a->pg),
348 vsz, vsz, 0, fn);
350 return true;
353 #define DO_ZPZZ(NAME, name) \
354 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
355 uint32_t insn) \
357 static gen_helper_gvec_4 * const fns[4] = { \
358 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
359 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
360 }; \
361 return do_zpzz_ool(s, a, fns[a->esz]); \
364 DO_ZPZZ(AND, and)
365 DO_ZPZZ(EOR, eor)
366 DO_ZPZZ(ORR, orr)
367 DO_ZPZZ(BIC, bic)
369 DO_ZPZZ(ADD, add)
370 DO_ZPZZ(SUB, sub)
372 DO_ZPZZ(SMAX, smax)
373 DO_ZPZZ(UMAX, umax)
374 DO_ZPZZ(SMIN, smin)
375 DO_ZPZZ(UMIN, umin)
376 DO_ZPZZ(SABD, sabd)
377 DO_ZPZZ(UABD, uabd)
379 DO_ZPZZ(MUL, mul)
380 DO_ZPZZ(SMULH, smulh)
381 DO_ZPZZ(UMULH, umulh)
383 DO_ZPZZ(ASR, asr)
384 DO_ZPZZ(LSR, lsr)
385 DO_ZPZZ(LSL, lsl)
387 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
389 static gen_helper_gvec_4 * const fns[4] = {
390 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
392 return do_zpzz_ool(s, a, fns[a->esz]);
395 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
397 static gen_helper_gvec_4 * const fns[4] = {
398 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
400 return do_zpzz_ool(s, a, fns[a->esz]);
403 DO_ZPZZ(SEL, sel)
405 #undef DO_ZPZZ
408 *** SVE Integer Arithmetic - Unary Predicated Group
411 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
413 if (fn == NULL) {
414 return false;
416 if (sve_access_check(s)) {
417 unsigned vsz = vec_full_reg_size(s);
418 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
419 vec_full_reg_offset(s, a->rn),
420 pred_full_reg_offset(s, a->pg),
421 vsz, vsz, 0, fn);
423 return true;
426 #define DO_ZPZ(NAME, name) \
427 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
429 static gen_helper_gvec_3 * const fns[4] = { \
430 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
431 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
432 }; \
433 return do_zpz_ool(s, a, fns[a->esz]); \
436 DO_ZPZ(CLS, cls)
437 DO_ZPZ(CLZ, clz)
438 DO_ZPZ(CNT_zpz, cnt_zpz)
439 DO_ZPZ(CNOT, cnot)
440 DO_ZPZ(NOT_zpz, not_zpz)
441 DO_ZPZ(ABS, abs)
442 DO_ZPZ(NEG, neg)
444 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
446 static gen_helper_gvec_3 * const fns[4] = {
447 NULL,
448 gen_helper_sve_fabs_h,
449 gen_helper_sve_fabs_s,
450 gen_helper_sve_fabs_d
452 return do_zpz_ool(s, a, fns[a->esz]);
455 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
457 static gen_helper_gvec_3 * const fns[4] = {
458 NULL,
459 gen_helper_sve_fneg_h,
460 gen_helper_sve_fneg_s,
461 gen_helper_sve_fneg_d
463 return do_zpz_ool(s, a, fns[a->esz]);
466 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
468 static gen_helper_gvec_3 * const fns[4] = {
469 NULL,
470 gen_helper_sve_sxtb_h,
471 gen_helper_sve_sxtb_s,
472 gen_helper_sve_sxtb_d
474 return do_zpz_ool(s, a, fns[a->esz]);
477 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
479 static gen_helper_gvec_3 * const fns[4] = {
480 NULL,
481 gen_helper_sve_uxtb_h,
482 gen_helper_sve_uxtb_s,
483 gen_helper_sve_uxtb_d
485 return do_zpz_ool(s, a, fns[a->esz]);
488 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
490 static gen_helper_gvec_3 * const fns[4] = {
491 NULL, NULL,
492 gen_helper_sve_sxth_s,
493 gen_helper_sve_sxth_d
495 return do_zpz_ool(s, a, fns[a->esz]);
498 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
500 static gen_helper_gvec_3 * const fns[4] = {
501 NULL, NULL,
502 gen_helper_sve_uxth_s,
503 gen_helper_sve_uxth_d
505 return do_zpz_ool(s, a, fns[a->esz]);
508 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
510 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
513 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
515 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
518 #undef DO_ZPZ
521 *** SVE Integer Reduction Group
524 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
525 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
526 gen_helper_gvec_reduc *fn)
528 unsigned vsz = vec_full_reg_size(s);
529 TCGv_ptr t_zn, t_pg;
530 TCGv_i32 desc;
531 TCGv_i64 temp;
533 if (fn == NULL) {
534 return false;
536 if (!sve_access_check(s)) {
537 return true;
540 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
541 temp = tcg_temp_new_i64();
542 t_zn = tcg_temp_new_ptr();
543 t_pg = tcg_temp_new_ptr();
545 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
546 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
547 fn(temp, t_zn, t_pg, desc);
548 tcg_temp_free_ptr(t_zn);
549 tcg_temp_free_ptr(t_pg);
550 tcg_temp_free_i32(desc);
552 write_fp_dreg(s, a->rd, temp);
553 tcg_temp_free_i64(temp);
554 return true;
557 #define DO_VPZ(NAME, name) \
558 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
560 static gen_helper_gvec_reduc * const fns[4] = { \
561 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
562 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
563 }; \
564 return do_vpz_ool(s, a, fns[a->esz]); \
567 DO_VPZ(ORV, orv)
568 DO_VPZ(ANDV, andv)
569 DO_VPZ(EORV, eorv)
571 DO_VPZ(UADDV, uaddv)
572 DO_VPZ(SMAXV, smaxv)
573 DO_VPZ(UMAXV, umaxv)
574 DO_VPZ(SMINV, sminv)
575 DO_VPZ(UMINV, uminv)
577 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
579 static gen_helper_gvec_reduc * const fns[4] = {
580 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
581 gen_helper_sve_saddv_s, NULL
583 return do_vpz_ool(s, a, fns[a->esz]);
586 #undef DO_VPZ
589 *** SVE Shift by Immediate - Predicated Group
592 /* Store zero into every active element of Zd. We will use this for two
593 * and three-operand predicated instructions for which logic dictates a
594 * zero result.
596 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
598 static gen_helper_gvec_2 * const fns[4] = {
599 gen_helper_sve_clr_b, gen_helper_sve_clr_h,
600 gen_helper_sve_clr_s, gen_helper_sve_clr_d,
602 if (sve_access_check(s)) {
603 unsigned vsz = vec_full_reg_size(s);
604 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
605 pred_full_reg_offset(s, pg),
606 vsz, vsz, 0, fns[esz]);
608 return true;
611 /* Copy Zn into Zd, storing zeros into inactive elements. */
612 static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
614 static gen_helper_gvec_3 * const fns[4] = {
615 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
616 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
618 unsigned vsz = vec_full_reg_size(s);
619 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
620 vec_full_reg_offset(s, rn),
621 pred_full_reg_offset(s, pg),
622 vsz, vsz, 0, fns[esz]);
625 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
626 gen_helper_gvec_3 *fn)
628 if (sve_access_check(s)) {
629 unsigned vsz = vec_full_reg_size(s);
630 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
631 vec_full_reg_offset(s, a->rn),
632 pred_full_reg_offset(s, a->pg),
633 vsz, vsz, a->imm, fn);
635 return true;
638 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
640 static gen_helper_gvec_3 * const fns[4] = {
641 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
642 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
644 if (a->esz < 0) {
645 /* Invalid tsz encoding -- see tszimm_esz. */
646 return false;
648 /* Shift by element size is architecturally valid. For
649 arithmetic right-shift, it's the same as by one less. */
650 a->imm = MIN(a->imm, (8 << a->esz) - 1);
651 return do_zpzi_ool(s, a, fns[a->esz]);
654 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
656 static gen_helper_gvec_3 * const fns[4] = {
657 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
658 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
660 if (a->esz < 0) {
661 return false;
663 /* Shift by element size is architecturally valid.
664 For logical shifts, it is a zeroing operation. */
665 if (a->imm >= (8 << a->esz)) {
666 return do_clr_zp(s, a->rd, a->pg, a->esz);
667 } else {
668 return do_zpzi_ool(s, a, fns[a->esz]);
672 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
674 static gen_helper_gvec_3 * const fns[4] = {
675 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
676 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
678 if (a->esz < 0) {
679 return false;
681 /* Shift by element size is architecturally valid.
682 For logical shifts, it is a zeroing operation. */
683 if (a->imm >= (8 << a->esz)) {
684 return do_clr_zp(s, a->rd, a->pg, a->esz);
685 } else {
686 return do_zpzi_ool(s, a, fns[a->esz]);
690 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
692 static gen_helper_gvec_3 * const fns[4] = {
693 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
694 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
696 if (a->esz < 0) {
697 return false;
699 /* Shift by element size is architecturally valid. For arithmetic
700 right shift for division, it is a zeroing operation. */
701 if (a->imm >= (8 << a->esz)) {
702 return do_clr_zp(s, a->rd, a->pg, a->esz);
703 } else {
704 return do_zpzi_ool(s, a, fns[a->esz]);
709 *** SVE Bitwise Shift - Predicated Group
712 #define DO_ZPZW(NAME, name) \
713 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
714 uint32_t insn) \
716 static gen_helper_gvec_4 * const fns[3] = { \
717 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
718 gen_helper_sve_##name##_zpzw_s, \
719 }; \
720 if (a->esz < 0 || a->esz >= 3) { \
721 return false; \
723 return do_zpzz_ool(s, a, fns[a->esz]); \
726 DO_ZPZW(ASR, asr)
727 DO_ZPZW(LSR, lsr)
728 DO_ZPZW(LSL, lsl)
730 #undef DO_ZPZW
733 *** SVE Bitwise Shift - Unpredicated Group
736 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
737 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
738 int64_t, uint32_t, uint32_t))
740 if (a->esz < 0) {
741 /* Invalid tsz encoding -- see tszimm_esz. */
742 return false;
744 if (sve_access_check(s)) {
745 unsigned vsz = vec_full_reg_size(s);
746 /* Shift by element size is architecturally valid. For
747 arithmetic right-shift, it's the same as by one less.
748 Otherwise it is a zeroing operation. */
749 if (a->imm >= 8 << a->esz) {
750 if (asr) {
751 a->imm = (8 << a->esz) - 1;
752 } else {
753 do_dupi_z(s, a->rd, 0);
754 return true;
757 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
758 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
760 return true;
763 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
765 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
768 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
770 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
773 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
775 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
778 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
780 if (fn == NULL) {
781 return false;
783 if (sve_access_check(s)) {
784 unsigned vsz = vec_full_reg_size(s);
785 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
786 vec_full_reg_offset(s, a->rn),
787 vec_full_reg_offset(s, a->rm),
788 vsz, vsz, 0, fn);
790 return true;
793 #define DO_ZZW(NAME, name) \
794 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
795 uint32_t insn) \
797 static gen_helper_gvec_3 * const fns[4] = { \
798 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
799 gen_helper_sve_##name##_zzw_s, NULL \
800 }; \
801 return do_zzw_ool(s, a, fns[a->esz]); \
804 DO_ZZW(ASR, asr)
805 DO_ZZW(LSR, lsr)
806 DO_ZZW(LSL, lsl)
808 #undef DO_ZZW
811 *** SVE Integer Multiply-Add Group
814 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
815 gen_helper_gvec_5 *fn)
817 if (sve_access_check(s)) {
818 unsigned vsz = vec_full_reg_size(s);
819 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
820 vec_full_reg_offset(s, a->ra),
821 vec_full_reg_offset(s, a->rn),
822 vec_full_reg_offset(s, a->rm),
823 pred_full_reg_offset(s, a->pg),
824 vsz, vsz, 0, fn);
826 return true;
829 #define DO_ZPZZZ(NAME, name) \
830 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
832 static gen_helper_gvec_5 * const fns[4] = { \
833 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
834 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
835 }; \
836 return do_zpzzz_ool(s, a, fns[a->esz]); \
839 DO_ZPZZZ(MLA, mla)
840 DO_ZPZZZ(MLS, mls)
842 #undef DO_ZPZZZ
845 *** SVE Index Generation Group
848 static void do_index(DisasContext *s, int esz, int rd,
849 TCGv_i64 start, TCGv_i64 incr)
851 unsigned vsz = vec_full_reg_size(s);
852 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
853 TCGv_ptr t_zd = tcg_temp_new_ptr();
855 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
856 if (esz == 3) {
857 gen_helper_sve_index_d(t_zd, start, incr, desc);
858 } else {
859 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
860 static index_fn * const fns[3] = {
861 gen_helper_sve_index_b,
862 gen_helper_sve_index_h,
863 gen_helper_sve_index_s,
865 TCGv_i32 s32 = tcg_temp_new_i32();
866 TCGv_i32 i32 = tcg_temp_new_i32();
868 tcg_gen_extrl_i64_i32(s32, start);
869 tcg_gen_extrl_i64_i32(i32, incr);
870 fns[esz](t_zd, s32, i32, desc);
872 tcg_temp_free_i32(s32);
873 tcg_temp_free_i32(i32);
875 tcg_temp_free_ptr(t_zd);
876 tcg_temp_free_i32(desc);
879 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
881 if (sve_access_check(s)) {
882 TCGv_i64 start = tcg_const_i64(a->imm1);
883 TCGv_i64 incr = tcg_const_i64(a->imm2);
884 do_index(s, a->esz, a->rd, start, incr);
885 tcg_temp_free_i64(start);
886 tcg_temp_free_i64(incr);
888 return true;
891 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
893 if (sve_access_check(s)) {
894 TCGv_i64 start = tcg_const_i64(a->imm);
895 TCGv_i64 incr = cpu_reg(s, a->rm);
896 do_index(s, a->esz, a->rd, start, incr);
897 tcg_temp_free_i64(start);
899 return true;
902 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
904 if (sve_access_check(s)) {
905 TCGv_i64 start = cpu_reg(s, a->rn);
906 TCGv_i64 incr = tcg_const_i64(a->imm);
907 do_index(s, a->esz, a->rd, start, incr);
908 tcg_temp_free_i64(incr);
910 return true;
913 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
915 if (sve_access_check(s)) {
916 TCGv_i64 start = cpu_reg(s, a->rn);
917 TCGv_i64 incr = cpu_reg(s, a->rm);
918 do_index(s, a->esz, a->rd, start, incr);
920 return true;
924 *** SVE Stack Allocation Group
927 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
929 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
930 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
931 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
932 return true;
935 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
937 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
938 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
939 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
940 return true;
943 static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
945 TCGv_i64 reg = cpu_reg(s, a->rd);
946 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
947 return true;
951 *** SVE Compute Vector Address Group
954 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
956 if (sve_access_check(s)) {
957 unsigned vsz = vec_full_reg_size(s);
958 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
959 vec_full_reg_offset(s, a->rn),
960 vec_full_reg_offset(s, a->rm),
961 vsz, vsz, a->imm, fn);
963 return true;
966 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
968 return do_adr(s, a, gen_helper_sve_adr_p32);
971 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
973 return do_adr(s, a, gen_helper_sve_adr_p64);
976 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
978 return do_adr(s, a, gen_helper_sve_adr_s32);
981 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
983 return do_adr(s, a, gen_helper_sve_adr_u32);
987 *** SVE Integer Misc - Unpredicated Group
990 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
992 static gen_helper_gvec_2 * const fns[4] = {
993 NULL,
994 gen_helper_sve_fexpa_h,
995 gen_helper_sve_fexpa_s,
996 gen_helper_sve_fexpa_d,
998 if (a->esz == 0) {
999 return false;
1001 if (sve_access_check(s)) {
1002 unsigned vsz = vec_full_reg_size(s);
1003 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1004 vec_full_reg_offset(s, a->rn),
1005 vsz, vsz, 0, fns[a->esz]);
1007 return true;
1010 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
1012 static gen_helper_gvec_3 * const fns[4] = {
1013 NULL,
1014 gen_helper_sve_ftssel_h,
1015 gen_helper_sve_ftssel_s,
1016 gen_helper_sve_ftssel_d,
1018 if (a->esz == 0) {
1019 return false;
1021 if (sve_access_check(s)) {
1022 unsigned vsz = vec_full_reg_size(s);
1023 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1024 vec_full_reg_offset(s, a->rn),
1025 vec_full_reg_offset(s, a->rm),
1026 vsz, vsz, 0, fns[a->esz]);
1028 return true;
1032 *** SVE Predicate Logical Operations Group
1035 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1036 const GVecGen4 *gvec_op)
1038 if (!sve_access_check(s)) {
1039 return true;
1042 unsigned psz = pred_gvec_reg_size(s);
1043 int dofs = pred_full_reg_offset(s, a->rd);
1044 int nofs = pred_full_reg_offset(s, a->rn);
1045 int mofs = pred_full_reg_offset(s, a->rm);
1046 int gofs = pred_full_reg_offset(s, a->pg);
1048 if (psz == 8) {
1049 /* Do the operation and the flags generation in temps. */
1050 TCGv_i64 pd = tcg_temp_new_i64();
1051 TCGv_i64 pn = tcg_temp_new_i64();
1052 TCGv_i64 pm = tcg_temp_new_i64();
1053 TCGv_i64 pg = tcg_temp_new_i64();
1055 tcg_gen_ld_i64(pn, cpu_env, nofs);
1056 tcg_gen_ld_i64(pm, cpu_env, mofs);
1057 tcg_gen_ld_i64(pg, cpu_env, gofs);
1059 gvec_op->fni8(pd, pn, pm, pg);
1060 tcg_gen_st_i64(pd, cpu_env, dofs);
1062 do_predtest1(pd, pg);
1064 tcg_temp_free_i64(pd);
1065 tcg_temp_free_i64(pn);
1066 tcg_temp_free_i64(pm);
1067 tcg_temp_free_i64(pg);
1068 } else {
1069 /* The operation and flags generation is large. The computation
1070 * of the flags depends on the original contents of the guarding
1071 * predicate. If the destination overwrites the guarding predicate,
1072 * then the easiest way to get this right is to save a copy.
1074 int tofs = gofs;
1075 if (a->rd == a->pg) {
1076 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1077 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1080 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1081 do_predtest(s, dofs, tofs, psz / 8);
1083 return true;
1086 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1088 tcg_gen_and_i64(pd, pn, pm);
1089 tcg_gen_and_i64(pd, pd, pg);
1092 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1093 TCGv_vec pm, TCGv_vec pg)
1095 tcg_gen_and_vec(vece, pd, pn, pm);
1096 tcg_gen_and_vec(vece, pd, pd, pg);
1099 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1101 static const GVecGen4 op = {
1102 .fni8 = gen_and_pg_i64,
1103 .fniv = gen_and_pg_vec,
1104 .fno = gen_helper_sve_and_pppp,
1105 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1107 if (a->s) {
1108 return do_pppp_flags(s, a, &op);
1109 } else if (a->rn == a->rm) {
1110 if (a->pg == a->rn) {
1111 return do_mov_p(s, a->rd, a->rn);
1112 } else {
1113 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1115 } else if (a->pg == a->rn || a->pg == a->rm) {
1116 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1117 } else {
1118 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1122 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1124 tcg_gen_andc_i64(pd, pn, pm);
1125 tcg_gen_and_i64(pd, pd, pg);
1128 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1129 TCGv_vec pm, TCGv_vec pg)
1131 tcg_gen_andc_vec(vece, pd, pn, pm);
1132 tcg_gen_and_vec(vece, pd, pd, pg);
1135 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1137 static const GVecGen4 op = {
1138 .fni8 = gen_bic_pg_i64,
1139 .fniv = gen_bic_pg_vec,
1140 .fno = gen_helper_sve_bic_pppp,
1141 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1143 if (a->s) {
1144 return do_pppp_flags(s, a, &op);
1145 } else if (a->pg == a->rn) {
1146 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1147 } else {
1148 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1152 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1154 tcg_gen_xor_i64(pd, pn, pm);
1155 tcg_gen_and_i64(pd, pd, pg);
1158 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1159 TCGv_vec pm, TCGv_vec pg)
1161 tcg_gen_xor_vec(vece, pd, pn, pm);
1162 tcg_gen_and_vec(vece, pd, pd, pg);
1165 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1167 static const GVecGen4 op = {
1168 .fni8 = gen_eor_pg_i64,
1169 .fniv = gen_eor_pg_vec,
1170 .fno = gen_helper_sve_eor_pppp,
1171 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1173 if (a->s) {
1174 return do_pppp_flags(s, a, &op);
1175 } else {
1176 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1180 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1182 tcg_gen_and_i64(pn, pn, pg);
1183 tcg_gen_andc_i64(pm, pm, pg);
1184 tcg_gen_or_i64(pd, pn, pm);
1187 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1188 TCGv_vec pm, TCGv_vec pg)
1190 tcg_gen_and_vec(vece, pn, pn, pg);
1191 tcg_gen_andc_vec(vece, pm, pm, pg);
1192 tcg_gen_or_vec(vece, pd, pn, pm);
1195 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1197 static const GVecGen4 op = {
1198 .fni8 = gen_sel_pg_i64,
1199 .fniv = gen_sel_pg_vec,
1200 .fno = gen_helper_sve_sel_pppp,
1201 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1203 if (a->s) {
1204 return false;
1205 } else {
1206 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1210 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1212 tcg_gen_or_i64(pd, pn, pm);
1213 tcg_gen_and_i64(pd, pd, pg);
1216 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1217 TCGv_vec pm, TCGv_vec pg)
1219 tcg_gen_or_vec(vece, pd, pn, pm);
1220 tcg_gen_and_vec(vece, pd, pd, pg);
1223 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1225 static const GVecGen4 op = {
1226 .fni8 = gen_orr_pg_i64,
1227 .fniv = gen_orr_pg_vec,
1228 .fno = gen_helper_sve_orr_pppp,
1229 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1231 if (a->s) {
1232 return do_pppp_flags(s, a, &op);
1233 } else if (a->pg == a->rn && a->rn == a->rm) {
1234 return do_mov_p(s, a->rd, a->rn);
1235 } else {
1236 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1240 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1242 tcg_gen_orc_i64(pd, pn, pm);
1243 tcg_gen_and_i64(pd, pd, pg);
1246 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1247 TCGv_vec pm, TCGv_vec pg)
1249 tcg_gen_orc_vec(vece, pd, pn, pm);
1250 tcg_gen_and_vec(vece, pd, pd, pg);
1253 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1255 static const GVecGen4 op = {
1256 .fni8 = gen_orn_pg_i64,
1257 .fniv = gen_orn_pg_vec,
1258 .fno = gen_helper_sve_orn_pppp,
1259 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1261 if (a->s) {
1262 return do_pppp_flags(s, a, &op);
1263 } else {
1264 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1268 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1270 tcg_gen_or_i64(pd, pn, pm);
1271 tcg_gen_andc_i64(pd, pg, pd);
1274 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1275 TCGv_vec pm, TCGv_vec pg)
1277 tcg_gen_or_vec(vece, pd, pn, pm);
1278 tcg_gen_andc_vec(vece, pd, pg, pd);
1281 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1283 static const GVecGen4 op = {
1284 .fni8 = gen_nor_pg_i64,
1285 .fniv = gen_nor_pg_vec,
1286 .fno = gen_helper_sve_nor_pppp,
1287 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1289 if (a->s) {
1290 return do_pppp_flags(s, a, &op);
1291 } else {
1292 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1296 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1298 tcg_gen_and_i64(pd, pn, pm);
1299 tcg_gen_andc_i64(pd, pg, pd);
1302 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1303 TCGv_vec pm, TCGv_vec pg)
1305 tcg_gen_and_vec(vece, pd, pn, pm);
1306 tcg_gen_andc_vec(vece, pd, pg, pd);
1309 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1311 static const GVecGen4 op = {
1312 .fni8 = gen_nand_pg_i64,
1313 .fniv = gen_nand_pg_vec,
1314 .fno = gen_helper_sve_nand_pppp,
1315 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1317 if (a->s) {
1318 return do_pppp_flags(s, a, &op);
1319 } else {
1320 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1325 *** SVE Predicate Misc Group
1328 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1330 if (sve_access_check(s)) {
1331 int nofs = pred_full_reg_offset(s, a->rn);
1332 int gofs = pred_full_reg_offset(s, a->pg);
1333 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1335 if (words == 1) {
1336 TCGv_i64 pn = tcg_temp_new_i64();
1337 TCGv_i64 pg = tcg_temp_new_i64();
1339 tcg_gen_ld_i64(pn, cpu_env, nofs);
1340 tcg_gen_ld_i64(pg, cpu_env, gofs);
1341 do_predtest1(pn, pg);
1343 tcg_temp_free_i64(pn);
1344 tcg_temp_free_i64(pg);
1345 } else {
1346 do_predtest(s, nofs, gofs, words);
1349 return true;
1352 /* See the ARM pseudocode DecodePredCount. */
1353 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1355 unsigned elements = fullsz >> esz;
1356 unsigned bound;
1358 switch (pattern) {
1359 case 0x0: /* POW2 */
1360 return pow2floor(elements);
1361 case 0x1: /* VL1 */
1362 case 0x2: /* VL2 */
1363 case 0x3: /* VL3 */
1364 case 0x4: /* VL4 */
1365 case 0x5: /* VL5 */
1366 case 0x6: /* VL6 */
1367 case 0x7: /* VL7 */
1368 case 0x8: /* VL8 */
1369 bound = pattern;
1370 break;
1371 case 0x9: /* VL16 */
1372 case 0xa: /* VL32 */
1373 case 0xb: /* VL64 */
1374 case 0xc: /* VL128 */
1375 case 0xd: /* VL256 */
1376 bound = 16 << (pattern - 9);
1377 break;
1378 case 0x1d: /* MUL4 */
1379 return elements - elements % 4;
1380 case 0x1e: /* MUL3 */
1381 return elements - elements % 3;
1382 case 0x1f: /* ALL */
1383 return elements;
1384 default: /* #uimm5 */
1385 return 0;
1387 return elements >= bound ? bound : 0;
1390 /* This handles all of the predicate initialization instructions,
1391 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1392 * so that decode_pred_count returns 0. For SETFFR, we will have
1393 * set RD == 16 == FFR.
1395 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1397 if (!sve_access_check(s)) {
1398 return true;
1401 unsigned fullsz = vec_full_reg_size(s);
1402 unsigned ofs = pred_full_reg_offset(s, rd);
1403 unsigned numelem, setsz, i;
1404 uint64_t word, lastword;
1405 TCGv_i64 t;
1407 numelem = decode_pred_count(fullsz, pat, esz);
1409 /* Determine what we must store into each bit, and how many. */
1410 if (numelem == 0) {
1411 lastword = word = 0;
1412 setsz = fullsz;
1413 } else {
1414 setsz = numelem << esz;
1415 lastword = word = pred_esz_masks[esz];
1416 if (setsz % 64) {
1417 lastword &= ~(-1ull << (setsz % 64));
1421 t = tcg_temp_new_i64();
1422 if (fullsz <= 64) {
1423 tcg_gen_movi_i64(t, lastword);
1424 tcg_gen_st_i64(t, cpu_env, ofs);
1425 goto done;
1428 if (word == lastword) {
1429 unsigned maxsz = size_for_gvec(fullsz / 8);
1430 unsigned oprsz = size_for_gvec(setsz / 8);
1432 if (oprsz * 8 == setsz) {
1433 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1434 goto done;
1436 if (oprsz * 8 == setsz + 8) {
1437 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1438 tcg_gen_movi_i64(t, 0);
1439 tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1440 goto done;
1444 setsz /= 8;
1445 fullsz /= 8;
1447 tcg_gen_movi_i64(t, word);
1448 for (i = 0; i < setsz; i += 8) {
1449 tcg_gen_st_i64(t, cpu_env, ofs + i);
1451 if (lastword != word) {
1452 tcg_gen_movi_i64(t, lastword);
1453 tcg_gen_st_i64(t, cpu_env, ofs + i);
1454 i += 8;
1456 if (i < fullsz) {
1457 tcg_gen_movi_i64(t, 0);
1458 for (; i < fullsz; i += 8) {
1459 tcg_gen_st_i64(t, cpu_env, ofs + i);
1463 done:
1464 tcg_temp_free_i64(t);
1466 /* PTRUES */
1467 if (setflag) {
1468 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1469 tcg_gen_movi_i32(cpu_CF, word == 0);
1470 tcg_gen_movi_i32(cpu_VF, 0);
1471 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1473 return true;
1476 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1478 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1481 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1483 /* Note pat == 31 is #all, to set all elements. */
1484 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1487 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1489 /* Note pat == 32 is #unimp, to set no elements. */
1490 return do_predset(s, 0, a->rd, 32, false);
1493 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1495 /* The path through do_pppp_flags is complicated enough to want to avoid
1496 * duplication. Frob the arguments into the form of a predicated AND.
1498 arg_rprr_s alt_a = {
1499 .rd = a->rd, .pg = a->pg, .s = a->s,
1500 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1502 return trans_AND_pppp(s, &alt_a, insn);
1505 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1507 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1510 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1512 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1515 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1516 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1517 TCGv_ptr, TCGv_i32))
1519 if (!sve_access_check(s)) {
1520 return true;
1523 TCGv_ptr t_pd = tcg_temp_new_ptr();
1524 TCGv_ptr t_pg = tcg_temp_new_ptr();
1525 TCGv_i32 t;
1526 unsigned desc;
1528 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1529 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1531 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1532 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1533 t = tcg_const_i32(desc);
1535 gen_fn(t, t_pd, t_pg, t);
1536 tcg_temp_free_ptr(t_pd);
1537 tcg_temp_free_ptr(t_pg);
1539 do_pred_flags(t);
1540 tcg_temp_free_i32(t);
1541 return true;
1544 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1546 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1549 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1551 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1555 *** SVE Element Count Group
1558 /* Perform an inline saturating addition of a 32-bit value within
1559 * a 64-bit register. The second operand is known to be positive,
1560 * which halves the comparisions we must perform to bound the result.
1562 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1564 int64_t ibound;
1565 TCGv_i64 bound;
1566 TCGCond cond;
1568 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1569 if (u) {
1570 tcg_gen_ext32u_i64(reg, reg);
1571 } else {
1572 tcg_gen_ext32s_i64(reg, reg);
1574 if (d) {
1575 tcg_gen_sub_i64(reg, reg, val);
1576 ibound = (u ? 0 : INT32_MIN);
1577 cond = TCG_COND_LT;
1578 } else {
1579 tcg_gen_add_i64(reg, reg, val);
1580 ibound = (u ? UINT32_MAX : INT32_MAX);
1581 cond = TCG_COND_GT;
1583 bound = tcg_const_i64(ibound);
1584 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1585 tcg_temp_free_i64(bound);
1588 /* Similarly with 64-bit values. */
1589 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1591 TCGv_i64 t0 = tcg_temp_new_i64();
1592 TCGv_i64 t1 = tcg_temp_new_i64();
1593 TCGv_i64 t2;
1595 if (u) {
1596 if (d) {
1597 tcg_gen_sub_i64(t0, reg, val);
1598 tcg_gen_movi_i64(t1, 0);
1599 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1600 } else {
1601 tcg_gen_add_i64(t0, reg, val);
1602 tcg_gen_movi_i64(t1, -1);
1603 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1605 } else {
1606 if (d) {
1607 /* Detect signed overflow for subtraction. */
1608 tcg_gen_xor_i64(t0, reg, val);
1609 tcg_gen_sub_i64(t1, reg, val);
1610 tcg_gen_xor_i64(reg, reg, t0);
1611 tcg_gen_and_i64(t0, t0, reg);
1613 /* Bound the result. */
1614 tcg_gen_movi_i64(reg, INT64_MIN);
1615 t2 = tcg_const_i64(0);
1616 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1617 } else {
1618 /* Detect signed overflow for addition. */
1619 tcg_gen_xor_i64(t0, reg, val);
1620 tcg_gen_add_i64(reg, reg, val);
1621 tcg_gen_xor_i64(t1, reg, val);
1622 tcg_gen_andc_i64(t0, t1, t0);
1624 /* Bound the result. */
1625 tcg_gen_movi_i64(t1, INT64_MAX);
1626 t2 = tcg_const_i64(0);
1627 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1629 tcg_temp_free_i64(t2);
1631 tcg_temp_free_i64(t0);
1632 tcg_temp_free_i64(t1);
1635 /* Similarly with a vector and a scalar operand. */
1636 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1637 TCGv_i64 val, bool u, bool d)
1639 unsigned vsz = vec_full_reg_size(s);
1640 TCGv_ptr dptr, nptr;
1641 TCGv_i32 t32, desc;
1642 TCGv_i64 t64;
1644 dptr = tcg_temp_new_ptr();
1645 nptr = tcg_temp_new_ptr();
1646 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1647 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1648 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1650 switch (esz) {
1651 case MO_8:
1652 t32 = tcg_temp_new_i32();
1653 tcg_gen_extrl_i64_i32(t32, val);
1654 if (d) {
1655 tcg_gen_neg_i32(t32, t32);
1657 if (u) {
1658 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1659 } else {
1660 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1662 tcg_temp_free_i32(t32);
1663 break;
1665 case MO_16:
1666 t32 = tcg_temp_new_i32();
1667 tcg_gen_extrl_i64_i32(t32, val);
1668 if (d) {
1669 tcg_gen_neg_i32(t32, t32);
1671 if (u) {
1672 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1673 } else {
1674 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1676 tcg_temp_free_i32(t32);
1677 break;
1679 case MO_32:
1680 t64 = tcg_temp_new_i64();
1681 if (d) {
1682 tcg_gen_neg_i64(t64, val);
1683 } else {
1684 tcg_gen_mov_i64(t64, val);
1686 if (u) {
1687 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1688 } else {
1689 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1691 tcg_temp_free_i64(t64);
1692 break;
1694 case MO_64:
1695 if (u) {
1696 if (d) {
1697 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1698 } else {
1699 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1701 } else if (d) {
1702 t64 = tcg_temp_new_i64();
1703 tcg_gen_neg_i64(t64, val);
1704 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1705 tcg_temp_free_i64(t64);
1706 } else {
1707 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1709 break;
1711 default:
1712 g_assert_not_reached();
1715 tcg_temp_free_ptr(dptr);
1716 tcg_temp_free_ptr(nptr);
1717 tcg_temp_free_i32(desc);
1720 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1722 if (sve_access_check(s)) {
1723 unsigned fullsz = vec_full_reg_size(s);
1724 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1725 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1727 return true;
1730 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1732 if (sve_access_check(s)) {
1733 unsigned fullsz = vec_full_reg_size(s);
1734 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1735 int inc = numelem * a->imm * (a->d ? -1 : 1);
1736 TCGv_i64 reg = cpu_reg(s, a->rd);
1738 tcg_gen_addi_i64(reg, reg, inc);
1740 return true;
1743 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1744 uint32_t insn)
1746 if (!sve_access_check(s)) {
1747 return true;
1750 unsigned fullsz = vec_full_reg_size(s);
1751 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1752 int inc = numelem * a->imm;
1753 TCGv_i64 reg = cpu_reg(s, a->rd);
1755 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1756 if (inc == 0) {
1757 if (a->u) {
1758 tcg_gen_ext32u_i64(reg, reg);
1759 } else {
1760 tcg_gen_ext32s_i64(reg, reg);
1762 } else {
1763 TCGv_i64 t = tcg_const_i64(inc);
1764 do_sat_addsub_32(reg, t, a->u, a->d);
1765 tcg_temp_free_i64(t);
1767 return true;
1770 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1771 uint32_t insn)
1773 if (!sve_access_check(s)) {
1774 return true;
1777 unsigned fullsz = vec_full_reg_size(s);
1778 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1779 int inc = numelem * a->imm;
1780 TCGv_i64 reg = cpu_reg(s, a->rd);
1782 if (inc != 0) {
1783 TCGv_i64 t = tcg_const_i64(inc);
1784 do_sat_addsub_64(reg, t, a->u, a->d);
1785 tcg_temp_free_i64(t);
1787 return true;
1790 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1792 if (a->esz == 0) {
1793 return false;
1796 unsigned fullsz = vec_full_reg_size(s);
1797 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1798 int inc = numelem * a->imm;
1800 if (inc != 0) {
1801 if (sve_access_check(s)) {
1802 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1803 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1804 vec_full_reg_offset(s, a->rn),
1805 t, fullsz, fullsz);
1806 tcg_temp_free_i64(t);
1808 } else {
1809 do_mov_z(s, a->rd, a->rn);
1811 return true;
1814 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1815 uint32_t insn)
1817 if (a->esz == 0) {
1818 return false;
1821 unsigned fullsz = vec_full_reg_size(s);
1822 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1823 int inc = numelem * a->imm;
1825 if (inc != 0) {
1826 if (sve_access_check(s)) {
1827 TCGv_i64 t = tcg_const_i64(inc);
1828 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1829 tcg_temp_free_i64(t);
1831 } else {
1832 do_mov_z(s, a->rd, a->rn);
1834 return true;
1838 *** SVE Bitwise Immediate Group
1841 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1843 uint64_t imm;
1844 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1845 extract32(a->dbm, 0, 6),
1846 extract32(a->dbm, 6, 6))) {
1847 return false;
1849 if (sve_access_check(s)) {
1850 unsigned vsz = vec_full_reg_size(s);
1851 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1852 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1854 return true;
1857 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1859 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1862 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1864 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1867 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1869 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1872 static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1874 uint64_t imm;
1875 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1876 extract32(a->dbm, 0, 6),
1877 extract32(a->dbm, 6, 6))) {
1878 return false;
1880 if (sve_access_check(s)) {
1881 do_dupi_z(s, a->rd, imm);
1883 return true;
1887 *** SVE Integer Wide Immediate - Predicated Group
1890 /* Implement all merging copies. This is used for CPY (immediate),
1891 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1893 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1894 TCGv_i64 val)
1896 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1897 static gen_cpy * const fns[4] = {
1898 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1899 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1901 unsigned vsz = vec_full_reg_size(s);
1902 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1903 TCGv_ptr t_zd = tcg_temp_new_ptr();
1904 TCGv_ptr t_zn = tcg_temp_new_ptr();
1905 TCGv_ptr t_pg = tcg_temp_new_ptr();
1907 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1908 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1909 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1911 fns[esz](t_zd, t_zn, t_pg, val, desc);
1913 tcg_temp_free_ptr(t_zd);
1914 tcg_temp_free_ptr(t_zn);
1915 tcg_temp_free_ptr(t_pg);
1916 tcg_temp_free_i32(desc);
1919 static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1921 if (a->esz == 0) {
1922 return false;
1924 if (sve_access_check(s)) {
1925 /* Decode the VFP immediate. */
1926 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1927 TCGv_i64 t_imm = tcg_const_i64(imm);
1928 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1929 tcg_temp_free_i64(t_imm);
1931 return true;
1934 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1936 if (a->esz == 0 && extract32(insn, 13, 1)) {
1937 return false;
1939 if (sve_access_check(s)) {
1940 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1941 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1942 tcg_temp_free_i64(t_imm);
1944 return true;
1947 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1949 static gen_helper_gvec_2i * const fns[4] = {
1950 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1951 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1954 if (a->esz == 0 && extract32(insn, 13, 1)) {
1955 return false;
1957 if (sve_access_check(s)) {
1958 unsigned vsz = vec_full_reg_size(s);
1959 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1960 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1961 pred_full_reg_offset(s, a->pg),
1962 t_imm, vsz, vsz, 0, fns[a->esz]);
1963 tcg_temp_free_i64(t_imm);
1965 return true;
1969 *** SVE Permute Extract Group
1972 static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1974 if (!sve_access_check(s)) {
1975 return true;
1978 unsigned vsz = vec_full_reg_size(s);
1979 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1980 unsigned n_siz = vsz - n_ofs;
1981 unsigned d = vec_full_reg_offset(s, a->rd);
1982 unsigned n = vec_full_reg_offset(s, a->rn);
1983 unsigned m = vec_full_reg_offset(s, a->rm);
1985 /* Use host vector move insns if we have appropriate sizes
1986 * and no unfortunate overlap.
1988 if (m != d
1989 && n_ofs == size_for_gvec(n_ofs)
1990 && n_siz == size_for_gvec(n_siz)
1991 && (d != n || n_siz <= n_ofs)) {
1992 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1993 if (n_ofs != 0) {
1994 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1996 } else {
1997 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1999 return true;
2003 *** SVE Permute - Unpredicated Group
2006 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
2008 if (sve_access_check(s)) {
2009 unsigned vsz = vec_full_reg_size(s);
2010 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2011 vsz, vsz, cpu_reg_sp(s, a->rn));
2013 return true;
2016 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
2018 if ((a->imm & 0x1f) == 0) {
2019 return false;
2021 if (sve_access_check(s)) {
2022 unsigned vsz = vec_full_reg_size(s);
2023 unsigned dofs = vec_full_reg_offset(s, a->rd);
2024 unsigned esz, index;
2026 esz = ctz32(a->imm);
2027 index = a->imm >> (esz + 1);
2029 if ((index << esz) < vsz) {
2030 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2031 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2032 } else {
2033 tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2036 return true;
2039 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2041 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2042 static gen_insr * const fns[4] = {
2043 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2044 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2046 unsigned vsz = vec_full_reg_size(s);
2047 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2048 TCGv_ptr t_zd = tcg_temp_new_ptr();
2049 TCGv_ptr t_zn = tcg_temp_new_ptr();
2051 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2052 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2054 fns[a->esz](t_zd, t_zn, val, desc);
2056 tcg_temp_free_ptr(t_zd);
2057 tcg_temp_free_ptr(t_zn);
2058 tcg_temp_free_i32(desc);
2061 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2063 if (sve_access_check(s)) {
2064 TCGv_i64 t = tcg_temp_new_i64();
2065 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2066 do_insr_i64(s, a, t);
2067 tcg_temp_free_i64(t);
2069 return true;
2072 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2074 if (sve_access_check(s)) {
2075 do_insr_i64(s, a, cpu_reg(s, a->rm));
2077 return true;
2080 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2082 static gen_helper_gvec_2 * const fns[4] = {
2083 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2084 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2087 if (sve_access_check(s)) {
2088 unsigned vsz = vec_full_reg_size(s);
2089 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2090 vec_full_reg_offset(s, a->rn),
2091 vsz, vsz, 0, fns[a->esz]);
2093 return true;
2096 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2098 static gen_helper_gvec_3 * const fns[4] = {
2099 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2100 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2103 if (sve_access_check(s)) {
2104 unsigned vsz = vec_full_reg_size(s);
2105 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2106 vec_full_reg_offset(s, a->rn),
2107 vec_full_reg_offset(s, a->rm),
2108 vsz, vsz, 0, fns[a->esz]);
2110 return true;
2113 static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2115 static gen_helper_gvec_2 * const fns[4][2] = {
2116 { NULL, NULL },
2117 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2118 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2119 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2122 if (a->esz == 0) {
2123 return false;
2125 if (sve_access_check(s)) {
2126 unsigned vsz = vec_full_reg_size(s);
2127 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2128 vec_full_reg_offset(s, a->rn)
2129 + (a->h ? vsz / 2 : 0),
2130 vsz, vsz, 0, fns[a->esz][a->u]);
2132 return true;
2136 *** SVE Permute - Predicates Group
2139 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2140 gen_helper_gvec_3 *fn)
2142 if (!sve_access_check(s)) {
2143 return true;
2146 unsigned vsz = pred_full_reg_size(s);
2148 /* Predicate sizes may be smaller and cannot use simd_desc.
2149 We cannot round up, as we do elsewhere, because we need
2150 the exact size for ZIP2 and REV. We retain the style for
2151 the other helpers for consistency. */
2152 TCGv_ptr t_d = tcg_temp_new_ptr();
2153 TCGv_ptr t_n = tcg_temp_new_ptr();
2154 TCGv_ptr t_m = tcg_temp_new_ptr();
2155 TCGv_i32 t_desc;
2156 int desc;
2158 desc = vsz - 2;
2159 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2160 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2162 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2163 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2164 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2165 t_desc = tcg_const_i32(desc);
2167 fn(t_d, t_n, t_m, t_desc);
2169 tcg_temp_free_ptr(t_d);
2170 tcg_temp_free_ptr(t_n);
2171 tcg_temp_free_ptr(t_m);
2172 tcg_temp_free_i32(t_desc);
2173 return true;
2176 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2177 gen_helper_gvec_2 *fn)
2179 if (!sve_access_check(s)) {
2180 return true;
2183 unsigned vsz = pred_full_reg_size(s);
2184 TCGv_ptr t_d = tcg_temp_new_ptr();
2185 TCGv_ptr t_n = tcg_temp_new_ptr();
2186 TCGv_i32 t_desc;
2187 int desc;
2189 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2190 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2192 /* Predicate sizes may be smaller and cannot use simd_desc.
2193 We cannot round up, as we do elsewhere, because we need
2194 the exact size for ZIP2 and REV. We retain the style for
2195 the other helpers for consistency. */
2197 desc = vsz - 2;
2198 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2199 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2200 t_desc = tcg_const_i32(desc);
2202 fn(t_d, t_n, t_desc);
2204 tcg_temp_free_i32(t_desc);
2205 tcg_temp_free_ptr(t_d);
2206 tcg_temp_free_ptr(t_n);
2207 return true;
2210 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2212 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2215 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2217 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2220 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2222 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2225 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2227 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2230 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2232 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2235 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2237 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2240 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2242 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2245 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2247 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2250 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2252 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2256 *** SVE Permute - Interleaving Group
2259 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2261 static gen_helper_gvec_3 * const fns[4] = {
2262 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2263 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2266 if (sve_access_check(s)) {
2267 unsigned vsz = vec_full_reg_size(s);
2268 unsigned high_ofs = high ? vsz / 2 : 0;
2269 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2270 vec_full_reg_offset(s, a->rn) + high_ofs,
2271 vec_full_reg_offset(s, a->rm) + high_ofs,
2272 vsz, vsz, 0, fns[a->esz]);
2274 return true;
2277 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2278 gen_helper_gvec_3 *fn)
2280 if (sve_access_check(s)) {
2281 unsigned vsz = vec_full_reg_size(s);
2282 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2283 vec_full_reg_offset(s, a->rn),
2284 vec_full_reg_offset(s, a->rm),
2285 vsz, vsz, data, fn);
2287 return true;
2290 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2292 return do_zip(s, a, false);
2295 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2297 return do_zip(s, a, true);
2300 static gen_helper_gvec_3 * const uzp_fns[4] = {
2301 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2302 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2305 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2307 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2310 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2312 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2315 static gen_helper_gvec_3 * const trn_fns[4] = {
2316 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2317 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2320 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2322 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2325 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2327 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2331 *** SVE Permute Vector - Predicated Group
2334 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2336 static gen_helper_gvec_3 * const fns[4] = {
2337 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2339 return do_zpz_ool(s, a, fns[a->esz]);
2342 /* Call the helper that computes the ARM LastActiveElement pseudocode
2343 * function, scaled by the element size. This includes the not found
2344 * indication; e.g. not found for esz=3 is -8.
2346 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2348 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2349 * round up, as we do elsewhere, because we need the exact size.
2351 TCGv_ptr t_p = tcg_temp_new_ptr();
2352 TCGv_i32 t_desc;
2353 unsigned vsz = pred_full_reg_size(s);
2354 unsigned desc;
2356 desc = vsz - 2;
2357 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2359 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2360 t_desc = tcg_const_i32(desc);
2362 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2364 tcg_temp_free_i32(t_desc);
2365 tcg_temp_free_ptr(t_p);
2368 /* Increment LAST to the offset of the next element in the vector,
2369 * wrapping around to 0.
2371 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2373 unsigned vsz = vec_full_reg_size(s);
2375 tcg_gen_addi_i32(last, last, 1 << esz);
2376 if (is_power_of_2(vsz)) {
2377 tcg_gen_andi_i32(last, last, vsz - 1);
2378 } else {
2379 TCGv_i32 max = tcg_const_i32(vsz);
2380 TCGv_i32 zero = tcg_const_i32(0);
2381 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2382 tcg_temp_free_i32(max);
2383 tcg_temp_free_i32(zero);
2387 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
2388 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2390 unsigned vsz = vec_full_reg_size(s);
2392 if (is_power_of_2(vsz)) {
2393 tcg_gen_andi_i32(last, last, vsz - 1);
2394 } else {
2395 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2396 TCGv_i32 zero = tcg_const_i32(0);
2397 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2398 tcg_temp_free_i32(max);
2399 tcg_temp_free_i32(zero);
2403 /* Load an unsigned element of ESZ from BASE+OFS. */
2404 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2406 TCGv_i64 r = tcg_temp_new_i64();
2408 switch (esz) {
2409 case 0:
2410 tcg_gen_ld8u_i64(r, base, ofs);
2411 break;
2412 case 1:
2413 tcg_gen_ld16u_i64(r, base, ofs);
2414 break;
2415 case 2:
2416 tcg_gen_ld32u_i64(r, base, ofs);
2417 break;
2418 case 3:
2419 tcg_gen_ld_i64(r, base, ofs);
2420 break;
2421 default:
2422 g_assert_not_reached();
2424 return r;
2427 /* Load an unsigned element of ESZ from RM[LAST]. */
2428 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2429 int rm, int esz)
2431 TCGv_ptr p = tcg_temp_new_ptr();
2432 TCGv_i64 r;
2434 /* Convert offset into vector into offset into ENV.
2435 * The final adjustment for the vector register base
2436 * is added via constant offset to the load.
2438 #ifdef HOST_WORDS_BIGENDIAN
2439 /* Adjust for element ordering. See vec_reg_offset. */
2440 if (esz < 3) {
2441 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2443 #endif
2444 tcg_gen_ext_i32_ptr(p, last);
2445 tcg_gen_add_ptr(p, p, cpu_env);
2447 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2448 tcg_temp_free_ptr(p);
2450 return r;
2453 /* Compute CLAST for a Zreg. */
2454 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2456 TCGv_i32 last;
2457 TCGLabel *over;
2458 TCGv_i64 ele;
2459 unsigned vsz, esz = a->esz;
2461 if (!sve_access_check(s)) {
2462 return true;
2465 last = tcg_temp_local_new_i32();
2466 over = gen_new_label();
2468 find_last_active(s, last, esz, a->pg);
2470 /* There is of course no movcond for a 2048-bit vector,
2471 * so we must branch over the actual store.
2473 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2475 if (!before) {
2476 incr_last_active(s, last, esz);
2479 ele = load_last_active(s, last, a->rm, esz);
2480 tcg_temp_free_i32(last);
2482 vsz = vec_full_reg_size(s);
2483 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2484 tcg_temp_free_i64(ele);
2486 /* If this insn used MOVPRFX, we may need a second move. */
2487 if (a->rd != a->rn) {
2488 TCGLabel *done = gen_new_label();
2489 tcg_gen_br(done);
2491 gen_set_label(over);
2492 do_mov_z(s, a->rd, a->rn);
2494 gen_set_label(done);
2495 } else {
2496 gen_set_label(over);
2498 return true;
2501 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2503 return do_clast_vector(s, a, false);
2506 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2508 return do_clast_vector(s, a, true);
2511 /* Compute CLAST for a scalar. */
2512 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2513 bool before, TCGv_i64 reg_val)
2515 TCGv_i32 last = tcg_temp_new_i32();
2516 TCGv_i64 ele, cmp, zero;
2518 find_last_active(s, last, esz, pg);
2520 /* Extend the original value of last prior to incrementing. */
2521 cmp = tcg_temp_new_i64();
2522 tcg_gen_ext_i32_i64(cmp, last);
2524 if (!before) {
2525 incr_last_active(s, last, esz);
2528 /* The conceit here is that while last < 0 indicates not found, after
2529 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2530 * from which we can load garbage. We then discard the garbage with
2531 * a conditional move.
2533 ele = load_last_active(s, last, rm, esz);
2534 tcg_temp_free_i32(last);
2536 zero = tcg_const_i64(0);
2537 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2539 tcg_temp_free_i64(zero);
2540 tcg_temp_free_i64(cmp);
2541 tcg_temp_free_i64(ele);
2544 /* Compute CLAST for a Vreg. */
2545 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2547 if (sve_access_check(s)) {
2548 int esz = a->esz;
2549 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2550 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2552 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2553 write_fp_dreg(s, a->rd, reg);
2554 tcg_temp_free_i64(reg);
2556 return true;
2559 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2561 return do_clast_fp(s, a, false);
2564 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2566 return do_clast_fp(s, a, true);
2569 /* Compute CLAST for a Xreg. */
2570 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2572 TCGv_i64 reg;
2574 if (!sve_access_check(s)) {
2575 return true;
2578 reg = cpu_reg(s, a->rd);
2579 switch (a->esz) {
2580 case 0:
2581 tcg_gen_ext8u_i64(reg, reg);
2582 break;
2583 case 1:
2584 tcg_gen_ext16u_i64(reg, reg);
2585 break;
2586 case 2:
2587 tcg_gen_ext32u_i64(reg, reg);
2588 break;
2589 case 3:
2590 break;
2591 default:
2592 g_assert_not_reached();
2595 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2596 return true;
2599 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2601 return do_clast_general(s, a, false);
2604 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2606 return do_clast_general(s, a, true);
2609 /* Compute LAST for a scalar. */
2610 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2611 int pg, int rm, bool before)
2613 TCGv_i32 last = tcg_temp_new_i32();
2614 TCGv_i64 ret;
2616 find_last_active(s, last, esz, pg);
2617 if (before) {
2618 wrap_last_active(s, last, esz);
2619 } else {
2620 incr_last_active(s, last, esz);
2623 ret = load_last_active(s, last, rm, esz);
2624 tcg_temp_free_i32(last);
2625 return ret;
2628 /* Compute LAST for a Vreg. */
2629 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2631 if (sve_access_check(s)) {
2632 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2633 write_fp_dreg(s, a->rd, val);
2634 tcg_temp_free_i64(val);
2636 return true;
2639 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2641 return do_last_fp(s, a, false);
2644 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2646 return do_last_fp(s, a, true);
2649 /* Compute LAST for a Xreg. */
2650 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2652 if (sve_access_check(s)) {
2653 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2654 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2655 tcg_temp_free_i64(val);
2657 return true;
2660 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2662 return do_last_general(s, a, false);
2665 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2667 return do_last_general(s, a, true);
2670 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2672 if (sve_access_check(s)) {
2673 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2675 return true;
2678 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2680 if (sve_access_check(s)) {
2681 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2682 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2683 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2684 tcg_temp_free_i64(t);
2686 return true;
2689 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2691 static gen_helper_gvec_3 * const fns[4] = {
2692 NULL,
2693 gen_helper_sve_revb_h,
2694 gen_helper_sve_revb_s,
2695 gen_helper_sve_revb_d,
2697 return do_zpz_ool(s, a, fns[a->esz]);
2700 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2702 static gen_helper_gvec_3 * const fns[4] = {
2703 NULL,
2704 NULL,
2705 gen_helper_sve_revh_s,
2706 gen_helper_sve_revh_d,
2708 return do_zpz_ool(s, a, fns[a->esz]);
2711 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2713 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2716 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2718 static gen_helper_gvec_3 * const fns[4] = {
2719 gen_helper_sve_rbit_b,
2720 gen_helper_sve_rbit_h,
2721 gen_helper_sve_rbit_s,
2722 gen_helper_sve_rbit_d,
2724 return do_zpz_ool(s, a, fns[a->esz]);
2727 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2729 if (sve_access_check(s)) {
2730 unsigned vsz = vec_full_reg_size(s);
2731 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2732 vec_full_reg_offset(s, a->rn),
2733 vec_full_reg_offset(s, a->rm),
2734 pred_full_reg_offset(s, a->pg),
2735 vsz, vsz, a->esz, gen_helper_sve_splice);
2737 return true;
2741 *** SVE Integer Compare - Vectors Group
2744 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2745 gen_helper_gvec_flags_4 *gen_fn)
2747 TCGv_ptr pd, zn, zm, pg;
2748 unsigned vsz;
2749 TCGv_i32 t;
2751 if (gen_fn == NULL) {
2752 return false;
2754 if (!sve_access_check(s)) {
2755 return true;
2758 vsz = vec_full_reg_size(s);
2759 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2760 pd = tcg_temp_new_ptr();
2761 zn = tcg_temp_new_ptr();
2762 zm = tcg_temp_new_ptr();
2763 pg = tcg_temp_new_ptr();
2765 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2766 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2767 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2768 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2770 gen_fn(t, pd, zn, zm, pg, t);
2772 tcg_temp_free_ptr(pd);
2773 tcg_temp_free_ptr(zn);
2774 tcg_temp_free_ptr(zm);
2775 tcg_temp_free_ptr(pg);
2777 do_pred_flags(t);
2779 tcg_temp_free_i32(t);
2780 return true;
2783 #define DO_PPZZ(NAME, name) \
2784 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
2785 uint32_t insn) \
2787 static gen_helper_gvec_flags_4 * const fns[4] = { \
2788 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2789 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2790 }; \
2791 return do_ppzz_flags(s, a, fns[a->esz]); \
2794 DO_PPZZ(CMPEQ, cmpeq)
2795 DO_PPZZ(CMPNE, cmpne)
2796 DO_PPZZ(CMPGT, cmpgt)
2797 DO_PPZZ(CMPGE, cmpge)
2798 DO_PPZZ(CMPHI, cmphi)
2799 DO_PPZZ(CMPHS, cmphs)
2801 #undef DO_PPZZ
2803 #define DO_PPZW(NAME, name) \
2804 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a, \
2805 uint32_t insn) \
2807 static gen_helper_gvec_flags_4 * const fns[4] = { \
2808 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2809 gen_helper_sve_##name##_ppzw_s, NULL \
2810 }; \
2811 return do_ppzz_flags(s, a, fns[a->esz]); \
2814 DO_PPZW(CMPEQ, cmpeq)
2815 DO_PPZW(CMPNE, cmpne)
2816 DO_PPZW(CMPGT, cmpgt)
2817 DO_PPZW(CMPGE, cmpge)
2818 DO_PPZW(CMPHI, cmphi)
2819 DO_PPZW(CMPHS, cmphs)
2820 DO_PPZW(CMPLT, cmplt)
2821 DO_PPZW(CMPLE, cmple)
2822 DO_PPZW(CMPLO, cmplo)
2823 DO_PPZW(CMPLS, cmpls)
2825 #undef DO_PPZW
2828 *** SVE Integer Compare - Immediate Groups
2831 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2832 gen_helper_gvec_flags_3 *gen_fn)
2834 TCGv_ptr pd, zn, pg;
2835 unsigned vsz;
2836 TCGv_i32 t;
2838 if (gen_fn == NULL) {
2839 return false;
2841 if (!sve_access_check(s)) {
2842 return true;
2845 vsz = vec_full_reg_size(s);
2846 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2847 pd = tcg_temp_new_ptr();
2848 zn = tcg_temp_new_ptr();
2849 pg = tcg_temp_new_ptr();
2851 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2852 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2853 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2855 gen_fn(t, pd, zn, pg, t);
2857 tcg_temp_free_ptr(pd);
2858 tcg_temp_free_ptr(zn);
2859 tcg_temp_free_ptr(pg);
2861 do_pred_flags(t);
2863 tcg_temp_free_i32(t);
2864 return true;
2867 #define DO_PPZI(NAME, name) \
2868 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a, \
2869 uint32_t insn) \
2871 static gen_helper_gvec_flags_3 * const fns[4] = { \
2872 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2873 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2874 }; \
2875 return do_ppzi_flags(s, a, fns[a->esz]); \
2878 DO_PPZI(CMPEQ, cmpeq)
2879 DO_PPZI(CMPNE, cmpne)
2880 DO_PPZI(CMPGT, cmpgt)
2881 DO_PPZI(CMPGE, cmpge)
2882 DO_PPZI(CMPHI, cmphi)
2883 DO_PPZI(CMPHS, cmphs)
2884 DO_PPZI(CMPLT, cmplt)
2885 DO_PPZI(CMPLE, cmple)
2886 DO_PPZI(CMPLO, cmplo)
2887 DO_PPZI(CMPLS, cmpls)
2889 #undef DO_PPZI
2892 *** SVE Partition Break Group
2895 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2896 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2898 if (!sve_access_check(s)) {
2899 return true;
2902 unsigned vsz = pred_full_reg_size(s);
2904 /* Predicate sizes may be smaller and cannot use simd_desc. */
2905 TCGv_ptr d = tcg_temp_new_ptr();
2906 TCGv_ptr n = tcg_temp_new_ptr();
2907 TCGv_ptr m = tcg_temp_new_ptr();
2908 TCGv_ptr g = tcg_temp_new_ptr();
2909 TCGv_i32 t = tcg_const_i32(vsz - 2);
2911 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2912 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2913 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2914 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2916 if (a->s) {
2917 fn_s(t, d, n, m, g, t);
2918 do_pred_flags(t);
2919 } else {
2920 fn(d, n, m, g, t);
2922 tcg_temp_free_ptr(d);
2923 tcg_temp_free_ptr(n);
2924 tcg_temp_free_ptr(m);
2925 tcg_temp_free_ptr(g);
2926 tcg_temp_free_i32(t);
2927 return true;
2930 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2931 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2933 if (!sve_access_check(s)) {
2934 return true;
2937 unsigned vsz = pred_full_reg_size(s);
2939 /* Predicate sizes may be smaller and cannot use simd_desc. */
2940 TCGv_ptr d = tcg_temp_new_ptr();
2941 TCGv_ptr n = tcg_temp_new_ptr();
2942 TCGv_ptr g = tcg_temp_new_ptr();
2943 TCGv_i32 t = tcg_const_i32(vsz - 2);
2945 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2946 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2947 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2949 if (a->s) {
2950 fn_s(t, d, n, g, t);
2951 do_pred_flags(t);
2952 } else {
2953 fn(d, n, g, t);
2955 tcg_temp_free_ptr(d);
2956 tcg_temp_free_ptr(n);
2957 tcg_temp_free_ptr(g);
2958 tcg_temp_free_i32(t);
2959 return true;
2962 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2964 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2967 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2969 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2972 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2974 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2977 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2979 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2982 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2984 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2987 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2989 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2992 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2994 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
2998 *** SVE Predicate Count Group
3001 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3003 unsigned psz = pred_full_reg_size(s);
3005 if (psz <= 8) {
3006 uint64_t psz_mask;
3008 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3009 if (pn != pg) {
3010 TCGv_i64 g = tcg_temp_new_i64();
3011 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3012 tcg_gen_and_i64(val, val, g);
3013 tcg_temp_free_i64(g);
3016 /* Reduce the pred_esz_masks value simply to reduce the
3017 * size of the code generated here.
3019 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3020 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3022 tcg_gen_ctpop_i64(val, val);
3023 } else {
3024 TCGv_ptr t_pn = tcg_temp_new_ptr();
3025 TCGv_ptr t_pg = tcg_temp_new_ptr();
3026 unsigned desc;
3027 TCGv_i32 t_desc;
3029 desc = psz - 2;
3030 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3032 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3033 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3034 t_desc = tcg_const_i32(desc);
3036 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3037 tcg_temp_free_ptr(t_pn);
3038 tcg_temp_free_ptr(t_pg);
3039 tcg_temp_free_i32(t_desc);
3043 static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3045 if (sve_access_check(s)) {
3046 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3048 return true;
3051 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3052 uint32_t insn)
3054 if (sve_access_check(s)) {
3055 TCGv_i64 reg = cpu_reg(s, a->rd);
3056 TCGv_i64 val = tcg_temp_new_i64();
3058 do_cntp(s, val, a->esz, a->pg, a->pg);
3059 if (a->d) {
3060 tcg_gen_sub_i64(reg, reg, val);
3061 } else {
3062 tcg_gen_add_i64(reg, reg, val);
3064 tcg_temp_free_i64(val);
3066 return true;
3069 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3070 uint32_t insn)
3072 if (a->esz == 0) {
3073 return false;
3075 if (sve_access_check(s)) {
3076 unsigned vsz = vec_full_reg_size(s);
3077 TCGv_i64 val = tcg_temp_new_i64();
3078 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3080 do_cntp(s, val, a->esz, a->pg, a->pg);
3081 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3082 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3084 return true;
3087 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3088 uint32_t insn)
3090 if (sve_access_check(s)) {
3091 TCGv_i64 reg = cpu_reg(s, a->rd);
3092 TCGv_i64 val = tcg_temp_new_i64();
3094 do_cntp(s, val, a->esz, a->pg, a->pg);
3095 do_sat_addsub_32(reg, val, a->u, a->d);
3097 return true;
3100 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3101 uint32_t insn)
3103 if (sve_access_check(s)) {
3104 TCGv_i64 reg = cpu_reg(s, a->rd);
3105 TCGv_i64 val = tcg_temp_new_i64();
3107 do_cntp(s, val, a->esz, a->pg, a->pg);
3108 do_sat_addsub_64(reg, val, a->u, a->d);
3110 return true;
3113 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3114 uint32_t insn)
3116 if (a->esz == 0) {
3117 return false;
3119 if (sve_access_check(s)) {
3120 TCGv_i64 val = tcg_temp_new_i64();
3121 do_cntp(s, val, a->esz, a->pg, a->pg);
3122 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3124 return true;
3128 *** SVE Integer Compare Scalars Group
3131 static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
3133 if (!sve_access_check(s)) {
3134 return true;
3137 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3138 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3139 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3140 TCGv_i64 cmp = tcg_temp_new_i64();
3142 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3143 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3144 tcg_temp_free_i64(cmp);
3146 /* VF = !NF & !CF. */
3147 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3148 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3150 /* Both NF and VF actually look at bit 31. */
3151 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3152 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3153 return true;
3156 static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
3158 if (!sve_access_check(s)) {
3159 return true;
3162 TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
3163 TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
3164 TCGv_i64 t0 = tcg_temp_new_i64();
3165 TCGv_i64 t1 = tcg_temp_new_i64();
3166 TCGv_i32 t2, t3;
3167 TCGv_ptr ptr;
3168 unsigned desc, vsz = vec_full_reg_size(s);
3169 TCGCond cond;
3171 if (!a->sf) {
3172 if (a->u) {
3173 tcg_gen_ext32u_i64(op0, op0);
3174 tcg_gen_ext32u_i64(op1, op1);
3175 } else {
3176 tcg_gen_ext32s_i64(op0, op0);
3177 tcg_gen_ext32s_i64(op1, op1);
3181 /* For the helper, compress the different conditions into a computation
3182 * of how many iterations for which the condition is true.
3184 * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
3185 * 2**64 iterations, overflowing to 0. Of course, predicate registers
3186 * aren't that large, so any value >= predicate size is sufficient.
3188 tcg_gen_sub_i64(t0, op1, op0);
3190 /* t0 = MIN(op1 - op0, vsz). */
3191 tcg_gen_movi_i64(t1, vsz);
3192 tcg_gen_umin_i64(t0, t0, t1);
3193 if (a->eq) {
3194 /* Equality means one more iteration. */
3195 tcg_gen_addi_i64(t0, t0, 1);
3198 /* t0 = (condition true ? t0 : 0). */
3199 cond = (a->u
3200 ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3201 : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3202 tcg_gen_movi_i64(t1, 0);
3203 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3205 t2 = tcg_temp_new_i32();
3206 tcg_gen_extrl_i64_i32(t2, t0);
3207 tcg_temp_free_i64(t0);
3208 tcg_temp_free_i64(t1);
3210 desc = (vsz / 8) - 2;
3211 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3212 t3 = tcg_const_i32(desc);
3214 ptr = tcg_temp_new_ptr();
3215 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3217 gen_helper_sve_while(t2, ptr, t2, t3);
3218 do_pred_flags(t2);
3220 tcg_temp_free_ptr(ptr);
3221 tcg_temp_free_i32(t2);
3222 tcg_temp_free_i32(t3);
3223 return true;
3227 *** SVE Integer Wide Immediate - Unpredicated Group
3230 static bool trans_FDUP(DisasContext *s, arg_FDUP *a, uint32_t insn)
3232 if (a->esz == 0) {
3233 return false;
3235 if (sve_access_check(s)) {
3236 unsigned vsz = vec_full_reg_size(s);
3237 int dofs = vec_full_reg_offset(s, a->rd);
3238 uint64_t imm;
3240 /* Decode the VFP immediate. */
3241 imm = vfp_expand_imm(a->esz, a->imm);
3242 imm = dup_const(a->esz, imm);
3244 tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3246 return true;
3249 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a, uint32_t insn)
3251 if (a->esz == 0 && extract32(insn, 13, 1)) {
3252 return false;
3254 if (sve_access_check(s)) {
3255 unsigned vsz = vec_full_reg_size(s);
3256 int dofs = vec_full_reg_offset(s, a->rd);
3258 tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3260 return true;
3263 static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3265 if (a->esz == 0 && extract32(insn, 13, 1)) {
3266 return false;
3268 if (sve_access_check(s)) {
3269 unsigned vsz = vec_full_reg_size(s);
3270 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3271 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3273 return true;
3276 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3278 a->imm = -a->imm;
3279 return trans_ADD_zzi(s, a, insn);
3282 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3284 static const GVecGen2s op[4] = {
3285 { .fni8 = tcg_gen_vec_sub8_i64,
3286 .fniv = tcg_gen_sub_vec,
3287 .fno = gen_helper_sve_subri_b,
3288 .opc = INDEX_op_sub_vec,
3289 .vece = MO_8,
3290 .scalar_first = true },
3291 { .fni8 = tcg_gen_vec_sub16_i64,
3292 .fniv = tcg_gen_sub_vec,
3293 .fno = gen_helper_sve_subri_h,
3294 .opc = INDEX_op_sub_vec,
3295 .vece = MO_16,
3296 .scalar_first = true },
3297 { .fni4 = tcg_gen_sub_i32,
3298 .fniv = tcg_gen_sub_vec,
3299 .fno = gen_helper_sve_subri_s,
3300 .opc = INDEX_op_sub_vec,
3301 .vece = MO_32,
3302 .scalar_first = true },
3303 { .fni8 = tcg_gen_sub_i64,
3304 .fniv = tcg_gen_sub_vec,
3305 .fno = gen_helper_sve_subri_d,
3306 .opc = INDEX_op_sub_vec,
3307 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3308 .vece = MO_64,
3309 .scalar_first = true }
3312 if (a->esz == 0 && extract32(insn, 13, 1)) {
3313 return false;
3315 if (sve_access_check(s)) {
3316 unsigned vsz = vec_full_reg_size(s);
3317 TCGv_i64 c = tcg_const_i64(a->imm);
3318 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3319 vec_full_reg_offset(s, a->rn),
3320 vsz, vsz, c, &op[a->esz]);
3321 tcg_temp_free_i64(c);
3323 return true;
3326 static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3328 if (sve_access_check(s)) {
3329 unsigned vsz = vec_full_reg_size(s);
3330 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3331 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3333 return true;
3336 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, uint32_t insn,
3337 bool u, bool d)
3339 if (a->esz == 0 && extract32(insn, 13, 1)) {
3340 return false;
3342 if (sve_access_check(s)) {
3343 TCGv_i64 val = tcg_const_i64(a->imm);
3344 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3345 tcg_temp_free_i64(val);
3347 return true;
3350 static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3352 return do_zzi_sat(s, a, insn, false, false);
3355 static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3357 return do_zzi_sat(s, a, insn, true, false);
3360 static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3362 return do_zzi_sat(s, a, insn, false, true);
3365 static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3367 return do_zzi_sat(s, a, insn, true, true);
3370 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3372 if (sve_access_check(s)) {
3373 unsigned vsz = vec_full_reg_size(s);
3374 TCGv_i64 c = tcg_const_i64(a->imm);
3376 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3377 vec_full_reg_offset(s, a->rn),
3378 c, vsz, vsz, 0, fn);
3379 tcg_temp_free_i64(c);
3381 return true;
3384 #define DO_ZZI(NAME, name) \
3385 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a, \
3386 uint32_t insn) \
3388 static gen_helper_gvec_2i * const fns[4] = { \
3389 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3390 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3391 }; \
3392 return do_zzi_ool(s, a, fns[a->esz]); \
3395 DO_ZZI(SMAX, smax)
3396 DO_ZZI(UMAX, umax)
3397 DO_ZZI(SMIN, smin)
3398 DO_ZZI(UMIN, umin)
3400 #undef DO_ZZI
3403 *** SVE Floating Point Accumulating Reduction Group
3406 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
3408 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3409 TCGv_ptr, TCGv_ptr, TCGv_i32);
3410 static fadda_fn * const fns[3] = {
3411 gen_helper_sve_fadda_h,
3412 gen_helper_sve_fadda_s,
3413 gen_helper_sve_fadda_d,
3415 unsigned vsz = vec_full_reg_size(s);
3416 TCGv_ptr t_rm, t_pg, t_fpst;
3417 TCGv_i64 t_val;
3418 TCGv_i32 t_desc;
3420 if (a->esz == 0) {
3421 return false;
3423 if (!sve_access_check(s)) {
3424 return true;
3427 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3428 t_rm = tcg_temp_new_ptr();
3429 t_pg = tcg_temp_new_ptr();
3430 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3431 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3432 t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3433 t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3435 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3437 tcg_temp_free_i32(t_desc);
3438 tcg_temp_free_ptr(t_fpst);
3439 tcg_temp_free_ptr(t_pg);
3440 tcg_temp_free_ptr(t_rm);
3442 write_fp_dreg(s, a->rd, t_val);
3443 tcg_temp_free_i64(t_val);
3444 return true;
3448 *** SVE Floating Point Arithmetic - Unpredicated Group
3451 static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3452 gen_helper_gvec_3_ptr *fn)
3454 if (fn == NULL) {
3455 return false;
3457 if (sve_access_check(s)) {
3458 unsigned vsz = vec_full_reg_size(s);
3459 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3460 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3461 vec_full_reg_offset(s, a->rn),
3462 vec_full_reg_offset(s, a->rm),
3463 status, vsz, vsz, 0, fn);
3464 tcg_temp_free_ptr(status);
3466 return true;
3470 #define DO_FP3(NAME, name) \
3471 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a, uint32_t insn) \
3473 static gen_helper_gvec_3_ptr * const fns[4] = { \
3474 NULL, gen_helper_gvec_##name##_h, \
3475 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3476 }; \
3477 return do_zzz_fp(s, a, fns[a->esz]); \
3480 DO_FP3(FADD_zzz, fadd)
3481 DO_FP3(FSUB_zzz, fsub)
3482 DO_FP3(FMUL_zzz, fmul)
3483 DO_FP3(FTSMUL, ftsmul)
3484 DO_FP3(FRECPS, recps)
3485 DO_FP3(FRSQRTS, rsqrts)
3487 #undef DO_FP3
3490 *** SVE Floating Point Arithmetic - Predicated Group
3493 static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3494 gen_helper_gvec_4_ptr *fn)
3496 if (fn == NULL) {
3497 return false;
3499 if (sve_access_check(s)) {
3500 unsigned vsz = vec_full_reg_size(s);
3501 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3502 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3503 vec_full_reg_offset(s, a->rn),
3504 vec_full_reg_offset(s, a->rm),
3505 pred_full_reg_offset(s, a->pg),
3506 status, vsz, vsz, 0, fn);
3507 tcg_temp_free_ptr(status);
3509 return true;
3512 #define DO_FP3(NAME, name) \
3513 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a, uint32_t insn) \
3515 static gen_helper_gvec_4_ptr * const fns[4] = { \
3516 NULL, gen_helper_sve_##name##_h, \
3517 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3518 }; \
3519 return do_zpzz_fp(s, a, fns[a->esz]); \
3522 DO_FP3(FADD_zpzz, fadd)
3523 DO_FP3(FSUB_zpzz, fsub)
3524 DO_FP3(FMUL_zpzz, fmul)
3525 DO_FP3(FMIN_zpzz, fmin)
3526 DO_FP3(FMAX_zpzz, fmax)
3527 DO_FP3(FMINNM_zpzz, fminnum)
3528 DO_FP3(FMAXNM_zpzz, fmaxnum)
3529 DO_FP3(FABD, fabd)
3530 DO_FP3(FSCALE, fscalbn)
3531 DO_FP3(FDIV, fdiv)
3532 DO_FP3(FMULX, fmulx)
3534 #undef DO_FP3
3536 typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32);
3538 static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)
3540 if (fn == NULL) {
3541 return false;
3543 if (!sve_access_check(s)) {
3544 return true;
3547 unsigned vsz = vec_full_reg_size(s);
3548 unsigned desc;
3549 TCGv_i32 t_desc;
3550 TCGv_ptr pg = tcg_temp_new_ptr();
3552 /* We would need 7 operands to pass these arguments "properly".
3553 * So we encode all the register numbers into the descriptor.
3555 desc = deposit32(a->rd, 5, 5, a->rn);
3556 desc = deposit32(desc, 10, 5, a->rm);
3557 desc = deposit32(desc, 15, 5, a->ra);
3558 desc = simd_desc(vsz, vsz, desc);
3560 t_desc = tcg_const_i32(desc);
3561 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3562 fn(cpu_env, pg, t_desc);
3563 tcg_temp_free_i32(t_desc);
3564 tcg_temp_free_ptr(pg);
3565 return true;
3568 #define DO_FMLA(NAME, name) \
3569 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
3571 static gen_helper_sve_fmla * const fns[4] = { \
3572 NULL, gen_helper_sve_##name##_h, \
3573 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3574 }; \
3575 return do_fmla(s, a, fns[a->esz]); \
3578 DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3579 DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3580 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3581 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3583 #undef DO_FMLA
3586 *** SVE Floating Point Unary Operations Predicated Group
3589 static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
3590 bool is_fp16, gen_helper_gvec_3_ptr *fn)
3592 if (sve_access_check(s)) {
3593 unsigned vsz = vec_full_reg_size(s);
3594 TCGv_ptr status = get_fpstatus_ptr(is_fp16);
3595 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
3596 vec_full_reg_offset(s, rn),
3597 pred_full_reg_offset(s, pg),
3598 status, vsz, vsz, 0, fn);
3599 tcg_temp_free_ptr(status);
3601 return true;
3604 static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3606 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
3609 static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3611 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
3614 static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3616 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
3619 static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3621 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
3624 static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3626 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
3629 static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3631 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
3634 static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3636 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
3639 static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3641 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
3644 static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3646 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
3649 static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3651 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
3654 static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3656 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
3659 static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3661 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
3664 static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3666 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
3669 static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3671 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
3675 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
3678 /* Subroutine loading a vector register at VOFS of LEN bytes.
3679 * The load should begin at the address Rn + IMM.
3682 static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
3683 int rn, int imm)
3685 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
3686 uint32_t len_remain = len % 8;
3687 uint32_t nparts = len / 8 + ctpop8(len_remain);
3688 int midx = get_mem_index(s);
3689 TCGv_i64 addr, t0, t1;
3691 addr = tcg_temp_new_i64();
3692 t0 = tcg_temp_new_i64();
3694 /* Note that unpredicated load/store of vector/predicate registers
3695 * are defined as a stream of bytes, which equates to little-endian
3696 * operations on larger quantities. There is no nice way to force
3697 * a little-endian load for aarch64_be-linux-user out of line.
3699 * Attempt to keep code expansion to a minimum by limiting the
3700 * amount of unrolling done.
3702 if (nparts <= 4) {
3703 int i;
3705 for (i = 0; i < len_align; i += 8) {
3706 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
3707 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3708 tcg_gen_st_i64(t0, cpu_env, vofs + i);
3710 } else {
3711 TCGLabel *loop = gen_new_label();
3712 TCGv_ptr tp, i = tcg_const_local_ptr(0);
3714 gen_set_label(loop);
3716 /* Minimize the number of local temps that must be re-read from
3717 * the stack each iteration. Instead, re-compute values other
3718 * than the loop counter.
3720 tp = tcg_temp_new_ptr();
3721 tcg_gen_addi_ptr(tp, i, imm);
3722 tcg_gen_extu_ptr_i64(addr, tp);
3723 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
3725 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3727 tcg_gen_add_ptr(tp, cpu_env, i);
3728 tcg_gen_addi_ptr(i, i, 8);
3729 tcg_gen_st_i64(t0, tp, vofs);
3730 tcg_temp_free_ptr(tp);
3732 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
3733 tcg_temp_free_ptr(i);
3736 /* Predicate register loads can be any multiple of 2.
3737 * Note that we still store the entire 64-bit unit into cpu_env.
3739 if (len_remain) {
3740 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
3742 switch (len_remain) {
3743 case 2:
3744 case 4:
3745 case 8:
3746 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
3747 break;
3749 case 6:
3750 t1 = tcg_temp_new_i64();
3751 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
3752 tcg_gen_addi_i64(addr, addr, 4);
3753 tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
3754 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
3755 tcg_temp_free_i64(t1);
3756 break;
3758 default:
3759 g_assert_not_reached();
3761 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
3763 tcg_temp_free_i64(addr);
3764 tcg_temp_free_i64(t0);
3767 /* Similarly for stores. */
3768 static void do_str(DisasContext *s, uint32_t vofs, uint32_t len,
3769 int rn, int imm)
3771 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
3772 uint32_t len_remain = len % 8;
3773 uint32_t nparts = len / 8 + ctpop8(len_remain);
3774 int midx = get_mem_index(s);
3775 TCGv_i64 addr, t0;
3777 addr = tcg_temp_new_i64();
3778 t0 = tcg_temp_new_i64();
3780 /* Note that unpredicated load/store of vector/predicate registers
3781 * are defined as a stream of bytes, which equates to little-endian
3782 * operations on larger quantities. There is no nice way to force
3783 * a little-endian store for aarch64_be-linux-user out of line.
3785 * Attempt to keep code expansion to a minimum by limiting the
3786 * amount of unrolling done.
3788 if (nparts <= 4) {
3789 int i;
3791 for (i = 0; i < len_align; i += 8) {
3792 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
3793 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
3794 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
3796 } else {
3797 TCGLabel *loop = gen_new_label();
3798 TCGv_ptr t2, i = tcg_const_local_ptr(0);
3800 gen_set_label(loop);
3802 t2 = tcg_temp_new_ptr();
3803 tcg_gen_add_ptr(t2, cpu_env, i);
3804 tcg_gen_ld_i64(t0, t2, vofs);
3806 /* Minimize the number of local temps that must be re-read from
3807 * the stack each iteration. Instead, re-compute values other
3808 * than the loop counter.
3810 tcg_gen_addi_ptr(t2, i, imm);
3811 tcg_gen_extu_ptr_i64(addr, t2);
3812 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
3813 tcg_temp_free_ptr(t2);
3815 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
3817 tcg_gen_addi_ptr(i, i, 8);
3819 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
3820 tcg_temp_free_ptr(i);
3823 /* Predicate register stores can be any multiple of 2. */
3824 if (len_remain) {
3825 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
3826 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
3828 switch (len_remain) {
3829 case 2:
3830 case 4:
3831 case 8:
3832 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
3833 break;
3835 case 6:
3836 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
3837 tcg_gen_addi_i64(addr, addr, 4);
3838 tcg_gen_shri_i64(t0, t0, 32);
3839 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
3840 break;
3842 default:
3843 g_assert_not_reached();
3846 tcg_temp_free_i64(addr);
3847 tcg_temp_free_i64(t0);
3850 static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
3852 if (sve_access_check(s)) {
3853 int size = vec_full_reg_size(s);
3854 int off = vec_full_reg_offset(s, a->rd);
3855 do_ldr(s, off, size, a->rn, a->imm * size);
3857 return true;
3860 static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
3862 if (sve_access_check(s)) {
3863 int size = pred_full_reg_size(s);
3864 int off = pred_full_reg_offset(s, a->rd);
3865 do_ldr(s, off, size, a->rn, a->imm * size);
3867 return true;
3870 static bool trans_STR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
3872 if (sve_access_check(s)) {
3873 int size = vec_full_reg_size(s);
3874 int off = vec_full_reg_offset(s, a->rd);
3875 do_str(s, off, size, a->rn, a->imm * size);
3877 return true;
3880 static bool trans_STR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
3882 if (sve_access_check(s)) {
3883 int size = pred_full_reg_size(s);
3884 int off = pred_full_reg_offset(s, a->rd);
3885 do_str(s, off, size, a->rn, a->imm * size);
3887 return true;
3891 *** SVE Memory - Contiguous Load Group
3894 /* The memory mode of the dtype. */
3895 static const TCGMemOp dtype_mop[16] = {
3896 MO_UB, MO_UB, MO_UB, MO_UB,
3897 MO_SL, MO_UW, MO_UW, MO_UW,
3898 MO_SW, MO_SW, MO_UL, MO_UL,
3899 MO_SB, MO_SB, MO_SB, MO_Q
3902 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
3904 /* The vector element size of dtype. */
3905 static const uint8_t dtype_esz[16] = {
3906 0, 1, 2, 3,
3907 3, 1, 2, 3,
3908 3, 2, 2, 3,
3909 3, 2, 1, 3
3912 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
3913 gen_helper_gvec_mem *fn)
3915 unsigned vsz = vec_full_reg_size(s);
3916 TCGv_ptr t_pg;
3917 TCGv_i32 desc;
3919 /* For e.g. LD4, there are not enough arguments to pass all 4
3920 * registers as pointers, so encode the regno into the data field.
3921 * For consistency, do this even for LD1.
3923 desc = tcg_const_i32(simd_desc(vsz, vsz, zt));
3924 t_pg = tcg_temp_new_ptr();
3926 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3927 fn(cpu_env, t_pg, addr, desc);
3929 tcg_temp_free_ptr(t_pg);
3930 tcg_temp_free_i32(desc);
3933 static void do_ld_zpa(DisasContext *s, int zt, int pg,
3934 TCGv_i64 addr, int dtype, int nreg)
3936 static gen_helper_gvec_mem * const fns[16][4] = {
3937 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
3938 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
3939 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
3940 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
3941 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
3943 { gen_helper_sve_ld1sds_r, NULL, NULL, NULL },
3944 { gen_helper_sve_ld1hh_r, gen_helper_sve_ld2hh_r,
3945 gen_helper_sve_ld3hh_r, gen_helper_sve_ld4hh_r },
3946 { gen_helper_sve_ld1hsu_r, NULL, NULL, NULL },
3947 { gen_helper_sve_ld1hdu_r, NULL, NULL, NULL },
3949 { gen_helper_sve_ld1hds_r, NULL, NULL, NULL },
3950 { gen_helper_sve_ld1hss_r, NULL, NULL, NULL },
3951 { gen_helper_sve_ld1ss_r, gen_helper_sve_ld2ss_r,
3952 gen_helper_sve_ld3ss_r, gen_helper_sve_ld4ss_r },
3953 { gen_helper_sve_ld1sdu_r, NULL, NULL, NULL },
3955 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
3956 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
3957 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
3958 { gen_helper_sve_ld1dd_r, gen_helper_sve_ld2dd_r,
3959 gen_helper_sve_ld3dd_r, gen_helper_sve_ld4dd_r },
3961 gen_helper_gvec_mem *fn = fns[dtype][nreg];
3963 /* While there are holes in the table, they are not
3964 * accessible via the instruction encoding.
3966 assert(fn != NULL);
3967 do_mem_zpa(s, zt, pg, addr, fn);
3970 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
3972 if (a->rm == 31) {
3973 return false;
3975 if (sve_access_check(s)) {
3976 TCGv_i64 addr = new_tmp_a64(s);
3977 tcg_gen_muli_i64(addr, cpu_reg(s, a->rm),
3978 (a->nreg + 1) << dtype_msz(a->dtype));
3979 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
3980 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
3982 return true;
3985 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
3987 if (sve_access_check(s)) {
3988 int vsz = vec_full_reg_size(s);
3989 int elements = vsz >> dtype_esz[a->dtype];
3990 TCGv_i64 addr = new_tmp_a64(s);
3992 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
3993 (a->imm * elements * (a->nreg + 1))
3994 << dtype_msz(a->dtype));
3995 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
3997 return true;
4000 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4002 static gen_helper_gvec_mem * const fns[16] = {
4003 gen_helper_sve_ldff1bb_r,
4004 gen_helper_sve_ldff1bhu_r,
4005 gen_helper_sve_ldff1bsu_r,
4006 gen_helper_sve_ldff1bdu_r,
4008 gen_helper_sve_ldff1sds_r,
4009 gen_helper_sve_ldff1hh_r,
4010 gen_helper_sve_ldff1hsu_r,
4011 gen_helper_sve_ldff1hdu_r,
4013 gen_helper_sve_ldff1hds_r,
4014 gen_helper_sve_ldff1hss_r,
4015 gen_helper_sve_ldff1ss_r,
4016 gen_helper_sve_ldff1sdu_r,
4018 gen_helper_sve_ldff1bds_r,
4019 gen_helper_sve_ldff1bss_r,
4020 gen_helper_sve_ldff1bhs_r,
4021 gen_helper_sve_ldff1dd_r,
4024 if (sve_access_check(s)) {
4025 TCGv_i64 addr = new_tmp_a64(s);
4026 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4027 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4028 do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4030 return true;
4033 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4035 static gen_helper_gvec_mem * const fns[16] = {
4036 gen_helper_sve_ldnf1bb_r,
4037 gen_helper_sve_ldnf1bhu_r,
4038 gen_helper_sve_ldnf1bsu_r,
4039 gen_helper_sve_ldnf1bdu_r,
4041 gen_helper_sve_ldnf1sds_r,
4042 gen_helper_sve_ldnf1hh_r,
4043 gen_helper_sve_ldnf1hsu_r,
4044 gen_helper_sve_ldnf1hdu_r,
4046 gen_helper_sve_ldnf1hds_r,
4047 gen_helper_sve_ldnf1hss_r,
4048 gen_helper_sve_ldnf1ss_r,
4049 gen_helper_sve_ldnf1sdu_r,
4051 gen_helper_sve_ldnf1bds_r,
4052 gen_helper_sve_ldnf1bss_r,
4053 gen_helper_sve_ldnf1bhs_r,
4054 gen_helper_sve_ldnf1dd_r,
4057 if (sve_access_check(s)) {
4058 int vsz = vec_full_reg_size(s);
4059 int elements = vsz >> dtype_esz[a->dtype];
4060 int off = (a->imm * elements) << dtype_msz(a->dtype);
4061 TCGv_i64 addr = new_tmp_a64(s);
4063 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4064 do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4066 return true;
4069 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4071 static gen_helper_gvec_mem * const fns[4] = {
4072 gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_r,
4073 gen_helper_sve_ld1ss_r, gen_helper_sve_ld1dd_r,
4075 unsigned vsz = vec_full_reg_size(s);
4076 TCGv_ptr t_pg;
4077 TCGv_i32 desc;
4079 /* Load the first quadword using the normal predicated load helpers. */
4080 desc = tcg_const_i32(simd_desc(16, 16, zt));
4081 t_pg = tcg_temp_new_ptr();
4083 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4084 fns[msz](cpu_env, t_pg, addr, desc);
4086 tcg_temp_free_ptr(t_pg);
4087 tcg_temp_free_i32(desc);
4089 /* Replicate that first quadword. */
4090 if (vsz > 16) {
4091 unsigned dofs = vec_full_reg_offset(s, zt);
4092 tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4096 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4098 if (a->rm == 31) {
4099 return false;
4101 if (sve_access_check(s)) {
4102 int msz = dtype_msz(a->dtype);
4103 TCGv_i64 addr = new_tmp_a64(s);
4104 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4105 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4106 do_ldrq(s, a->rd, a->pg, addr, msz);
4108 return true;
4111 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4113 if (sve_access_check(s)) {
4114 TCGv_i64 addr = new_tmp_a64(s);
4115 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4116 do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4118 return true;
4121 /* Load and broadcast element. */
4122 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4124 if (!sve_access_check(s)) {
4125 return true;
4128 unsigned vsz = vec_full_reg_size(s);
4129 unsigned psz = pred_full_reg_size(s);
4130 unsigned esz = dtype_esz[a->dtype];
4131 TCGLabel *over = gen_new_label();
4132 TCGv_i64 temp;
4134 /* If the guarding predicate has no bits set, no load occurs. */
4135 if (psz <= 8) {
4136 /* Reduce the pred_esz_masks value simply to reduce the
4137 * size of the code generated here.
4139 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4140 temp = tcg_temp_new_i64();
4141 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4142 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4143 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4144 tcg_temp_free_i64(temp);
4145 } else {
4146 TCGv_i32 t32 = tcg_temp_new_i32();
4147 find_last_active(s, t32, esz, a->pg);
4148 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4149 tcg_temp_free_i32(t32);
4152 /* Load the data. */
4153 temp = tcg_temp_new_i64();
4154 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << esz);
4155 tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
4156 s->be_data | dtype_mop[a->dtype]);
4158 /* Broadcast to *all* elements. */
4159 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4160 vsz, vsz, temp);
4161 tcg_temp_free_i64(temp);
4163 /* Zero the inactive elements. */
4164 gen_set_label(over);
4165 do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
4166 return true;
4169 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4170 int msz, int esz, int nreg)
4172 static gen_helper_gvec_mem * const fn_single[4][4] = {
4173 { gen_helper_sve_st1bb_r, gen_helper_sve_st1bh_r,
4174 gen_helper_sve_st1bs_r, gen_helper_sve_st1bd_r },
4175 { NULL, gen_helper_sve_st1hh_r,
4176 gen_helper_sve_st1hs_r, gen_helper_sve_st1hd_r },
4177 { NULL, NULL,
4178 gen_helper_sve_st1ss_r, gen_helper_sve_st1sd_r },
4179 { NULL, NULL, NULL, gen_helper_sve_st1dd_r },
4181 static gen_helper_gvec_mem * const fn_multiple[3][4] = {
4182 { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_r,
4183 gen_helper_sve_st2ss_r, gen_helper_sve_st2dd_r },
4184 { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_r,
4185 gen_helper_sve_st3ss_r, gen_helper_sve_st3dd_r },
4186 { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_r,
4187 gen_helper_sve_st4ss_r, gen_helper_sve_st4dd_r },
4189 gen_helper_gvec_mem *fn;
4191 if (nreg == 0) {
4192 /* ST1 */
4193 fn = fn_single[msz][esz];
4194 } else {
4195 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
4196 assert(msz == esz);
4197 fn = fn_multiple[nreg - 1][msz];
4199 assert(fn != NULL);
4200 do_mem_zpa(s, zt, pg, addr, fn);
4203 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a, uint32_t insn)
4205 if (a->rm == 31 || a->msz > a->esz) {
4206 return false;
4208 if (sve_access_check(s)) {
4209 TCGv_i64 addr = new_tmp_a64(s);
4210 tcg_gen_muli_i64(addr, cpu_reg(s, a->rm), (a->nreg + 1) << a->msz);
4211 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4212 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4214 return true;
4217 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a, uint32_t insn)
4219 if (a->msz > a->esz) {
4220 return false;
4222 if (sve_access_check(s)) {
4223 int vsz = vec_full_reg_size(s);
4224 int elements = vsz >> a->esz;
4225 TCGv_i64 addr = new_tmp_a64(s);
4227 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4228 (a->imm * elements * (a->nreg + 1)) << a->msz);
4229 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4231 return true;
4235 *** SVE gather loads / scatter stores
4238 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, int scale,
4239 TCGv_i64 scalar, gen_helper_gvec_mem_scatter *fn)
4241 unsigned vsz = vec_full_reg_size(s);
4242 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, scale));
4243 TCGv_ptr t_zm = tcg_temp_new_ptr();
4244 TCGv_ptr t_pg = tcg_temp_new_ptr();
4245 TCGv_ptr t_zt = tcg_temp_new_ptr();
4247 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4248 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
4249 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
4250 fn(cpu_env, t_zt, t_pg, t_zm, scalar, desc);
4252 tcg_temp_free_ptr(t_zt);
4253 tcg_temp_free_ptr(t_zm);
4254 tcg_temp_free_ptr(t_pg);
4255 tcg_temp_free_i32(desc);
4258 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a, uint32_t insn)
4260 /* Indexed by [xs][msz]. */
4261 static gen_helper_gvec_mem_scatter * const fn32[2][3] = {
4262 { gen_helper_sve_stbs_zsu,
4263 gen_helper_sve_sths_zsu,
4264 gen_helper_sve_stss_zsu, },
4265 { gen_helper_sve_stbs_zss,
4266 gen_helper_sve_sths_zss,
4267 gen_helper_sve_stss_zss, },
4269 /* Note that we overload xs=2 to indicate 64-bit offset. */
4270 static gen_helper_gvec_mem_scatter * const fn64[3][4] = {
4271 { gen_helper_sve_stbd_zsu,
4272 gen_helper_sve_sthd_zsu,
4273 gen_helper_sve_stsd_zsu,
4274 gen_helper_sve_stdd_zsu, },
4275 { gen_helper_sve_stbd_zss,
4276 gen_helper_sve_sthd_zss,
4277 gen_helper_sve_stsd_zss,
4278 gen_helper_sve_stdd_zss, },
4279 { gen_helper_sve_stbd_zd,
4280 gen_helper_sve_sthd_zd,
4281 gen_helper_sve_stsd_zd,
4282 gen_helper_sve_stdd_zd, },
4284 gen_helper_gvec_mem_scatter *fn;
4286 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
4287 return false;
4289 if (!sve_access_check(s)) {
4290 return true;
4292 switch (a->esz) {
4293 case MO_32:
4294 fn = fn32[a->xs][a->msz];
4295 break;
4296 case MO_64:
4297 fn = fn64[a->xs][a->msz];
4298 break;
4299 default:
4300 g_assert_not_reached();
4302 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
4303 cpu_reg_sp(s, a->rn), fn);
4304 return true;