target/arm: Implement SVE Predicate Count Group
[qemu.git] / target / arm / translate-sve.c
blob6b0b8c55d00d73e92ee83b46a3ea0729f0212fd9
1 /*
2 * AArch64 SVE translation
4 * Copyright (c) 2018 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "tcg-op.h"
24 #include "tcg-op-gvec.h"
25 #include "tcg-gvec-desc.h"
26 #include "qemu/log.h"
27 #include "arm_ldst.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
32 #include "exec/log.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
37 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
40 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
42 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
46 * Helpers for extracting complex instruction fields.
49 /* See e.g. ASR (immediate, predicated).
50 * Returns -1 for unallocated encoding; diagnose later.
52 static int tszimm_esz(int x)
54 x >>= 3; /* discard imm3 */
55 return 31 - clz32(x);
58 static int tszimm_shr(int x)
60 return (16 << tszimm_esz(x)) - x;
63 /* See e.g. LSL (immediate, predicated). */
64 static int tszimm_shl(int x)
66 return x - (8 << tszimm_esz(x));
69 static inline int plus1(int x)
71 return x + 1;
74 /* The SH bit is in bit 8. Extract the low 8 and shift. */
75 static inline int expand_imm_sh8s(int x)
77 return (int8_t)x << (x & 0x100 ? 8 : 0);
81 * Include the generated decoder.
84 #include "decode-sve.inc.c"
87 * Implement all of the translator functions referenced by the decoder.
90 /* Return the offset info CPUARMState of the predicate vector register Pn.
91 * Note for this purpose, FFR is P16.
93 static inline int pred_full_reg_offset(DisasContext *s, int regno)
95 return offsetof(CPUARMState, vfp.pregs[regno]);
98 /* Return the byte size of the whole predicate register, VL / 64. */
99 static inline int pred_full_reg_size(DisasContext *s)
101 return s->sve_len >> 3;
104 /* Round up the size of a register to a size allowed by
105 * the tcg vector infrastructure. Any operation which uses this
106 * size may assume that the bits above pred_full_reg_size are zero,
107 * and must leave them the same way.
109 * Note that this is not needed for the vector registers as they
110 * are always properly sized for tcg vectors.
112 static int size_for_gvec(int size)
114 if (size <= 8) {
115 return 8;
116 } else {
117 return QEMU_ALIGN_UP(size, 16);
121 static int pred_gvec_reg_size(DisasContext *s)
123 return size_for_gvec(pred_full_reg_size(s));
126 /* Invoke a vector expander on two Zregs. */
127 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
128 int esz, int rd, int rn)
130 if (sve_access_check(s)) {
131 unsigned vsz = vec_full_reg_size(s);
132 gvec_fn(esz, vec_full_reg_offset(s, rd),
133 vec_full_reg_offset(s, rn), vsz, vsz);
135 return true;
138 /* Invoke a vector expander on three Zregs. */
139 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
140 int esz, int rd, int rn, int rm)
142 if (sve_access_check(s)) {
143 unsigned vsz = vec_full_reg_size(s);
144 gvec_fn(esz, vec_full_reg_offset(s, rd),
145 vec_full_reg_offset(s, rn),
146 vec_full_reg_offset(s, rm), vsz, vsz);
148 return true;
151 /* Invoke a vector move on two Zregs. */
152 static bool do_mov_z(DisasContext *s, int rd, int rn)
154 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
157 /* Initialize a Zreg with replications of a 64-bit immediate. */
158 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
160 unsigned vsz = vec_full_reg_size(s);
161 tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
164 /* Invoke a vector expander on two Pregs. */
165 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
166 int esz, int rd, int rn)
168 if (sve_access_check(s)) {
169 unsigned psz = pred_gvec_reg_size(s);
170 gvec_fn(esz, pred_full_reg_offset(s, rd),
171 pred_full_reg_offset(s, rn), psz, psz);
173 return true;
176 /* Invoke a vector expander on three Pregs. */
177 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
178 int esz, int rd, int rn, int rm)
180 if (sve_access_check(s)) {
181 unsigned psz = pred_gvec_reg_size(s);
182 gvec_fn(esz, pred_full_reg_offset(s, rd),
183 pred_full_reg_offset(s, rn),
184 pred_full_reg_offset(s, rm), psz, psz);
186 return true;
189 /* Invoke a vector operation on four Pregs. */
190 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
191 int rd, int rn, int rm, int rg)
193 if (sve_access_check(s)) {
194 unsigned psz = pred_gvec_reg_size(s);
195 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
196 pred_full_reg_offset(s, rn),
197 pred_full_reg_offset(s, rm),
198 pred_full_reg_offset(s, rg),
199 psz, psz, gvec_op);
201 return true;
204 /* Invoke a vector move on two Pregs. */
205 static bool do_mov_p(DisasContext *s, int rd, int rn)
207 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
210 /* Set the cpu flags as per a return from an SVE helper. */
211 static void do_pred_flags(TCGv_i32 t)
213 tcg_gen_mov_i32(cpu_NF, t);
214 tcg_gen_andi_i32(cpu_ZF, t, 2);
215 tcg_gen_andi_i32(cpu_CF, t, 1);
216 tcg_gen_movi_i32(cpu_VF, 0);
219 /* Subroutines computing the ARM PredTest psuedofunction. */
220 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
222 TCGv_i32 t = tcg_temp_new_i32();
224 gen_helper_sve_predtest1(t, d, g);
225 do_pred_flags(t);
226 tcg_temp_free_i32(t);
229 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
231 TCGv_ptr dptr = tcg_temp_new_ptr();
232 TCGv_ptr gptr = tcg_temp_new_ptr();
233 TCGv_i32 t;
235 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
236 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
237 t = tcg_const_i32(words);
239 gen_helper_sve_predtest(t, dptr, gptr, t);
240 tcg_temp_free_ptr(dptr);
241 tcg_temp_free_ptr(gptr);
243 do_pred_flags(t);
244 tcg_temp_free_i32(t);
247 /* For each element size, the bits within a predicate word that are active. */
248 const uint64_t pred_esz_masks[4] = {
249 0xffffffffffffffffull, 0x5555555555555555ull,
250 0x1111111111111111ull, 0x0101010101010101ull
254 *** SVE Logical - Unpredicated Group
257 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
259 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
262 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
264 if (a->rn == a->rm) { /* MOV */
265 return do_mov_z(s, a->rd, a->rn);
266 } else {
267 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
271 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
273 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
276 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
278 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
282 *** SVE Integer Arithmetic - Unpredicated Group
285 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
287 return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
290 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
292 return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
295 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
297 return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
300 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
302 return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
305 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
307 return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
310 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
312 return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
316 *** SVE Integer Arithmetic - Binary Predicated Group
319 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
321 unsigned vsz = vec_full_reg_size(s);
322 if (fn == NULL) {
323 return false;
325 if (sve_access_check(s)) {
326 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
327 vec_full_reg_offset(s, a->rn),
328 vec_full_reg_offset(s, a->rm),
329 pred_full_reg_offset(s, a->pg),
330 vsz, vsz, 0, fn);
332 return true;
335 #define DO_ZPZZ(NAME, name) \
336 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
337 uint32_t insn) \
339 static gen_helper_gvec_4 * const fns[4] = { \
340 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
341 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
342 }; \
343 return do_zpzz_ool(s, a, fns[a->esz]); \
346 DO_ZPZZ(AND, and)
347 DO_ZPZZ(EOR, eor)
348 DO_ZPZZ(ORR, orr)
349 DO_ZPZZ(BIC, bic)
351 DO_ZPZZ(ADD, add)
352 DO_ZPZZ(SUB, sub)
354 DO_ZPZZ(SMAX, smax)
355 DO_ZPZZ(UMAX, umax)
356 DO_ZPZZ(SMIN, smin)
357 DO_ZPZZ(UMIN, umin)
358 DO_ZPZZ(SABD, sabd)
359 DO_ZPZZ(UABD, uabd)
361 DO_ZPZZ(MUL, mul)
362 DO_ZPZZ(SMULH, smulh)
363 DO_ZPZZ(UMULH, umulh)
365 DO_ZPZZ(ASR, asr)
366 DO_ZPZZ(LSR, lsr)
367 DO_ZPZZ(LSL, lsl)
369 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
371 static gen_helper_gvec_4 * const fns[4] = {
372 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
374 return do_zpzz_ool(s, a, fns[a->esz]);
377 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
379 static gen_helper_gvec_4 * const fns[4] = {
380 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
382 return do_zpzz_ool(s, a, fns[a->esz]);
385 DO_ZPZZ(SEL, sel)
387 #undef DO_ZPZZ
390 *** SVE Integer Arithmetic - Unary Predicated Group
393 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
395 if (fn == NULL) {
396 return false;
398 if (sve_access_check(s)) {
399 unsigned vsz = vec_full_reg_size(s);
400 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
401 vec_full_reg_offset(s, a->rn),
402 pred_full_reg_offset(s, a->pg),
403 vsz, vsz, 0, fn);
405 return true;
408 #define DO_ZPZ(NAME, name) \
409 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
411 static gen_helper_gvec_3 * const fns[4] = { \
412 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
413 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
414 }; \
415 return do_zpz_ool(s, a, fns[a->esz]); \
418 DO_ZPZ(CLS, cls)
419 DO_ZPZ(CLZ, clz)
420 DO_ZPZ(CNT_zpz, cnt_zpz)
421 DO_ZPZ(CNOT, cnot)
422 DO_ZPZ(NOT_zpz, not_zpz)
423 DO_ZPZ(ABS, abs)
424 DO_ZPZ(NEG, neg)
426 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
428 static gen_helper_gvec_3 * const fns[4] = {
429 NULL,
430 gen_helper_sve_fabs_h,
431 gen_helper_sve_fabs_s,
432 gen_helper_sve_fabs_d
434 return do_zpz_ool(s, a, fns[a->esz]);
437 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
439 static gen_helper_gvec_3 * const fns[4] = {
440 NULL,
441 gen_helper_sve_fneg_h,
442 gen_helper_sve_fneg_s,
443 gen_helper_sve_fneg_d
445 return do_zpz_ool(s, a, fns[a->esz]);
448 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
450 static gen_helper_gvec_3 * const fns[4] = {
451 NULL,
452 gen_helper_sve_sxtb_h,
453 gen_helper_sve_sxtb_s,
454 gen_helper_sve_sxtb_d
456 return do_zpz_ool(s, a, fns[a->esz]);
459 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
461 static gen_helper_gvec_3 * const fns[4] = {
462 NULL,
463 gen_helper_sve_uxtb_h,
464 gen_helper_sve_uxtb_s,
465 gen_helper_sve_uxtb_d
467 return do_zpz_ool(s, a, fns[a->esz]);
470 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
472 static gen_helper_gvec_3 * const fns[4] = {
473 NULL, NULL,
474 gen_helper_sve_sxth_s,
475 gen_helper_sve_sxth_d
477 return do_zpz_ool(s, a, fns[a->esz]);
480 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
482 static gen_helper_gvec_3 * const fns[4] = {
483 NULL, NULL,
484 gen_helper_sve_uxth_s,
485 gen_helper_sve_uxth_d
487 return do_zpz_ool(s, a, fns[a->esz]);
490 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
492 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
495 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
497 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
500 #undef DO_ZPZ
503 *** SVE Integer Reduction Group
506 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
507 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
508 gen_helper_gvec_reduc *fn)
510 unsigned vsz = vec_full_reg_size(s);
511 TCGv_ptr t_zn, t_pg;
512 TCGv_i32 desc;
513 TCGv_i64 temp;
515 if (fn == NULL) {
516 return false;
518 if (!sve_access_check(s)) {
519 return true;
522 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
523 temp = tcg_temp_new_i64();
524 t_zn = tcg_temp_new_ptr();
525 t_pg = tcg_temp_new_ptr();
527 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
528 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
529 fn(temp, t_zn, t_pg, desc);
530 tcg_temp_free_ptr(t_zn);
531 tcg_temp_free_ptr(t_pg);
532 tcg_temp_free_i32(desc);
534 write_fp_dreg(s, a->rd, temp);
535 tcg_temp_free_i64(temp);
536 return true;
539 #define DO_VPZ(NAME, name) \
540 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
542 static gen_helper_gvec_reduc * const fns[4] = { \
543 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
544 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
545 }; \
546 return do_vpz_ool(s, a, fns[a->esz]); \
549 DO_VPZ(ORV, orv)
550 DO_VPZ(ANDV, andv)
551 DO_VPZ(EORV, eorv)
553 DO_VPZ(UADDV, uaddv)
554 DO_VPZ(SMAXV, smaxv)
555 DO_VPZ(UMAXV, umaxv)
556 DO_VPZ(SMINV, sminv)
557 DO_VPZ(UMINV, uminv)
559 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
561 static gen_helper_gvec_reduc * const fns[4] = {
562 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
563 gen_helper_sve_saddv_s, NULL
565 return do_vpz_ool(s, a, fns[a->esz]);
568 #undef DO_VPZ
571 *** SVE Shift by Immediate - Predicated Group
574 /* Store zero into every active element of Zd. We will use this for two
575 * and three-operand predicated instructions for which logic dictates a
576 * zero result.
578 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
580 static gen_helper_gvec_2 * const fns[4] = {
581 gen_helper_sve_clr_b, gen_helper_sve_clr_h,
582 gen_helper_sve_clr_s, gen_helper_sve_clr_d,
584 if (sve_access_check(s)) {
585 unsigned vsz = vec_full_reg_size(s);
586 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
587 pred_full_reg_offset(s, pg),
588 vsz, vsz, 0, fns[esz]);
590 return true;
593 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
594 gen_helper_gvec_3 *fn)
596 if (sve_access_check(s)) {
597 unsigned vsz = vec_full_reg_size(s);
598 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
599 vec_full_reg_offset(s, a->rn),
600 pred_full_reg_offset(s, a->pg),
601 vsz, vsz, a->imm, fn);
603 return true;
606 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
608 static gen_helper_gvec_3 * const fns[4] = {
609 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
610 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
612 if (a->esz < 0) {
613 /* Invalid tsz encoding -- see tszimm_esz. */
614 return false;
616 /* Shift by element size is architecturally valid. For
617 arithmetic right-shift, it's the same as by one less. */
618 a->imm = MIN(a->imm, (8 << a->esz) - 1);
619 return do_zpzi_ool(s, a, fns[a->esz]);
622 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
624 static gen_helper_gvec_3 * const fns[4] = {
625 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
626 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
628 if (a->esz < 0) {
629 return false;
631 /* Shift by element size is architecturally valid.
632 For logical shifts, it is a zeroing operation. */
633 if (a->imm >= (8 << a->esz)) {
634 return do_clr_zp(s, a->rd, a->pg, a->esz);
635 } else {
636 return do_zpzi_ool(s, a, fns[a->esz]);
640 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
642 static gen_helper_gvec_3 * const fns[4] = {
643 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
644 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
646 if (a->esz < 0) {
647 return false;
649 /* Shift by element size is architecturally valid.
650 For logical shifts, it is a zeroing operation. */
651 if (a->imm >= (8 << a->esz)) {
652 return do_clr_zp(s, a->rd, a->pg, a->esz);
653 } else {
654 return do_zpzi_ool(s, a, fns[a->esz]);
658 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
660 static gen_helper_gvec_3 * const fns[4] = {
661 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
662 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
664 if (a->esz < 0) {
665 return false;
667 /* Shift by element size is architecturally valid. For arithmetic
668 right shift for division, it is a zeroing operation. */
669 if (a->imm >= (8 << a->esz)) {
670 return do_clr_zp(s, a->rd, a->pg, a->esz);
671 } else {
672 return do_zpzi_ool(s, a, fns[a->esz]);
677 *** SVE Bitwise Shift - Predicated Group
680 #define DO_ZPZW(NAME, name) \
681 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
682 uint32_t insn) \
684 static gen_helper_gvec_4 * const fns[3] = { \
685 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
686 gen_helper_sve_##name##_zpzw_s, \
687 }; \
688 if (a->esz < 0 || a->esz >= 3) { \
689 return false; \
691 return do_zpzz_ool(s, a, fns[a->esz]); \
694 DO_ZPZW(ASR, asr)
695 DO_ZPZW(LSR, lsr)
696 DO_ZPZW(LSL, lsl)
698 #undef DO_ZPZW
701 *** SVE Bitwise Shift - Unpredicated Group
704 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
705 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
706 int64_t, uint32_t, uint32_t))
708 if (a->esz < 0) {
709 /* Invalid tsz encoding -- see tszimm_esz. */
710 return false;
712 if (sve_access_check(s)) {
713 unsigned vsz = vec_full_reg_size(s);
714 /* Shift by element size is architecturally valid. For
715 arithmetic right-shift, it's the same as by one less.
716 Otherwise it is a zeroing operation. */
717 if (a->imm >= 8 << a->esz) {
718 if (asr) {
719 a->imm = (8 << a->esz) - 1;
720 } else {
721 do_dupi_z(s, a->rd, 0);
722 return true;
725 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
726 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
728 return true;
731 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
733 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
736 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
738 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
741 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
743 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
746 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
748 if (fn == NULL) {
749 return false;
751 if (sve_access_check(s)) {
752 unsigned vsz = vec_full_reg_size(s);
753 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
754 vec_full_reg_offset(s, a->rn),
755 vec_full_reg_offset(s, a->rm),
756 vsz, vsz, 0, fn);
758 return true;
761 #define DO_ZZW(NAME, name) \
762 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
763 uint32_t insn) \
765 static gen_helper_gvec_3 * const fns[4] = { \
766 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
767 gen_helper_sve_##name##_zzw_s, NULL \
768 }; \
769 return do_zzw_ool(s, a, fns[a->esz]); \
772 DO_ZZW(ASR, asr)
773 DO_ZZW(LSR, lsr)
774 DO_ZZW(LSL, lsl)
776 #undef DO_ZZW
779 *** SVE Integer Multiply-Add Group
782 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
783 gen_helper_gvec_5 *fn)
785 if (sve_access_check(s)) {
786 unsigned vsz = vec_full_reg_size(s);
787 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
788 vec_full_reg_offset(s, a->ra),
789 vec_full_reg_offset(s, a->rn),
790 vec_full_reg_offset(s, a->rm),
791 pred_full_reg_offset(s, a->pg),
792 vsz, vsz, 0, fn);
794 return true;
797 #define DO_ZPZZZ(NAME, name) \
798 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
800 static gen_helper_gvec_5 * const fns[4] = { \
801 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
802 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
803 }; \
804 return do_zpzzz_ool(s, a, fns[a->esz]); \
807 DO_ZPZZZ(MLA, mla)
808 DO_ZPZZZ(MLS, mls)
810 #undef DO_ZPZZZ
813 *** SVE Index Generation Group
816 static void do_index(DisasContext *s, int esz, int rd,
817 TCGv_i64 start, TCGv_i64 incr)
819 unsigned vsz = vec_full_reg_size(s);
820 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
821 TCGv_ptr t_zd = tcg_temp_new_ptr();
823 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
824 if (esz == 3) {
825 gen_helper_sve_index_d(t_zd, start, incr, desc);
826 } else {
827 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
828 static index_fn * const fns[3] = {
829 gen_helper_sve_index_b,
830 gen_helper_sve_index_h,
831 gen_helper_sve_index_s,
833 TCGv_i32 s32 = tcg_temp_new_i32();
834 TCGv_i32 i32 = tcg_temp_new_i32();
836 tcg_gen_extrl_i64_i32(s32, start);
837 tcg_gen_extrl_i64_i32(i32, incr);
838 fns[esz](t_zd, s32, i32, desc);
840 tcg_temp_free_i32(s32);
841 tcg_temp_free_i32(i32);
843 tcg_temp_free_ptr(t_zd);
844 tcg_temp_free_i32(desc);
847 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
849 if (sve_access_check(s)) {
850 TCGv_i64 start = tcg_const_i64(a->imm1);
851 TCGv_i64 incr = tcg_const_i64(a->imm2);
852 do_index(s, a->esz, a->rd, start, incr);
853 tcg_temp_free_i64(start);
854 tcg_temp_free_i64(incr);
856 return true;
859 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
861 if (sve_access_check(s)) {
862 TCGv_i64 start = tcg_const_i64(a->imm);
863 TCGv_i64 incr = cpu_reg(s, a->rm);
864 do_index(s, a->esz, a->rd, start, incr);
865 tcg_temp_free_i64(start);
867 return true;
870 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
872 if (sve_access_check(s)) {
873 TCGv_i64 start = cpu_reg(s, a->rn);
874 TCGv_i64 incr = tcg_const_i64(a->imm);
875 do_index(s, a->esz, a->rd, start, incr);
876 tcg_temp_free_i64(incr);
878 return true;
881 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
883 if (sve_access_check(s)) {
884 TCGv_i64 start = cpu_reg(s, a->rn);
885 TCGv_i64 incr = cpu_reg(s, a->rm);
886 do_index(s, a->esz, a->rd, start, incr);
888 return true;
892 *** SVE Stack Allocation Group
895 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
897 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
898 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
899 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
900 return true;
903 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
905 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
906 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
907 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
908 return true;
911 static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
913 TCGv_i64 reg = cpu_reg(s, a->rd);
914 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
915 return true;
919 *** SVE Compute Vector Address Group
922 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
924 if (sve_access_check(s)) {
925 unsigned vsz = vec_full_reg_size(s);
926 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
927 vec_full_reg_offset(s, a->rn),
928 vec_full_reg_offset(s, a->rm),
929 vsz, vsz, a->imm, fn);
931 return true;
934 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
936 return do_adr(s, a, gen_helper_sve_adr_p32);
939 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
941 return do_adr(s, a, gen_helper_sve_adr_p64);
944 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
946 return do_adr(s, a, gen_helper_sve_adr_s32);
949 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
951 return do_adr(s, a, gen_helper_sve_adr_u32);
955 *** SVE Integer Misc - Unpredicated Group
958 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
960 static gen_helper_gvec_2 * const fns[4] = {
961 NULL,
962 gen_helper_sve_fexpa_h,
963 gen_helper_sve_fexpa_s,
964 gen_helper_sve_fexpa_d,
966 if (a->esz == 0) {
967 return false;
969 if (sve_access_check(s)) {
970 unsigned vsz = vec_full_reg_size(s);
971 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
972 vec_full_reg_offset(s, a->rn),
973 vsz, vsz, 0, fns[a->esz]);
975 return true;
978 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
980 static gen_helper_gvec_3 * const fns[4] = {
981 NULL,
982 gen_helper_sve_ftssel_h,
983 gen_helper_sve_ftssel_s,
984 gen_helper_sve_ftssel_d,
986 if (a->esz == 0) {
987 return false;
989 if (sve_access_check(s)) {
990 unsigned vsz = vec_full_reg_size(s);
991 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
992 vec_full_reg_offset(s, a->rn),
993 vec_full_reg_offset(s, a->rm),
994 vsz, vsz, 0, fns[a->esz]);
996 return true;
1000 *** SVE Predicate Logical Operations Group
1003 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1004 const GVecGen4 *gvec_op)
1006 if (!sve_access_check(s)) {
1007 return true;
1010 unsigned psz = pred_gvec_reg_size(s);
1011 int dofs = pred_full_reg_offset(s, a->rd);
1012 int nofs = pred_full_reg_offset(s, a->rn);
1013 int mofs = pred_full_reg_offset(s, a->rm);
1014 int gofs = pred_full_reg_offset(s, a->pg);
1016 if (psz == 8) {
1017 /* Do the operation and the flags generation in temps. */
1018 TCGv_i64 pd = tcg_temp_new_i64();
1019 TCGv_i64 pn = tcg_temp_new_i64();
1020 TCGv_i64 pm = tcg_temp_new_i64();
1021 TCGv_i64 pg = tcg_temp_new_i64();
1023 tcg_gen_ld_i64(pn, cpu_env, nofs);
1024 tcg_gen_ld_i64(pm, cpu_env, mofs);
1025 tcg_gen_ld_i64(pg, cpu_env, gofs);
1027 gvec_op->fni8(pd, pn, pm, pg);
1028 tcg_gen_st_i64(pd, cpu_env, dofs);
1030 do_predtest1(pd, pg);
1032 tcg_temp_free_i64(pd);
1033 tcg_temp_free_i64(pn);
1034 tcg_temp_free_i64(pm);
1035 tcg_temp_free_i64(pg);
1036 } else {
1037 /* The operation and flags generation is large. The computation
1038 * of the flags depends on the original contents of the guarding
1039 * predicate. If the destination overwrites the guarding predicate,
1040 * then the easiest way to get this right is to save a copy.
1042 int tofs = gofs;
1043 if (a->rd == a->pg) {
1044 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1045 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1048 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1049 do_predtest(s, dofs, tofs, psz / 8);
1051 return true;
1054 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1056 tcg_gen_and_i64(pd, pn, pm);
1057 tcg_gen_and_i64(pd, pd, pg);
1060 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1061 TCGv_vec pm, TCGv_vec pg)
1063 tcg_gen_and_vec(vece, pd, pn, pm);
1064 tcg_gen_and_vec(vece, pd, pd, pg);
1067 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1069 static const GVecGen4 op = {
1070 .fni8 = gen_and_pg_i64,
1071 .fniv = gen_and_pg_vec,
1072 .fno = gen_helper_sve_and_pppp,
1073 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1075 if (a->s) {
1076 return do_pppp_flags(s, a, &op);
1077 } else if (a->rn == a->rm) {
1078 if (a->pg == a->rn) {
1079 return do_mov_p(s, a->rd, a->rn);
1080 } else {
1081 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1083 } else if (a->pg == a->rn || a->pg == a->rm) {
1084 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1085 } else {
1086 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1090 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1092 tcg_gen_andc_i64(pd, pn, pm);
1093 tcg_gen_and_i64(pd, pd, pg);
1096 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1097 TCGv_vec pm, TCGv_vec pg)
1099 tcg_gen_andc_vec(vece, pd, pn, pm);
1100 tcg_gen_and_vec(vece, pd, pd, pg);
1103 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1105 static const GVecGen4 op = {
1106 .fni8 = gen_bic_pg_i64,
1107 .fniv = gen_bic_pg_vec,
1108 .fno = gen_helper_sve_bic_pppp,
1109 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1111 if (a->s) {
1112 return do_pppp_flags(s, a, &op);
1113 } else if (a->pg == a->rn) {
1114 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1115 } else {
1116 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1120 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1122 tcg_gen_xor_i64(pd, pn, pm);
1123 tcg_gen_and_i64(pd, pd, pg);
1126 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1127 TCGv_vec pm, TCGv_vec pg)
1129 tcg_gen_xor_vec(vece, pd, pn, pm);
1130 tcg_gen_and_vec(vece, pd, pd, pg);
1133 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1135 static const GVecGen4 op = {
1136 .fni8 = gen_eor_pg_i64,
1137 .fniv = gen_eor_pg_vec,
1138 .fno = gen_helper_sve_eor_pppp,
1139 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1141 if (a->s) {
1142 return do_pppp_flags(s, a, &op);
1143 } else {
1144 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1148 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1150 tcg_gen_and_i64(pn, pn, pg);
1151 tcg_gen_andc_i64(pm, pm, pg);
1152 tcg_gen_or_i64(pd, pn, pm);
1155 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1156 TCGv_vec pm, TCGv_vec pg)
1158 tcg_gen_and_vec(vece, pn, pn, pg);
1159 tcg_gen_andc_vec(vece, pm, pm, pg);
1160 tcg_gen_or_vec(vece, pd, pn, pm);
1163 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1165 static const GVecGen4 op = {
1166 .fni8 = gen_sel_pg_i64,
1167 .fniv = gen_sel_pg_vec,
1168 .fno = gen_helper_sve_sel_pppp,
1169 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1171 if (a->s) {
1172 return false;
1173 } else {
1174 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1178 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1180 tcg_gen_or_i64(pd, pn, pm);
1181 tcg_gen_and_i64(pd, pd, pg);
1184 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1185 TCGv_vec pm, TCGv_vec pg)
1187 tcg_gen_or_vec(vece, pd, pn, pm);
1188 tcg_gen_and_vec(vece, pd, pd, pg);
1191 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1193 static const GVecGen4 op = {
1194 .fni8 = gen_orr_pg_i64,
1195 .fniv = gen_orr_pg_vec,
1196 .fno = gen_helper_sve_orr_pppp,
1197 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1199 if (a->s) {
1200 return do_pppp_flags(s, a, &op);
1201 } else if (a->pg == a->rn && a->rn == a->rm) {
1202 return do_mov_p(s, a->rd, a->rn);
1203 } else {
1204 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1208 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1210 tcg_gen_orc_i64(pd, pn, pm);
1211 tcg_gen_and_i64(pd, pd, pg);
1214 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1215 TCGv_vec pm, TCGv_vec pg)
1217 tcg_gen_orc_vec(vece, pd, pn, pm);
1218 tcg_gen_and_vec(vece, pd, pd, pg);
1221 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1223 static const GVecGen4 op = {
1224 .fni8 = gen_orn_pg_i64,
1225 .fniv = gen_orn_pg_vec,
1226 .fno = gen_helper_sve_orn_pppp,
1227 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1229 if (a->s) {
1230 return do_pppp_flags(s, a, &op);
1231 } else {
1232 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1236 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1238 tcg_gen_or_i64(pd, pn, pm);
1239 tcg_gen_andc_i64(pd, pg, pd);
1242 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1243 TCGv_vec pm, TCGv_vec pg)
1245 tcg_gen_or_vec(vece, pd, pn, pm);
1246 tcg_gen_andc_vec(vece, pd, pg, pd);
1249 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1251 static const GVecGen4 op = {
1252 .fni8 = gen_nor_pg_i64,
1253 .fniv = gen_nor_pg_vec,
1254 .fno = gen_helper_sve_nor_pppp,
1255 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1257 if (a->s) {
1258 return do_pppp_flags(s, a, &op);
1259 } else {
1260 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1264 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1266 tcg_gen_and_i64(pd, pn, pm);
1267 tcg_gen_andc_i64(pd, pg, pd);
1270 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1271 TCGv_vec pm, TCGv_vec pg)
1273 tcg_gen_and_vec(vece, pd, pn, pm);
1274 tcg_gen_andc_vec(vece, pd, pg, pd);
1277 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1279 static const GVecGen4 op = {
1280 .fni8 = gen_nand_pg_i64,
1281 .fniv = gen_nand_pg_vec,
1282 .fno = gen_helper_sve_nand_pppp,
1283 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1285 if (a->s) {
1286 return do_pppp_flags(s, a, &op);
1287 } else {
1288 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1293 *** SVE Predicate Misc Group
1296 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1298 if (sve_access_check(s)) {
1299 int nofs = pred_full_reg_offset(s, a->rn);
1300 int gofs = pred_full_reg_offset(s, a->pg);
1301 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1303 if (words == 1) {
1304 TCGv_i64 pn = tcg_temp_new_i64();
1305 TCGv_i64 pg = tcg_temp_new_i64();
1307 tcg_gen_ld_i64(pn, cpu_env, nofs);
1308 tcg_gen_ld_i64(pg, cpu_env, gofs);
1309 do_predtest1(pn, pg);
1311 tcg_temp_free_i64(pn);
1312 tcg_temp_free_i64(pg);
1313 } else {
1314 do_predtest(s, nofs, gofs, words);
1317 return true;
1320 /* See the ARM pseudocode DecodePredCount. */
1321 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1323 unsigned elements = fullsz >> esz;
1324 unsigned bound;
1326 switch (pattern) {
1327 case 0x0: /* POW2 */
1328 return pow2floor(elements);
1329 case 0x1: /* VL1 */
1330 case 0x2: /* VL2 */
1331 case 0x3: /* VL3 */
1332 case 0x4: /* VL4 */
1333 case 0x5: /* VL5 */
1334 case 0x6: /* VL6 */
1335 case 0x7: /* VL7 */
1336 case 0x8: /* VL8 */
1337 bound = pattern;
1338 break;
1339 case 0x9: /* VL16 */
1340 case 0xa: /* VL32 */
1341 case 0xb: /* VL64 */
1342 case 0xc: /* VL128 */
1343 case 0xd: /* VL256 */
1344 bound = 16 << (pattern - 9);
1345 break;
1346 case 0x1d: /* MUL4 */
1347 return elements - elements % 4;
1348 case 0x1e: /* MUL3 */
1349 return elements - elements % 3;
1350 case 0x1f: /* ALL */
1351 return elements;
1352 default: /* #uimm5 */
1353 return 0;
1355 return elements >= bound ? bound : 0;
1358 /* This handles all of the predicate initialization instructions,
1359 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1360 * so that decode_pred_count returns 0. For SETFFR, we will have
1361 * set RD == 16 == FFR.
1363 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1365 if (!sve_access_check(s)) {
1366 return true;
1369 unsigned fullsz = vec_full_reg_size(s);
1370 unsigned ofs = pred_full_reg_offset(s, rd);
1371 unsigned numelem, setsz, i;
1372 uint64_t word, lastword;
1373 TCGv_i64 t;
1375 numelem = decode_pred_count(fullsz, pat, esz);
1377 /* Determine what we must store into each bit, and how many. */
1378 if (numelem == 0) {
1379 lastword = word = 0;
1380 setsz = fullsz;
1381 } else {
1382 setsz = numelem << esz;
1383 lastword = word = pred_esz_masks[esz];
1384 if (setsz % 64) {
1385 lastword &= ~(-1ull << (setsz % 64));
1389 t = tcg_temp_new_i64();
1390 if (fullsz <= 64) {
1391 tcg_gen_movi_i64(t, lastword);
1392 tcg_gen_st_i64(t, cpu_env, ofs);
1393 goto done;
1396 if (word == lastword) {
1397 unsigned maxsz = size_for_gvec(fullsz / 8);
1398 unsigned oprsz = size_for_gvec(setsz / 8);
1400 if (oprsz * 8 == setsz) {
1401 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1402 goto done;
1404 if (oprsz * 8 == setsz + 8) {
1405 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1406 tcg_gen_movi_i64(t, 0);
1407 tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1408 goto done;
1412 setsz /= 8;
1413 fullsz /= 8;
1415 tcg_gen_movi_i64(t, word);
1416 for (i = 0; i < setsz; i += 8) {
1417 tcg_gen_st_i64(t, cpu_env, ofs + i);
1419 if (lastword != word) {
1420 tcg_gen_movi_i64(t, lastword);
1421 tcg_gen_st_i64(t, cpu_env, ofs + i);
1422 i += 8;
1424 if (i < fullsz) {
1425 tcg_gen_movi_i64(t, 0);
1426 for (; i < fullsz; i += 8) {
1427 tcg_gen_st_i64(t, cpu_env, ofs + i);
1431 done:
1432 tcg_temp_free_i64(t);
1434 /* PTRUES */
1435 if (setflag) {
1436 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1437 tcg_gen_movi_i32(cpu_CF, word == 0);
1438 tcg_gen_movi_i32(cpu_VF, 0);
1439 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1441 return true;
1444 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1446 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1449 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1451 /* Note pat == 31 is #all, to set all elements. */
1452 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1455 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1457 /* Note pat == 32 is #unimp, to set no elements. */
1458 return do_predset(s, 0, a->rd, 32, false);
1461 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1463 /* The path through do_pppp_flags is complicated enough to want to avoid
1464 * duplication. Frob the arguments into the form of a predicated AND.
1466 arg_rprr_s alt_a = {
1467 .rd = a->rd, .pg = a->pg, .s = a->s,
1468 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1470 return trans_AND_pppp(s, &alt_a, insn);
1473 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1475 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1478 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1480 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1483 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1484 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1485 TCGv_ptr, TCGv_i32))
1487 if (!sve_access_check(s)) {
1488 return true;
1491 TCGv_ptr t_pd = tcg_temp_new_ptr();
1492 TCGv_ptr t_pg = tcg_temp_new_ptr();
1493 TCGv_i32 t;
1494 unsigned desc;
1496 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1497 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1499 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1500 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1501 t = tcg_const_i32(desc);
1503 gen_fn(t, t_pd, t_pg, t);
1504 tcg_temp_free_ptr(t_pd);
1505 tcg_temp_free_ptr(t_pg);
1507 do_pred_flags(t);
1508 tcg_temp_free_i32(t);
1509 return true;
1512 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1514 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1517 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1519 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1523 *** SVE Element Count Group
1526 /* Perform an inline saturating addition of a 32-bit value within
1527 * a 64-bit register. The second operand is known to be positive,
1528 * which halves the comparisions we must perform to bound the result.
1530 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1532 int64_t ibound;
1533 TCGv_i64 bound;
1534 TCGCond cond;
1536 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1537 if (u) {
1538 tcg_gen_ext32u_i64(reg, reg);
1539 } else {
1540 tcg_gen_ext32s_i64(reg, reg);
1542 if (d) {
1543 tcg_gen_sub_i64(reg, reg, val);
1544 ibound = (u ? 0 : INT32_MIN);
1545 cond = TCG_COND_LT;
1546 } else {
1547 tcg_gen_add_i64(reg, reg, val);
1548 ibound = (u ? UINT32_MAX : INT32_MAX);
1549 cond = TCG_COND_GT;
1551 bound = tcg_const_i64(ibound);
1552 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1553 tcg_temp_free_i64(bound);
1556 /* Similarly with 64-bit values. */
1557 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1559 TCGv_i64 t0 = tcg_temp_new_i64();
1560 TCGv_i64 t1 = tcg_temp_new_i64();
1561 TCGv_i64 t2;
1563 if (u) {
1564 if (d) {
1565 tcg_gen_sub_i64(t0, reg, val);
1566 tcg_gen_movi_i64(t1, 0);
1567 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1568 } else {
1569 tcg_gen_add_i64(t0, reg, val);
1570 tcg_gen_movi_i64(t1, -1);
1571 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1573 } else {
1574 if (d) {
1575 /* Detect signed overflow for subtraction. */
1576 tcg_gen_xor_i64(t0, reg, val);
1577 tcg_gen_sub_i64(t1, reg, val);
1578 tcg_gen_xor_i64(reg, reg, t0);
1579 tcg_gen_and_i64(t0, t0, reg);
1581 /* Bound the result. */
1582 tcg_gen_movi_i64(reg, INT64_MIN);
1583 t2 = tcg_const_i64(0);
1584 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1585 } else {
1586 /* Detect signed overflow for addition. */
1587 tcg_gen_xor_i64(t0, reg, val);
1588 tcg_gen_add_i64(reg, reg, val);
1589 tcg_gen_xor_i64(t1, reg, val);
1590 tcg_gen_andc_i64(t0, t1, t0);
1592 /* Bound the result. */
1593 tcg_gen_movi_i64(t1, INT64_MAX);
1594 t2 = tcg_const_i64(0);
1595 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1597 tcg_temp_free_i64(t2);
1599 tcg_temp_free_i64(t0);
1600 tcg_temp_free_i64(t1);
1603 /* Similarly with a vector and a scalar operand. */
1604 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1605 TCGv_i64 val, bool u, bool d)
1607 unsigned vsz = vec_full_reg_size(s);
1608 TCGv_ptr dptr, nptr;
1609 TCGv_i32 t32, desc;
1610 TCGv_i64 t64;
1612 dptr = tcg_temp_new_ptr();
1613 nptr = tcg_temp_new_ptr();
1614 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1615 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1616 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1618 switch (esz) {
1619 case MO_8:
1620 t32 = tcg_temp_new_i32();
1621 tcg_gen_extrl_i64_i32(t32, val);
1622 if (d) {
1623 tcg_gen_neg_i32(t32, t32);
1625 if (u) {
1626 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1627 } else {
1628 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1630 tcg_temp_free_i32(t32);
1631 break;
1633 case MO_16:
1634 t32 = tcg_temp_new_i32();
1635 tcg_gen_extrl_i64_i32(t32, val);
1636 if (d) {
1637 tcg_gen_neg_i32(t32, t32);
1639 if (u) {
1640 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1641 } else {
1642 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1644 tcg_temp_free_i32(t32);
1645 break;
1647 case MO_32:
1648 t64 = tcg_temp_new_i64();
1649 if (d) {
1650 tcg_gen_neg_i64(t64, val);
1651 } else {
1652 tcg_gen_mov_i64(t64, val);
1654 if (u) {
1655 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1656 } else {
1657 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1659 tcg_temp_free_i64(t64);
1660 break;
1662 case MO_64:
1663 if (u) {
1664 if (d) {
1665 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1666 } else {
1667 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1669 } else if (d) {
1670 t64 = tcg_temp_new_i64();
1671 tcg_gen_neg_i64(t64, val);
1672 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1673 tcg_temp_free_i64(t64);
1674 } else {
1675 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1677 break;
1679 default:
1680 g_assert_not_reached();
1683 tcg_temp_free_ptr(dptr);
1684 tcg_temp_free_ptr(nptr);
1685 tcg_temp_free_i32(desc);
1688 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1690 if (sve_access_check(s)) {
1691 unsigned fullsz = vec_full_reg_size(s);
1692 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1693 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1695 return true;
1698 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1700 if (sve_access_check(s)) {
1701 unsigned fullsz = vec_full_reg_size(s);
1702 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1703 int inc = numelem * a->imm * (a->d ? -1 : 1);
1704 TCGv_i64 reg = cpu_reg(s, a->rd);
1706 tcg_gen_addi_i64(reg, reg, inc);
1708 return true;
1711 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1712 uint32_t insn)
1714 if (!sve_access_check(s)) {
1715 return true;
1718 unsigned fullsz = vec_full_reg_size(s);
1719 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1720 int inc = numelem * a->imm;
1721 TCGv_i64 reg = cpu_reg(s, a->rd);
1723 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1724 if (inc == 0) {
1725 if (a->u) {
1726 tcg_gen_ext32u_i64(reg, reg);
1727 } else {
1728 tcg_gen_ext32s_i64(reg, reg);
1730 } else {
1731 TCGv_i64 t = tcg_const_i64(inc);
1732 do_sat_addsub_32(reg, t, a->u, a->d);
1733 tcg_temp_free_i64(t);
1735 return true;
1738 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1739 uint32_t insn)
1741 if (!sve_access_check(s)) {
1742 return true;
1745 unsigned fullsz = vec_full_reg_size(s);
1746 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1747 int inc = numelem * a->imm;
1748 TCGv_i64 reg = cpu_reg(s, a->rd);
1750 if (inc != 0) {
1751 TCGv_i64 t = tcg_const_i64(inc);
1752 do_sat_addsub_64(reg, t, a->u, a->d);
1753 tcg_temp_free_i64(t);
1755 return true;
1758 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1760 if (a->esz == 0) {
1761 return false;
1764 unsigned fullsz = vec_full_reg_size(s);
1765 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1766 int inc = numelem * a->imm;
1768 if (inc != 0) {
1769 if (sve_access_check(s)) {
1770 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1771 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1772 vec_full_reg_offset(s, a->rn),
1773 t, fullsz, fullsz);
1774 tcg_temp_free_i64(t);
1776 } else {
1777 do_mov_z(s, a->rd, a->rn);
1779 return true;
1782 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1783 uint32_t insn)
1785 if (a->esz == 0) {
1786 return false;
1789 unsigned fullsz = vec_full_reg_size(s);
1790 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1791 int inc = numelem * a->imm;
1793 if (inc != 0) {
1794 if (sve_access_check(s)) {
1795 TCGv_i64 t = tcg_const_i64(inc);
1796 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1797 tcg_temp_free_i64(t);
1799 } else {
1800 do_mov_z(s, a->rd, a->rn);
1802 return true;
1806 *** SVE Bitwise Immediate Group
1809 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1811 uint64_t imm;
1812 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1813 extract32(a->dbm, 0, 6),
1814 extract32(a->dbm, 6, 6))) {
1815 return false;
1817 if (sve_access_check(s)) {
1818 unsigned vsz = vec_full_reg_size(s);
1819 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1820 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1822 return true;
1825 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1827 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1830 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1832 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1835 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1837 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1840 static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1842 uint64_t imm;
1843 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1844 extract32(a->dbm, 0, 6),
1845 extract32(a->dbm, 6, 6))) {
1846 return false;
1848 if (sve_access_check(s)) {
1849 do_dupi_z(s, a->rd, imm);
1851 return true;
1855 *** SVE Integer Wide Immediate - Predicated Group
1858 /* Implement all merging copies. This is used for CPY (immediate),
1859 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1861 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1862 TCGv_i64 val)
1864 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1865 static gen_cpy * const fns[4] = {
1866 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1867 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1869 unsigned vsz = vec_full_reg_size(s);
1870 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1871 TCGv_ptr t_zd = tcg_temp_new_ptr();
1872 TCGv_ptr t_zn = tcg_temp_new_ptr();
1873 TCGv_ptr t_pg = tcg_temp_new_ptr();
1875 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1876 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1877 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1879 fns[esz](t_zd, t_zn, t_pg, val, desc);
1881 tcg_temp_free_ptr(t_zd);
1882 tcg_temp_free_ptr(t_zn);
1883 tcg_temp_free_ptr(t_pg);
1884 tcg_temp_free_i32(desc);
1887 static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1889 if (a->esz == 0) {
1890 return false;
1892 if (sve_access_check(s)) {
1893 /* Decode the VFP immediate. */
1894 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1895 TCGv_i64 t_imm = tcg_const_i64(imm);
1896 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1897 tcg_temp_free_i64(t_imm);
1899 return true;
1902 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1904 if (a->esz == 0 && extract32(insn, 13, 1)) {
1905 return false;
1907 if (sve_access_check(s)) {
1908 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1909 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1910 tcg_temp_free_i64(t_imm);
1912 return true;
1915 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1917 static gen_helper_gvec_2i * const fns[4] = {
1918 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1919 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1922 if (a->esz == 0 && extract32(insn, 13, 1)) {
1923 return false;
1925 if (sve_access_check(s)) {
1926 unsigned vsz = vec_full_reg_size(s);
1927 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1928 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1929 pred_full_reg_offset(s, a->pg),
1930 t_imm, vsz, vsz, 0, fns[a->esz]);
1931 tcg_temp_free_i64(t_imm);
1933 return true;
1937 *** SVE Permute Extract Group
1940 static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1942 if (!sve_access_check(s)) {
1943 return true;
1946 unsigned vsz = vec_full_reg_size(s);
1947 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1948 unsigned n_siz = vsz - n_ofs;
1949 unsigned d = vec_full_reg_offset(s, a->rd);
1950 unsigned n = vec_full_reg_offset(s, a->rn);
1951 unsigned m = vec_full_reg_offset(s, a->rm);
1953 /* Use host vector move insns if we have appropriate sizes
1954 * and no unfortunate overlap.
1956 if (m != d
1957 && n_ofs == size_for_gvec(n_ofs)
1958 && n_siz == size_for_gvec(n_siz)
1959 && (d != n || n_siz <= n_ofs)) {
1960 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1961 if (n_ofs != 0) {
1962 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1964 } else {
1965 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1967 return true;
1971 *** SVE Permute - Unpredicated Group
1974 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
1976 if (sve_access_check(s)) {
1977 unsigned vsz = vec_full_reg_size(s);
1978 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
1979 vsz, vsz, cpu_reg_sp(s, a->rn));
1981 return true;
1984 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
1986 if ((a->imm & 0x1f) == 0) {
1987 return false;
1989 if (sve_access_check(s)) {
1990 unsigned vsz = vec_full_reg_size(s);
1991 unsigned dofs = vec_full_reg_offset(s, a->rd);
1992 unsigned esz, index;
1994 esz = ctz32(a->imm);
1995 index = a->imm >> (esz + 1);
1997 if ((index << esz) < vsz) {
1998 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
1999 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2000 } else {
2001 tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2004 return true;
2007 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2009 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2010 static gen_insr * const fns[4] = {
2011 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2012 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2014 unsigned vsz = vec_full_reg_size(s);
2015 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2016 TCGv_ptr t_zd = tcg_temp_new_ptr();
2017 TCGv_ptr t_zn = tcg_temp_new_ptr();
2019 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2020 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2022 fns[a->esz](t_zd, t_zn, val, desc);
2024 tcg_temp_free_ptr(t_zd);
2025 tcg_temp_free_ptr(t_zn);
2026 tcg_temp_free_i32(desc);
2029 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2031 if (sve_access_check(s)) {
2032 TCGv_i64 t = tcg_temp_new_i64();
2033 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2034 do_insr_i64(s, a, t);
2035 tcg_temp_free_i64(t);
2037 return true;
2040 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2042 if (sve_access_check(s)) {
2043 do_insr_i64(s, a, cpu_reg(s, a->rm));
2045 return true;
2048 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2050 static gen_helper_gvec_2 * const fns[4] = {
2051 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2052 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2055 if (sve_access_check(s)) {
2056 unsigned vsz = vec_full_reg_size(s);
2057 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2058 vec_full_reg_offset(s, a->rn),
2059 vsz, vsz, 0, fns[a->esz]);
2061 return true;
2064 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2066 static gen_helper_gvec_3 * const fns[4] = {
2067 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2068 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2071 if (sve_access_check(s)) {
2072 unsigned vsz = vec_full_reg_size(s);
2073 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2074 vec_full_reg_offset(s, a->rn),
2075 vec_full_reg_offset(s, a->rm),
2076 vsz, vsz, 0, fns[a->esz]);
2078 return true;
2081 static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2083 static gen_helper_gvec_2 * const fns[4][2] = {
2084 { NULL, NULL },
2085 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2086 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2087 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2090 if (a->esz == 0) {
2091 return false;
2093 if (sve_access_check(s)) {
2094 unsigned vsz = vec_full_reg_size(s);
2095 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2096 vec_full_reg_offset(s, a->rn)
2097 + (a->h ? vsz / 2 : 0),
2098 vsz, vsz, 0, fns[a->esz][a->u]);
2100 return true;
2104 *** SVE Permute - Predicates Group
2107 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2108 gen_helper_gvec_3 *fn)
2110 if (!sve_access_check(s)) {
2111 return true;
2114 unsigned vsz = pred_full_reg_size(s);
2116 /* Predicate sizes may be smaller and cannot use simd_desc.
2117 We cannot round up, as we do elsewhere, because we need
2118 the exact size for ZIP2 and REV. We retain the style for
2119 the other helpers for consistency. */
2120 TCGv_ptr t_d = tcg_temp_new_ptr();
2121 TCGv_ptr t_n = tcg_temp_new_ptr();
2122 TCGv_ptr t_m = tcg_temp_new_ptr();
2123 TCGv_i32 t_desc;
2124 int desc;
2126 desc = vsz - 2;
2127 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2128 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2130 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2131 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2132 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2133 t_desc = tcg_const_i32(desc);
2135 fn(t_d, t_n, t_m, t_desc);
2137 tcg_temp_free_ptr(t_d);
2138 tcg_temp_free_ptr(t_n);
2139 tcg_temp_free_ptr(t_m);
2140 tcg_temp_free_i32(t_desc);
2141 return true;
2144 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2145 gen_helper_gvec_2 *fn)
2147 if (!sve_access_check(s)) {
2148 return true;
2151 unsigned vsz = pred_full_reg_size(s);
2152 TCGv_ptr t_d = tcg_temp_new_ptr();
2153 TCGv_ptr t_n = tcg_temp_new_ptr();
2154 TCGv_i32 t_desc;
2155 int desc;
2157 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2158 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2160 /* Predicate sizes may be smaller and cannot use simd_desc.
2161 We cannot round up, as we do elsewhere, because we need
2162 the exact size for ZIP2 and REV. We retain the style for
2163 the other helpers for consistency. */
2165 desc = vsz - 2;
2166 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2167 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2168 t_desc = tcg_const_i32(desc);
2170 fn(t_d, t_n, t_desc);
2172 tcg_temp_free_i32(t_desc);
2173 tcg_temp_free_ptr(t_d);
2174 tcg_temp_free_ptr(t_n);
2175 return true;
2178 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2180 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2183 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2185 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2188 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2190 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2193 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2195 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2198 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2200 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2203 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2205 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2208 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2210 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2213 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2215 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2218 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2220 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2224 *** SVE Permute - Interleaving Group
2227 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2229 static gen_helper_gvec_3 * const fns[4] = {
2230 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2231 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2234 if (sve_access_check(s)) {
2235 unsigned vsz = vec_full_reg_size(s);
2236 unsigned high_ofs = high ? vsz / 2 : 0;
2237 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2238 vec_full_reg_offset(s, a->rn) + high_ofs,
2239 vec_full_reg_offset(s, a->rm) + high_ofs,
2240 vsz, vsz, 0, fns[a->esz]);
2242 return true;
2245 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2246 gen_helper_gvec_3 *fn)
2248 if (sve_access_check(s)) {
2249 unsigned vsz = vec_full_reg_size(s);
2250 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2251 vec_full_reg_offset(s, a->rn),
2252 vec_full_reg_offset(s, a->rm),
2253 vsz, vsz, data, fn);
2255 return true;
2258 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2260 return do_zip(s, a, false);
2263 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2265 return do_zip(s, a, true);
2268 static gen_helper_gvec_3 * const uzp_fns[4] = {
2269 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2270 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2273 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2275 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2278 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2280 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2283 static gen_helper_gvec_3 * const trn_fns[4] = {
2284 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2285 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2288 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2290 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2293 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2295 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2299 *** SVE Permute Vector - Predicated Group
2302 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2304 static gen_helper_gvec_3 * const fns[4] = {
2305 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2307 return do_zpz_ool(s, a, fns[a->esz]);
2310 /* Call the helper that computes the ARM LastActiveElement pseudocode
2311 * function, scaled by the element size. This includes the not found
2312 * indication; e.g. not found for esz=3 is -8.
2314 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2316 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2317 * round up, as we do elsewhere, because we need the exact size.
2319 TCGv_ptr t_p = tcg_temp_new_ptr();
2320 TCGv_i32 t_desc;
2321 unsigned vsz = pred_full_reg_size(s);
2322 unsigned desc;
2324 desc = vsz - 2;
2325 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2327 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2328 t_desc = tcg_const_i32(desc);
2330 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2332 tcg_temp_free_i32(t_desc);
2333 tcg_temp_free_ptr(t_p);
2336 /* Increment LAST to the offset of the next element in the vector,
2337 * wrapping around to 0.
2339 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2341 unsigned vsz = vec_full_reg_size(s);
2343 tcg_gen_addi_i32(last, last, 1 << esz);
2344 if (is_power_of_2(vsz)) {
2345 tcg_gen_andi_i32(last, last, vsz - 1);
2346 } else {
2347 TCGv_i32 max = tcg_const_i32(vsz);
2348 TCGv_i32 zero = tcg_const_i32(0);
2349 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2350 tcg_temp_free_i32(max);
2351 tcg_temp_free_i32(zero);
2355 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
2356 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2358 unsigned vsz = vec_full_reg_size(s);
2360 if (is_power_of_2(vsz)) {
2361 tcg_gen_andi_i32(last, last, vsz - 1);
2362 } else {
2363 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2364 TCGv_i32 zero = tcg_const_i32(0);
2365 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2366 tcg_temp_free_i32(max);
2367 tcg_temp_free_i32(zero);
2371 /* Load an unsigned element of ESZ from BASE+OFS. */
2372 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2374 TCGv_i64 r = tcg_temp_new_i64();
2376 switch (esz) {
2377 case 0:
2378 tcg_gen_ld8u_i64(r, base, ofs);
2379 break;
2380 case 1:
2381 tcg_gen_ld16u_i64(r, base, ofs);
2382 break;
2383 case 2:
2384 tcg_gen_ld32u_i64(r, base, ofs);
2385 break;
2386 case 3:
2387 tcg_gen_ld_i64(r, base, ofs);
2388 break;
2389 default:
2390 g_assert_not_reached();
2392 return r;
2395 /* Load an unsigned element of ESZ from RM[LAST]. */
2396 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2397 int rm, int esz)
2399 TCGv_ptr p = tcg_temp_new_ptr();
2400 TCGv_i64 r;
2402 /* Convert offset into vector into offset into ENV.
2403 * The final adjustment for the vector register base
2404 * is added via constant offset to the load.
2406 #ifdef HOST_WORDS_BIGENDIAN
2407 /* Adjust for element ordering. See vec_reg_offset. */
2408 if (esz < 3) {
2409 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2411 #endif
2412 tcg_gen_ext_i32_ptr(p, last);
2413 tcg_gen_add_ptr(p, p, cpu_env);
2415 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2416 tcg_temp_free_ptr(p);
2418 return r;
2421 /* Compute CLAST for a Zreg. */
2422 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2424 TCGv_i32 last;
2425 TCGLabel *over;
2426 TCGv_i64 ele;
2427 unsigned vsz, esz = a->esz;
2429 if (!sve_access_check(s)) {
2430 return true;
2433 last = tcg_temp_local_new_i32();
2434 over = gen_new_label();
2436 find_last_active(s, last, esz, a->pg);
2438 /* There is of course no movcond for a 2048-bit vector,
2439 * so we must branch over the actual store.
2441 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2443 if (!before) {
2444 incr_last_active(s, last, esz);
2447 ele = load_last_active(s, last, a->rm, esz);
2448 tcg_temp_free_i32(last);
2450 vsz = vec_full_reg_size(s);
2451 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2452 tcg_temp_free_i64(ele);
2454 /* If this insn used MOVPRFX, we may need a second move. */
2455 if (a->rd != a->rn) {
2456 TCGLabel *done = gen_new_label();
2457 tcg_gen_br(done);
2459 gen_set_label(over);
2460 do_mov_z(s, a->rd, a->rn);
2462 gen_set_label(done);
2463 } else {
2464 gen_set_label(over);
2466 return true;
2469 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2471 return do_clast_vector(s, a, false);
2474 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2476 return do_clast_vector(s, a, true);
2479 /* Compute CLAST for a scalar. */
2480 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2481 bool before, TCGv_i64 reg_val)
2483 TCGv_i32 last = tcg_temp_new_i32();
2484 TCGv_i64 ele, cmp, zero;
2486 find_last_active(s, last, esz, pg);
2488 /* Extend the original value of last prior to incrementing. */
2489 cmp = tcg_temp_new_i64();
2490 tcg_gen_ext_i32_i64(cmp, last);
2492 if (!before) {
2493 incr_last_active(s, last, esz);
2496 /* The conceit here is that while last < 0 indicates not found, after
2497 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2498 * from which we can load garbage. We then discard the garbage with
2499 * a conditional move.
2501 ele = load_last_active(s, last, rm, esz);
2502 tcg_temp_free_i32(last);
2504 zero = tcg_const_i64(0);
2505 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2507 tcg_temp_free_i64(zero);
2508 tcg_temp_free_i64(cmp);
2509 tcg_temp_free_i64(ele);
2512 /* Compute CLAST for a Vreg. */
2513 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2515 if (sve_access_check(s)) {
2516 int esz = a->esz;
2517 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2518 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2520 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2521 write_fp_dreg(s, a->rd, reg);
2522 tcg_temp_free_i64(reg);
2524 return true;
2527 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2529 return do_clast_fp(s, a, false);
2532 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2534 return do_clast_fp(s, a, true);
2537 /* Compute CLAST for a Xreg. */
2538 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2540 TCGv_i64 reg;
2542 if (!sve_access_check(s)) {
2543 return true;
2546 reg = cpu_reg(s, a->rd);
2547 switch (a->esz) {
2548 case 0:
2549 tcg_gen_ext8u_i64(reg, reg);
2550 break;
2551 case 1:
2552 tcg_gen_ext16u_i64(reg, reg);
2553 break;
2554 case 2:
2555 tcg_gen_ext32u_i64(reg, reg);
2556 break;
2557 case 3:
2558 break;
2559 default:
2560 g_assert_not_reached();
2563 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2564 return true;
2567 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2569 return do_clast_general(s, a, false);
2572 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2574 return do_clast_general(s, a, true);
2577 /* Compute LAST for a scalar. */
2578 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2579 int pg, int rm, bool before)
2581 TCGv_i32 last = tcg_temp_new_i32();
2582 TCGv_i64 ret;
2584 find_last_active(s, last, esz, pg);
2585 if (before) {
2586 wrap_last_active(s, last, esz);
2587 } else {
2588 incr_last_active(s, last, esz);
2591 ret = load_last_active(s, last, rm, esz);
2592 tcg_temp_free_i32(last);
2593 return ret;
2596 /* Compute LAST for a Vreg. */
2597 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2599 if (sve_access_check(s)) {
2600 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2601 write_fp_dreg(s, a->rd, val);
2602 tcg_temp_free_i64(val);
2604 return true;
2607 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2609 return do_last_fp(s, a, false);
2612 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2614 return do_last_fp(s, a, true);
2617 /* Compute LAST for a Xreg. */
2618 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2620 if (sve_access_check(s)) {
2621 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2622 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2623 tcg_temp_free_i64(val);
2625 return true;
2628 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2630 return do_last_general(s, a, false);
2633 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2635 return do_last_general(s, a, true);
2638 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2640 if (sve_access_check(s)) {
2641 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2643 return true;
2646 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2648 if (sve_access_check(s)) {
2649 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2650 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2651 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2652 tcg_temp_free_i64(t);
2654 return true;
2657 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2659 static gen_helper_gvec_3 * const fns[4] = {
2660 NULL,
2661 gen_helper_sve_revb_h,
2662 gen_helper_sve_revb_s,
2663 gen_helper_sve_revb_d,
2665 return do_zpz_ool(s, a, fns[a->esz]);
2668 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2670 static gen_helper_gvec_3 * const fns[4] = {
2671 NULL,
2672 NULL,
2673 gen_helper_sve_revh_s,
2674 gen_helper_sve_revh_d,
2676 return do_zpz_ool(s, a, fns[a->esz]);
2679 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2681 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2684 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2686 static gen_helper_gvec_3 * const fns[4] = {
2687 gen_helper_sve_rbit_b,
2688 gen_helper_sve_rbit_h,
2689 gen_helper_sve_rbit_s,
2690 gen_helper_sve_rbit_d,
2692 return do_zpz_ool(s, a, fns[a->esz]);
2695 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2697 if (sve_access_check(s)) {
2698 unsigned vsz = vec_full_reg_size(s);
2699 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2700 vec_full_reg_offset(s, a->rn),
2701 vec_full_reg_offset(s, a->rm),
2702 pred_full_reg_offset(s, a->pg),
2703 vsz, vsz, a->esz, gen_helper_sve_splice);
2705 return true;
2709 *** SVE Integer Compare - Vectors Group
2712 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2713 gen_helper_gvec_flags_4 *gen_fn)
2715 TCGv_ptr pd, zn, zm, pg;
2716 unsigned vsz;
2717 TCGv_i32 t;
2719 if (gen_fn == NULL) {
2720 return false;
2722 if (!sve_access_check(s)) {
2723 return true;
2726 vsz = vec_full_reg_size(s);
2727 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2728 pd = tcg_temp_new_ptr();
2729 zn = tcg_temp_new_ptr();
2730 zm = tcg_temp_new_ptr();
2731 pg = tcg_temp_new_ptr();
2733 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2734 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2735 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2736 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2738 gen_fn(t, pd, zn, zm, pg, t);
2740 tcg_temp_free_ptr(pd);
2741 tcg_temp_free_ptr(zn);
2742 tcg_temp_free_ptr(zm);
2743 tcg_temp_free_ptr(pg);
2745 do_pred_flags(t);
2747 tcg_temp_free_i32(t);
2748 return true;
2751 #define DO_PPZZ(NAME, name) \
2752 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
2753 uint32_t insn) \
2755 static gen_helper_gvec_flags_4 * const fns[4] = { \
2756 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2757 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2758 }; \
2759 return do_ppzz_flags(s, a, fns[a->esz]); \
2762 DO_PPZZ(CMPEQ, cmpeq)
2763 DO_PPZZ(CMPNE, cmpne)
2764 DO_PPZZ(CMPGT, cmpgt)
2765 DO_PPZZ(CMPGE, cmpge)
2766 DO_PPZZ(CMPHI, cmphi)
2767 DO_PPZZ(CMPHS, cmphs)
2769 #undef DO_PPZZ
2771 #define DO_PPZW(NAME, name) \
2772 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a, \
2773 uint32_t insn) \
2775 static gen_helper_gvec_flags_4 * const fns[4] = { \
2776 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2777 gen_helper_sve_##name##_ppzw_s, NULL \
2778 }; \
2779 return do_ppzz_flags(s, a, fns[a->esz]); \
2782 DO_PPZW(CMPEQ, cmpeq)
2783 DO_PPZW(CMPNE, cmpne)
2784 DO_PPZW(CMPGT, cmpgt)
2785 DO_PPZW(CMPGE, cmpge)
2786 DO_PPZW(CMPHI, cmphi)
2787 DO_PPZW(CMPHS, cmphs)
2788 DO_PPZW(CMPLT, cmplt)
2789 DO_PPZW(CMPLE, cmple)
2790 DO_PPZW(CMPLO, cmplo)
2791 DO_PPZW(CMPLS, cmpls)
2793 #undef DO_PPZW
2796 *** SVE Integer Compare - Immediate Groups
2799 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2800 gen_helper_gvec_flags_3 *gen_fn)
2802 TCGv_ptr pd, zn, pg;
2803 unsigned vsz;
2804 TCGv_i32 t;
2806 if (gen_fn == NULL) {
2807 return false;
2809 if (!sve_access_check(s)) {
2810 return true;
2813 vsz = vec_full_reg_size(s);
2814 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2815 pd = tcg_temp_new_ptr();
2816 zn = tcg_temp_new_ptr();
2817 pg = tcg_temp_new_ptr();
2819 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2820 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2821 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2823 gen_fn(t, pd, zn, pg, t);
2825 tcg_temp_free_ptr(pd);
2826 tcg_temp_free_ptr(zn);
2827 tcg_temp_free_ptr(pg);
2829 do_pred_flags(t);
2831 tcg_temp_free_i32(t);
2832 return true;
2835 #define DO_PPZI(NAME, name) \
2836 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a, \
2837 uint32_t insn) \
2839 static gen_helper_gvec_flags_3 * const fns[4] = { \
2840 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2841 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2842 }; \
2843 return do_ppzi_flags(s, a, fns[a->esz]); \
2846 DO_PPZI(CMPEQ, cmpeq)
2847 DO_PPZI(CMPNE, cmpne)
2848 DO_PPZI(CMPGT, cmpgt)
2849 DO_PPZI(CMPGE, cmpge)
2850 DO_PPZI(CMPHI, cmphi)
2851 DO_PPZI(CMPHS, cmphs)
2852 DO_PPZI(CMPLT, cmplt)
2853 DO_PPZI(CMPLE, cmple)
2854 DO_PPZI(CMPLO, cmplo)
2855 DO_PPZI(CMPLS, cmpls)
2857 #undef DO_PPZI
2860 *** SVE Partition Break Group
2863 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2864 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2866 if (!sve_access_check(s)) {
2867 return true;
2870 unsigned vsz = pred_full_reg_size(s);
2872 /* Predicate sizes may be smaller and cannot use simd_desc. */
2873 TCGv_ptr d = tcg_temp_new_ptr();
2874 TCGv_ptr n = tcg_temp_new_ptr();
2875 TCGv_ptr m = tcg_temp_new_ptr();
2876 TCGv_ptr g = tcg_temp_new_ptr();
2877 TCGv_i32 t = tcg_const_i32(vsz - 2);
2879 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2880 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2881 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2882 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2884 if (a->s) {
2885 fn_s(t, d, n, m, g, t);
2886 do_pred_flags(t);
2887 } else {
2888 fn(d, n, m, g, t);
2890 tcg_temp_free_ptr(d);
2891 tcg_temp_free_ptr(n);
2892 tcg_temp_free_ptr(m);
2893 tcg_temp_free_ptr(g);
2894 tcg_temp_free_i32(t);
2895 return true;
2898 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2899 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2901 if (!sve_access_check(s)) {
2902 return true;
2905 unsigned vsz = pred_full_reg_size(s);
2907 /* Predicate sizes may be smaller and cannot use simd_desc. */
2908 TCGv_ptr d = tcg_temp_new_ptr();
2909 TCGv_ptr n = tcg_temp_new_ptr();
2910 TCGv_ptr g = tcg_temp_new_ptr();
2911 TCGv_i32 t = tcg_const_i32(vsz - 2);
2913 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2914 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2915 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2917 if (a->s) {
2918 fn_s(t, d, n, g, t);
2919 do_pred_flags(t);
2920 } else {
2921 fn(d, n, g, t);
2923 tcg_temp_free_ptr(d);
2924 tcg_temp_free_ptr(n);
2925 tcg_temp_free_ptr(g);
2926 tcg_temp_free_i32(t);
2927 return true;
2930 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2932 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2935 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2937 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2940 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2942 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2945 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2947 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2950 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2952 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2955 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2957 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2960 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2962 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
2966 *** SVE Predicate Count Group
2969 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
2971 unsigned psz = pred_full_reg_size(s);
2973 if (psz <= 8) {
2974 uint64_t psz_mask;
2976 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
2977 if (pn != pg) {
2978 TCGv_i64 g = tcg_temp_new_i64();
2979 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
2980 tcg_gen_and_i64(val, val, g);
2981 tcg_temp_free_i64(g);
2984 /* Reduce the pred_esz_masks value simply to reduce the
2985 * size of the code generated here.
2987 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
2988 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
2990 tcg_gen_ctpop_i64(val, val);
2991 } else {
2992 TCGv_ptr t_pn = tcg_temp_new_ptr();
2993 TCGv_ptr t_pg = tcg_temp_new_ptr();
2994 unsigned desc;
2995 TCGv_i32 t_desc;
2997 desc = psz - 2;
2998 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3000 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3001 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3002 t_desc = tcg_const_i32(desc);
3004 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3005 tcg_temp_free_ptr(t_pn);
3006 tcg_temp_free_ptr(t_pg);
3007 tcg_temp_free_i32(t_desc);
3011 static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3013 if (sve_access_check(s)) {
3014 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3016 return true;
3019 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3020 uint32_t insn)
3022 if (sve_access_check(s)) {
3023 TCGv_i64 reg = cpu_reg(s, a->rd);
3024 TCGv_i64 val = tcg_temp_new_i64();
3026 do_cntp(s, val, a->esz, a->pg, a->pg);
3027 if (a->d) {
3028 tcg_gen_sub_i64(reg, reg, val);
3029 } else {
3030 tcg_gen_add_i64(reg, reg, val);
3032 tcg_temp_free_i64(val);
3034 return true;
3037 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3038 uint32_t insn)
3040 if (a->esz == 0) {
3041 return false;
3043 if (sve_access_check(s)) {
3044 unsigned vsz = vec_full_reg_size(s);
3045 TCGv_i64 val = tcg_temp_new_i64();
3046 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3048 do_cntp(s, val, a->esz, a->pg, a->pg);
3049 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3050 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3052 return true;
3055 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3056 uint32_t insn)
3058 if (sve_access_check(s)) {
3059 TCGv_i64 reg = cpu_reg(s, a->rd);
3060 TCGv_i64 val = tcg_temp_new_i64();
3062 do_cntp(s, val, a->esz, a->pg, a->pg);
3063 do_sat_addsub_32(reg, val, a->u, a->d);
3065 return true;
3068 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3069 uint32_t insn)
3071 if (sve_access_check(s)) {
3072 TCGv_i64 reg = cpu_reg(s, a->rd);
3073 TCGv_i64 val = tcg_temp_new_i64();
3075 do_cntp(s, val, a->esz, a->pg, a->pg);
3076 do_sat_addsub_64(reg, val, a->u, a->d);
3078 return true;
3081 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3082 uint32_t insn)
3084 if (a->esz == 0) {
3085 return false;
3087 if (sve_access_check(s)) {
3088 TCGv_i64 val = tcg_temp_new_i64();
3089 do_cntp(s, val, a->esz, a->pg, a->pg);
3090 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3092 return true;
3096 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
3099 /* Subroutine loading a vector register at VOFS of LEN bytes.
3100 * The load should begin at the address Rn + IMM.
3103 static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
3104 int rn, int imm)
3106 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
3107 uint32_t len_remain = len % 8;
3108 uint32_t nparts = len / 8 + ctpop8(len_remain);
3109 int midx = get_mem_index(s);
3110 TCGv_i64 addr, t0, t1;
3112 addr = tcg_temp_new_i64();
3113 t0 = tcg_temp_new_i64();
3115 /* Note that unpredicated load/store of vector/predicate registers
3116 * are defined as a stream of bytes, which equates to little-endian
3117 * operations on larger quantities. There is no nice way to force
3118 * a little-endian load for aarch64_be-linux-user out of line.
3120 * Attempt to keep code expansion to a minimum by limiting the
3121 * amount of unrolling done.
3123 if (nparts <= 4) {
3124 int i;
3126 for (i = 0; i < len_align; i += 8) {
3127 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
3128 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3129 tcg_gen_st_i64(t0, cpu_env, vofs + i);
3131 } else {
3132 TCGLabel *loop = gen_new_label();
3133 TCGv_ptr tp, i = tcg_const_local_ptr(0);
3135 gen_set_label(loop);
3137 /* Minimize the number of local temps that must be re-read from
3138 * the stack each iteration. Instead, re-compute values other
3139 * than the loop counter.
3141 tp = tcg_temp_new_ptr();
3142 tcg_gen_addi_ptr(tp, i, imm);
3143 tcg_gen_extu_ptr_i64(addr, tp);
3144 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
3146 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3148 tcg_gen_add_ptr(tp, cpu_env, i);
3149 tcg_gen_addi_ptr(i, i, 8);
3150 tcg_gen_st_i64(t0, tp, vofs);
3151 tcg_temp_free_ptr(tp);
3153 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
3154 tcg_temp_free_ptr(i);
3157 /* Predicate register loads can be any multiple of 2.
3158 * Note that we still store the entire 64-bit unit into cpu_env.
3160 if (len_remain) {
3161 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
3163 switch (len_remain) {
3164 case 2:
3165 case 4:
3166 case 8:
3167 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
3168 break;
3170 case 6:
3171 t1 = tcg_temp_new_i64();
3172 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
3173 tcg_gen_addi_i64(addr, addr, 4);
3174 tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
3175 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
3176 tcg_temp_free_i64(t1);
3177 break;
3179 default:
3180 g_assert_not_reached();
3182 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
3184 tcg_temp_free_i64(addr);
3185 tcg_temp_free_i64(t0);
3188 static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
3190 if (sve_access_check(s)) {
3191 int size = vec_full_reg_size(s);
3192 int off = vec_full_reg_offset(s, a->rd);
3193 do_ldr(s, off, size, a->rn, a->imm * size);
3195 return true;
3198 static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
3200 if (sve_access_check(s)) {
3201 int size = pred_full_reg_size(s);
3202 int off = pred_full_reg_offset(s, a->rd);
3203 do_ldr(s, off, size, a->rn, a->imm * size);
3205 return true;