target/arm: Implement SVE Floating Point Arithmetic - Unpredicated Group
[qemu.git] / target / arm / translate-sve.c
blob226c97579ce919310973e9788ce386b3cb444071
1 /*
2 * AArch64 SVE translation
4 * Copyright (c) 2018 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "tcg-op.h"
24 #include "tcg-op-gvec.h"
25 #include "tcg-gvec-desc.h"
26 #include "qemu/log.h"
27 #include "arm_ldst.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
32 #include "exec/log.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
37 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
40 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
42 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
46 * Helpers for extracting complex instruction fields.
49 /* See e.g. ASR (immediate, predicated).
50 * Returns -1 for unallocated encoding; diagnose later.
52 static int tszimm_esz(int x)
54 x >>= 3; /* discard imm3 */
55 return 31 - clz32(x);
58 static int tszimm_shr(int x)
60 return (16 << tszimm_esz(x)) - x;
63 /* See e.g. LSL (immediate, predicated). */
64 static int tszimm_shl(int x)
66 return x - (8 << tszimm_esz(x));
69 static inline int plus1(int x)
71 return x + 1;
74 /* The SH bit is in bit 8. Extract the low 8 and shift. */
75 static inline int expand_imm_sh8s(int x)
77 return (int8_t)x << (x & 0x100 ? 8 : 0);
80 static inline int expand_imm_sh8u(int x)
82 return (uint8_t)x << (x & 0x100 ? 8 : 0);
86 * Include the generated decoder.
89 #include "decode-sve.inc.c"
92 * Implement all of the translator functions referenced by the decoder.
95 /* Return the offset info CPUARMState of the predicate vector register Pn.
96 * Note for this purpose, FFR is P16.
98 static inline int pred_full_reg_offset(DisasContext *s, int regno)
100 return offsetof(CPUARMState, vfp.pregs[regno]);
103 /* Return the byte size of the whole predicate register, VL / 64. */
104 static inline int pred_full_reg_size(DisasContext *s)
106 return s->sve_len >> 3;
109 /* Round up the size of a register to a size allowed by
110 * the tcg vector infrastructure. Any operation which uses this
111 * size may assume that the bits above pred_full_reg_size are zero,
112 * and must leave them the same way.
114 * Note that this is not needed for the vector registers as they
115 * are always properly sized for tcg vectors.
117 static int size_for_gvec(int size)
119 if (size <= 8) {
120 return 8;
121 } else {
122 return QEMU_ALIGN_UP(size, 16);
126 static int pred_gvec_reg_size(DisasContext *s)
128 return size_for_gvec(pred_full_reg_size(s));
131 /* Invoke a vector expander on two Zregs. */
132 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
133 int esz, int rd, int rn)
135 if (sve_access_check(s)) {
136 unsigned vsz = vec_full_reg_size(s);
137 gvec_fn(esz, vec_full_reg_offset(s, rd),
138 vec_full_reg_offset(s, rn), vsz, vsz);
140 return true;
143 /* Invoke a vector expander on three Zregs. */
144 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
145 int esz, int rd, int rn, int rm)
147 if (sve_access_check(s)) {
148 unsigned vsz = vec_full_reg_size(s);
149 gvec_fn(esz, vec_full_reg_offset(s, rd),
150 vec_full_reg_offset(s, rn),
151 vec_full_reg_offset(s, rm), vsz, vsz);
153 return true;
156 /* Invoke a vector move on two Zregs. */
157 static bool do_mov_z(DisasContext *s, int rd, int rn)
159 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
162 /* Initialize a Zreg with replications of a 64-bit immediate. */
163 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
165 unsigned vsz = vec_full_reg_size(s);
166 tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
169 /* Invoke a vector expander on two Pregs. */
170 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
171 int esz, int rd, int rn)
173 if (sve_access_check(s)) {
174 unsigned psz = pred_gvec_reg_size(s);
175 gvec_fn(esz, pred_full_reg_offset(s, rd),
176 pred_full_reg_offset(s, rn), psz, psz);
178 return true;
181 /* Invoke a vector expander on three Pregs. */
182 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
183 int esz, int rd, int rn, int rm)
185 if (sve_access_check(s)) {
186 unsigned psz = pred_gvec_reg_size(s);
187 gvec_fn(esz, pred_full_reg_offset(s, rd),
188 pred_full_reg_offset(s, rn),
189 pred_full_reg_offset(s, rm), psz, psz);
191 return true;
194 /* Invoke a vector operation on four Pregs. */
195 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
196 int rd, int rn, int rm, int rg)
198 if (sve_access_check(s)) {
199 unsigned psz = pred_gvec_reg_size(s);
200 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
201 pred_full_reg_offset(s, rn),
202 pred_full_reg_offset(s, rm),
203 pred_full_reg_offset(s, rg),
204 psz, psz, gvec_op);
206 return true;
209 /* Invoke a vector move on two Pregs. */
210 static bool do_mov_p(DisasContext *s, int rd, int rn)
212 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
215 /* Set the cpu flags as per a return from an SVE helper. */
216 static void do_pred_flags(TCGv_i32 t)
218 tcg_gen_mov_i32(cpu_NF, t);
219 tcg_gen_andi_i32(cpu_ZF, t, 2);
220 tcg_gen_andi_i32(cpu_CF, t, 1);
221 tcg_gen_movi_i32(cpu_VF, 0);
224 /* Subroutines computing the ARM PredTest psuedofunction. */
225 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
227 TCGv_i32 t = tcg_temp_new_i32();
229 gen_helper_sve_predtest1(t, d, g);
230 do_pred_flags(t);
231 tcg_temp_free_i32(t);
234 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
236 TCGv_ptr dptr = tcg_temp_new_ptr();
237 TCGv_ptr gptr = tcg_temp_new_ptr();
238 TCGv_i32 t;
240 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
241 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
242 t = tcg_const_i32(words);
244 gen_helper_sve_predtest(t, dptr, gptr, t);
245 tcg_temp_free_ptr(dptr);
246 tcg_temp_free_ptr(gptr);
248 do_pred_flags(t);
249 tcg_temp_free_i32(t);
252 /* For each element size, the bits within a predicate word that are active. */
253 const uint64_t pred_esz_masks[4] = {
254 0xffffffffffffffffull, 0x5555555555555555ull,
255 0x1111111111111111ull, 0x0101010101010101ull
259 *** SVE Logical - Unpredicated Group
262 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
264 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
267 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
269 if (a->rn == a->rm) { /* MOV */
270 return do_mov_z(s, a->rd, a->rn);
271 } else {
272 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
276 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
278 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
281 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
283 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
287 *** SVE Integer Arithmetic - Unpredicated Group
290 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
292 return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
295 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
297 return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
300 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
302 return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
305 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
307 return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
310 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
312 return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
315 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
317 return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
321 *** SVE Integer Arithmetic - Binary Predicated Group
324 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
326 unsigned vsz = vec_full_reg_size(s);
327 if (fn == NULL) {
328 return false;
330 if (sve_access_check(s)) {
331 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
332 vec_full_reg_offset(s, a->rn),
333 vec_full_reg_offset(s, a->rm),
334 pred_full_reg_offset(s, a->pg),
335 vsz, vsz, 0, fn);
337 return true;
340 #define DO_ZPZZ(NAME, name) \
341 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
342 uint32_t insn) \
344 static gen_helper_gvec_4 * const fns[4] = { \
345 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
346 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
347 }; \
348 return do_zpzz_ool(s, a, fns[a->esz]); \
351 DO_ZPZZ(AND, and)
352 DO_ZPZZ(EOR, eor)
353 DO_ZPZZ(ORR, orr)
354 DO_ZPZZ(BIC, bic)
356 DO_ZPZZ(ADD, add)
357 DO_ZPZZ(SUB, sub)
359 DO_ZPZZ(SMAX, smax)
360 DO_ZPZZ(UMAX, umax)
361 DO_ZPZZ(SMIN, smin)
362 DO_ZPZZ(UMIN, umin)
363 DO_ZPZZ(SABD, sabd)
364 DO_ZPZZ(UABD, uabd)
366 DO_ZPZZ(MUL, mul)
367 DO_ZPZZ(SMULH, smulh)
368 DO_ZPZZ(UMULH, umulh)
370 DO_ZPZZ(ASR, asr)
371 DO_ZPZZ(LSR, lsr)
372 DO_ZPZZ(LSL, lsl)
374 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
376 static gen_helper_gvec_4 * const fns[4] = {
377 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
379 return do_zpzz_ool(s, a, fns[a->esz]);
382 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
384 static gen_helper_gvec_4 * const fns[4] = {
385 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
387 return do_zpzz_ool(s, a, fns[a->esz]);
390 DO_ZPZZ(SEL, sel)
392 #undef DO_ZPZZ
395 *** SVE Integer Arithmetic - Unary Predicated Group
398 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
400 if (fn == NULL) {
401 return false;
403 if (sve_access_check(s)) {
404 unsigned vsz = vec_full_reg_size(s);
405 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
406 vec_full_reg_offset(s, a->rn),
407 pred_full_reg_offset(s, a->pg),
408 vsz, vsz, 0, fn);
410 return true;
413 #define DO_ZPZ(NAME, name) \
414 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
416 static gen_helper_gvec_3 * const fns[4] = { \
417 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
418 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
419 }; \
420 return do_zpz_ool(s, a, fns[a->esz]); \
423 DO_ZPZ(CLS, cls)
424 DO_ZPZ(CLZ, clz)
425 DO_ZPZ(CNT_zpz, cnt_zpz)
426 DO_ZPZ(CNOT, cnot)
427 DO_ZPZ(NOT_zpz, not_zpz)
428 DO_ZPZ(ABS, abs)
429 DO_ZPZ(NEG, neg)
431 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
433 static gen_helper_gvec_3 * const fns[4] = {
434 NULL,
435 gen_helper_sve_fabs_h,
436 gen_helper_sve_fabs_s,
437 gen_helper_sve_fabs_d
439 return do_zpz_ool(s, a, fns[a->esz]);
442 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
444 static gen_helper_gvec_3 * const fns[4] = {
445 NULL,
446 gen_helper_sve_fneg_h,
447 gen_helper_sve_fneg_s,
448 gen_helper_sve_fneg_d
450 return do_zpz_ool(s, a, fns[a->esz]);
453 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
455 static gen_helper_gvec_3 * const fns[4] = {
456 NULL,
457 gen_helper_sve_sxtb_h,
458 gen_helper_sve_sxtb_s,
459 gen_helper_sve_sxtb_d
461 return do_zpz_ool(s, a, fns[a->esz]);
464 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
466 static gen_helper_gvec_3 * const fns[4] = {
467 NULL,
468 gen_helper_sve_uxtb_h,
469 gen_helper_sve_uxtb_s,
470 gen_helper_sve_uxtb_d
472 return do_zpz_ool(s, a, fns[a->esz]);
475 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
477 static gen_helper_gvec_3 * const fns[4] = {
478 NULL, NULL,
479 gen_helper_sve_sxth_s,
480 gen_helper_sve_sxth_d
482 return do_zpz_ool(s, a, fns[a->esz]);
485 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
487 static gen_helper_gvec_3 * const fns[4] = {
488 NULL, NULL,
489 gen_helper_sve_uxth_s,
490 gen_helper_sve_uxth_d
492 return do_zpz_ool(s, a, fns[a->esz]);
495 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
497 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
500 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
502 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
505 #undef DO_ZPZ
508 *** SVE Integer Reduction Group
511 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
512 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
513 gen_helper_gvec_reduc *fn)
515 unsigned vsz = vec_full_reg_size(s);
516 TCGv_ptr t_zn, t_pg;
517 TCGv_i32 desc;
518 TCGv_i64 temp;
520 if (fn == NULL) {
521 return false;
523 if (!sve_access_check(s)) {
524 return true;
527 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
528 temp = tcg_temp_new_i64();
529 t_zn = tcg_temp_new_ptr();
530 t_pg = tcg_temp_new_ptr();
532 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
533 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
534 fn(temp, t_zn, t_pg, desc);
535 tcg_temp_free_ptr(t_zn);
536 tcg_temp_free_ptr(t_pg);
537 tcg_temp_free_i32(desc);
539 write_fp_dreg(s, a->rd, temp);
540 tcg_temp_free_i64(temp);
541 return true;
544 #define DO_VPZ(NAME, name) \
545 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
547 static gen_helper_gvec_reduc * const fns[4] = { \
548 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
549 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
550 }; \
551 return do_vpz_ool(s, a, fns[a->esz]); \
554 DO_VPZ(ORV, orv)
555 DO_VPZ(ANDV, andv)
556 DO_VPZ(EORV, eorv)
558 DO_VPZ(UADDV, uaddv)
559 DO_VPZ(SMAXV, smaxv)
560 DO_VPZ(UMAXV, umaxv)
561 DO_VPZ(SMINV, sminv)
562 DO_VPZ(UMINV, uminv)
564 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
566 static gen_helper_gvec_reduc * const fns[4] = {
567 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
568 gen_helper_sve_saddv_s, NULL
570 return do_vpz_ool(s, a, fns[a->esz]);
573 #undef DO_VPZ
576 *** SVE Shift by Immediate - Predicated Group
579 /* Store zero into every active element of Zd. We will use this for two
580 * and three-operand predicated instructions for which logic dictates a
581 * zero result.
583 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
585 static gen_helper_gvec_2 * const fns[4] = {
586 gen_helper_sve_clr_b, gen_helper_sve_clr_h,
587 gen_helper_sve_clr_s, gen_helper_sve_clr_d,
589 if (sve_access_check(s)) {
590 unsigned vsz = vec_full_reg_size(s);
591 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
592 pred_full_reg_offset(s, pg),
593 vsz, vsz, 0, fns[esz]);
595 return true;
598 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
599 gen_helper_gvec_3 *fn)
601 if (sve_access_check(s)) {
602 unsigned vsz = vec_full_reg_size(s);
603 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
604 vec_full_reg_offset(s, a->rn),
605 pred_full_reg_offset(s, a->pg),
606 vsz, vsz, a->imm, fn);
608 return true;
611 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
613 static gen_helper_gvec_3 * const fns[4] = {
614 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
615 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
617 if (a->esz < 0) {
618 /* Invalid tsz encoding -- see tszimm_esz. */
619 return false;
621 /* Shift by element size is architecturally valid. For
622 arithmetic right-shift, it's the same as by one less. */
623 a->imm = MIN(a->imm, (8 << a->esz) - 1);
624 return do_zpzi_ool(s, a, fns[a->esz]);
627 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
629 static gen_helper_gvec_3 * const fns[4] = {
630 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
631 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
633 if (a->esz < 0) {
634 return false;
636 /* Shift by element size is architecturally valid.
637 For logical shifts, it is a zeroing operation. */
638 if (a->imm >= (8 << a->esz)) {
639 return do_clr_zp(s, a->rd, a->pg, a->esz);
640 } else {
641 return do_zpzi_ool(s, a, fns[a->esz]);
645 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
647 static gen_helper_gvec_3 * const fns[4] = {
648 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
649 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
651 if (a->esz < 0) {
652 return false;
654 /* Shift by element size is architecturally valid.
655 For logical shifts, it is a zeroing operation. */
656 if (a->imm >= (8 << a->esz)) {
657 return do_clr_zp(s, a->rd, a->pg, a->esz);
658 } else {
659 return do_zpzi_ool(s, a, fns[a->esz]);
663 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
665 static gen_helper_gvec_3 * const fns[4] = {
666 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
667 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
669 if (a->esz < 0) {
670 return false;
672 /* Shift by element size is architecturally valid. For arithmetic
673 right shift for division, it is a zeroing operation. */
674 if (a->imm >= (8 << a->esz)) {
675 return do_clr_zp(s, a->rd, a->pg, a->esz);
676 } else {
677 return do_zpzi_ool(s, a, fns[a->esz]);
682 *** SVE Bitwise Shift - Predicated Group
685 #define DO_ZPZW(NAME, name) \
686 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
687 uint32_t insn) \
689 static gen_helper_gvec_4 * const fns[3] = { \
690 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
691 gen_helper_sve_##name##_zpzw_s, \
692 }; \
693 if (a->esz < 0 || a->esz >= 3) { \
694 return false; \
696 return do_zpzz_ool(s, a, fns[a->esz]); \
699 DO_ZPZW(ASR, asr)
700 DO_ZPZW(LSR, lsr)
701 DO_ZPZW(LSL, lsl)
703 #undef DO_ZPZW
706 *** SVE Bitwise Shift - Unpredicated Group
709 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
710 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
711 int64_t, uint32_t, uint32_t))
713 if (a->esz < 0) {
714 /* Invalid tsz encoding -- see tszimm_esz. */
715 return false;
717 if (sve_access_check(s)) {
718 unsigned vsz = vec_full_reg_size(s);
719 /* Shift by element size is architecturally valid. For
720 arithmetic right-shift, it's the same as by one less.
721 Otherwise it is a zeroing operation. */
722 if (a->imm >= 8 << a->esz) {
723 if (asr) {
724 a->imm = (8 << a->esz) - 1;
725 } else {
726 do_dupi_z(s, a->rd, 0);
727 return true;
730 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
731 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
733 return true;
736 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
738 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
741 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
743 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
746 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
748 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
751 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
753 if (fn == NULL) {
754 return false;
756 if (sve_access_check(s)) {
757 unsigned vsz = vec_full_reg_size(s);
758 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
759 vec_full_reg_offset(s, a->rn),
760 vec_full_reg_offset(s, a->rm),
761 vsz, vsz, 0, fn);
763 return true;
766 #define DO_ZZW(NAME, name) \
767 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
768 uint32_t insn) \
770 static gen_helper_gvec_3 * const fns[4] = { \
771 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
772 gen_helper_sve_##name##_zzw_s, NULL \
773 }; \
774 return do_zzw_ool(s, a, fns[a->esz]); \
777 DO_ZZW(ASR, asr)
778 DO_ZZW(LSR, lsr)
779 DO_ZZW(LSL, lsl)
781 #undef DO_ZZW
784 *** SVE Integer Multiply-Add Group
787 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
788 gen_helper_gvec_5 *fn)
790 if (sve_access_check(s)) {
791 unsigned vsz = vec_full_reg_size(s);
792 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
793 vec_full_reg_offset(s, a->ra),
794 vec_full_reg_offset(s, a->rn),
795 vec_full_reg_offset(s, a->rm),
796 pred_full_reg_offset(s, a->pg),
797 vsz, vsz, 0, fn);
799 return true;
802 #define DO_ZPZZZ(NAME, name) \
803 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
805 static gen_helper_gvec_5 * const fns[4] = { \
806 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
807 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
808 }; \
809 return do_zpzzz_ool(s, a, fns[a->esz]); \
812 DO_ZPZZZ(MLA, mla)
813 DO_ZPZZZ(MLS, mls)
815 #undef DO_ZPZZZ
818 *** SVE Index Generation Group
821 static void do_index(DisasContext *s, int esz, int rd,
822 TCGv_i64 start, TCGv_i64 incr)
824 unsigned vsz = vec_full_reg_size(s);
825 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
826 TCGv_ptr t_zd = tcg_temp_new_ptr();
828 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
829 if (esz == 3) {
830 gen_helper_sve_index_d(t_zd, start, incr, desc);
831 } else {
832 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
833 static index_fn * const fns[3] = {
834 gen_helper_sve_index_b,
835 gen_helper_sve_index_h,
836 gen_helper_sve_index_s,
838 TCGv_i32 s32 = tcg_temp_new_i32();
839 TCGv_i32 i32 = tcg_temp_new_i32();
841 tcg_gen_extrl_i64_i32(s32, start);
842 tcg_gen_extrl_i64_i32(i32, incr);
843 fns[esz](t_zd, s32, i32, desc);
845 tcg_temp_free_i32(s32);
846 tcg_temp_free_i32(i32);
848 tcg_temp_free_ptr(t_zd);
849 tcg_temp_free_i32(desc);
852 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
854 if (sve_access_check(s)) {
855 TCGv_i64 start = tcg_const_i64(a->imm1);
856 TCGv_i64 incr = tcg_const_i64(a->imm2);
857 do_index(s, a->esz, a->rd, start, incr);
858 tcg_temp_free_i64(start);
859 tcg_temp_free_i64(incr);
861 return true;
864 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
866 if (sve_access_check(s)) {
867 TCGv_i64 start = tcg_const_i64(a->imm);
868 TCGv_i64 incr = cpu_reg(s, a->rm);
869 do_index(s, a->esz, a->rd, start, incr);
870 tcg_temp_free_i64(start);
872 return true;
875 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
877 if (sve_access_check(s)) {
878 TCGv_i64 start = cpu_reg(s, a->rn);
879 TCGv_i64 incr = tcg_const_i64(a->imm);
880 do_index(s, a->esz, a->rd, start, incr);
881 tcg_temp_free_i64(incr);
883 return true;
886 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
888 if (sve_access_check(s)) {
889 TCGv_i64 start = cpu_reg(s, a->rn);
890 TCGv_i64 incr = cpu_reg(s, a->rm);
891 do_index(s, a->esz, a->rd, start, incr);
893 return true;
897 *** SVE Stack Allocation Group
900 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
902 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
903 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
904 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
905 return true;
908 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
910 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
911 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
912 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
913 return true;
916 static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
918 TCGv_i64 reg = cpu_reg(s, a->rd);
919 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
920 return true;
924 *** SVE Compute Vector Address Group
927 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
929 if (sve_access_check(s)) {
930 unsigned vsz = vec_full_reg_size(s);
931 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
932 vec_full_reg_offset(s, a->rn),
933 vec_full_reg_offset(s, a->rm),
934 vsz, vsz, a->imm, fn);
936 return true;
939 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
941 return do_adr(s, a, gen_helper_sve_adr_p32);
944 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
946 return do_adr(s, a, gen_helper_sve_adr_p64);
949 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
951 return do_adr(s, a, gen_helper_sve_adr_s32);
954 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
956 return do_adr(s, a, gen_helper_sve_adr_u32);
960 *** SVE Integer Misc - Unpredicated Group
963 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
965 static gen_helper_gvec_2 * const fns[4] = {
966 NULL,
967 gen_helper_sve_fexpa_h,
968 gen_helper_sve_fexpa_s,
969 gen_helper_sve_fexpa_d,
971 if (a->esz == 0) {
972 return false;
974 if (sve_access_check(s)) {
975 unsigned vsz = vec_full_reg_size(s);
976 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
977 vec_full_reg_offset(s, a->rn),
978 vsz, vsz, 0, fns[a->esz]);
980 return true;
983 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
985 static gen_helper_gvec_3 * const fns[4] = {
986 NULL,
987 gen_helper_sve_ftssel_h,
988 gen_helper_sve_ftssel_s,
989 gen_helper_sve_ftssel_d,
991 if (a->esz == 0) {
992 return false;
994 if (sve_access_check(s)) {
995 unsigned vsz = vec_full_reg_size(s);
996 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
997 vec_full_reg_offset(s, a->rn),
998 vec_full_reg_offset(s, a->rm),
999 vsz, vsz, 0, fns[a->esz]);
1001 return true;
1005 *** SVE Predicate Logical Operations Group
1008 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1009 const GVecGen4 *gvec_op)
1011 if (!sve_access_check(s)) {
1012 return true;
1015 unsigned psz = pred_gvec_reg_size(s);
1016 int dofs = pred_full_reg_offset(s, a->rd);
1017 int nofs = pred_full_reg_offset(s, a->rn);
1018 int mofs = pred_full_reg_offset(s, a->rm);
1019 int gofs = pred_full_reg_offset(s, a->pg);
1021 if (psz == 8) {
1022 /* Do the operation and the flags generation in temps. */
1023 TCGv_i64 pd = tcg_temp_new_i64();
1024 TCGv_i64 pn = tcg_temp_new_i64();
1025 TCGv_i64 pm = tcg_temp_new_i64();
1026 TCGv_i64 pg = tcg_temp_new_i64();
1028 tcg_gen_ld_i64(pn, cpu_env, nofs);
1029 tcg_gen_ld_i64(pm, cpu_env, mofs);
1030 tcg_gen_ld_i64(pg, cpu_env, gofs);
1032 gvec_op->fni8(pd, pn, pm, pg);
1033 tcg_gen_st_i64(pd, cpu_env, dofs);
1035 do_predtest1(pd, pg);
1037 tcg_temp_free_i64(pd);
1038 tcg_temp_free_i64(pn);
1039 tcg_temp_free_i64(pm);
1040 tcg_temp_free_i64(pg);
1041 } else {
1042 /* The operation and flags generation is large. The computation
1043 * of the flags depends on the original contents of the guarding
1044 * predicate. If the destination overwrites the guarding predicate,
1045 * then the easiest way to get this right is to save a copy.
1047 int tofs = gofs;
1048 if (a->rd == a->pg) {
1049 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1050 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1053 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1054 do_predtest(s, dofs, tofs, psz / 8);
1056 return true;
1059 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1061 tcg_gen_and_i64(pd, pn, pm);
1062 tcg_gen_and_i64(pd, pd, pg);
1065 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1066 TCGv_vec pm, TCGv_vec pg)
1068 tcg_gen_and_vec(vece, pd, pn, pm);
1069 tcg_gen_and_vec(vece, pd, pd, pg);
1072 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1074 static const GVecGen4 op = {
1075 .fni8 = gen_and_pg_i64,
1076 .fniv = gen_and_pg_vec,
1077 .fno = gen_helper_sve_and_pppp,
1078 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1080 if (a->s) {
1081 return do_pppp_flags(s, a, &op);
1082 } else if (a->rn == a->rm) {
1083 if (a->pg == a->rn) {
1084 return do_mov_p(s, a->rd, a->rn);
1085 } else {
1086 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1088 } else if (a->pg == a->rn || a->pg == a->rm) {
1089 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1090 } else {
1091 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1095 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1097 tcg_gen_andc_i64(pd, pn, pm);
1098 tcg_gen_and_i64(pd, pd, pg);
1101 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1102 TCGv_vec pm, TCGv_vec pg)
1104 tcg_gen_andc_vec(vece, pd, pn, pm);
1105 tcg_gen_and_vec(vece, pd, pd, pg);
1108 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1110 static const GVecGen4 op = {
1111 .fni8 = gen_bic_pg_i64,
1112 .fniv = gen_bic_pg_vec,
1113 .fno = gen_helper_sve_bic_pppp,
1114 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1116 if (a->s) {
1117 return do_pppp_flags(s, a, &op);
1118 } else if (a->pg == a->rn) {
1119 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1120 } else {
1121 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1125 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1127 tcg_gen_xor_i64(pd, pn, pm);
1128 tcg_gen_and_i64(pd, pd, pg);
1131 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1132 TCGv_vec pm, TCGv_vec pg)
1134 tcg_gen_xor_vec(vece, pd, pn, pm);
1135 tcg_gen_and_vec(vece, pd, pd, pg);
1138 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1140 static const GVecGen4 op = {
1141 .fni8 = gen_eor_pg_i64,
1142 .fniv = gen_eor_pg_vec,
1143 .fno = gen_helper_sve_eor_pppp,
1144 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1146 if (a->s) {
1147 return do_pppp_flags(s, a, &op);
1148 } else {
1149 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1153 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1155 tcg_gen_and_i64(pn, pn, pg);
1156 tcg_gen_andc_i64(pm, pm, pg);
1157 tcg_gen_or_i64(pd, pn, pm);
1160 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1161 TCGv_vec pm, TCGv_vec pg)
1163 tcg_gen_and_vec(vece, pn, pn, pg);
1164 tcg_gen_andc_vec(vece, pm, pm, pg);
1165 tcg_gen_or_vec(vece, pd, pn, pm);
1168 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1170 static const GVecGen4 op = {
1171 .fni8 = gen_sel_pg_i64,
1172 .fniv = gen_sel_pg_vec,
1173 .fno = gen_helper_sve_sel_pppp,
1174 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1176 if (a->s) {
1177 return false;
1178 } else {
1179 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1183 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1185 tcg_gen_or_i64(pd, pn, pm);
1186 tcg_gen_and_i64(pd, pd, pg);
1189 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1190 TCGv_vec pm, TCGv_vec pg)
1192 tcg_gen_or_vec(vece, pd, pn, pm);
1193 tcg_gen_and_vec(vece, pd, pd, pg);
1196 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1198 static const GVecGen4 op = {
1199 .fni8 = gen_orr_pg_i64,
1200 .fniv = gen_orr_pg_vec,
1201 .fno = gen_helper_sve_orr_pppp,
1202 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1204 if (a->s) {
1205 return do_pppp_flags(s, a, &op);
1206 } else if (a->pg == a->rn && a->rn == a->rm) {
1207 return do_mov_p(s, a->rd, a->rn);
1208 } else {
1209 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1213 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1215 tcg_gen_orc_i64(pd, pn, pm);
1216 tcg_gen_and_i64(pd, pd, pg);
1219 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1220 TCGv_vec pm, TCGv_vec pg)
1222 tcg_gen_orc_vec(vece, pd, pn, pm);
1223 tcg_gen_and_vec(vece, pd, pd, pg);
1226 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1228 static const GVecGen4 op = {
1229 .fni8 = gen_orn_pg_i64,
1230 .fniv = gen_orn_pg_vec,
1231 .fno = gen_helper_sve_orn_pppp,
1232 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1234 if (a->s) {
1235 return do_pppp_flags(s, a, &op);
1236 } else {
1237 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1241 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1243 tcg_gen_or_i64(pd, pn, pm);
1244 tcg_gen_andc_i64(pd, pg, pd);
1247 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1248 TCGv_vec pm, TCGv_vec pg)
1250 tcg_gen_or_vec(vece, pd, pn, pm);
1251 tcg_gen_andc_vec(vece, pd, pg, pd);
1254 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1256 static const GVecGen4 op = {
1257 .fni8 = gen_nor_pg_i64,
1258 .fniv = gen_nor_pg_vec,
1259 .fno = gen_helper_sve_nor_pppp,
1260 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1262 if (a->s) {
1263 return do_pppp_flags(s, a, &op);
1264 } else {
1265 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1269 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1271 tcg_gen_and_i64(pd, pn, pm);
1272 tcg_gen_andc_i64(pd, pg, pd);
1275 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1276 TCGv_vec pm, TCGv_vec pg)
1278 tcg_gen_and_vec(vece, pd, pn, pm);
1279 tcg_gen_andc_vec(vece, pd, pg, pd);
1282 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1284 static const GVecGen4 op = {
1285 .fni8 = gen_nand_pg_i64,
1286 .fniv = gen_nand_pg_vec,
1287 .fno = gen_helper_sve_nand_pppp,
1288 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1290 if (a->s) {
1291 return do_pppp_flags(s, a, &op);
1292 } else {
1293 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1298 *** SVE Predicate Misc Group
1301 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1303 if (sve_access_check(s)) {
1304 int nofs = pred_full_reg_offset(s, a->rn);
1305 int gofs = pred_full_reg_offset(s, a->pg);
1306 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1308 if (words == 1) {
1309 TCGv_i64 pn = tcg_temp_new_i64();
1310 TCGv_i64 pg = tcg_temp_new_i64();
1312 tcg_gen_ld_i64(pn, cpu_env, nofs);
1313 tcg_gen_ld_i64(pg, cpu_env, gofs);
1314 do_predtest1(pn, pg);
1316 tcg_temp_free_i64(pn);
1317 tcg_temp_free_i64(pg);
1318 } else {
1319 do_predtest(s, nofs, gofs, words);
1322 return true;
1325 /* See the ARM pseudocode DecodePredCount. */
1326 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1328 unsigned elements = fullsz >> esz;
1329 unsigned bound;
1331 switch (pattern) {
1332 case 0x0: /* POW2 */
1333 return pow2floor(elements);
1334 case 0x1: /* VL1 */
1335 case 0x2: /* VL2 */
1336 case 0x3: /* VL3 */
1337 case 0x4: /* VL4 */
1338 case 0x5: /* VL5 */
1339 case 0x6: /* VL6 */
1340 case 0x7: /* VL7 */
1341 case 0x8: /* VL8 */
1342 bound = pattern;
1343 break;
1344 case 0x9: /* VL16 */
1345 case 0xa: /* VL32 */
1346 case 0xb: /* VL64 */
1347 case 0xc: /* VL128 */
1348 case 0xd: /* VL256 */
1349 bound = 16 << (pattern - 9);
1350 break;
1351 case 0x1d: /* MUL4 */
1352 return elements - elements % 4;
1353 case 0x1e: /* MUL3 */
1354 return elements - elements % 3;
1355 case 0x1f: /* ALL */
1356 return elements;
1357 default: /* #uimm5 */
1358 return 0;
1360 return elements >= bound ? bound : 0;
1363 /* This handles all of the predicate initialization instructions,
1364 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1365 * so that decode_pred_count returns 0. For SETFFR, we will have
1366 * set RD == 16 == FFR.
1368 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1370 if (!sve_access_check(s)) {
1371 return true;
1374 unsigned fullsz = vec_full_reg_size(s);
1375 unsigned ofs = pred_full_reg_offset(s, rd);
1376 unsigned numelem, setsz, i;
1377 uint64_t word, lastword;
1378 TCGv_i64 t;
1380 numelem = decode_pred_count(fullsz, pat, esz);
1382 /* Determine what we must store into each bit, and how many. */
1383 if (numelem == 0) {
1384 lastword = word = 0;
1385 setsz = fullsz;
1386 } else {
1387 setsz = numelem << esz;
1388 lastword = word = pred_esz_masks[esz];
1389 if (setsz % 64) {
1390 lastword &= ~(-1ull << (setsz % 64));
1394 t = tcg_temp_new_i64();
1395 if (fullsz <= 64) {
1396 tcg_gen_movi_i64(t, lastword);
1397 tcg_gen_st_i64(t, cpu_env, ofs);
1398 goto done;
1401 if (word == lastword) {
1402 unsigned maxsz = size_for_gvec(fullsz / 8);
1403 unsigned oprsz = size_for_gvec(setsz / 8);
1405 if (oprsz * 8 == setsz) {
1406 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1407 goto done;
1409 if (oprsz * 8 == setsz + 8) {
1410 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1411 tcg_gen_movi_i64(t, 0);
1412 tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1413 goto done;
1417 setsz /= 8;
1418 fullsz /= 8;
1420 tcg_gen_movi_i64(t, word);
1421 for (i = 0; i < setsz; i += 8) {
1422 tcg_gen_st_i64(t, cpu_env, ofs + i);
1424 if (lastword != word) {
1425 tcg_gen_movi_i64(t, lastword);
1426 tcg_gen_st_i64(t, cpu_env, ofs + i);
1427 i += 8;
1429 if (i < fullsz) {
1430 tcg_gen_movi_i64(t, 0);
1431 for (; i < fullsz; i += 8) {
1432 tcg_gen_st_i64(t, cpu_env, ofs + i);
1436 done:
1437 tcg_temp_free_i64(t);
1439 /* PTRUES */
1440 if (setflag) {
1441 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1442 tcg_gen_movi_i32(cpu_CF, word == 0);
1443 tcg_gen_movi_i32(cpu_VF, 0);
1444 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1446 return true;
1449 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1451 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1454 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1456 /* Note pat == 31 is #all, to set all elements. */
1457 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1460 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1462 /* Note pat == 32 is #unimp, to set no elements. */
1463 return do_predset(s, 0, a->rd, 32, false);
1466 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1468 /* The path through do_pppp_flags is complicated enough to want to avoid
1469 * duplication. Frob the arguments into the form of a predicated AND.
1471 arg_rprr_s alt_a = {
1472 .rd = a->rd, .pg = a->pg, .s = a->s,
1473 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1475 return trans_AND_pppp(s, &alt_a, insn);
1478 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1480 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1483 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1485 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1488 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1489 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1490 TCGv_ptr, TCGv_i32))
1492 if (!sve_access_check(s)) {
1493 return true;
1496 TCGv_ptr t_pd = tcg_temp_new_ptr();
1497 TCGv_ptr t_pg = tcg_temp_new_ptr();
1498 TCGv_i32 t;
1499 unsigned desc;
1501 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1502 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1504 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1505 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1506 t = tcg_const_i32(desc);
1508 gen_fn(t, t_pd, t_pg, t);
1509 tcg_temp_free_ptr(t_pd);
1510 tcg_temp_free_ptr(t_pg);
1512 do_pred_flags(t);
1513 tcg_temp_free_i32(t);
1514 return true;
1517 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1519 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1522 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1524 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1528 *** SVE Element Count Group
1531 /* Perform an inline saturating addition of a 32-bit value within
1532 * a 64-bit register. The second operand is known to be positive,
1533 * which halves the comparisions we must perform to bound the result.
1535 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1537 int64_t ibound;
1538 TCGv_i64 bound;
1539 TCGCond cond;
1541 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1542 if (u) {
1543 tcg_gen_ext32u_i64(reg, reg);
1544 } else {
1545 tcg_gen_ext32s_i64(reg, reg);
1547 if (d) {
1548 tcg_gen_sub_i64(reg, reg, val);
1549 ibound = (u ? 0 : INT32_MIN);
1550 cond = TCG_COND_LT;
1551 } else {
1552 tcg_gen_add_i64(reg, reg, val);
1553 ibound = (u ? UINT32_MAX : INT32_MAX);
1554 cond = TCG_COND_GT;
1556 bound = tcg_const_i64(ibound);
1557 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1558 tcg_temp_free_i64(bound);
1561 /* Similarly with 64-bit values. */
1562 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1564 TCGv_i64 t0 = tcg_temp_new_i64();
1565 TCGv_i64 t1 = tcg_temp_new_i64();
1566 TCGv_i64 t2;
1568 if (u) {
1569 if (d) {
1570 tcg_gen_sub_i64(t0, reg, val);
1571 tcg_gen_movi_i64(t1, 0);
1572 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1573 } else {
1574 tcg_gen_add_i64(t0, reg, val);
1575 tcg_gen_movi_i64(t1, -1);
1576 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1578 } else {
1579 if (d) {
1580 /* Detect signed overflow for subtraction. */
1581 tcg_gen_xor_i64(t0, reg, val);
1582 tcg_gen_sub_i64(t1, reg, val);
1583 tcg_gen_xor_i64(reg, reg, t0);
1584 tcg_gen_and_i64(t0, t0, reg);
1586 /* Bound the result. */
1587 tcg_gen_movi_i64(reg, INT64_MIN);
1588 t2 = tcg_const_i64(0);
1589 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1590 } else {
1591 /* Detect signed overflow for addition. */
1592 tcg_gen_xor_i64(t0, reg, val);
1593 tcg_gen_add_i64(reg, reg, val);
1594 tcg_gen_xor_i64(t1, reg, val);
1595 tcg_gen_andc_i64(t0, t1, t0);
1597 /* Bound the result. */
1598 tcg_gen_movi_i64(t1, INT64_MAX);
1599 t2 = tcg_const_i64(0);
1600 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1602 tcg_temp_free_i64(t2);
1604 tcg_temp_free_i64(t0);
1605 tcg_temp_free_i64(t1);
1608 /* Similarly with a vector and a scalar operand. */
1609 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1610 TCGv_i64 val, bool u, bool d)
1612 unsigned vsz = vec_full_reg_size(s);
1613 TCGv_ptr dptr, nptr;
1614 TCGv_i32 t32, desc;
1615 TCGv_i64 t64;
1617 dptr = tcg_temp_new_ptr();
1618 nptr = tcg_temp_new_ptr();
1619 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1620 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1621 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1623 switch (esz) {
1624 case MO_8:
1625 t32 = tcg_temp_new_i32();
1626 tcg_gen_extrl_i64_i32(t32, val);
1627 if (d) {
1628 tcg_gen_neg_i32(t32, t32);
1630 if (u) {
1631 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1632 } else {
1633 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1635 tcg_temp_free_i32(t32);
1636 break;
1638 case MO_16:
1639 t32 = tcg_temp_new_i32();
1640 tcg_gen_extrl_i64_i32(t32, val);
1641 if (d) {
1642 tcg_gen_neg_i32(t32, t32);
1644 if (u) {
1645 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1646 } else {
1647 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1649 tcg_temp_free_i32(t32);
1650 break;
1652 case MO_32:
1653 t64 = tcg_temp_new_i64();
1654 if (d) {
1655 tcg_gen_neg_i64(t64, val);
1656 } else {
1657 tcg_gen_mov_i64(t64, val);
1659 if (u) {
1660 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1661 } else {
1662 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1664 tcg_temp_free_i64(t64);
1665 break;
1667 case MO_64:
1668 if (u) {
1669 if (d) {
1670 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1671 } else {
1672 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1674 } else if (d) {
1675 t64 = tcg_temp_new_i64();
1676 tcg_gen_neg_i64(t64, val);
1677 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1678 tcg_temp_free_i64(t64);
1679 } else {
1680 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1682 break;
1684 default:
1685 g_assert_not_reached();
1688 tcg_temp_free_ptr(dptr);
1689 tcg_temp_free_ptr(nptr);
1690 tcg_temp_free_i32(desc);
1693 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1695 if (sve_access_check(s)) {
1696 unsigned fullsz = vec_full_reg_size(s);
1697 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1698 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1700 return true;
1703 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1705 if (sve_access_check(s)) {
1706 unsigned fullsz = vec_full_reg_size(s);
1707 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1708 int inc = numelem * a->imm * (a->d ? -1 : 1);
1709 TCGv_i64 reg = cpu_reg(s, a->rd);
1711 tcg_gen_addi_i64(reg, reg, inc);
1713 return true;
1716 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1717 uint32_t insn)
1719 if (!sve_access_check(s)) {
1720 return true;
1723 unsigned fullsz = vec_full_reg_size(s);
1724 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1725 int inc = numelem * a->imm;
1726 TCGv_i64 reg = cpu_reg(s, a->rd);
1728 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1729 if (inc == 0) {
1730 if (a->u) {
1731 tcg_gen_ext32u_i64(reg, reg);
1732 } else {
1733 tcg_gen_ext32s_i64(reg, reg);
1735 } else {
1736 TCGv_i64 t = tcg_const_i64(inc);
1737 do_sat_addsub_32(reg, t, a->u, a->d);
1738 tcg_temp_free_i64(t);
1740 return true;
1743 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1744 uint32_t insn)
1746 if (!sve_access_check(s)) {
1747 return true;
1750 unsigned fullsz = vec_full_reg_size(s);
1751 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1752 int inc = numelem * a->imm;
1753 TCGv_i64 reg = cpu_reg(s, a->rd);
1755 if (inc != 0) {
1756 TCGv_i64 t = tcg_const_i64(inc);
1757 do_sat_addsub_64(reg, t, a->u, a->d);
1758 tcg_temp_free_i64(t);
1760 return true;
1763 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1765 if (a->esz == 0) {
1766 return false;
1769 unsigned fullsz = vec_full_reg_size(s);
1770 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1771 int inc = numelem * a->imm;
1773 if (inc != 0) {
1774 if (sve_access_check(s)) {
1775 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1776 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1777 vec_full_reg_offset(s, a->rn),
1778 t, fullsz, fullsz);
1779 tcg_temp_free_i64(t);
1781 } else {
1782 do_mov_z(s, a->rd, a->rn);
1784 return true;
1787 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1788 uint32_t insn)
1790 if (a->esz == 0) {
1791 return false;
1794 unsigned fullsz = vec_full_reg_size(s);
1795 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1796 int inc = numelem * a->imm;
1798 if (inc != 0) {
1799 if (sve_access_check(s)) {
1800 TCGv_i64 t = tcg_const_i64(inc);
1801 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1802 tcg_temp_free_i64(t);
1804 } else {
1805 do_mov_z(s, a->rd, a->rn);
1807 return true;
1811 *** SVE Bitwise Immediate Group
1814 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1816 uint64_t imm;
1817 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1818 extract32(a->dbm, 0, 6),
1819 extract32(a->dbm, 6, 6))) {
1820 return false;
1822 if (sve_access_check(s)) {
1823 unsigned vsz = vec_full_reg_size(s);
1824 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1825 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1827 return true;
1830 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1832 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1835 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1837 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1840 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1842 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1845 static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1847 uint64_t imm;
1848 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1849 extract32(a->dbm, 0, 6),
1850 extract32(a->dbm, 6, 6))) {
1851 return false;
1853 if (sve_access_check(s)) {
1854 do_dupi_z(s, a->rd, imm);
1856 return true;
1860 *** SVE Integer Wide Immediate - Predicated Group
1863 /* Implement all merging copies. This is used for CPY (immediate),
1864 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1866 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1867 TCGv_i64 val)
1869 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1870 static gen_cpy * const fns[4] = {
1871 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1872 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1874 unsigned vsz = vec_full_reg_size(s);
1875 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1876 TCGv_ptr t_zd = tcg_temp_new_ptr();
1877 TCGv_ptr t_zn = tcg_temp_new_ptr();
1878 TCGv_ptr t_pg = tcg_temp_new_ptr();
1880 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1881 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1882 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1884 fns[esz](t_zd, t_zn, t_pg, val, desc);
1886 tcg_temp_free_ptr(t_zd);
1887 tcg_temp_free_ptr(t_zn);
1888 tcg_temp_free_ptr(t_pg);
1889 tcg_temp_free_i32(desc);
1892 static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1894 if (a->esz == 0) {
1895 return false;
1897 if (sve_access_check(s)) {
1898 /* Decode the VFP immediate. */
1899 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1900 TCGv_i64 t_imm = tcg_const_i64(imm);
1901 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1902 tcg_temp_free_i64(t_imm);
1904 return true;
1907 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1909 if (a->esz == 0 && extract32(insn, 13, 1)) {
1910 return false;
1912 if (sve_access_check(s)) {
1913 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1914 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1915 tcg_temp_free_i64(t_imm);
1917 return true;
1920 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1922 static gen_helper_gvec_2i * const fns[4] = {
1923 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1924 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1927 if (a->esz == 0 && extract32(insn, 13, 1)) {
1928 return false;
1930 if (sve_access_check(s)) {
1931 unsigned vsz = vec_full_reg_size(s);
1932 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1933 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1934 pred_full_reg_offset(s, a->pg),
1935 t_imm, vsz, vsz, 0, fns[a->esz]);
1936 tcg_temp_free_i64(t_imm);
1938 return true;
1942 *** SVE Permute Extract Group
1945 static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1947 if (!sve_access_check(s)) {
1948 return true;
1951 unsigned vsz = vec_full_reg_size(s);
1952 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1953 unsigned n_siz = vsz - n_ofs;
1954 unsigned d = vec_full_reg_offset(s, a->rd);
1955 unsigned n = vec_full_reg_offset(s, a->rn);
1956 unsigned m = vec_full_reg_offset(s, a->rm);
1958 /* Use host vector move insns if we have appropriate sizes
1959 * and no unfortunate overlap.
1961 if (m != d
1962 && n_ofs == size_for_gvec(n_ofs)
1963 && n_siz == size_for_gvec(n_siz)
1964 && (d != n || n_siz <= n_ofs)) {
1965 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1966 if (n_ofs != 0) {
1967 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1969 } else {
1970 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1972 return true;
1976 *** SVE Permute - Unpredicated Group
1979 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
1981 if (sve_access_check(s)) {
1982 unsigned vsz = vec_full_reg_size(s);
1983 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
1984 vsz, vsz, cpu_reg_sp(s, a->rn));
1986 return true;
1989 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
1991 if ((a->imm & 0x1f) == 0) {
1992 return false;
1994 if (sve_access_check(s)) {
1995 unsigned vsz = vec_full_reg_size(s);
1996 unsigned dofs = vec_full_reg_offset(s, a->rd);
1997 unsigned esz, index;
1999 esz = ctz32(a->imm);
2000 index = a->imm >> (esz + 1);
2002 if ((index << esz) < vsz) {
2003 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2004 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2005 } else {
2006 tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2009 return true;
2012 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2014 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2015 static gen_insr * const fns[4] = {
2016 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2017 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2019 unsigned vsz = vec_full_reg_size(s);
2020 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2021 TCGv_ptr t_zd = tcg_temp_new_ptr();
2022 TCGv_ptr t_zn = tcg_temp_new_ptr();
2024 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2025 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2027 fns[a->esz](t_zd, t_zn, val, desc);
2029 tcg_temp_free_ptr(t_zd);
2030 tcg_temp_free_ptr(t_zn);
2031 tcg_temp_free_i32(desc);
2034 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2036 if (sve_access_check(s)) {
2037 TCGv_i64 t = tcg_temp_new_i64();
2038 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2039 do_insr_i64(s, a, t);
2040 tcg_temp_free_i64(t);
2042 return true;
2045 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2047 if (sve_access_check(s)) {
2048 do_insr_i64(s, a, cpu_reg(s, a->rm));
2050 return true;
2053 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2055 static gen_helper_gvec_2 * const fns[4] = {
2056 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2057 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2060 if (sve_access_check(s)) {
2061 unsigned vsz = vec_full_reg_size(s);
2062 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2063 vec_full_reg_offset(s, a->rn),
2064 vsz, vsz, 0, fns[a->esz]);
2066 return true;
2069 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2071 static gen_helper_gvec_3 * const fns[4] = {
2072 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2073 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2076 if (sve_access_check(s)) {
2077 unsigned vsz = vec_full_reg_size(s);
2078 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2079 vec_full_reg_offset(s, a->rn),
2080 vec_full_reg_offset(s, a->rm),
2081 vsz, vsz, 0, fns[a->esz]);
2083 return true;
2086 static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2088 static gen_helper_gvec_2 * const fns[4][2] = {
2089 { NULL, NULL },
2090 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2091 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2092 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2095 if (a->esz == 0) {
2096 return false;
2098 if (sve_access_check(s)) {
2099 unsigned vsz = vec_full_reg_size(s);
2100 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2101 vec_full_reg_offset(s, a->rn)
2102 + (a->h ? vsz / 2 : 0),
2103 vsz, vsz, 0, fns[a->esz][a->u]);
2105 return true;
2109 *** SVE Permute - Predicates Group
2112 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2113 gen_helper_gvec_3 *fn)
2115 if (!sve_access_check(s)) {
2116 return true;
2119 unsigned vsz = pred_full_reg_size(s);
2121 /* Predicate sizes may be smaller and cannot use simd_desc.
2122 We cannot round up, as we do elsewhere, because we need
2123 the exact size for ZIP2 and REV. We retain the style for
2124 the other helpers for consistency. */
2125 TCGv_ptr t_d = tcg_temp_new_ptr();
2126 TCGv_ptr t_n = tcg_temp_new_ptr();
2127 TCGv_ptr t_m = tcg_temp_new_ptr();
2128 TCGv_i32 t_desc;
2129 int desc;
2131 desc = vsz - 2;
2132 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2133 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2135 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2136 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2137 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2138 t_desc = tcg_const_i32(desc);
2140 fn(t_d, t_n, t_m, t_desc);
2142 tcg_temp_free_ptr(t_d);
2143 tcg_temp_free_ptr(t_n);
2144 tcg_temp_free_ptr(t_m);
2145 tcg_temp_free_i32(t_desc);
2146 return true;
2149 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2150 gen_helper_gvec_2 *fn)
2152 if (!sve_access_check(s)) {
2153 return true;
2156 unsigned vsz = pred_full_reg_size(s);
2157 TCGv_ptr t_d = tcg_temp_new_ptr();
2158 TCGv_ptr t_n = tcg_temp_new_ptr();
2159 TCGv_i32 t_desc;
2160 int desc;
2162 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2163 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2165 /* Predicate sizes may be smaller and cannot use simd_desc.
2166 We cannot round up, as we do elsewhere, because we need
2167 the exact size for ZIP2 and REV. We retain the style for
2168 the other helpers for consistency. */
2170 desc = vsz - 2;
2171 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2172 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2173 t_desc = tcg_const_i32(desc);
2175 fn(t_d, t_n, t_desc);
2177 tcg_temp_free_i32(t_desc);
2178 tcg_temp_free_ptr(t_d);
2179 tcg_temp_free_ptr(t_n);
2180 return true;
2183 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2185 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2188 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2190 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2193 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2195 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2198 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2200 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2203 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2205 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2208 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2210 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2213 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2215 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2218 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2220 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2223 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2225 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2229 *** SVE Permute - Interleaving Group
2232 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2234 static gen_helper_gvec_3 * const fns[4] = {
2235 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2236 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2239 if (sve_access_check(s)) {
2240 unsigned vsz = vec_full_reg_size(s);
2241 unsigned high_ofs = high ? vsz / 2 : 0;
2242 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2243 vec_full_reg_offset(s, a->rn) + high_ofs,
2244 vec_full_reg_offset(s, a->rm) + high_ofs,
2245 vsz, vsz, 0, fns[a->esz]);
2247 return true;
2250 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2251 gen_helper_gvec_3 *fn)
2253 if (sve_access_check(s)) {
2254 unsigned vsz = vec_full_reg_size(s);
2255 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2256 vec_full_reg_offset(s, a->rn),
2257 vec_full_reg_offset(s, a->rm),
2258 vsz, vsz, data, fn);
2260 return true;
2263 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2265 return do_zip(s, a, false);
2268 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2270 return do_zip(s, a, true);
2273 static gen_helper_gvec_3 * const uzp_fns[4] = {
2274 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2275 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2278 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2280 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2283 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2285 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2288 static gen_helper_gvec_3 * const trn_fns[4] = {
2289 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2290 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2293 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2295 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2298 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2300 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2304 *** SVE Permute Vector - Predicated Group
2307 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2309 static gen_helper_gvec_3 * const fns[4] = {
2310 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2312 return do_zpz_ool(s, a, fns[a->esz]);
2315 /* Call the helper that computes the ARM LastActiveElement pseudocode
2316 * function, scaled by the element size. This includes the not found
2317 * indication; e.g. not found for esz=3 is -8.
2319 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2321 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2322 * round up, as we do elsewhere, because we need the exact size.
2324 TCGv_ptr t_p = tcg_temp_new_ptr();
2325 TCGv_i32 t_desc;
2326 unsigned vsz = pred_full_reg_size(s);
2327 unsigned desc;
2329 desc = vsz - 2;
2330 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2332 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2333 t_desc = tcg_const_i32(desc);
2335 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2337 tcg_temp_free_i32(t_desc);
2338 tcg_temp_free_ptr(t_p);
2341 /* Increment LAST to the offset of the next element in the vector,
2342 * wrapping around to 0.
2344 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2346 unsigned vsz = vec_full_reg_size(s);
2348 tcg_gen_addi_i32(last, last, 1 << esz);
2349 if (is_power_of_2(vsz)) {
2350 tcg_gen_andi_i32(last, last, vsz - 1);
2351 } else {
2352 TCGv_i32 max = tcg_const_i32(vsz);
2353 TCGv_i32 zero = tcg_const_i32(0);
2354 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2355 tcg_temp_free_i32(max);
2356 tcg_temp_free_i32(zero);
2360 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
2361 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2363 unsigned vsz = vec_full_reg_size(s);
2365 if (is_power_of_2(vsz)) {
2366 tcg_gen_andi_i32(last, last, vsz - 1);
2367 } else {
2368 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2369 TCGv_i32 zero = tcg_const_i32(0);
2370 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2371 tcg_temp_free_i32(max);
2372 tcg_temp_free_i32(zero);
2376 /* Load an unsigned element of ESZ from BASE+OFS. */
2377 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2379 TCGv_i64 r = tcg_temp_new_i64();
2381 switch (esz) {
2382 case 0:
2383 tcg_gen_ld8u_i64(r, base, ofs);
2384 break;
2385 case 1:
2386 tcg_gen_ld16u_i64(r, base, ofs);
2387 break;
2388 case 2:
2389 tcg_gen_ld32u_i64(r, base, ofs);
2390 break;
2391 case 3:
2392 tcg_gen_ld_i64(r, base, ofs);
2393 break;
2394 default:
2395 g_assert_not_reached();
2397 return r;
2400 /* Load an unsigned element of ESZ from RM[LAST]. */
2401 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2402 int rm, int esz)
2404 TCGv_ptr p = tcg_temp_new_ptr();
2405 TCGv_i64 r;
2407 /* Convert offset into vector into offset into ENV.
2408 * The final adjustment for the vector register base
2409 * is added via constant offset to the load.
2411 #ifdef HOST_WORDS_BIGENDIAN
2412 /* Adjust for element ordering. See vec_reg_offset. */
2413 if (esz < 3) {
2414 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2416 #endif
2417 tcg_gen_ext_i32_ptr(p, last);
2418 tcg_gen_add_ptr(p, p, cpu_env);
2420 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2421 tcg_temp_free_ptr(p);
2423 return r;
2426 /* Compute CLAST for a Zreg. */
2427 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2429 TCGv_i32 last;
2430 TCGLabel *over;
2431 TCGv_i64 ele;
2432 unsigned vsz, esz = a->esz;
2434 if (!sve_access_check(s)) {
2435 return true;
2438 last = tcg_temp_local_new_i32();
2439 over = gen_new_label();
2441 find_last_active(s, last, esz, a->pg);
2443 /* There is of course no movcond for a 2048-bit vector,
2444 * so we must branch over the actual store.
2446 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2448 if (!before) {
2449 incr_last_active(s, last, esz);
2452 ele = load_last_active(s, last, a->rm, esz);
2453 tcg_temp_free_i32(last);
2455 vsz = vec_full_reg_size(s);
2456 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2457 tcg_temp_free_i64(ele);
2459 /* If this insn used MOVPRFX, we may need a second move. */
2460 if (a->rd != a->rn) {
2461 TCGLabel *done = gen_new_label();
2462 tcg_gen_br(done);
2464 gen_set_label(over);
2465 do_mov_z(s, a->rd, a->rn);
2467 gen_set_label(done);
2468 } else {
2469 gen_set_label(over);
2471 return true;
2474 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2476 return do_clast_vector(s, a, false);
2479 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2481 return do_clast_vector(s, a, true);
2484 /* Compute CLAST for a scalar. */
2485 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2486 bool before, TCGv_i64 reg_val)
2488 TCGv_i32 last = tcg_temp_new_i32();
2489 TCGv_i64 ele, cmp, zero;
2491 find_last_active(s, last, esz, pg);
2493 /* Extend the original value of last prior to incrementing. */
2494 cmp = tcg_temp_new_i64();
2495 tcg_gen_ext_i32_i64(cmp, last);
2497 if (!before) {
2498 incr_last_active(s, last, esz);
2501 /* The conceit here is that while last < 0 indicates not found, after
2502 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2503 * from which we can load garbage. We then discard the garbage with
2504 * a conditional move.
2506 ele = load_last_active(s, last, rm, esz);
2507 tcg_temp_free_i32(last);
2509 zero = tcg_const_i64(0);
2510 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2512 tcg_temp_free_i64(zero);
2513 tcg_temp_free_i64(cmp);
2514 tcg_temp_free_i64(ele);
2517 /* Compute CLAST for a Vreg. */
2518 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2520 if (sve_access_check(s)) {
2521 int esz = a->esz;
2522 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2523 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2525 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2526 write_fp_dreg(s, a->rd, reg);
2527 tcg_temp_free_i64(reg);
2529 return true;
2532 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2534 return do_clast_fp(s, a, false);
2537 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2539 return do_clast_fp(s, a, true);
2542 /* Compute CLAST for a Xreg. */
2543 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2545 TCGv_i64 reg;
2547 if (!sve_access_check(s)) {
2548 return true;
2551 reg = cpu_reg(s, a->rd);
2552 switch (a->esz) {
2553 case 0:
2554 tcg_gen_ext8u_i64(reg, reg);
2555 break;
2556 case 1:
2557 tcg_gen_ext16u_i64(reg, reg);
2558 break;
2559 case 2:
2560 tcg_gen_ext32u_i64(reg, reg);
2561 break;
2562 case 3:
2563 break;
2564 default:
2565 g_assert_not_reached();
2568 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2569 return true;
2572 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2574 return do_clast_general(s, a, false);
2577 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2579 return do_clast_general(s, a, true);
2582 /* Compute LAST for a scalar. */
2583 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2584 int pg, int rm, bool before)
2586 TCGv_i32 last = tcg_temp_new_i32();
2587 TCGv_i64 ret;
2589 find_last_active(s, last, esz, pg);
2590 if (before) {
2591 wrap_last_active(s, last, esz);
2592 } else {
2593 incr_last_active(s, last, esz);
2596 ret = load_last_active(s, last, rm, esz);
2597 tcg_temp_free_i32(last);
2598 return ret;
2601 /* Compute LAST for a Vreg. */
2602 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2604 if (sve_access_check(s)) {
2605 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2606 write_fp_dreg(s, a->rd, val);
2607 tcg_temp_free_i64(val);
2609 return true;
2612 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2614 return do_last_fp(s, a, false);
2617 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2619 return do_last_fp(s, a, true);
2622 /* Compute LAST for a Xreg. */
2623 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2625 if (sve_access_check(s)) {
2626 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2627 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2628 tcg_temp_free_i64(val);
2630 return true;
2633 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2635 return do_last_general(s, a, false);
2638 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2640 return do_last_general(s, a, true);
2643 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2645 if (sve_access_check(s)) {
2646 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2648 return true;
2651 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2653 if (sve_access_check(s)) {
2654 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2655 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2656 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2657 tcg_temp_free_i64(t);
2659 return true;
2662 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2664 static gen_helper_gvec_3 * const fns[4] = {
2665 NULL,
2666 gen_helper_sve_revb_h,
2667 gen_helper_sve_revb_s,
2668 gen_helper_sve_revb_d,
2670 return do_zpz_ool(s, a, fns[a->esz]);
2673 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2675 static gen_helper_gvec_3 * const fns[4] = {
2676 NULL,
2677 NULL,
2678 gen_helper_sve_revh_s,
2679 gen_helper_sve_revh_d,
2681 return do_zpz_ool(s, a, fns[a->esz]);
2684 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2686 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2689 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2691 static gen_helper_gvec_3 * const fns[4] = {
2692 gen_helper_sve_rbit_b,
2693 gen_helper_sve_rbit_h,
2694 gen_helper_sve_rbit_s,
2695 gen_helper_sve_rbit_d,
2697 return do_zpz_ool(s, a, fns[a->esz]);
2700 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2702 if (sve_access_check(s)) {
2703 unsigned vsz = vec_full_reg_size(s);
2704 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2705 vec_full_reg_offset(s, a->rn),
2706 vec_full_reg_offset(s, a->rm),
2707 pred_full_reg_offset(s, a->pg),
2708 vsz, vsz, a->esz, gen_helper_sve_splice);
2710 return true;
2714 *** SVE Integer Compare - Vectors Group
2717 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2718 gen_helper_gvec_flags_4 *gen_fn)
2720 TCGv_ptr pd, zn, zm, pg;
2721 unsigned vsz;
2722 TCGv_i32 t;
2724 if (gen_fn == NULL) {
2725 return false;
2727 if (!sve_access_check(s)) {
2728 return true;
2731 vsz = vec_full_reg_size(s);
2732 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2733 pd = tcg_temp_new_ptr();
2734 zn = tcg_temp_new_ptr();
2735 zm = tcg_temp_new_ptr();
2736 pg = tcg_temp_new_ptr();
2738 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2739 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2740 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2741 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2743 gen_fn(t, pd, zn, zm, pg, t);
2745 tcg_temp_free_ptr(pd);
2746 tcg_temp_free_ptr(zn);
2747 tcg_temp_free_ptr(zm);
2748 tcg_temp_free_ptr(pg);
2750 do_pred_flags(t);
2752 tcg_temp_free_i32(t);
2753 return true;
2756 #define DO_PPZZ(NAME, name) \
2757 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
2758 uint32_t insn) \
2760 static gen_helper_gvec_flags_4 * const fns[4] = { \
2761 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2762 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2763 }; \
2764 return do_ppzz_flags(s, a, fns[a->esz]); \
2767 DO_PPZZ(CMPEQ, cmpeq)
2768 DO_PPZZ(CMPNE, cmpne)
2769 DO_PPZZ(CMPGT, cmpgt)
2770 DO_PPZZ(CMPGE, cmpge)
2771 DO_PPZZ(CMPHI, cmphi)
2772 DO_PPZZ(CMPHS, cmphs)
2774 #undef DO_PPZZ
2776 #define DO_PPZW(NAME, name) \
2777 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a, \
2778 uint32_t insn) \
2780 static gen_helper_gvec_flags_4 * const fns[4] = { \
2781 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2782 gen_helper_sve_##name##_ppzw_s, NULL \
2783 }; \
2784 return do_ppzz_flags(s, a, fns[a->esz]); \
2787 DO_PPZW(CMPEQ, cmpeq)
2788 DO_PPZW(CMPNE, cmpne)
2789 DO_PPZW(CMPGT, cmpgt)
2790 DO_PPZW(CMPGE, cmpge)
2791 DO_PPZW(CMPHI, cmphi)
2792 DO_PPZW(CMPHS, cmphs)
2793 DO_PPZW(CMPLT, cmplt)
2794 DO_PPZW(CMPLE, cmple)
2795 DO_PPZW(CMPLO, cmplo)
2796 DO_PPZW(CMPLS, cmpls)
2798 #undef DO_PPZW
2801 *** SVE Integer Compare - Immediate Groups
2804 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2805 gen_helper_gvec_flags_3 *gen_fn)
2807 TCGv_ptr pd, zn, pg;
2808 unsigned vsz;
2809 TCGv_i32 t;
2811 if (gen_fn == NULL) {
2812 return false;
2814 if (!sve_access_check(s)) {
2815 return true;
2818 vsz = vec_full_reg_size(s);
2819 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2820 pd = tcg_temp_new_ptr();
2821 zn = tcg_temp_new_ptr();
2822 pg = tcg_temp_new_ptr();
2824 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2825 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2826 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2828 gen_fn(t, pd, zn, pg, t);
2830 tcg_temp_free_ptr(pd);
2831 tcg_temp_free_ptr(zn);
2832 tcg_temp_free_ptr(pg);
2834 do_pred_flags(t);
2836 tcg_temp_free_i32(t);
2837 return true;
2840 #define DO_PPZI(NAME, name) \
2841 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a, \
2842 uint32_t insn) \
2844 static gen_helper_gvec_flags_3 * const fns[4] = { \
2845 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2846 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2847 }; \
2848 return do_ppzi_flags(s, a, fns[a->esz]); \
2851 DO_PPZI(CMPEQ, cmpeq)
2852 DO_PPZI(CMPNE, cmpne)
2853 DO_PPZI(CMPGT, cmpgt)
2854 DO_PPZI(CMPGE, cmpge)
2855 DO_PPZI(CMPHI, cmphi)
2856 DO_PPZI(CMPHS, cmphs)
2857 DO_PPZI(CMPLT, cmplt)
2858 DO_PPZI(CMPLE, cmple)
2859 DO_PPZI(CMPLO, cmplo)
2860 DO_PPZI(CMPLS, cmpls)
2862 #undef DO_PPZI
2865 *** SVE Partition Break Group
2868 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2869 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2871 if (!sve_access_check(s)) {
2872 return true;
2875 unsigned vsz = pred_full_reg_size(s);
2877 /* Predicate sizes may be smaller and cannot use simd_desc. */
2878 TCGv_ptr d = tcg_temp_new_ptr();
2879 TCGv_ptr n = tcg_temp_new_ptr();
2880 TCGv_ptr m = tcg_temp_new_ptr();
2881 TCGv_ptr g = tcg_temp_new_ptr();
2882 TCGv_i32 t = tcg_const_i32(vsz - 2);
2884 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2885 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2886 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2887 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2889 if (a->s) {
2890 fn_s(t, d, n, m, g, t);
2891 do_pred_flags(t);
2892 } else {
2893 fn(d, n, m, g, t);
2895 tcg_temp_free_ptr(d);
2896 tcg_temp_free_ptr(n);
2897 tcg_temp_free_ptr(m);
2898 tcg_temp_free_ptr(g);
2899 tcg_temp_free_i32(t);
2900 return true;
2903 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2904 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2906 if (!sve_access_check(s)) {
2907 return true;
2910 unsigned vsz = pred_full_reg_size(s);
2912 /* Predicate sizes may be smaller and cannot use simd_desc. */
2913 TCGv_ptr d = tcg_temp_new_ptr();
2914 TCGv_ptr n = tcg_temp_new_ptr();
2915 TCGv_ptr g = tcg_temp_new_ptr();
2916 TCGv_i32 t = tcg_const_i32(vsz - 2);
2918 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2919 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2920 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2922 if (a->s) {
2923 fn_s(t, d, n, g, t);
2924 do_pred_flags(t);
2925 } else {
2926 fn(d, n, g, t);
2928 tcg_temp_free_ptr(d);
2929 tcg_temp_free_ptr(n);
2930 tcg_temp_free_ptr(g);
2931 tcg_temp_free_i32(t);
2932 return true;
2935 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2937 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2940 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2942 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2945 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2947 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2950 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2952 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2955 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2957 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2960 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2962 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2965 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2967 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
2971 *** SVE Predicate Count Group
2974 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
2976 unsigned psz = pred_full_reg_size(s);
2978 if (psz <= 8) {
2979 uint64_t psz_mask;
2981 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
2982 if (pn != pg) {
2983 TCGv_i64 g = tcg_temp_new_i64();
2984 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
2985 tcg_gen_and_i64(val, val, g);
2986 tcg_temp_free_i64(g);
2989 /* Reduce the pred_esz_masks value simply to reduce the
2990 * size of the code generated here.
2992 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
2993 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
2995 tcg_gen_ctpop_i64(val, val);
2996 } else {
2997 TCGv_ptr t_pn = tcg_temp_new_ptr();
2998 TCGv_ptr t_pg = tcg_temp_new_ptr();
2999 unsigned desc;
3000 TCGv_i32 t_desc;
3002 desc = psz - 2;
3003 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3005 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3006 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3007 t_desc = tcg_const_i32(desc);
3009 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3010 tcg_temp_free_ptr(t_pn);
3011 tcg_temp_free_ptr(t_pg);
3012 tcg_temp_free_i32(t_desc);
3016 static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3018 if (sve_access_check(s)) {
3019 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3021 return true;
3024 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3025 uint32_t insn)
3027 if (sve_access_check(s)) {
3028 TCGv_i64 reg = cpu_reg(s, a->rd);
3029 TCGv_i64 val = tcg_temp_new_i64();
3031 do_cntp(s, val, a->esz, a->pg, a->pg);
3032 if (a->d) {
3033 tcg_gen_sub_i64(reg, reg, val);
3034 } else {
3035 tcg_gen_add_i64(reg, reg, val);
3037 tcg_temp_free_i64(val);
3039 return true;
3042 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3043 uint32_t insn)
3045 if (a->esz == 0) {
3046 return false;
3048 if (sve_access_check(s)) {
3049 unsigned vsz = vec_full_reg_size(s);
3050 TCGv_i64 val = tcg_temp_new_i64();
3051 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3053 do_cntp(s, val, a->esz, a->pg, a->pg);
3054 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3055 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3057 return true;
3060 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3061 uint32_t insn)
3063 if (sve_access_check(s)) {
3064 TCGv_i64 reg = cpu_reg(s, a->rd);
3065 TCGv_i64 val = tcg_temp_new_i64();
3067 do_cntp(s, val, a->esz, a->pg, a->pg);
3068 do_sat_addsub_32(reg, val, a->u, a->d);
3070 return true;
3073 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3074 uint32_t insn)
3076 if (sve_access_check(s)) {
3077 TCGv_i64 reg = cpu_reg(s, a->rd);
3078 TCGv_i64 val = tcg_temp_new_i64();
3080 do_cntp(s, val, a->esz, a->pg, a->pg);
3081 do_sat_addsub_64(reg, val, a->u, a->d);
3083 return true;
3086 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3087 uint32_t insn)
3089 if (a->esz == 0) {
3090 return false;
3092 if (sve_access_check(s)) {
3093 TCGv_i64 val = tcg_temp_new_i64();
3094 do_cntp(s, val, a->esz, a->pg, a->pg);
3095 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3097 return true;
3101 *** SVE Integer Compare Scalars Group
3104 static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
3106 if (!sve_access_check(s)) {
3107 return true;
3110 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3111 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3112 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3113 TCGv_i64 cmp = tcg_temp_new_i64();
3115 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3116 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3117 tcg_temp_free_i64(cmp);
3119 /* VF = !NF & !CF. */
3120 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3121 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3123 /* Both NF and VF actually look at bit 31. */
3124 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3125 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3126 return true;
3129 static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
3131 if (!sve_access_check(s)) {
3132 return true;
3135 TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
3136 TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
3137 TCGv_i64 t0 = tcg_temp_new_i64();
3138 TCGv_i64 t1 = tcg_temp_new_i64();
3139 TCGv_i32 t2, t3;
3140 TCGv_ptr ptr;
3141 unsigned desc, vsz = vec_full_reg_size(s);
3142 TCGCond cond;
3144 if (!a->sf) {
3145 if (a->u) {
3146 tcg_gen_ext32u_i64(op0, op0);
3147 tcg_gen_ext32u_i64(op1, op1);
3148 } else {
3149 tcg_gen_ext32s_i64(op0, op0);
3150 tcg_gen_ext32s_i64(op1, op1);
3154 /* For the helper, compress the different conditions into a computation
3155 * of how many iterations for which the condition is true.
3157 * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
3158 * 2**64 iterations, overflowing to 0. Of course, predicate registers
3159 * aren't that large, so any value >= predicate size is sufficient.
3161 tcg_gen_sub_i64(t0, op1, op0);
3163 /* t0 = MIN(op1 - op0, vsz). */
3164 tcg_gen_movi_i64(t1, vsz);
3165 tcg_gen_umin_i64(t0, t0, t1);
3166 if (a->eq) {
3167 /* Equality means one more iteration. */
3168 tcg_gen_addi_i64(t0, t0, 1);
3171 /* t0 = (condition true ? t0 : 0). */
3172 cond = (a->u
3173 ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3174 : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3175 tcg_gen_movi_i64(t1, 0);
3176 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3178 t2 = tcg_temp_new_i32();
3179 tcg_gen_extrl_i64_i32(t2, t0);
3180 tcg_temp_free_i64(t0);
3181 tcg_temp_free_i64(t1);
3183 desc = (vsz / 8) - 2;
3184 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3185 t3 = tcg_const_i32(desc);
3187 ptr = tcg_temp_new_ptr();
3188 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3190 gen_helper_sve_while(t2, ptr, t2, t3);
3191 do_pred_flags(t2);
3193 tcg_temp_free_ptr(ptr);
3194 tcg_temp_free_i32(t2);
3195 tcg_temp_free_i32(t3);
3196 return true;
3200 *** SVE Integer Wide Immediate - Unpredicated Group
3203 static bool trans_FDUP(DisasContext *s, arg_FDUP *a, uint32_t insn)
3205 if (a->esz == 0) {
3206 return false;
3208 if (sve_access_check(s)) {
3209 unsigned vsz = vec_full_reg_size(s);
3210 int dofs = vec_full_reg_offset(s, a->rd);
3211 uint64_t imm;
3213 /* Decode the VFP immediate. */
3214 imm = vfp_expand_imm(a->esz, a->imm);
3215 imm = dup_const(a->esz, imm);
3217 tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3219 return true;
3222 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a, uint32_t insn)
3224 if (a->esz == 0 && extract32(insn, 13, 1)) {
3225 return false;
3227 if (sve_access_check(s)) {
3228 unsigned vsz = vec_full_reg_size(s);
3229 int dofs = vec_full_reg_offset(s, a->rd);
3231 tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3233 return true;
3236 static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3238 if (a->esz == 0 && extract32(insn, 13, 1)) {
3239 return false;
3241 if (sve_access_check(s)) {
3242 unsigned vsz = vec_full_reg_size(s);
3243 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3244 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3246 return true;
3249 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3251 a->imm = -a->imm;
3252 return trans_ADD_zzi(s, a, insn);
3255 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3257 static const GVecGen2s op[4] = {
3258 { .fni8 = tcg_gen_vec_sub8_i64,
3259 .fniv = tcg_gen_sub_vec,
3260 .fno = gen_helper_sve_subri_b,
3261 .opc = INDEX_op_sub_vec,
3262 .vece = MO_8,
3263 .scalar_first = true },
3264 { .fni8 = tcg_gen_vec_sub16_i64,
3265 .fniv = tcg_gen_sub_vec,
3266 .fno = gen_helper_sve_subri_h,
3267 .opc = INDEX_op_sub_vec,
3268 .vece = MO_16,
3269 .scalar_first = true },
3270 { .fni4 = tcg_gen_sub_i32,
3271 .fniv = tcg_gen_sub_vec,
3272 .fno = gen_helper_sve_subri_s,
3273 .opc = INDEX_op_sub_vec,
3274 .vece = MO_32,
3275 .scalar_first = true },
3276 { .fni8 = tcg_gen_sub_i64,
3277 .fniv = tcg_gen_sub_vec,
3278 .fno = gen_helper_sve_subri_d,
3279 .opc = INDEX_op_sub_vec,
3280 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3281 .vece = MO_64,
3282 .scalar_first = true }
3285 if (a->esz == 0 && extract32(insn, 13, 1)) {
3286 return false;
3288 if (sve_access_check(s)) {
3289 unsigned vsz = vec_full_reg_size(s);
3290 TCGv_i64 c = tcg_const_i64(a->imm);
3291 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3292 vec_full_reg_offset(s, a->rn),
3293 vsz, vsz, c, &op[a->esz]);
3294 tcg_temp_free_i64(c);
3296 return true;
3299 static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3301 if (sve_access_check(s)) {
3302 unsigned vsz = vec_full_reg_size(s);
3303 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3304 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3306 return true;
3309 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, uint32_t insn,
3310 bool u, bool d)
3312 if (a->esz == 0 && extract32(insn, 13, 1)) {
3313 return false;
3315 if (sve_access_check(s)) {
3316 TCGv_i64 val = tcg_const_i64(a->imm);
3317 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3318 tcg_temp_free_i64(val);
3320 return true;
3323 static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3325 return do_zzi_sat(s, a, insn, false, false);
3328 static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3330 return do_zzi_sat(s, a, insn, true, false);
3333 static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3335 return do_zzi_sat(s, a, insn, false, true);
3338 static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3340 return do_zzi_sat(s, a, insn, true, true);
3343 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3345 if (sve_access_check(s)) {
3346 unsigned vsz = vec_full_reg_size(s);
3347 TCGv_i64 c = tcg_const_i64(a->imm);
3349 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3350 vec_full_reg_offset(s, a->rn),
3351 c, vsz, vsz, 0, fn);
3352 tcg_temp_free_i64(c);
3354 return true;
3357 #define DO_ZZI(NAME, name) \
3358 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a, \
3359 uint32_t insn) \
3361 static gen_helper_gvec_2i * const fns[4] = { \
3362 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3363 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3364 }; \
3365 return do_zzi_ool(s, a, fns[a->esz]); \
3368 DO_ZZI(SMAX, smax)
3369 DO_ZZI(UMAX, umax)
3370 DO_ZZI(SMIN, smin)
3371 DO_ZZI(UMIN, umin)
3373 #undef DO_ZZI
3376 *** SVE Floating Point Arithmetic - Unpredicated Group
3379 static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3380 gen_helper_gvec_3_ptr *fn)
3382 if (fn == NULL) {
3383 return false;
3385 if (sve_access_check(s)) {
3386 unsigned vsz = vec_full_reg_size(s);
3387 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3388 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3389 vec_full_reg_offset(s, a->rn),
3390 vec_full_reg_offset(s, a->rm),
3391 status, vsz, vsz, 0, fn);
3392 tcg_temp_free_ptr(status);
3394 return true;
3398 #define DO_FP3(NAME, name) \
3399 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a, uint32_t insn) \
3401 static gen_helper_gvec_3_ptr * const fns[4] = { \
3402 NULL, gen_helper_gvec_##name##_h, \
3403 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3404 }; \
3405 return do_zzz_fp(s, a, fns[a->esz]); \
3408 DO_FP3(FADD_zzz, fadd)
3409 DO_FP3(FSUB_zzz, fsub)
3410 DO_FP3(FMUL_zzz, fmul)
3411 DO_FP3(FTSMUL, ftsmul)
3412 DO_FP3(FRECPS, recps)
3413 DO_FP3(FRSQRTS, rsqrts)
3415 #undef DO_FP3
3418 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
3421 /* Subroutine loading a vector register at VOFS of LEN bytes.
3422 * The load should begin at the address Rn + IMM.
3425 static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
3426 int rn, int imm)
3428 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
3429 uint32_t len_remain = len % 8;
3430 uint32_t nparts = len / 8 + ctpop8(len_remain);
3431 int midx = get_mem_index(s);
3432 TCGv_i64 addr, t0, t1;
3434 addr = tcg_temp_new_i64();
3435 t0 = tcg_temp_new_i64();
3437 /* Note that unpredicated load/store of vector/predicate registers
3438 * are defined as a stream of bytes, which equates to little-endian
3439 * operations on larger quantities. There is no nice way to force
3440 * a little-endian load for aarch64_be-linux-user out of line.
3442 * Attempt to keep code expansion to a minimum by limiting the
3443 * amount of unrolling done.
3445 if (nparts <= 4) {
3446 int i;
3448 for (i = 0; i < len_align; i += 8) {
3449 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
3450 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3451 tcg_gen_st_i64(t0, cpu_env, vofs + i);
3453 } else {
3454 TCGLabel *loop = gen_new_label();
3455 TCGv_ptr tp, i = tcg_const_local_ptr(0);
3457 gen_set_label(loop);
3459 /* Minimize the number of local temps that must be re-read from
3460 * the stack each iteration. Instead, re-compute values other
3461 * than the loop counter.
3463 tp = tcg_temp_new_ptr();
3464 tcg_gen_addi_ptr(tp, i, imm);
3465 tcg_gen_extu_ptr_i64(addr, tp);
3466 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
3468 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3470 tcg_gen_add_ptr(tp, cpu_env, i);
3471 tcg_gen_addi_ptr(i, i, 8);
3472 tcg_gen_st_i64(t0, tp, vofs);
3473 tcg_temp_free_ptr(tp);
3475 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
3476 tcg_temp_free_ptr(i);
3479 /* Predicate register loads can be any multiple of 2.
3480 * Note that we still store the entire 64-bit unit into cpu_env.
3482 if (len_remain) {
3483 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
3485 switch (len_remain) {
3486 case 2:
3487 case 4:
3488 case 8:
3489 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
3490 break;
3492 case 6:
3493 t1 = tcg_temp_new_i64();
3494 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
3495 tcg_gen_addi_i64(addr, addr, 4);
3496 tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
3497 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
3498 tcg_temp_free_i64(t1);
3499 break;
3501 default:
3502 g_assert_not_reached();
3504 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
3506 tcg_temp_free_i64(addr);
3507 tcg_temp_free_i64(t0);
3510 static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
3512 if (sve_access_check(s)) {
3513 int size = vec_full_reg_size(s);
3514 int off = vec_full_reg_offset(s, a->rd);
3515 do_ldr(s, off, size, a->rn, a->imm * size);
3517 return true;
3520 static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
3522 if (sve_access_check(s)) {
3523 int size = pred_full_reg_size(s);
3524 int off = pred_full_reg_offset(s, a->rd);
3525 do_ldr(s, off, size, a->rn, a->imm * size);
3527 return true;