target/arm: Implement SVE vector splice (predicated)
[qemu.git] / target / arm / translate-sve.c
blob1517d82468b206f225a3c45edb9ca3fa29d95bcc
1 /*
2 * AArch64 SVE translation
4 * Copyright (c) 2018 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "tcg-op.h"
24 #include "tcg-op-gvec.h"
25 #include "tcg-gvec-desc.h"
26 #include "qemu/log.h"
27 #include "arm_ldst.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
32 #include "exec/log.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
37 * Helpers for extracting complex instruction fields.
40 /* See e.g. ASR (immediate, predicated).
41 * Returns -1 for unallocated encoding; diagnose later.
43 static int tszimm_esz(int x)
45 x >>= 3; /* discard imm3 */
46 return 31 - clz32(x);
49 static int tszimm_shr(int x)
51 return (16 << tszimm_esz(x)) - x;
54 /* See e.g. LSL (immediate, predicated). */
55 static int tszimm_shl(int x)
57 return x - (8 << tszimm_esz(x));
60 static inline int plus1(int x)
62 return x + 1;
65 /* The SH bit is in bit 8. Extract the low 8 and shift. */
66 static inline int expand_imm_sh8s(int x)
68 return (int8_t)x << (x & 0x100 ? 8 : 0);
72 * Include the generated decoder.
75 #include "decode-sve.inc.c"
78 * Implement all of the translator functions referenced by the decoder.
81 /* Return the offset info CPUARMState of the predicate vector register Pn.
82 * Note for this purpose, FFR is P16.
84 static inline int pred_full_reg_offset(DisasContext *s, int regno)
86 return offsetof(CPUARMState, vfp.pregs[regno]);
89 /* Return the byte size of the whole predicate register, VL / 64. */
90 static inline int pred_full_reg_size(DisasContext *s)
92 return s->sve_len >> 3;
95 /* Round up the size of a register to a size allowed by
96 * the tcg vector infrastructure. Any operation which uses this
97 * size may assume that the bits above pred_full_reg_size are zero,
98 * and must leave them the same way.
100 * Note that this is not needed for the vector registers as they
101 * are always properly sized for tcg vectors.
103 static int size_for_gvec(int size)
105 if (size <= 8) {
106 return 8;
107 } else {
108 return QEMU_ALIGN_UP(size, 16);
112 static int pred_gvec_reg_size(DisasContext *s)
114 return size_for_gvec(pred_full_reg_size(s));
117 /* Invoke a vector expander on two Zregs. */
118 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
119 int esz, int rd, int rn)
121 if (sve_access_check(s)) {
122 unsigned vsz = vec_full_reg_size(s);
123 gvec_fn(esz, vec_full_reg_offset(s, rd),
124 vec_full_reg_offset(s, rn), vsz, vsz);
126 return true;
129 /* Invoke a vector expander on three Zregs. */
130 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
131 int esz, int rd, int rn, int rm)
133 if (sve_access_check(s)) {
134 unsigned vsz = vec_full_reg_size(s);
135 gvec_fn(esz, vec_full_reg_offset(s, rd),
136 vec_full_reg_offset(s, rn),
137 vec_full_reg_offset(s, rm), vsz, vsz);
139 return true;
142 /* Invoke a vector move on two Zregs. */
143 static bool do_mov_z(DisasContext *s, int rd, int rn)
145 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
148 /* Initialize a Zreg with replications of a 64-bit immediate. */
149 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
151 unsigned vsz = vec_full_reg_size(s);
152 tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
155 /* Invoke a vector expander on two Pregs. */
156 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
157 int esz, int rd, int rn)
159 if (sve_access_check(s)) {
160 unsigned psz = pred_gvec_reg_size(s);
161 gvec_fn(esz, pred_full_reg_offset(s, rd),
162 pred_full_reg_offset(s, rn), psz, psz);
164 return true;
167 /* Invoke a vector expander on three Pregs. */
168 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
169 int esz, int rd, int rn, int rm)
171 if (sve_access_check(s)) {
172 unsigned psz = pred_gvec_reg_size(s);
173 gvec_fn(esz, pred_full_reg_offset(s, rd),
174 pred_full_reg_offset(s, rn),
175 pred_full_reg_offset(s, rm), psz, psz);
177 return true;
180 /* Invoke a vector operation on four Pregs. */
181 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
182 int rd, int rn, int rm, int rg)
184 if (sve_access_check(s)) {
185 unsigned psz = pred_gvec_reg_size(s);
186 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
187 pred_full_reg_offset(s, rn),
188 pred_full_reg_offset(s, rm),
189 pred_full_reg_offset(s, rg),
190 psz, psz, gvec_op);
192 return true;
195 /* Invoke a vector move on two Pregs. */
196 static bool do_mov_p(DisasContext *s, int rd, int rn)
198 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
201 /* Set the cpu flags as per a return from an SVE helper. */
202 static void do_pred_flags(TCGv_i32 t)
204 tcg_gen_mov_i32(cpu_NF, t);
205 tcg_gen_andi_i32(cpu_ZF, t, 2);
206 tcg_gen_andi_i32(cpu_CF, t, 1);
207 tcg_gen_movi_i32(cpu_VF, 0);
210 /* Subroutines computing the ARM PredTest psuedofunction. */
211 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
213 TCGv_i32 t = tcg_temp_new_i32();
215 gen_helper_sve_predtest1(t, d, g);
216 do_pred_flags(t);
217 tcg_temp_free_i32(t);
220 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
222 TCGv_ptr dptr = tcg_temp_new_ptr();
223 TCGv_ptr gptr = tcg_temp_new_ptr();
224 TCGv_i32 t;
226 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
227 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
228 t = tcg_const_i32(words);
230 gen_helper_sve_predtest(t, dptr, gptr, t);
231 tcg_temp_free_ptr(dptr);
232 tcg_temp_free_ptr(gptr);
234 do_pred_flags(t);
235 tcg_temp_free_i32(t);
238 /* For each element size, the bits within a predicate word that are active. */
239 const uint64_t pred_esz_masks[4] = {
240 0xffffffffffffffffull, 0x5555555555555555ull,
241 0x1111111111111111ull, 0x0101010101010101ull
245 *** SVE Logical - Unpredicated Group
248 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
250 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
253 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
255 if (a->rn == a->rm) { /* MOV */
256 return do_mov_z(s, a->rd, a->rn);
257 } else {
258 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
262 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
264 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
267 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
269 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
273 *** SVE Integer Arithmetic - Unpredicated Group
276 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
278 return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
281 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
283 return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
286 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
288 return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
291 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
293 return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
296 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
298 return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
301 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
303 return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
307 *** SVE Integer Arithmetic - Binary Predicated Group
310 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
312 unsigned vsz = vec_full_reg_size(s);
313 if (fn == NULL) {
314 return false;
316 if (sve_access_check(s)) {
317 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
318 vec_full_reg_offset(s, a->rn),
319 vec_full_reg_offset(s, a->rm),
320 pred_full_reg_offset(s, a->pg),
321 vsz, vsz, 0, fn);
323 return true;
326 #define DO_ZPZZ(NAME, name) \
327 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
328 uint32_t insn) \
330 static gen_helper_gvec_4 * const fns[4] = { \
331 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
332 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
333 }; \
334 return do_zpzz_ool(s, a, fns[a->esz]); \
337 DO_ZPZZ(AND, and)
338 DO_ZPZZ(EOR, eor)
339 DO_ZPZZ(ORR, orr)
340 DO_ZPZZ(BIC, bic)
342 DO_ZPZZ(ADD, add)
343 DO_ZPZZ(SUB, sub)
345 DO_ZPZZ(SMAX, smax)
346 DO_ZPZZ(UMAX, umax)
347 DO_ZPZZ(SMIN, smin)
348 DO_ZPZZ(UMIN, umin)
349 DO_ZPZZ(SABD, sabd)
350 DO_ZPZZ(UABD, uabd)
352 DO_ZPZZ(MUL, mul)
353 DO_ZPZZ(SMULH, smulh)
354 DO_ZPZZ(UMULH, umulh)
356 DO_ZPZZ(ASR, asr)
357 DO_ZPZZ(LSR, lsr)
358 DO_ZPZZ(LSL, lsl)
360 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
362 static gen_helper_gvec_4 * const fns[4] = {
363 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
365 return do_zpzz_ool(s, a, fns[a->esz]);
368 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
370 static gen_helper_gvec_4 * const fns[4] = {
371 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
373 return do_zpzz_ool(s, a, fns[a->esz]);
376 #undef DO_ZPZZ
379 *** SVE Integer Arithmetic - Unary Predicated Group
382 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
384 if (fn == NULL) {
385 return false;
387 if (sve_access_check(s)) {
388 unsigned vsz = vec_full_reg_size(s);
389 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
390 vec_full_reg_offset(s, a->rn),
391 pred_full_reg_offset(s, a->pg),
392 vsz, vsz, 0, fn);
394 return true;
397 #define DO_ZPZ(NAME, name) \
398 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
400 static gen_helper_gvec_3 * const fns[4] = { \
401 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
402 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
403 }; \
404 return do_zpz_ool(s, a, fns[a->esz]); \
407 DO_ZPZ(CLS, cls)
408 DO_ZPZ(CLZ, clz)
409 DO_ZPZ(CNT_zpz, cnt_zpz)
410 DO_ZPZ(CNOT, cnot)
411 DO_ZPZ(NOT_zpz, not_zpz)
412 DO_ZPZ(ABS, abs)
413 DO_ZPZ(NEG, neg)
415 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
417 static gen_helper_gvec_3 * const fns[4] = {
418 NULL,
419 gen_helper_sve_fabs_h,
420 gen_helper_sve_fabs_s,
421 gen_helper_sve_fabs_d
423 return do_zpz_ool(s, a, fns[a->esz]);
426 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
428 static gen_helper_gvec_3 * const fns[4] = {
429 NULL,
430 gen_helper_sve_fneg_h,
431 gen_helper_sve_fneg_s,
432 gen_helper_sve_fneg_d
434 return do_zpz_ool(s, a, fns[a->esz]);
437 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
439 static gen_helper_gvec_3 * const fns[4] = {
440 NULL,
441 gen_helper_sve_sxtb_h,
442 gen_helper_sve_sxtb_s,
443 gen_helper_sve_sxtb_d
445 return do_zpz_ool(s, a, fns[a->esz]);
448 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
450 static gen_helper_gvec_3 * const fns[4] = {
451 NULL,
452 gen_helper_sve_uxtb_h,
453 gen_helper_sve_uxtb_s,
454 gen_helper_sve_uxtb_d
456 return do_zpz_ool(s, a, fns[a->esz]);
459 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
461 static gen_helper_gvec_3 * const fns[4] = {
462 NULL, NULL,
463 gen_helper_sve_sxth_s,
464 gen_helper_sve_sxth_d
466 return do_zpz_ool(s, a, fns[a->esz]);
469 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
471 static gen_helper_gvec_3 * const fns[4] = {
472 NULL, NULL,
473 gen_helper_sve_uxth_s,
474 gen_helper_sve_uxth_d
476 return do_zpz_ool(s, a, fns[a->esz]);
479 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
481 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
484 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
486 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
489 #undef DO_ZPZ
492 *** SVE Integer Reduction Group
495 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
496 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
497 gen_helper_gvec_reduc *fn)
499 unsigned vsz = vec_full_reg_size(s);
500 TCGv_ptr t_zn, t_pg;
501 TCGv_i32 desc;
502 TCGv_i64 temp;
504 if (fn == NULL) {
505 return false;
507 if (!sve_access_check(s)) {
508 return true;
511 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
512 temp = tcg_temp_new_i64();
513 t_zn = tcg_temp_new_ptr();
514 t_pg = tcg_temp_new_ptr();
516 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
517 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
518 fn(temp, t_zn, t_pg, desc);
519 tcg_temp_free_ptr(t_zn);
520 tcg_temp_free_ptr(t_pg);
521 tcg_temp_free_i32(desc);
523 write_fp_dreg(s, a->rd, temp);
524 tcg_temp_free_i64(temp);
525 return true;
528 #define DO_VPZ(NAME, name) \
529 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
531 static gen_helper_gvec_reduc * const fns[4] = { \
532 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
533 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
534 }; \
535 return do_vpz_ool(s, a, fns[a->esz]); \
538 DO_VPZ(ORV, orv)
539 DO_VPZ(ANDV, andv)
540 DO_VPZ(EORV, eorv)
542 DO_VPZ(UADDV, uaddv)
543 DO_VPZ(SMAXV, smaxv)
544 DO_VPZ(UMAXV, umaxv)
545 DO_VPZ(SMINV, sminv)
546 DO_VPZ(UMINV, uminv)
548 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
550 static gen_helper_gvec_reduc * const fns[4] = {
551 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
552 gen_helper_sve_saddv_s, NULL
554 return do_vpz_ool(s, a, fns[a->esz]);
557 #undef DO_VPZ
560 *** SVE Shift by Immediate - Predicated Group
563 /* Store zero into every active element of Zd. We will use this for two
564 * and three-operand predicated instructions for which logic dictates a
565 * zero result.
567 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
569 static gen_helper_gvec_2 * const fns[4] = {
570 gen_helper_sve_clr_b, gen_helper_sve_clr_h,
571 gen_helper_sve_clr_s, gen_helper_sve_clr_d,
573 if (sve_access_check(s)) {
574 unsigned vsz = vec_full_reg_size(s);
575 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
576 pred_full_reg_offset(s, pg),
577 vsz, vsz, 0, fns[esz]);
579 return true;
582 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
583 gen_helper_gvec_3 *fn)
585 if (sve_access_check(s)) {
586 unsigned vsz = vec_full_reg_size(s);
587 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
588 vec_full_reg_offset(s, a->rn),
589 pred_full_reg_offset(s, a->pg),
590 vsz, vsz, a->imm, fn);
592 return true;
595 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
597 static gen_helper_gvec_3 * const fns[4] = {
598 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
599 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
601 if (a->esz < 0) {
602 /* Invalid tsz encoding -- see tszimm_esz. */
603 return false;
605 /* Shift by element size is architecturally valid. For
606 arithmetic right-shift, it's the same as by one less. */
607 a->imm = MIN(a->imm, (8 << a->esz) - 1);
608 return do_zpzi_ool(s, a, fns[a->esz]);
611 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
613 static gen_helper_gvec_3 * const fns[4] = {
614 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
615 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
617 if (a->esz < 0) {
618 return false;
620 /* Shift by element size is architecturally valid.
621 For logical shifts, it is a zeroing operation. */
622 if (a->imm >= (8 << a->esz)) {
623 return do_clr_zp(s, a->rd, a->pg, a->esz);
624 } else {
625 return do_zpzi_ool(s, a, fns[a->esz]);
629 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
631 static gen_helper_gvec_3 * const fns[4] = {
632 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
633 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
635 if (a->esz < 0) {
636 return false;
638 /* Shift by element size is architecturally valid.
639 For logical shifts, it is a zeroing operation. */
640 if (a->imm >= (8 << a->esz)) {
641 return do_clr_zp(s, a->rd, a->pg, a->esz);
642 } else {
643 return do_zpzi_ool(s, a, fns[a->esz]);
647 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
649 static gen_helper_gvec_3 * const fns[4] = {
650 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
651 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
653 if (a->esz < 0) {
654 return false;
656 /* Shift by element size is architecturally valid. For arithmetic
657 right shift for division, it is a zeroing operation. */
658 if (a->imm >= (8 << a->esz)) {
659 return do_clr_zp(s, a->rd, a->pg, a->esz);
660 } else {
661 return do_zpzi_ool(s, a, fns[a->esz]);
666 *** SVE Bitwise Shift - Predicated Group
669 #define DO_ZPZW(NAME, name) \
670 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
671 uint32_t insn) \
673 static gen_helper_gvec_4 * const fns[3] = { \
674 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
675 gen_helper_sve_##name##_zpzw_s, \
676 }; \
677 if (a->esz < 0 || a->esz >= 3) { \
678 return false; \
680 return do_zpzz_ool(s, a, fns[a->esz]); \
683 DO_ZPZW(ASR, asr)
684 DO_ZPZW(LSR, lsr)
685 DO_ZPZW(LSL, lsl)
687 #undef DO_ZPZW
690 *** SVE Bitwise Shift - Unpredicated Group
693 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
694 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
695 int64_t, uint32_t, uint32_t))
697 if (a->esz < 0) {
698 /* Invalid tsz encoding -- see tszimm_esz. */
699 return false;
701 if (sve_access_check(s)) {
702 unsigned vsz = vec_full_reg_size(s);
703 /* Shift by element size is architecturally valid. For
704 arithmetic right-shift, it's the same as by one less.
705 Otherwise it is a zeroing operation. */
706 if (a->imm >= 8 << a->esz) {
707 if (asr) {
708 a->imm = (8 << a->esz) - 1;
709 } else {
710 do_dupi_z(s, a->rd, 0);
711 return true;
714 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
715 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
717 return true;
720 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
722 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
725 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
727 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
730 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
732 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
735 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
737 if (fn == NULL) {
738 return false;
740 if (sve_access_check(s)) {
741 unsigned vsz = vec_full_reg_size(s);
742 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
743 vec_full_reg_offset(s, a->rn),
744 vec_full_reg_offset(s, a->rm),
745 vsz, vsz, 0, fn);
747 return true;
750 #define DO_ZZW(NAME, name) \
751 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
752 uint32_t insn) \
754 static gen_helper_gvec_3 * const fns[4] = { \
755 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
756 gen_helper_sve_##name##_zzw_s, NULL \
757 }; \
758 return do_zzw_ool(s, a, fns[a->esz]); \
761 DO_ZZW(ASR, asr)
762 DO_ZZW(LSR, lsr)
763 DO_ZZW(LSL, lsl)
765 #undef DO_ZZW
768 *** SVE Integer Multiply-Add Group
771 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
772 gen_helper_gvec_5 *fn)
774 if (sve_access_check(s)) {
775 unsigned vsz = vec_full_reg_size(s);
776 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
777 vec_full_reg_offset(s, a->ra),
778 vec_full_reg_offset(s, a->rn),
779 vec_full_reg_offset(s, a->rm),
780 pred_full_reg_offset(s, a->pg),
781 vsz, vsz, 0, fn);
783 return true;
786 #define DO_ZPZZZ(NAME, name) \
787 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
789 static gen_helper_gvec_5 * const fns[4] = { \
790 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
791 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
792 }; \
793 return do_zpzzz_ool(s, a, fns[a->esz]); \
796 DO_ZPZZZ(MLA, mla)
797 DO_ZPZZZ(MLS, mls)
799 #undef DO_ZPZZZ
802 *** SVE Index Generation Group
805 static void do_index(DisasContext *s, int esz, int rd,
806 TCGv_i64 start, TCGv_i64 incr)
808 unsigned vsz = vec_full_reg_size(s);
809 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
810 TCGv_ptr t_zd = tcg_temp_new_ptr();
812 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
813 if (esz == 3) {
814 gen_helper_sve_index_d(t_zd, start, incr, desc);
815 } else {
816 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
817 static index_fn * const fns[3] = {
818 gen_helper_sve_index_b,
819 gen_helper_sve_index_h,
820 gen_helper_sve_index_s,
822 TCGv_i32 s32 = tcg_temp_new_i32();
823 TCGv_i32 i32 = tcg_temp_new_i32();
825 tcg_gen_extrl_i64_i32(s32, start);
826 tcg_gen_extrl_i64_i32(i32, incr);
827 fns[esz](t_zd, s32, i32, desc);
829 tcg_temp_free_i32(s32);
830 tcg_temp_free_i32(i32);
832 tcg_temp_free_ptr(t_zd);
833 tcg_temp_free_i32(desc);
836 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
838 if (sve_access_check(s)) {
839 TCGv_i64 start = tcg_const_i64(a->imm1);
840 TCGv_i64 incr = tcg_const_i64(a->imm2);
841 do_index(s, a->esz, a->rd, start, incr);
842 tcg_temp_free_i64(start);
843 tcg_temp_free_i64(incr);
845 return true;
848 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
850 if (sve_access_check(s)) {
851 TCGv_i64 start = tcg_const_i64(a->imm);
852 TCGv_i64 incr = cpu_reg(s, a->rm);
853 do_index(s, a->esz, a->rd, start, incr);
854 tcg_temp_free_i64(start);
856 return true;
859 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
861 if (sve_access_check(s)) {
862 TCGv_i64 start = cpu_reg(s, a->rn);
863 TCGv_i64 incr = tcg_const_i64(a->imm);
864 do_index(s, a->esz, a->rd, start, incr);
865 tcg_temp_free_i64(incr);
867 return true;
870 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
872 if (sve_access_check(s)) {
873 TCGv_i64 start = cpu_reg(s, a->rn);
874 TCGv_i64 incr = cpu_reg(s, a->rm);
875 do_index(s, a->esz, a->rd, start, incr);
877 return true;
881 *** SVE Stack Allocation Group
884 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
886 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
887 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
888 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
889 return true;
892 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
894 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
895 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
896 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
897 return true;
900 static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
902 TCGv_i64 reg = cpu_reg(s, a->rd);
903 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
904 return true;
908 *** SVE Compute Vector Address Group
911 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
913 if (sve_access_check(s)) {
914 unsigned vsz = vec_full_reg_size(s);
915 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
916 vec_full_reg_offset(s, a->rn),
917 vec_full_reg_offset(s, a->rm),
918 vsz, vsz, a->imm, fn);
920 return true;
923 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
925 return do_adr(s, a, gen_helper_sve_adr_p32);
928 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
930 return do_adr(s, a, gen_helper_sve_adr_p64);
933 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
935 return do_adr(s, a, gen_helper_sve_adr_s32);
938 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
940 return do_adr(s, a, gen_helper_sve_adr_u32);
944 *** SVE Integer Misc - Unpredicated Group
947 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
949 static gen_helper_gvec_2 * const fns[4] = {
950 NULL,
951 gen_helper_sve_fexpa_h,
952 gen_helper_sve_fexpa_s,
953 gen_helper_sve_fexpa_d,
955 if (a->esz == 0) {
956 return false;
958 if (sve_access_check(s)) {
959 unsigned vsz = vec_full_reg_size(s);
960 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
961 vec_full_reg_offset(s, a->rn),
962 vsz, vsz, 0, fns[a->esz]);
964 return true;
967 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
969 static gen_helper_gvec_3 * const fns[4] = {
970 NULL,
971 gen_helper_sve_ftssel_h,
972 gen_helper_sve_ftssel_s,
973 gen_helper_sve_ftssel_d,
975 if (a->esz == 0) {
976 return false;
978 if (sve_access_check(s)) {
979 unsigned vsz = vec_full_reg_size(s);
980 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
981 vec_full_reg_offset(s, a->rn),
982 vec_full_reg_offset(s, a->rm),
983 vsz, vsz, 0, fns[a->esz]);
985 return true;
989 *** SVE Predicate Logical Operations Group
992 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
993 const GVecGen4 *gvec_op)
995 if (!sve_access_check(s)) {
996 return true;
999 unsigned psz = pred_gvec_reg_size(s);
1000 int dofs = pred_full_reg_offset(s, a->rd);
1001 int nofs = pred_full_reg_offset(s, a->rn);
1002 int mofs = pred_full_reg_offset(s, a->rm);
1003 int gofs = pred_full_reg_offset(s, a->pg);
1005 if (psz == 8) {
1006 /* Do the operation and the flags generation in temps. */
1007 TCGv_i64 pd = tcg_temp_new_i64();
1008 TCGv_i64 pn = tcg_temp_new_i64();
1009 TCGv_i64 pm = tcg_temp_new_i64();
1010 TCGv_i64 pg = tcg_temp_new_i64();
1012 tcg_gen_ld_i64(pn, cpu_env, nofs);
1013 tcg_gen_ld_i64(pm, cpu_env, mofs);
1014 tcg_gen_ld_i64(pg, cpu_env, gofs);
1016 gvec_op->fni8(pd, pn, pm, pg);
1017 tcg_gen_st_i64(pd, cpu_env, dofs);
1019 do_predtest1(pd, pg);
1021 tcg_temp_free_i64(pd);
1022 tcg_temp_free_i64(pn);
1023 tcg_temp_free_i64(pm);
1024 tcg_temp_free_i64(pg);
1025 } else {
1026 /* The operation and flags generation is large. The computation
1027 * of the flags depends on the original contents of the guarding
1028 * predicate. If the destination overwrites the guarding predicate,
1029 * then the easiest way to get this right is to save a copy.
1031 int tofs = gofs;
1032 if (a->rd == a->pg) {
1033 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1034 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1037 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1038 do_predtest(s, dofs, tofs, psz / 8);
1040 return true;
1043 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1045 tcg_gen_and_i64(pd, pn, pm);
1046 tcg_gen_and_i64(pd, pd, pg);
1049 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1050 TCGv_vec pm, TCGv_vec pg)
1052 tcg_gen_and_vec(vece, pd, pn, pm);
1053 tcg_gen_and_vec(vece, pd, pd, pg);
1056 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1058 static const GVecGen4 op = {
1059 .fni8 = gen_and_pg_i64,
1060 .fniv = gen_and_pg_vec,
1061 .fno = gen_helper_sve_and_pppp,
1062 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1064 if (a->s) {
1065 return do_pppp_flags(s, a, &op);
1066 } else if (a->rn == a->rm) {
1067 if (a->pg == a->rn) {
1068 return do_mov_p(s, a->rd, a->rn);
1069 } else {
1070 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1072 } else if (a->pg == a->rn || a->pg == a->rm) {
1073 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1074 } else {
1075 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1079 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1081 tcg_gen_andc_i64(pd, pn, pm);
1082 tcg_gen_and_i64(pd, pd, pg);
1085 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1086 TCGv_vec pm, TCGv_vec pg)
1088 tcg_gen_andc_vec(vece, pd, pn, pm);
1089 tcg_gen_and_vec(vece, pd, pd, pg);
1092 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1094 static const GVecGen4 op = {
1095 .fni8 = gen_bic_pg_i64,
1096 .fniv = gen_bic_pg_vec,
1097 .fno = gen_helper_sve_bic_pppp,
1098 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1100 if (a->s) {
1101 return do_pppp_flags(s, a, &op);
1102 } else if (a->pg == a->rn) {
1103 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1104 } else {
1105 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1109 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1111 tcg_gen_xor_i64(pd, pn, pm);
1112 tcg_gen_and_i64(pd, pd, pg);
1115 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1116 TCGv_vec pm, TCGv_vec pg)
1118 tcg_gen_xor_vec(vece, pd, pn, pm);
1119 tcg_gen_and_vec(vece, pd, pd, pg);
1122 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1124 static const GVecGen4 op = {
1125 .fni8 = gen_eor_pg_i64,
1126 .fniv = gen_eor_pg_vec,
1127 .fno = gen_helper_sve_eor_pppp,
1128 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1130 if (a->s) {
1131 return do_pppp_flags(s, a, &op);
1132 } else {
1133 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1137 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1139 tcg_gen_and_i64(pn, pn, pg);
1140 tcg_gen_andc_i64(pm, pm, pg);
1141 tcg_gen_or_i64(pd, pn, pm);
1144 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1145 TCGv_vec pm, TCGv_vec pg)
1147 tcg_gen_and_vec(vece, pn, pn, pg);
1148 tcg_gen_andc_vec(vece, pm, pm, pg);
1149 tcg_gen_or_vec(vece, pd, pn, pm);
1152 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1154 static const GVecGen4 op = {
1155 .fni8 = gen_sel_pg_i64,
1156 .fniv = gen_sel_pg_vec,
1157 .fno = gen_helper_sve_sel_pppp,
1158 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1160 if (a->s) {
1161 return false;
1162 } else {
1163 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1167 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1169 tcg_gen_or_i64(pd, pn, pm);
1170 tcg_gen_and_i64(pd, pd, pg);
1173 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1174 TCGv_vec pm, TCGv_vec pg)
1176 tcg_gen_or_vec(vece, pd, pn, pm);
1177 tcg_gen_and_vec(vece, pd, pd, pg);
1180 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1182 static const GVecGen4 op = {
1183 .fni8 = gen_orr_pg_i64,
1184 .fniv = gen_orr_pg_vec,
1185 .fno = gen_helper_sve_orr_pppp,
1186 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1188 if (a->s) {
1189 return do_pppp_flags(s, a, &op);
1190 } else if (a->pg == a->rn && a->rn == a->rm) {
1191 return do_mov_p(s, a->rd, a->rn);
1192 } else {
1193 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1197 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1199 tcg_gen_orc_i64(pd, pn, pm);
1200 tcg_gen_and_i64(pd, pd, pg);
1203 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1204 TCGv_vec pm, TCGv_vec pg)
1206 tcg_gen_orc_vec(vece, pd, pn, pm);
1207 tcg_gen_and_vec(vece, pd, pd, pg);
1210 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1212 static const GVecGen4 op = {
1213 .fni8 = gen_orn_pg_i64,
1214 .fniv = gen_orn_pg_vec,
1215 .fno = gen_helper_sve_orn_pppp,
1216 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1218 if (a->s) {
1219 return do_pppp_flags(s, a, &op);
1220 } else {
1221 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1225 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1227 tcg_gen_or_i64(pd, pn, pm);
1228 tcg_gen_andc_i64(pd, pg, pd);
1231 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1232 TCGv_vec pm, TCGv_vec pg)
1234 tcg_gen_or_vec(vece, pd, pn, pm);
1235 tcg_gen_andc_vec(vece, pd, pg, pd);
1238 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1240 static const GVecGen4 op = {
1241 .fni8 = gen_nor_pg_i64,
1242 .fniv = gen_nor_pg_vec,
1243 .fno = gen_helper_sve_nor_pppp,
1244 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1246 if (a->s) {
1247 return do_pppp_flags(s, a, &op);
1248 } else {
1249 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1253 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1255 tcg_gen_and_i64(pd, pn, pm);
1256 tcg_gen_andc_i64(pd, pg, pd);
1259 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1260 TCGv_vec pm, TCGv_vec pg)
1262 tcg_gen_and_vec(vece, pd, pn, pm);
1263 tcg_gen_andc_vec(vece, pd, pg, pd);
1266 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1268 static const GVecGen4 op = {
1269 .fni8 = gen_nand_pg_i64,
1270 .fniv = gen_nand_pg_vec,
1271 .fno = gen_helper_sve_nand_pppp,
1272 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1274 if (a->s) {
1275 return do_pppp_flags(s, a, &op);
1276 } else {
1277 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1282 *** SVE Predicate Misc Group
1285 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1287 if (sve_access_check(s)) {
1288 int nofs = pred_full_reg_offset(s, a->rn);
1289 int gofs = pred_full_reg_offset(s, a->pg);
1290 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1292 if (words == 1) {
1293 TCGv_i64 pn = tcg_temp_new_i64();
1294 TCGv_i64 pg = tcg_temp_new_i64();
1296 tcg_gen_ld_i64(pn, cpu_env, nofs);
1297 tcg_gen_ld_i64(pg, cpu_env, gofs);
1298 do_predtest1(pn, pg);
1300 tcg_temp_free_i64(pn);
1301 tcg_temp_free_i64(pg);
1302 } else {
1303 do_predtest(s, nofs, gofs, words);
1306 return true;
1309 /* See the ARM pseudocode DecodePredCount. */
1310 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1312 unsigned elements = fullsz >> esz;
1313 unsigned bound;
1315 switch (pattern) {
1316 case 0x0: /* POW2 */
1317 return pow2floor(elements);
1318 case 0x1: /* VL1 */
1319 case 0x2: /* VL2 */
1320 case 0x3: /* VL3 */
1321 case 0x4: /* VL4 */
1322 case 0x5: /* VL5 */
1323 case 0x6: /* VL6 */
1324 case 0x7: /* VL7 */
1325 case 0x8: /* VL8 */
1326 bound = pattern;
1327 break;
1328 case 0x9: /* VL16 */
1329 case 0xa: /* VL32 */
1330 case 0xb: /* VL64 */
1331 case 0xc: /* VL128 */
1332 case 0xd: /* VL256 */
1333 bound = 16 << (pattern - 9);
1334 break;
1335 case 0x1d: /* MUL4 */
1336 return elements - elements % 4;
1337 case 0x1e: /* MUL3 */
1338 return elements - elements % 3;
1339 case 0x1f: /* ALL */
1340 return elements;
1341 default: /* #uimm5 */
1342 return 0;
1344 return elements >= bound ? bound : 0;
1347 /* This handles all of the predicate initialization instructions,
1348 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1349 * so that decode_pred_count returns 0. For SETFFR, we will have
1350 * set RD == 16 == FFR.
1352 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1354 if (!sve_access_check(s)) {
1355 return true;
1358 unsigned fullsz = vec_full_reg_size(s);
1359 unsigned ofs = pred_full_reg_offset(s, rd);
1360 unsigned numelem, setsz, i;
1361 uint64_t word, lastword;
1362 TCGv_i64 t;
1364 numelem = decode_pred_count(fullsz, pat, esz);
1366 /* Determine what we must store into each bit, and how many. */
1367 if (numelem == 0) {
1368 lastword = word = 0;
1369 setsz = fullsz;
1370 } else {
1371 setsz = numelem << esz;
1372 lastword = word = pred_esz_masks[esz];
1373 if (setsz % 64) {
1374 lastword &= ~(-1ull << (setsz % 64));
1378 t = tcg_temp_new_i64();
1379 if (fullsz <= 64) {
1380 tcg_gen_movi_i64(t, lastword);
1381 tcg_gen_st_i64(t, cpu_env, ofs);
1382 goto done;
1385 if (word == lastword) {
1386 unsigned maxsz = size_for_gvec(fullsz / 8);
1387 unsigned oprsz = size_for_gvec(setsz / 8);
1389 if (oprsz * 8 == setsz) {
1390 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1391 goto done;
1393 if (oprsz * 8 == setsz + 8) {
1394 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1395 tcg_gen_movi_i64(t, 0);
1396 tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1397 goto done;
1401 setsz /= 8;
1402 fullsz /= 8;
1404 tcg_gen_movi_i64(t, word);
1405 for (i = 0; i < setsz; i += 8) {
1406 tcg_gen_st_i64(t, cpu_env, ofs + i);
1408 if (lastword != word) {
1409 tcg_gen_movi_i64(t, lastword);
1410 tcg_gen_st_i64(t, cpu_env, ofs + i);
1411 i += 8;
1413 if (i < fullsz) {
1414 tcg_gen_movi_i64(t, 0);
1415 for (; i < fullsz; i += 8) {
1416 tcg_gen_st_i64(t, cpu_env, ofs + i);
1420 done:
1421 tcg_temp_free_i64(t);
1423 /* PTRUES */
1424 if (setflag) {
1425 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1426 tcg_gen_movi_i32(cpu_CF, word == 0);
1427 tcg_gen_movi_i32(cpu_VF, 0);
1428 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1430 return true;
1433 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1435 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1438 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1440 /* Note pat == 31 is #all, to set all elements. */
1441 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1444 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1446 /* Note pat == 32 is #unimp, to set no elements. */
1447 return do_predset(s, 0, a->rd, 32, false);
1450 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1452 /* The path through do_pppp_flags is complicated enough to want to avoid
1453 * duplication. Frob the arguments into the form of a predicated AND.
1455 arg_rprr_s alt_a = {
1456 .rd = a->rd, .pg = a->pg, .s = a->s,
1457 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1459 return trans_AND_pppp(s, &alt_a, insn);
1462 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1464 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1467 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1469 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1472 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1473 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1474 TCGv_ptr, TCGv_i32))
1476 if (!sve_access_check(s)) {
1477 return true;
1480 TCGv_ptr t_pd = tcg_temp_new_ptr();
1481 TCGv_ptr t_pg = tcg_temp_new_ptr();
1482 TCGv_i32 t;
1483 unsigned desc;
1485 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1486 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1488 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1489 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1490 t = tcg_const_i32(desc);
1492 gen_fn(t, t_pd, t_pg, t);
1493 tcg_temp_free_ptr(t_pd);
1494 tcg_temp_free_ptr(t_pg);
1496 do_pred_flags(t);
1497 tcg_temp_free_i32(t);
1498 return true;
1501 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1503 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1506 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1508 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1512 *** SVE Element Count Group
1515 /* Perform an inline saturating addition of a 32-bit value within
1516 * a 64-bit register. The second operand is known to be positive,
1517 * which halves the comparisions we must perform to bound the result.
1519 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1521 int64_t ibound;
1522 TCGv_i64 bound;
1523 TCGCond cond;
1525 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1526 if (u) {
1527 tcg_gen_ext32u_i64(reg, reg);
1528 } else {
1529 tcg_gen_ext32s_i64(reg, reg);
1531 if (d) {
1532 tcg_gen_sub_i64(reg, reg, val);
1533 ibound = (u ? 0 : INT32_MIN);
1534 cond = TCG_COND_LT;
1535 } else {
1536 tcg_gen_add_i64(reg, reg, val);
1537 ibound = (u ? UINT32_MAX : INT32_MAX);
1538 cond = TCG_COND_GT;
1540 bound = tcg_const_i64(ibound);
1541 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1542 tcg_temp_free_i64(bound);
1545 /* Similarly with 64-bit values. */
1546 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1548 TCGv_i64 t0 = tcg_temp_new_i64();
1549 TCGv_i64 t1 = tcg_temp_new_i64();
1550 TCGv_i64 t2;
1552 if (u) {
1553 if (d) {
1554 tcg_gen_sub_i64(t0, reg, val);
1555 tcg_gen_movi_i64(t1, 0);
1556 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1557 } else {
1558 tcg_gen_add_i64(t0, reg, val);
1559 tcg_gen_movi_i64(t1, -1);
1560 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1562 } else {
1563 if (d) {
1564 /* Detect signed overflow for subtraction. */
1565 tcg_gen_xor_i64(t0, reg, val);
1566 tcg_gen_sub_i64(t1, reg, val);
1567 tcg_gen_xor_i64(reg, reg, t0);
1568 tcg_gen_and_i64(t0, t0, reg);
1570 /* Bound the result. */
1571 tcg_gen_movi_i64(reg, INT64_MIN);
1572 t2 = tcg_const_i64(0);
1573 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1574 } else {
1575 /* Detect signed overflow for addition. */
1576 tcg_gen_xor_i64(t0, reg, val);
1577 tcg_gen_add_i64(reg, reg, val);
1578 tcg_gen_xor_i64(t1, reg, val);
1579 tcg_gen_andc_i64(t0, t1, t0);
1581 /* Bound the result. */
1582 tcg_gen_movi_i64(t1, INT64_MAX);
1583 t2 = tcg_const_i64(0);
1584 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1586 tcg_temp_free_i64(t2);
1588 tcg_temp_free_i64(t0);
1589 tcg_temp_free_i64(t1);
1592 /* Similarly with a vector and a scalar operand. */
1593 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1594 TCGv_i64 val, bool u, bool d)
1596 unsigned vsz = vec_full_reg_size(s);
1597 TCGv_ptr dptr, nptr;
1598 TCGv_i32 t32, desc;
1599 TCGv_i64 t64;
1601 dptr = tcg_temp_new_ptr();
1602 nptr = tcg_temp_new_ptr();
1603 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1604 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1605 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1607 switch (esz) {
1608 case MO_8:
1609 t32 = tcg_temp_new_i32();
1610 tcg_gen_extrl_i64_i32(t32, val);
1611 if (d) {
1612 tcg_gen_neg_i32(t32, t32);
1614 if (u) {
1615 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1616 } else {
1617 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1619 tcg_temp_free_i32(t32);
1620 break;
1622 case MO_16:
1623 t32 = tcg_temp_new_i32();
1624 tcg_gen_extrl_i64_i32(t32, val);
1625 if (d) {
1626 tcg_gen_neg_i32(t32, t32);
1628 if (u) {
1629 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1630 } else {
1631 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1633 tcg_temp_free_i32(t32);
1634 break;
1636 case MO_32:
1637 t64 = tcg_temp_new_i64();
1638 if (d) {
1639 tcg_gen_neg_i64(t64, val);
1640 } else {
1641 tcg_gen_mov_i64(t64, val);
1643 if (u) {
1644 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1645 } else {
1646 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1648 tcg_temp_free_i64(t64);
1649 break;
1651 case MO_64:
1652 if (u) {
1653 if (d) {
1654 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1655 } else {
1656 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1658 } else if (d) {
1659 t64 = tcg_temp_new_i64();
1660 tcg_gen_neg_i64(t64, val);
1661 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1662 tcg_temp_free_i64(t64);
1663 } else {
1664 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1666 break;
1668 default:
1669 g_assert_not_reached();
1672 tcg_temp_free_ptr(dptr);
1673 tcg_temp_free_ptr(nptr);
1674 tcg_temp_free_i32(desc);
1677 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1679 if (sve_access_check(s)) {
1680 unsigned fullsz = vec_full_reg_size(s);
1681 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1682 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1684 return true;
1687 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1689 if (sve_access_check(s)) {
1690 unsigned fullsz = vec_full_reg_size(s);
1691 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1692 int inc = numelem * a->imm * (a->d ? -1 : 1);
1693 TCGv_i64 reg = cpu_reg(s, a->rd);
1695 tcg_gen_addi_i64(reg, reg, inc);
1697 return true;
1700 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1701 uint32_t insn)
1703 if (!sve_access_check(s)) {
1704 return true;
1707 unsigned fullsz = vec_full_reg_size(s);
1708 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1709 int inc = numelem * a->imm;
1710 TCGv_i64 reg = cpu_reg(s, a->rd);
1712 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1713 if (inc == 0) {
1714 if (a->u) {
1715 tcg_gen_ext32u_i64(reg, reg);
1716 } else {
1717 tcg_gen_ext32s_i64(reg, reg);
1719 } else {
1720 TCGv_i64 t = tcg_const_i64(inc);
1721 do_sat_addsub_32(reg, t, a->u, a->d);
1722 tcg_temp_free_i64(t);
1724 return true;
1727 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1728 uint32_t insn)
1730 if (!sve_access_check(s)) {
1731 return true;
1734 unsigned fullsz = vec_full_reg_size(s);
1735 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1736 int inc = numelem * a->imm;
1737 TCGv_i64 reg = cpu_reg(s, a->rd);
1739 if (inc != 0) {
1740 TCGv_i64 t = tcg_const_i64(inc);
1741 do_sat_addsub_64(reg, t, a->u, a->d);
1742 tcg_temp_free_i64(t);
1744 return true;
1747 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1749 if (a->esz == 0) {
1750 return false;
1753 unsigned fullsz = vec_full_reg_size(s);
1754 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1755 int inc = numelem * a->imm;
1757 if (inc != 0) {
1758 if (sve_access_check(s)) {
1759 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1760 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1761 vec_full_reg_offset(s, a->rn),
1762 t, fullsz, fullsz);
1763 tcg_temp_free_i64(t);
1765 } else {
1766 do_mov_z(s, a->rd, a->rn);
1768 return true;
1771 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1772 uint32_t insn)
1774 if (a->esz == 0) {
1775 return false;
1778 unsigned fullsz = vec_full_reg_size(s);
1779 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1780 int inc = numelem * a->imm;
1782 if (inc != 0) {
1783 if (sve_access_check(s)) {
1784 TCGv_i64 t = tcg_const_i64(inc);
1785 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1786 tcg_temp_free_i64(t);
1788 } else {
1789 do_mov_z(s, a->rd, a->rn);
1791 return true;
1795 *** SVE Bitwise Immediate Group
1798 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1800 uint64_t imm;
1801 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1802 extract32(a->dbm, 0, 6),
1803 extract32(a->dbm, 6, 6))) {
1804 return false;
1806 if (sve_access_check(s)) {
1807 unsigned vsz = vec_full_reg_size(s);
1808 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1809 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1811 return true;
1814 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1816 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1819 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1821 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1824 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1826 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1829 static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1831 uint64_t imm;
1832 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1833 extract32(a->dbm, 0, 6),
1834 extract32(a->dbm, 6, 6))) {
1835 return false;
1837 if (sve_access_check(s)) {
1838 do_dupi_z(s, a->rd, imm);
1840 return true;
1844 *** SVE Integer Wide Immediate - Predicated Group
1847 /* Implement all merging copies. This is used for CPY (immediate),
1848 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1850 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1851 TCGv_i64 val)
1853 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1854 static gen_cpy * const fns[4] = {
1855 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1856 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1858 unsigned vsz = vec_full_reg_size(s);
1859 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1860 TCGv_ptr t_zd = tcg_temp_new_ptr();
1861 TCGv_ptr t_zn = tcg_temp_new_ptr();
1862 TCGv_ptr t_pg = tcg_temp_new_ptr();
1864 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1865 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1866 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1868 fns[esz](t_zd, t_zn, t_pg, val, desc);
1870 tcg_temp_free_ptr(t_zd);
1871 tcg_temp_free_ptr(t_zn);
1872 tcg_temp_free_ptr(t_pg);
1873 tcg_temp_free_i32(desc);
1876 static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1878 if (a->esz == 0) {
1879 return false;
1881 if (sve_access_check(s)) {
1882 /* Decode the VFP immediate. */
1883 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1884 TCGv_i64 t_imm = tcg_const_i64(imm);
1885 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1886 tcg_temp_free_i64(t_imm);
1888 return true;
1891 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1893 if (a->esz == 0 && extract32(insn, 13, 1)) {
1894 return false;
1896 if (sve_access_check(s)) {
1897 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1898 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1899 tcg_temp_free_i64(t_imm);
1901 return true;
1904 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1906 static gen_helper_gvec_2i * const fns[4] = {
1907 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1908 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1911 if (a->esz == 0 && extract32(insn, 13, 1)) {
1912 return false;
1914 if (sve_access_check(s)) {
1915 unsigned vsz = vec_full_reg_size(s);
1916 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1917 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1918 pred_full_reg_offset(s, a->pg),
1919 t_imm, vsz, vsz, 0, fns[a->esz]);
1920 tcg_temp_free_i64(t_imm);
1922 return true;
1926 *** SVE Permute Extract Group
1929 static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1931 if (!sve_access_check(s)) {
1932 return true;
1935 unsigned vsz = vec_full_reg_size(s);
1936 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1937 unsigned n_siz = vsz - n_ofs;
1938 unsigned d = vec_full_reg_offset(s, a->rd);
1939 unsigned n = vec_full_reg_offset(s, a->rn);
1940 unsigned m = vec_full_reg_offset(s, a->rm);
1942 /* Use host vector move insns if we have appropriate sizes
1943 * and no unfortunate overlap.
1945 if (m != d
1946 && n_ofs == size_for_gvec(n_ofs)
1947 && n_siz == size_for_gvec(n_siz)
1948 && (d != n || n_siz <= n_ofs)) {
1949 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1950 if (n_ofs != 0) {
1951 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1953 } else {
1954 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1956 return true;
1960 *** SVE Permute - Unpredicated Group
1963 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
1965 if (sve_access_check(s)) {
1966 unsigned vsz = vec_full_reg_size(s);
1967 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
1968 vsz, vsz, cpu_reg_sp(s, a->rn));
1970 return true;
1973 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
1975 if ((a->imm & 0x1f) == 0) {
1976 return false;
1978 if (sve_access_check(s)) {
1979 unsigned vsz = vec_full_reg_size(s);
1980 unsigned dofs = vec_full_reg_offset(s, a->rd);
1981 unsigned esz, index;
1983 esz = ctz32(a->imm);
1984 index = a->imm >> (esz + 1);
1986 if ((index << esz) < vsz) {
1987 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
1988 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
1989 } else {
1990 tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
1993 return true;
1996 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
1998 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1999 static gen_insr * const fns[4] = {
2000 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2001 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2003 unsigned vsz = vec_full_reg_size(s);
2004 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2005 TCGv_ptr t_zd = tcg_temp_new_ptr();
2006 TCGv_ptr t_zn = tcg_temp_new_ptr();
2008 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2009 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2011 fns[a->esz](t_zd, t_zn, val, desc);
2013 tcg_temp_free_ptr(t_zd);
2014 tcg_temp_free_ptr(t_zn);
2015 tcg_temp_free_i32(desc);
2018 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2020 if (sve_access_check(s)) {
2021 TCGv_i64 t = tcg_temp_new_i64();
2022 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2023 do_insr_i64(s, a, t);
2024 tcg_temp_free_i64(t);
2026 return true;
2029 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2031 if (sve_access_check(s)) {
2032 do_insr_i64(s, a, cpu_reg(s, a->rm));
2034 return true;
2037 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2039 static gen_helper_gvec_2 * const fns[4] = {
2040 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2041 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2044 if (sve_access_check(s)) {
2045 unsigned vsz = vec_full_reg_size(s);
2046 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2047 vec_full_reg_offset(s, a->rn),
2048 vsz, vsz, 0, fns[a->esz]);
2050 return true;
2053 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2055 static gen_helper_gvec_3 * const fns[4] = {
2056 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2057 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2060 if (sve_access_check(s)) {
2061 unsigned vsz = vec_full_reg_size(s);
2062 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2063 vec_full_reg_offset(s, a->rn),
2064 vec_full_reg_offset(s, a->rm),
2065 vsz, vsz, 0, fns[a->esz]);
2067 return true;
2070 static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2072 static gen_helper_gvec_2 * const fns[4][2] = {
2073 { NULL, NULL },
2074 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2075 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2076 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2079 if (a->esz == 0) {
2080 return false;
2082 if (sve_access_check(s)) {
2083 unsigned vsz = vec_full_reg_size(s);
2084 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2085 vec_full_reg_offset(s, a->rn)
2086 + (a->h ? vsz / 2 : 0),
2087 vsz, vsz, 0, fns[a->esz][a->u]);
2089 return true;
2093 *** SVE Permute - Predicates Group
2096 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2097 gen_helper_gvec_3 *fn)
2099 if (!sve_access_check(s)) {
2100 return true;
2103 unsigned vsz = pred_full_reg_size(s);
2105 /* Predicate sizes may be smaller and cannot use simd_desc.
2106 We cannot round up, as we do elsewhere, because we need
2107 the exact size for ZIP2 and REV. We retain the style for
2108 the other helpers for consistency. */
2109 TCGv_ptr t_d = tcg_temp_new_ptr();
2110 TCGv_ptr t_n = tcg_temp_new_ptr();
2111 TCGv_ptr t_m = tcg_temp_new_ptr();
2112 TCGv_i32 t_desc;
2113 int desc;
2115 desc = vsz - 2;
2116 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2117 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2119 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2120 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2121 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2122 t_desc = tcg_const_i32(desc);
2124 fn(t_d, t_n, t_m, t_desc);
2126 tcg_temp_free_ptr(t_d);
2127 tcg_temp_free_ptr(t_n);
2128 tcg_temp_free_ptr(t_m);
2129 tcg_temp_free_i32(t_desc);
2130 return true;
2133 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2134 gen_helper_gvec_2 *fn)
2136 if (!sve_access_check(s)) {
2137 return true;
2140 unsigned vsz = pred_full_reg_size(s);
2141 TCGv_ptr t_d = tcg_temp_new_ptr();
2142 TCGv_ptr t_n = tcg_temp_new_ptr();
2143 TCGv_i32 t_desc;
2144 int desc;
2146 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2147 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2149 /* Predicate sizes may be smaller and cannot use simd_desc.
2150 We cannot round up, as we do elsewhere, because we need
2151 the exact size for ZIP2 and REV. We retain the style for
2152 the other helpers for consistency. */
2154 desc = vsz - 2;
2155 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2156 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2157 t_desc = tcg_const_i32(desc);
2159 fn(t_d, t_n, t_desc);
2161 tcg_temp_free_i32(t_desc);
2162 tcg_temp_free_ptr(t_d);
2163 tcg_temp_free_ptr(t_n);
2164 return true;
2167 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2169 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2172 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2174 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2177 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2179 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2182 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2184 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2187 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2189 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2192 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2194 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2197 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2199 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2202 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2204 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2207 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2209 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2213 *** SVE Permute - Interleaving Group
2216 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2218 static gen_helper_gvec_3 * const fns[4] = {
2219 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2220 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2223 if (sve_access_check(s)) {
2224 unsigned vsz = vec_full_reg_size(s);
2225 unsigned high_ofs = high ? vsz / 2 : 0;
2226 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2227 vec_full_reg_offset(s, a->rn) + high_ofs,
2228 vec_full_reg_offset(s, a->rm) + high_ofs,
2229 vsz, vsz, 0, fns[a->esz]);
2231 return true;
2234 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2235 gen_helper_gvec_3 *fn)
2237 if (sve_access_check(s)) {
2238 unsigned vsz = vec_full_reg_size(s);
2239 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2240 vec_full_reg_offset(s, a->rn),
2241 vec_full_reg_offset(s, a->rm),
2242 vsz, vsz, data, fn);
2244 return true;
2247 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2249 return do_zip(s, a, false);
2252 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2254 return do_zip(s, a, true);
2257 static gen_helper_gvec_3 * const uzp_fns[4] = {
2258 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2259 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2262 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2264 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2267 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2269 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2272 static gen_helper_gvec_3 * const trn_fns[4] = {
2273 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2274 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2277 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2279 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2282 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2284 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2288 *** SVE Permute Vector - Predicated Group
2291 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2293 static gen_helper_gvec_3 * const fns[4] = {
2294 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2296 return do_zpz_ool(s, a, fns[a->esz]);
2299 /* Call the helper that computes the ARM LastActiveElement pseudocode
2300 * function, scaled by the element size. This includes the not found
2301 * indication; e.g. not found for esz=3 is -8.
2303 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2305 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2306 * round up, as we do elsewhere, because we need the exact size.
2308 TCGv_ptr t_p = tcg_temp_new_ptr();
2309 TCGv_i32 t_desc;
2310 unsigned vsz = pred_full_reg_size(s);
2311 unsigned desc;
2313 desc = vsz - 2;
2314 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2316 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2317 t_desc = tcg_const_i32(desc);
2319 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2321 tcg_temp_free_i32(t_desc);
2322 tcg_temp_free_ptr(t_p);
2325 /* Increment LAST to the offset of the next element in the vector,
2326 * wrapping around to 0.
2328 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2330 unsigned vsz = vec_full_reg_size(s);
2332 tcg_gen_addi_i32(last, last, 1 << esz);
2333 if (is_power_of_2(vsz)) {
2334 tcg_gen_andi_i32(last, last, vsz - 1);
2335 } else {
2336 TCGv_i32 max = tcg_const_i32(vsz);
2337 TCGv_i32 zero = tcg_const_i32(0);
2338 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2339 tcg_temp_free_i32(max);
2340 tcg_temp_free_i32(zero);
2344 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
2345 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2347 unsigned vsz = vec_full_reg_size(s);
2349 if (is_power_of_2(vsz)) {
2350 tcg_gen_andi_i32(last, last, vsz - 1);
2351 } else {
2352 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2353 TCGv_i32 zero = tcg_const_i32(0);
2354 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2355 tcg_temp_free_i32(max);
2356 tcg_temp_free_i32(zero);
2360 /* Load an unsigned element of ESZ from BASE+OFS. */
2361 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2363 TCGv_i64 r = tcg_temp_new_i64();
2365 switch (esz) {
2366 case 0:
2367 tcg_gen_ld8u_i64(r, base, ofs);
2368 break;
2369 case 1:
2370 tcg_gen_ld16u_i64(r, base, ofs);
2371 break;
2372 case 2:
2373 tcg_gen_ld32u_i64(r, base, ofs);
2374 break;
2375 case 3:
2376 tcg_gen_ld_i64(r, base, ofs);
2377 break;
2378 default:
2379 g_assert_not_reached();
2381 return r;
2384 /* Load an unsigned element of ESZ from RM[LAST]. */
2385 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2386 int rm, int esz)
2388 TCGv_ptr p = tcg_temp_new_ptr();
2389 TCGv_i64 r;
2391 /* Convert offset into vector into offset into ENV.
2392 * The final adjustment for the vector register base
2393 * is added via constant offset to the load.
2395 #ifdef HOST_WORDS_BIGENDIAN
2396 /* Adjust for element ordering. See vec_reg_offset. */
2397 if (esz < 3) {
2398 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2400 #endif
2401 tcg_gen_ext_i32_ptr(p, last);
2402 tcg_gen_add_ptr(p, p, cpu_env);
2404 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2405 tcg_temp_free_ptr(p);
2407 return r;
2410 /* Compute CLAST for a Zreg. */
2411 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2413 TCGv_i32 last;
2414 TCGLabel *over;
2415 TCGv_i64 ele;
2416 unsigned vsz, esz = a->esz;
2418 if (!sve_access_check(s)) {
2419 return true;
2422 last = tcg_temp_local_new_i32();
2423 over = gen_new_label();
2425 find_last_active(s, last, esz, a->pg);
2427 /* There is of course no movcond for a 2048-bit vector,
2428 * so we must branch over the actual store.
2430 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2432 if (!before) {
2433 incr_last_active(s, last, esz);
2436 ele = load_last_active(s, last, a->rm, esz);
2437 tcg_temp_free_i32(last);
2439 vsz = vec_full_reg_size(s);
2440 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2441 tcg_temp_free_i64(ele);
2443 /* If this insn used MOVPRFX, we may need a second move. */
2444 if (a->rd != a->rn) {
2445 TCGLabel *done = gen_new_label();
2446 tcg_gen_br(done);
2448 gen_set_label(over);
2449 do_mov_z(s, a->rd, a->rn);
2451 gen_set_label(done);
2452 } else {
2453 gen_set_label(over);
2455 return true;
2458 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2460 return do_clast_vector(s, a, false);
2463 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2465 return do_clast_vector(s, a, true);
2468 /* Compute CLAST for a scalar. */
2469 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2470 bool before, TCGv_i64 reg_val)
2472 TCGv_i32 last = tcg_temp_new_i32();
2473 TCGv_i64 ele, cmp, zero;
2475 find_last_active(s, last, esz, pg);
2477 /* Extend the original value of last prior to incrementing. */
2478 cmp = tcg_temp_new_i64();
2479 tcg_gen_ext_i32_i64(cmp, last);
2481 if (!before) {
2482 incr_last_active(s, last, esz);
2485 /* The conceit here is that while last < 0 indicates not found, after
2486 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2487 * from which we can load garbage. We then discard the garbage with
2488 * a conditional move.
2490 ele = load_last_active(s, last, rm, esz);
2491 tcg_temp_free_i32(last);
2493 zero = tcg_const_i64(0);
2494 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2496 tcg_temp_free_i64(zero);
2497 tcg_temp_free_i64(cmp);
2498 tcg_temp_free_i64(ele);
2501 /* Compute CLAST for a Vreg. */
2502 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2504 if (sve_access_check(s)) {
2505 int esz = a->esz;
2506 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2507 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2509 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2510 write_fp_dreg(s, a->rd, reg);
2511 tcg_temp_free_i64(reg);
2513 return true;
2516 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2518 return do_clast_fp(s, a, false);
2521 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2523 return do_clast_fp(s, a, true);
2526 /* Compute CLAST for a Xreg. */
2527 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2529 TCGv_i64 reg;
2531 if (!sve_access_check(s)) {
2532 return true;
2535 reg = cpu_reg(s, a->rd);
2536 switch (a->esz) {
2537 case 0:
2538 tcg_gen_ext8u_i64(reg, reg);
2539 break;
2540 case 1:
2541 tcg_gen_ext16u_i64(reg, reg);
2542 break;
2543 case 2:
2544 tcg_gen_ext32u_i64(reg, reg);
2545 break;
2546 case 3:
2547 break;
2548 default:
2549 g_assert_not_reached();
2552 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2553 return true;
2556 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2558 return do_clast_general(s, a, false);
2561 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2563 return do_clast_general(s, a, true);
2566 /* Compute LAST for a scalar. */
2567 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2568 int pg, int rm, bool before)
2570 TCGv_i32 last = tcg_temp_new_i32();
2571 TCGv_i64 ret;
2573 find_last_active(s, last, esz, pg);
2574 if (before) {
2575 wrap_last_active(s, last, esz);
2576 } else {
2577 incr_last_active(s, last, esz);
2580 ret = load_last_active(s, last, rm, esz);
2581 tcg_temp_free_i32(last);
2582 return ret;
2585 /* Compute LAST for a Vreg. */
2586 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2588 if (sve_access_check(s)) {
2589 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2590 write_fp_dreg(s, a->rd, val);
2591 tcg_temp_free_i64(val);
2593 return true;
2596 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2598 return do_last_fp(s, a, false);
2601 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2603 return do_last_fp(s, a, true);
2606 /* Compute LAST for a Xreg. */
2607 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2609 if (sve_access_check(s)) {
2610 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2611 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2612 tcg_temp_free_i64(val);
2614 return true;
2617 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2619 return do_last_general(s, a, false);
2622 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2624 return do_last_general(s, a, true);
2627 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2629 if (sve_access_check(s)) {
2630 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2632 return true;
2635 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2637 if (sve_access_check(s)) {
2638 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2639 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2640 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2641 tcg_temp_free_i64(t);
2643 return true;
2646 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2648 static gen_helper_gvec_3 * const fns[4] = {
2649 NULL,
2650 gen_helper_sve_revb_h,
2651 gen_helper_sve_revb_s,
2652 gen_helper_sve_revb_d,
2654 return do_zpz_ool(s, a, fns[a->esz]);
2657 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2659 static gen_helper_gvec_3 * const fns[4] = {
2660 NULL,
2661 NULL,
2662 gen_helper_sve_revh_s,
2663 gen_helper_sve_revh_d,
2665 return do_zpz_ool(s, a, fns[a->esz]);
2668 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2670 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2673 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2675 static gen_helper_gvec_3 * const fns[4] = {
2676 gen_helper_sve_rbit_b,
2677 gen_helper_sve_rbit_h,
2678 gen_helper_sve_rbit_s,
2679 gen_helper_sve_rbit_d,
2681 return do_zpz_ool(s, a, fns[a->esz]);
2684 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2686 if (sve_access_check(s)) {
2687 unsigned vsz = vec_full_reg_size(s);
2688 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2689 vec_full_reg_offset(s, a->rn),
2690 vec_full_reg_offset(s, a->rm),
2691 pred_full_reg_offset(s, a->pg),
2692 vsz, vsz, a->esz, gen_helper_sve_splice);
2694 return true;
2698 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
2701 /* Subroutine loading a vector register at VOFS of LEN bytes.
2702 * The load should begin at the address Rn + IMM.
2705 static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
2706 int rn, int imm)
2708 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
2709 uint32_t len_remain = len % 8;
2710 uint32_t nparts = len / 8 + ctpop8(len_remain);
2711 int midx = get_mem_index(s);
2712 TCGv_i64 addr, t0, t1;
2714 addr = tcg_temp_new_i64();
2715 t0 = tcg_temp_new_i64();
2717 /* Note that unpredicated load/store of vector/predicate registers
2718 * are defined as a stream of bytes, which equates to little-endian
2719 * operations on larger quantities. There is no nice way to force
2720 * a little-endian load for aarch64_be-linux-user out of line.
2722 * Attempt to keep code expansion to a minimum by limiting the
2723 * amount of unrolling done.
2725 if (nparts <= 4) {
2726 int i;
2728 for (i = 0; i < len_align; i += 8) {
2729 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
2730 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
2731 tcg_gen_st_i64(t0, cpu_env, vofs + i);
2733 } else {
2734 TCGLabel *loop = gen_new_label();
2735 TCGv_ptr tp, i = tcg_const_local_ptr(0);
2737 gen_set_label(loop);
2739 /* Minimize the number of local temps that must be re-read from
2740 * the stack each iteration. Instead, re-compute values other
2741 * than the loop counter.
2743 tp = tcg_temp_new_ptr();
2744 tcg_gen_addi_ptr(tp, i, imm);
2745 tcg_gen_extu_ptr_i64(addr, tp);
2746 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
2748 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
2750 tcg_gen_add_ptr(tp, cpu_env, i);
2751 tcg_gen_addi_ptr(i, i, 8);
2752 tcg_gen_st_i64(t0, tp, vofs);
2753 tcg_temp_free_ptr(tp);
2755 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
2756 tcg_temp_free_ptr(i);
2759 /* Predicate register loads can be any multiple of 2.
2760 * Note that we still store the entire 64-bit unit into cpu_env.
2762 if (len_remain) {
2763 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
2765 switch (len_remain) {
2766 case 2:
2767 case 4:
2768 case 8:
2769 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
2770 break;
2772 case 6:
2773 t1 = tcg_temp_new_i64();
2774 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
2775 tcg_gen_addi_i64(addr, addr, 4);
2776 tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
2777 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
2778 tcg_temp_free_i64(t1);
2779 break;
2781 default:
2782 g_assert_not_reached();
2784 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
2786 tcg_temp_free_i64(addr);
2787 tcg_temp_free_i64(t0);
2790 static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
2792 if (sve_access_check(s)) {
2793 int size = vec_full_reg_size(s);
2794 int off = vec_full_reg_offset(s, a->rd);
2795 do_ldr(s, off, size, a->rn, a->imm * size);
2797 return true;
2800 static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
2802 if (sve_access_check(s)) {
2803 int size = pred_full_reg_size(s);
2804 int off = pred_full_reg_offset(s, a->rd);
2805 do_ldr(s, off, size, a->rn, a->imm * size);
2807 return true;