target/arm: Implement SVE Element Count Group
[qemu/ar7.git] / target / arm / translate-sve.c
blob2a0bf6b47ce717958453442a0fd0e4fe9b95eb9a
1 /*
2 * AArch64 SVE translation
4 * Copyright (c) 2018 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "tcg-op.h"
24 #include "tcg-op-gvec.h"
25 #include "tcg-gvec-desc.h"
26 #include "qemu/log.h"
27 #include "arm_ldst.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
32 #include "exec/log.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
37 * Helpers for extracting complex instruction fields.
40 /* See e.g. ASR (immediate, predicated).
41 * Returns -1 for unallocated encoding; diagnose later.
43 static int tszimm_esz(int x)
45 x >>= 3; /* discard imm3 */
46 return 31 - clz32(x);
49 static int tszimm_shr(int x)
51 return (16 << tszimm_esz(x)) - x;
54 /* See e.g. LSL (immediate, predicated). */
55 static int tszimm_shl(int x)
57 return x - (8 << tszimm_esz(x));
60 static inline int plus1(int x)
62 return x + 1;
66 * Include the generated decoder.
69 #include "decode-sve.inc.c"
72 * Implement all of the translator functions referenced by the decoder.
75 /* Return the offset info CPUARMState of the predicate vector register Pn.
76 * Note for this purpose, FFR is P16.
78 static inline int pred_full_reg_offset(DisasContext *s, int regno)
80 return offsetof(CPUARMState, vfp.pregs[regno]);
83 /* Return the byte size of the whole predicate register, VL / 64. */
84 static inline int pred_full_reg_size(DisasContext *s)
86 return s->sve_len >> 3;
89 /* Round up the size of a register to a size allowed by
90 * the tcg vector infrastructure. Any operation which uses this
91 * size may assume that the bits above pred_full_reg_size are zero,
92 * and must leave them the same way.
94 * Note that this is not needed for the vector registers as they
95 * are always properly sized for tcg vectors.
97 static int size_for_gvec(int size)
99 if (size <= 8) {
100 return 8;
101 } else {
102 return QEMU_ALIGN_UP(size, 16);
106 static int pred_gvec_reg_size(DisasContext *s)
108 return size_for_gvec(pred_full_reg_size(s));
111 /* Invoke a vector expander on two Zregs. */
112 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
113 int esz, int rd, int rn)
115 if (sve_access_check(s)) {
116 unsigned vsz = vec_full_reg_size(s);
117 gvec_fn(esz, vec_full_reg_offset(s, rd),
118 vec_full_reg_offset(s, rn), vsz, vsz);
120 return true;
123 /* Invoke a vector expander on three Zregs. */
124 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
125 int esz, int rd, int rn, int rm)
127 if (sve_access_check(s)) {
128 unsigned vsz = vec_full_reg_size(s);
129 gvec_fn(esz, vec_full_reg_offset(s, rd),
130 vec_full_reg_offset(s, rn),
131 vec_full_reg_offset(s, rm), vsz, vsz);
133 return true;
136 /* Invoke a vector move on two Zregs. */
137 static bool do_mov_z(DisasContext *s, int rd, int rn)
139 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
142 /* Initialize a Zreg with replications of a 64-bit immediate. */
143 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
145 unsigned vsz = vec_full_reg_size(s);
146 tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
149 /* Invoke a vector expander on two Pregs. */
150 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
151 int esz, int rd, int rn)
153 if (sve_access_check(s)) {
154 unsigned psz = pred_gvec_reg_size(s);
155 gvec_fn(esz, pred_full_reg_offset(s, rd),
156 pred_full_reg_offset(s, rn), psz, psz);
158 return true;
161 /* Invoke a vector expander on three Pregs. */
162 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
163 int esz, int rd, int rn, int rm)
165 if (sve_access_check(s)) {
166 unsigned psz = pred_gvec_reg_size(s);
167 gvec_fn(esz, pred_full_reg_offset(s, rd),
168 pred_full_reg_offset(s, rn),
169 pred_full_reg_offset(s, rm), psz, psz);
171 return true;
174 /* Invoke a vector operation on four Pregs. */
175 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
176 int rd, int rn, int rm, int rg)
178 if (sve_access_check(s)) {
179 unsigned psz = pred_gvec_reg_size(s);
180 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
181 pred_full_reg_offset(s, rn),
182 pred_full_reg_offset(s, rm),
183 pred_full_reg_offset(s, rg),
184 psz, psz, gvec_op);
186 return true;
189 /* Invoke a vector move on two Pregs. */
190 static bool do_mov_p(DisasContext *s, int rd, int rn)
192 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
195 /* Set the cpu flags as per a return from an SVE helper. */
196 static void do_pred_flags(TCGv_i32 t)
198 tcg_gen_mov_i32(cpu_NF, t);
199 tcg_gen_andi_i32(cpu_ZF, t, 2);
200 tcg_gen_andi_i32(cpu_CF, t, 1);
201 tcg_gen_movi_i32(cpu_VF, 0);
204 /* Subroutines computing the ARM PredTest psuedofunction. */
205 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
207 TCGv_i32 t = tcg_temp_new_i32();
209 gen_helper_sve_predtest1(t, d, g);
210 do_pred_flags(t);
211 tcg_temp_free_i32(t);
214 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
216 TCGv_ptr dptr = tcg_temp_new_ptr();
217 TCGv_ptr gptr = tcg_temp_new_ptr();
218 TCGv_i32 t;
220 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
221 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
222 t = tcg_const_i32(words);
224 gen_helper_sve_predtest(t, dptr, gptr, t);
225 tcg_temp_free_ptr(dptr);
226 tcg_temp_free_ptr(gptr);
228 do_pred_flags(t);
229 tcg_temp_free_i32(t);
232 /* For each element size, the bits within a predicate word that are active. */
233 const uint64_t pred_esz_masks[4] = {
234 0xffffffffffffffffull, 0x5555555555555555ull,
235 0x1111111111111111ull, 0x0101010101010101ull
239 *** SVE Logical - Unpredicated Group
242 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
244 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
247 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
249 if (a->rn == a->rm) { /* MOV */
250 return do_mov_z(s, a->rd, a->rn);
251 } else {
252 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
256 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
258 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
261 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
263 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
267 *** SVE Integer Arithmetic - Unpredicated Group
270 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
272 return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
275 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
277 return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
280 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
282 return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
285 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
287 return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
290 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
292 return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
295 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
297 return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
301 *** SVE Integer Arithmetic - Binary Predicated Group
304 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
306 unsigned vsz = vec_full_reg_size(s);
307 if (fn == NULL) {
308 return false;
310 if (sve_access_check(s)) {
311 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
312 vec_full_reg_offset(s, a->rn),
313 vec_full_reg_offset(s, a->rm),
314 pred_full_reg_offset(s, a->pg),
315 vsz, vsz, 0, fn);
317 return true;
320 #define DO_ZPZZ(NAME, name) \
321 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
322 uint32_t insn) \
324 static gen_helper_gvec_4 * const fns[4] = { \
325 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
326 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
327 }; \
328 return do_zpzz_ool(s, a, fns[a->esz]); \
331 DO_ZPZZ(AND, and)
332 DO_ZPZZ(EOR, eor)
333 DO_ZPZZ(ORR, orr)
334 DO_ZPZZ(BIC, bic)
336 DO_ZPZZ(ADD, add)
337 DO_ZPZZ(SUB, sub)
339 DO_ZPZZ(SMAX, smax)
340 DO_ZPZZ(UMAX, umax)
341 DO_ZPZZ(SMIN, smin)
342 DO_ZPZZ(UMIN, umin)
343 DO_ZPZZ(SABD, sabd)
344 DO_ZPZZ(UABD, uabd)
346 DO_ZPZZ(MUL, mul)
347 DO_ZPZZ(SMULH, smulh)
348 DO_ZPZZ(UMULH, umulh)
350 DO_ZPZZ(ASR, asr)
351 DO_ZPZZ(LSR, lsr)
352 DO_ZPZZ(LSL, lsl)
354 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
356 static gen_helper_gvec_4 * const fns[4] = {
357 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
359 return do_zpzz_ool(s, a, fns[a->esz]);
362 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
364 static gen_helper_gvec_4 * const fns[4] = {
365 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
367 return do_zpzz_ool(s, a, fns[a->esz]);
370 #undef DO_ZPZZ
373 *** SVE Integer Arithmetic - Unary Predicated Group
376 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
378 if (fn == NULL) {
379 return false;
381 if (sve_access_check(s)) {
382 unsigned vsz = vec_full_reg_size(s);
383 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
384 vec_full_reg_offset(s, a->rn),
385 pred_full_reg_offset(s, a->pg),
386 vsz, vsz, 0, fn);
388 return true;
391 #define DO_ZPZ(NAME, name) \
392 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
394 static gen_helper_gvec_3 * const fns[4] = { \
395 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
396 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
397 }; \
398 return do_zpz_ool(s, a, fns[a->esz]); \
401 DO_ZPZ(CLS, cls)
402 DO_ZPZ(CLZ, clz)
403 DO_ZPZ(CNT_zpz, cnt_zpz)
404 DO_ZPZ(CNOT, cnot)
405 DO_ZPZ(NOT_zpz, not_zpz)
406 DO_ZPZ(ABS, abs)
407 DO_ZPZ(NEG, neg)
409 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
411 static gen_helper_gvec_3 * const fns[4] = {
412 NULL,
413 gen_helper_sve_fabs_h,
414 gen_helper_sve_fabs_s,
415 gen_helper_sve_fabs_d
417 return do_zpz_ool(s, a, fns[a->esz]);
420 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
422 static gen_helper_gvec_3 * const fns[4] = {
423 NULL,
424 gen_helper_sve_fneg_h,
425 gen_helper_sve_fneg_s,
426 gen_helper_sve_fneg_d
428 return do_zpz_ool(s, a, fns[a->esz]);
431 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
433 static gen_helper_gvec_3 * const fns[4] = {
434 NULL,
435 gen_helper_sve_sxtb_h,
436 gen_helper_sve_sxtb_s,
437 gen_helper_sve_sxtb_d
439 return do_zpz_ool(s, a, fns[a->esz]);
442 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
444 static gen_helper_gvec_3 * const fns[4] = {
445 NULL,
446 gen_helper_sve_uxtb_h,
447 gen_helper_sve_uxtb_s,
448 gen_helper_sve_uxtb_d
450 return do_zpz_ool(s, a, fns[a->esz]);
453 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
455 static gen_helper_gvec_3 * const fns[4] = {
456 NULL, NULL,
457 gen_helper_sve_sxth_s,
458 gen_helper_sve_sxth_d
460 return do_zpz_ool(s, a, fns[a->esz]);
463 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
465 static gen_helper_gvec_3 * const fns[4] = {
466 NULL, NULL,
467 gen_helper_sve_uxth_s,
468 gen_helper_sve_uxth_d
470 return do_zpz_ool(s, a, fns[a->esz]);
473 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
475 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
478 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
480 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
483 #undef DO_ZPZ
486 *** SVE Integer Reduction Group
489 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
490 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
491 gen_helper_gvec_reduc *fn)
493 unsigned vsz = vec_full_reg_size(s);
494 TCGv_ptr t_zn, t_pg;
495 TCGv_i32 desc;
496 TCGv_i64 temp;
498 if (fn == NULL) {
499 return false;
501 if (!sve_access_check(s)) {
502 return true;
505 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
506 temp = tcg_temp_new_i64();
507 t_zn = tcg_temp_new_ptr();
508 t_pg = tcg_temp_new_ptr();
510 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
511 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
512 fn(temp, t_zn, t_pg, desc);
513 tcg_temp_free_ptr(t_zn);
514 tcg_temp_free_ptr(t_pg);
515 tcg_temp_free_i32(desc);
517 write_fp_dreg(s, a->rd, temp);
518 tcg_temp_free_i64(temp);
519 return true;
522 #define DO_VPZ(NAME, name) \
523 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
525 static gen_helper_gvec_reduc * const fns[4] = { \
526 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
527 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
528 }; \
529 return do_vpz_ool(s, a, fns[a->esz]); \
532 DO_VPZ(ORV, orv)
533 DO_VPZ(ANDV, andv)
534 DO_VPZ(EORV, eorv)
536 DO_VPZ(UADDV, uaddv)
537 DO_VPZ(SMAXV, smaxv)
538 DO_VPZ(UMAXV, umaxv)
539 DO_VPZ(SMINV, sminv)
540 DO_VPZ(UMINV, uminv)
542 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
544 static gen_helper_gvec_reduc * const fns[4] = {
545 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
546 gen_helper_sve_saddv_s, NULL
548 return do_vpz_ool(s, a, fns[a->esz]);
551 #undef DO_VPZ
554 *** SVE Shift by Immediate - Predicated Group
557 /* Store zero into every active element of Zd. We will use this for two
558 * and three-operand predicated instructions for which logic dictates a
559 * zero result.
561 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
563 static gen_helper_gvec_2 * const fns[4] = {
564 gen_helper_sve_clr_b, gen_helper_sve_clr_h,
565 gen_helper_sve_clr_s, gen_helper_sve_clr_d,
567 if (sve_access_check(s)) {
568 unsigned vsz = vec_full_reg_size(s);
569 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
570 pred_full_reg_offset(s, pg),
571 vsz, vsz, 0, fns[esz]);
573 return true;
576 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
577 gen_helper_gvec_3 *fn)
579 if (sve_access_check(s)) {
580 unsigned vsz = vec_full_reg_size(s);
581 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
582 vec_full_reg_offset(s, a->rn),
583 pred_full_reg_offset(s, a->pg),
584 vsz, vsz, a->imm, fn);
586 return true;
589 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
591 static gen_helper_gvec_3 * const fns[4] = {
592 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
593 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
595 if (a->esz < 0) {
596 /* Invalid tsz encoding -- see tszimm_esz. */
597 return false;
599 /* Shift by element size is architecturally valid. For
600 arithmetic right-shift, it's the same as by one less. */
601 a->imm = MIN(a->imm, (8 << a->esz) - 1);
602 return do_zpzi_ool(s, a, fns[a->esz]);
605 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
607 static gen_helper_gvec_3 * const fns[4] = {
608 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
609 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
611 if (a->esz < 0) {
612 return false;
614 /* Shift by element size is architecturally valid.
615 For logical shifts, it is a zeroing operation. */
616 if (a->imm >= (8 << a->esz)) {
617 return do_clr_zp(s, a->rd, a->pg, a->esz);
618 } else {
619 return do_zpzi_ool(s, a, fns[a->esz]);
623 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
625 static gen_helper_gvec_3 * const fns[4] = {
626 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
627 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
629 if (a->esz < 0) {
630 return false;
632 /* Shift by element size is architecturally valid.
633 For logical shifts, it is a zeroing operation. */
634 if (a->imm >= (8 << a->esz)) {
635 return do_clr_zp(s, a->rd, a->pg, a->esz);
636 } else {
637 return do_zpzi_ool(s, a, fns[a->esz]);
641 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
643 static gen_helper_gvec_3 * const fns[4] = {
644 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
645 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
647 if (a->esz < 0) {
648 return false;
650 /* Shift by element size is architecturally valid. For arithmetic
651 right shift for division, it is a zeroing operation. */
652 if (a->imm >= (8 << a->esz)) {
653 return do_clr_zp(s, a->rd, a->pg, a->esz);
654 } else {
655 return do_zpzi_ool(s, a, fns[a->esz]);
660 *** SVE Bitwise Shift - Predicated Group
663 #define DO_ZPZW(NAME, name) \
664 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
665 uint32_t insn) \
667 static gen_helper_gvec_4 * const fns[3] = { \
668 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
669 gen_helper_sve_##name##_zpzw_s, \
670 }; \
671 if (a->esz < 0 || a->esz >= 3) { \
672 return false; \
674 return do_zpzz_ool(s, a, fns[a->esz]); \
677 DO_ZPZW(ASR, asr)
678 DO_ZPZW(LSR, lsr)
679 DO_ZPZW(LSL, lsl)
681 #undef DO_ZPZW
684 *** SVE Bitwise Shift - Unpredicated Group
687 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
688 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
689 int64_t, uint32_t, uint32_t))
691 if (a->esz < 0) {
692 /* Invalid tsz encoding -- see tszimm_esz. */
693 return false;
695 if (sve_access_check(s)) {
696 unsigned vsz = vec_full_reg_size(s);
697 /* Shift by element size is architecturally valid. For
698 arithmetic right-shift, it's the same as by one less.
699 Otherwise it is a zeroing operation. */
700 if (a->imm >= 8 << a->esz) {
701 if (asr) {
702 a->imm = (8 << a->esz) - 1;
703 } else {
704 do_dupi_z(s, a->rd, 0);
705 return true;
708 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
709 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
711 return true;
714 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
716 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
719 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
721 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
724 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
726 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
729 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
731 if (fn == NULL) {
732 return false;
734 if (sve_access_check(s)) {
735 unsigned vsz = vec_full_reg_size(s);
736 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
737 vec_full_reg_offset(s, a->rn),
738 vec_full_reg_offset(s, a->rm),
739 vsz, vsz, 0, fn);
741 return true;
744 #define DO_ZZW(NAME, name) \
745 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
746 uint32_t insn) \
748 static gen_helper_gvec_3 * const fns[4] = { \
749 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
750 gen_helper_sve_##name##_zzw_s, NULL \
751 }; \
752 return do_zzw_ool(s, a, fns[a->esz]); \
755 DO_ZZW(ASR, asr)
756 DO_ZZW(LSR, lsr)
757 DO_ZZW(LSL, lsl)
759 #undef DO_ZZW
762 *** SVE Integer Multiply-Add Group
765 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
766 gen_helper_gvec_5 *fn)
768 if (sve_access_check(s)) {
769 unsigned vsz = vec_full_reg_size(s);
770 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
771 vec_full_reg_offset(s, a->ra),
772 vec_full_reg_offset(s, a->rn),
773 vec_full_reg_offset(s, a->rm),
774 pred_full_reg_offset(s, a->pg),
775 vsz, vsz, 0, fn);
777 return true;
780 #define DO_ZPZZZ(NAME, name) \
781 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
783 static gen_helper_gvec_5 * const fns[4] = { \
784 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
785 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
786 }; \
787 return do_zpzzz_ool(s, a, fns[a->esz]); \
790 DO_ZPZZZ(MLA, mla)
791 DO_ZPZZZ(MLS, mls)
793 #undef DO_ZPZZZ
796 *** SVE Index Generation Group
799 static void do_index(DisasContext *s, int esz, int rd,
800 TCGv_i64 start, TCGv_i64 incr)
802 unsigned vsz = vec_full_reg_size(s);
803 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
804 TCGv_ptr t_zd = tcg_temp_new_ptr();
806 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
807 if (esz == 3) {
808 gen_helper_sve_index_d(t_zd, start, incr, desc);
809 } else {
810 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
811 static index_fn * const fns[3] = {
812 gen_helper_sve_index_b,
813 gen_helper_sve_index_h,
814 gen_helper_sve_index_s,
816 TCGv_i32 s32 = tcg_temp_new_i32();
817 TCGv_i32 i32 = tcg_temp_new_i32();
819 tcg_gen_extrl_i64_i32(s32, start);
820 tcg_gen_extrl_i64_i32(i32, incr);
821 fns[esz](t_zd, s32, i32, desc);
823 tcg_temp_free_i32(s32);
824 tcg_temp_free_i32(i32);
826 tcg_temp_free_ptr(t_zd);
827 tcg_temp_free_i32(desc);
830 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
832 if (sve_access_check(s)) {
833 TCGv_i64 start = tcg_const_i64(a->imm1);
834 TCGv_i64 incr = tcg_const_i64(a->imm2);
835 do_index(s, a->esz, a->rd, start, incr);
836 tcg_temp_free_i64(start);
837 tcg_temp_free_i64(incr);
839 return true;
842 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
844 if (sve_access_check(s)) {
845 TCGv_i64 start = tcg_const_i64(a->imm);
846 TCGv_i64 incr = cpu_reg(s, a->rm);
847 do_index(s, a->esz, a->rd, start, incr);
848 tcg_temp_free_i64(start);
850 return true;
853 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
855 if (sve_access_check(s)) {
856 TCGv_i64 start = cpu_reg(s, a->rn);
857 TCGv_i64 incr = tcg_const_i64(a->imm);
858 do_index(s, a->esz, a->rd, start, incr);
859 tcg_temp_free_i64(incr);
861 return true;
864 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
866 if (sve_access_check(s)) {
867 TCGv_i64 start = cpu_reg(s, a->rn);
868 TCGv_i64 incr = cpu_reg(s, a->rm);
869 do_index(s, a->esz, a->rd, start, incr);
871 return true;
875 *** SVE Stack Allocation Group
878 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
880 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
881 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
882 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
883 return true;
886 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
888 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
889 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
890 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
891 return true;
894 static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
896 TCGv_i64 reg = cpu_reg(s, a->rd);
897 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
898 return true;
902 *** SVE Compute Vector Address Group
905 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
907 if (sve_access_check(s)) {
908 unsigned vsz = vec_full_reg_size(s);
909 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
910 vec_full_reg_offset(s, a->rn),
911 vec_full_reg_offset(s, a->rm),
912 vsz, vsz, a->imm, fn);
914 return true;
917 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
919 return do_adr(s, a, gen_helper_sve_adr_p32);
922 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
924 return do_adr(s, a, gen_helper_sve_adr_p64);
927 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
929 return do_adr(s, a, gen_helper_sve_adr_s32);
932 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
934 return do_adr(s, a, gen_helper_sve_adr_u32);
938 *** SVE Integer Misc - Unpredicated Group
941 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
943 static gen_helper_gvec_2 * const fns[4] = {
944 NULL,
945 gen_helper_sve_fexpa_h,
946 gen_helper_sve_fexpa_s,
947 gen_helper_sve_fexpa_d,
949 if (a->esz == 0) {
950 return false;
952 if (sve_access_check(s)) {
953 unsigned vsz = vec_full_reg_size(s);
954 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
955 vec_full_reg_offset(s, a->rn),
956 vsz, vsz, 0, fns[a->esz]);
958 return true;
961 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
963 static gen_helper_gvec_3 * const fns[4] = {
964 NULL,
965 gen_helper_sve_ftssel_h,
966 gen_helper_sve_ftssel_s,
967 gen_helper_sve_ftssel_d,
969 if (a->esz == 0) {
970 return false;
972 if (sve_access_check(s)) {
973 unsigned vsz = vec_full_reg_size(s);
974 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
975 vec_full_reg_offset(s, a->rn),
976 vec_full_reg_offset(s, a->rm),
977 vsz, vsz, 0, fns[a->esz]);
979 return true;
983 *** SVE Predicate Logical Operations Group
986 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
987 const GVecGen4 *gvec_op)
989 if (!sve_access_check(s)) {
990 return true;
993 unsigned psz = pred_gvec_reg_size(s);
994 int dofs = pred_full_reg_offset(s, a->rd);
995 int nofs = pred_full_reg_offset(s, a->rn);
996 int mofs = pred_full_reg_offset(s, a->rm);
997 int gofs = pred_full_reg_offset(s, a->pg);
999 if (psz == 8) {
1000 /* Do the operation and the flags generation in temps. */
1001 TCGv_i64 pd = tcg_temp_new_i64();
1002 TCGv_i64 pn = tcg_temp_new_i64();
1003 TCGv_i64 pm = tcg_temp_new_i64();
1004 TCGv_i64 pg = tcg_temp_new_i64();
1006 tcg_gen_ld_i64(pn, cpu_env, nofs);
1007 tcg_gen_ld_i64(pm, cpu_env, mofs);
1008 tcg_gen_ld_i64(pg, cpu_env, gofs);
1010 gvec_op->fni8(pd, pn, pm, pg);
1011 tcg_gen_st_i64(pd, cpu_env, dofs);
1013 do_predtest1(pd, pg);
1015 tcg_temp_free_i64(pd);
1016 tcg_temp_free_i64(pn);
1017 tcg_temp_free_i64(pm);
1018 tcg_temp_free_i64(pg);
1019 } else {
1020 /* The operation and flags generation is large. The computation
1021 * of the flags depends on the original contents of the guarding
1022 * predicate. If the destination overwrites the guarding predicate,
1023 * then the easiest way to get this right is to save a copy.
1025 int tofs = gofs;
1026 if (a->rd == a->pg) {
1027 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1028 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1031 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1032 do_predtest(s, dofs, tofs, psz / 8);
1034 return true;
1037 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1039 tcg_gen_and_i64(pd, pn, pm);
1040 tcg_gen_and_i64(pd, pd, pg);
1043 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1044 TCGv_vec pm, TCGv_vec pg)
1046 tcg_gen_and_vec(vece, pd, pn, pm);
1047 tcg_gen_and_vec(vece, pd, pd, pg);
1050 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1052 static const GVecGen4 op = {
1053 .fni8 = gen_and_pg_i64,
1054 .fniv = gen_and_pg_vec,
1055 .fno = gen_helper_sve_and_pppp,
1056 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1058 if (a->s) {
1059 return do_pppp_flags(s, a, &op);
1060 } else if (a->rn == a->rm) {
1061 if (a->pg == a->rn) {
1062 return do_mov_p(s, a->rd, a->rn);
1063 } else {
1064 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1066 } else if (a->pg == a->rn || a->pg == a->rm) {
1067 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1068 } else {
1069 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1073 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1075 tcg_gen_andc_i64(pd, pn, pm);
1076 tcg_gen_and_i64(pd, pd, pg);
1079 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1080 TCGv_vec pm, TCGv_vec pg)
1082 tcg_gen_andc_vec(vece, pd, pn, pm);
1083 tcg_gen_and_vec(vece, pd, pd, pg);
1086 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1088 static const GVecGen4 op = {
1089 .fni8 = gen_bic_pg_i64,
1090 .fniv = gen_bic_pg_vec,
1091 .fno = gen_helper_sve_bic_pppp,
1092 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1094 if (a->s) {
1095 return do_pppp_flags(s, a, &op);
1096 } else if (a->pg == a->rn) {
1097 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1098 } else {
1099 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1103 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1105 tcg_gen_xor_i64(pd, pn, pm);
1106 tcg_gen_and_i64(pd, pd, pg);
1109 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1110 TCGv_vec pm, TCGv_vec pg)
1112 tcg_gen_xor_vec(vece, pd, pn, pm);
1113 tcg_gen_and_vec(vece, pd, pd, pg);
1116 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1118 static const GVecGen4 op = {
1119 .fni8 = gen_eor_pg_i64,
1120 .fniv = gen_eor_pg_vec,
1121 .fno = gen_helper_sve_eor_pppp,
1122 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1124 if (a->s) {
1125 return do_pppp_flags(s, a, &op);
1126 } else {
1127 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1131 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1133 tcg_gen_and_i64(pn, pn, pg);
1134 tcg_gen_andc_i64(pm, pm, pg);
1135 tcg_gen_or_i64(pd, pn, pm);
1138 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1139 TCGv_vec pm, TCGv_vec pg)
1141 tcg_gen_and_vec(vece, pn, pn, pg);
1142 tcg_gen_andc_vec(vece, pm, pm, pg);
1143 tcg_gen_or_vec(vece, pd, pn, pm);
1146 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1148 static const GVecGen4 op = {
1149 .fni8 = gen_sel_pg_i64,
1150 .fniv = gen_sel_pg_vec,
1151 .fno = gen_helper_sve_sel_pppp,
1152 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1154 if (a->s) {
1155 return false;
1156 } else {
1157 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1161 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1163 tcg_gen_or_i64(pd, pn, pm);
1164 tcg_gen_and_i64(pd, pd, pg);
1167 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1168 TCGv_vec pm, TCGv_vec pg)
1170 tcg_gen_or_vec(vece, pd, pn, pm);
1171 tcg_gen_and_vec(vece, pd, pd, pg);
1174 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1176 static const GVecGen4 op = {
1177 .fni8 = gen_orr_pg_i64,
1178 .fniv = gen_orr_pg_vec,
1179 .fno = gen_helper_sve_orr_pppp,
1180 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1182 if (a->s) {
1183 return do_pppp_flags(s, a, &op);
1184 } else if (a->pg == a->rn && a->rn == a->rm) {
1185 return do_mov_p(s, a->rd, a->rn);
1186 } else {
1187 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1191 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1193 tcg_gen_orc_i64(pd, pn, pm);
1194 tcg_gen_and_i64(pd, pd, pg);
1197 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1198 TCGv_vec pm, TCGv_vec pg)
1200 tcg_gen_orc_vec(vece, pd, pn, pm);
1201 tcg_gen_and_vec(vece, pd, pd, pg);
1204 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1206 static const GVecGen4 op = {
1207 .fni8 = gen_orn_pg_i64,
1208 .fniv = gen_orn_pg_vec,
1209 .fno = gen_helper_sve_orn_pppp,
1210 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1212 if (a->s) {
1213 return do_pppp_flags(s, a, &op);
1214 } else {
1215 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1219 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1221 tcg_gen_or_i64(pd, pn, pm);
1222 tcg_gen_andc_i64(pd, pg, pd);
1225 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1226 TCGv_vec pm, TCGv_vec pg)
1228 tcg_gen_or_vec(vece, pd, pn, pm);
1229 tcg_gen_andc_vec(vece, pd, pg, pd);
1232 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1234 static const GVecGen4 op = {
1235 .fni8 = gen_nor_pg_i64,
1236 .fniv = gen_nor_pg_vec,
1237 .fno = gen_helper_sve_nor_pppp,
1238 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1240 if (a->s) {
1241 return do_pppp_flags(s, a, &op);
1242 } else {
1243 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1247 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1249 tcg_gen_and_i64(pd, pn, pm);
1250 tcg_gen_andc_i64(pd, pg, pd);
1253 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1254 TCGv_vec pm, TCGv_vec pg)
1256 tcg_gen_and_vec(vece, pd, pn, pm);
1257 tcg_gen_andc_vec(vece, pd, pg, pd);
1260 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1262 static const GVecGen4 op = {
1263 .fni8 = gen_nand_pg_i64,
1264 .fniv = gen_nand_pg_vec,
1265 .fno = gen_helper_sve_nand_pppp,
1266 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1268 if (a->s) {
1269 return do_pppp_flags(s, a, &op);
1270 } else {
1271 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1276 *** SVE Predicate Misc Group
1279 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1281 if (sve_access_check(s)) {
1282 int nofs = pred_full_reg_offset(s, a->rn);
1283 int gofs = pred_full_reg_offset(s, a->pg);
1284 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1286 if (words == 1) {
1287 TCGv_i64 pn = tcg_temp_new_i64();
1288 TCGv_i64 pg = tcg_temp_new_i64();
1290 tcg_gen_ld_i64(pn, cpu_env, nofs);
1291 tcg_gen_ld_i64(pg, cpu_env, gofs);
1292 do_predtest1(pn, pg);
1294 tcg_temp_free_i64(pn);
1295 tcg_temp_free_i64(pg);
1296 } else {
1297 do_predtest(s, nofs, gofs, words);
1300 return true;
1303 /* See the ARM pseudocode DecodePredCount. */
1304 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1306 unsigned elements = fullsz >> esz;
1307 unsigned bound;
1309 switch (pattern) {
1310 case 0x0: /* POW2 */
1311 return pow2floor(elements);
1312 case 0x1: /* VL1 */
1313 case 0x2: /* VL2 */
1314 case 0x3: /* VL3 */
1315 case 0x4: /* VL4 */
1316 case 0x5: /* VL5 */
1317 case 0x6: /* VL6 */
1318 case 0x7: /* VL7 */
1319 case 0x8: /* VL8 */
1320 bound = pattern;
1321 break;
1322 case 0x9: /* VL16 */
1323 case 0xa: /* VL32 */
1324 case 0xb: /* VL64 */
1325 case 0xc: /* VL128 */
1326 case 0xd: /* VL256 */
1327 bound = 16 << (pattern - 9);
1328 break;
1329 case 0x1d: /* MUL4 */
1330 return elements - elements % 4;
1331 case 0x1e: /* MUL3 */
1332 return elements - elements % 3;
1333 case 0x1f: /* ALL */
1334 return elements;
1335 default: /* #uimm5 */
1336 return 0;
1338 return elements >= bound ? bound : 0;
1341 /* This handles all of the predicate initialization instructions,
1342 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1343 * so that decode_pred_count returns 0. For SETFFR, we will have
1344 * set RD == 16 == FFR.
1346 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1348 if (!sve_access_check(s)) {
1349 return true;
1352 unsigned fullsz = vec_full_reg_size(s);
1353 unsigned ofs = pred_full_reg_offset(s, rd);
1354 unsigned numelem, setsz, i;
1355 uint64_t word, lastword;
1356 TCGv_i64 t;
1358 numelem = decode_pred_count(fullsz, pat, esz);
1360 /* Determine what we must store into each bit, and how many. */
1361 if (numelem == 0) {
1362 lastword = word = 0;
1363 setsz = fullsz;
1364 } else {
1365 setsz = numelem << esz;
1366 lastword = word = pred_esz_masks[esz];
1367 if (setsz % 64) {
1368 lastword &= ~(-1ull << (setsz % 64));
1372 t = tcg_temp_new_i64();
1373 if (fullsz <= 64) {
1374 tcg_gen_movi_i64(t, lastword);
1375 tcg_gen_st_i64(t, cpu_env, ofs);
1376 goto done;
1379 if (word == lastword) {
1380 unsigned maxsz = size_for_gvec(fullsz / 8);
1381 unsigned oprsz = size_for_gvec(setsz / 8);
1383 if (oprsz * 8 == setsz) {
1384 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1385 goto done;
1387 if (oprsz * 8 == setsz + 8) {
1388 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1389 tcg_gen_movi_i64(t, 0);
1390 tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1391 goto done;
1395 setsz /= 8;
1396 fullsz /= 8;
1398 tcg_gen_movi_i64(t, word);
1399 for (i = 0; i < setsz; i += 8) {
1400 tcg_gen_st_i64(t, cpu_env, ofs + i);
1402 if (lastword != word) {
1403 tcg_gen_movi_i64(t, lastword);
1404 tcg_gen_st_i64(t, cpu_env, ofs + i);
1405 i += 8;
1407 if (i < fullsz) {
1408 tcg_gen_movi_i64(t, 0);
1409 for (; i < fullsz; i += 8) {
1410 tcg_gen_st_i64(t, cpu_env, ofs + i);
1414 done:
1415 tcg_temp_free_i64(t);
1417 /* PTRUES */
1418 if (setflag) {
1419 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1420 tcg_gen_movi_i32(cpu_CF, word == 0);
1421 tcg_gen_movi_i32(cpu_VF, 0);
1422 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1424 return true;
1427 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1429 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1432 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1434 /* Note pat == 31 is #all, to set all elements. */
1435 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1438 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1440 /* Note pat == 32 is #unimp, to set no elements. */
1441 return do_predset(s, 0, a->rd, 32, false);
1444 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1446 /* The path through do_pppp_flags is complicated enough to want to avoid
1447 * duplication. Frob the arguments into the form of a predicated AND.
1449 arg_rprr_s alt_a = {
1450 .rd = a->rd, .pg = a->pg, .s = a->s,
1451 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1453 return trans_AND_pppp(s, &alt_a, insn);
1456 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1458 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1461 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1463 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1466 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1467 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1468 TCGv_ptr, TCGv_i32))
1470 if (!sve_access_check(s)) {
1471 return true;
1474 TCGv_ptr t_pd = tcg_temp_new_ptr();
1475 TCGv_ptr t_pg = tcg_temp_new_ptr();
1476 TCGv_i32 t;
1477 unsigned desc;
1479 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1480 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1482 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1483 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1484 t = tcg_const_i32(desc);
1486 gen_fn(t, t_pd, t_pg, t);
1487 tcg_temp_free_ptr(t_pd);
1488 tcg_temp_free_ptr(t_pg);
1490 do_pred_flags(t);
1491 tcg_temp_free_i32(t);
1492 return true;
1495 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1497 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1500 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1502 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1506 *** SVE Element Count Group
1509 /* Perform an inline saturating addition of a 32-bit value within
1510 * a 64-bit register. The second operand is known to be positive,
1511 * which halves the comparisions we must perform to bound the result.
1513 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1515 int64_t ibound;
1516 TCGv_i64 bound;
1517 TCGCond cond;
1519 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1520 if (u) {
1521 tcg_gen_ext32u_i64(reg, reg);
1522 } else {
1523 tcg_gen_ext32s_i64(reg, reg);
1525 if (d) {
1526 tcg_gen_sub_i64(reg, reg, val);
1527 ibound = (u ? 0 : INT32_MIN);
1528 cond = TCG_COND_LT;
1529 } else {
1530 tcg_gen_add_i64(reg, reg, val);
1531 ibound = (u ? UINT32_MAX : INT32_MAX);
1532 cond = TCG_COND_GT;
1534 bound = tcg_const_i64(ibound);
1535 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1536 tcg_temp_free_i64(bound);
1539 /* Similarly with 64-bit values. */
1540 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1542 TCGv_i64 t0 = tcg_temp_new_i64();
1543 TCGv_i64 t1 = tcg_temp_new_i64();
1544 TCGv_i64 t2;
1546 if (u) {
1547 if (d) {
1548 tcg_gen_sub_i64(t0, reg, val);
1549 tcg_gen_movi_i64(t1, 0);
1550 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1551 } else {
1552 tcg_gen_add_i64(t0, reg, val);
1553 tcg_gen_movi_i64(t1, -1);
1554 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1556 } else {
1557 if (d) {
1558 /* Detect signed overflow for subtraction. */
1559 tcg_gen_xor_i64(t0, reg, val);
1560 tcg_gen_sub_i64(t1, reg, val);
1561 tcg_gen_xor_i64(reg, reg, t0);
1562 tcg_gen_and_i64(t0, t0, reg);
1564 /* Bound the result. */
1565 tcg_gen_movi_i64(reg, INT64_MIN);
1566 t2 = tcg_const_i64(0);
1567 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1568 } else {
1569 /* Detect signed overflow for addition. */
1570 tcg_gen_xor_i64(t0, reg, val);
1571 tcg_gen_add_i64(reg, reg, val);
1572 tcg_gen_xor_i64(t1, reg, val);
1573 tcg_gen_andc_i64(t0, t1, t0);
1575 /* Bound the result. */
1576 tcg_gen_movi_i64(t1, INT64_MAX);
1577 t2 = tcg_const_i64(0);
1578 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1580 tcg_temp_free_i64(t2);
1582 tcg_temp_free_i64(t0);
1583 tcg_temp_free_i64(t1);
1586 /* Similarly with a vector and a scalar operand. */
1587 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1588 TCGv_i64 val, bool u, bool d)
1590 unsigned vsz = vec_full_reg_size(s);
1591 TCGv_ptr dptr, nptr;
1592 TCGv_i32 t32, desc;
1593 TCGv_i64 t64;
1595 dptr = tcg_temp_new_ptr();
1596 nptr = tcg_temp_new_ptr();
1597 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1598 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1599 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1601 switch (esz) {
1602 case MO_8:
1603 t32 = tcg_temp_new_i32();
1604 tcg_gen_extrl_i64_i32(t32, val);
1605 if (d) {
1606 tcg_gen_neg_i32(t32, t32);
1608 if (u) {
1609 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1610 } else {
1611 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1613 tcg_temp_free_i32(t32);
1614 break;
1616 case MO_16:
1617 t32 = tcg_temp_new_i32();
1618 tcg_gen_extrl_i64_i32(t32, val);
1619 if (d) {
1620 tcg_gen_neg_i32(t32, t32);
1622 if (u) {
1623 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1624 } else {
1625 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1627 tcg_temp_free_i32(t32);
1628 break;
1630 case MO_32:
1631 t64 = tcg_temp_new_i64();
1632 if (d) {
1633 tcg_gen_neg_i64(t64, val);
1634 } else {
1635 tcg_gen_mov_i64(t64, val);
1637 if (u) {
1638 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1639 } else {
1640 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1642 tcg_temp_free_i64(t64);
1643 break;
1645 case MO_64:
1646 if (u) {
1647 if (d) {
1648 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1649 } else {
1650 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1652 } else if (d) {
1653 t64 = tcg_temp_new_i64();
1654 tcg_gen_neg_i64(t64, val);
1655 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1656 tcg_temp_free_i64(t64);
1657 } else {
1658 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1660 break;
1662 default:
1663 g_assert_not_reached();
1666 tcg_temp_free_ptr(dptr);
1667 tcg_temp_free_ptr(nptr);
1668 tcg_temp_free_i32(desc);
1671 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1673 if (sve_access_check(s)) {
1674 unsigned fullsz = vec_full_reg_size(s);
1675 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1676 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1678 return true;
1681 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1683 if (sve_access_check(s)) {
1684 unsigned fullsz = vec_full_reg_size(s);
1685 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1686 int inc = numelem * a->imm * (a->d ? -1 : 1);
1687 TCGv_i64 reg = cpu_reg(s, a->rd);
1689 tcg_gen_addi_i64(reg, reg, inc);
1691 return true;
1694 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1695 uint32_t insn)
1697 if (!sve_access_check(s)) {
1698 return true;
1701 unsigned fullsz = vec_full_reg_size(s);
1702 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1703 int inc = numelem * a->imm;
1704 TCGv_i64 reg = cpu_reg(s, a->rd);
1706 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1707 if (inc == 0) {
1708 if (a->u) {
1709 tcg_gen_ext32u_i64(reg, reg);
1710 } else {
1711 tcg_gen_ext32s_i64(reg, reg);
1713 } else {
1714 TCGv_i64 t = tcg_const_i64(inc);
1715 do_sat_addsub_32(reg, t, a->u, a->d);
1716 tcg_temp_free_i64(t);
1718 return true;
1721 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1722 uint32_t insn)
1724 if (!sve_access_check(s)) {
1725 return true;
1728 unsigned fullsz = vec_full_reg_size(s);
1729 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1730 int inc = numelem * a->imm;
1731 TCGv_i64 reg = cpu_reg(s, a->rd);
1733 if (inc != 0) {
1734 TCGv_i64 t = tcg_const_i64(inc);
1735 do_sat_addsub_64(reg, t, a->u, a->d);
1736 tcg_temp_free_i64(t);
1738 return true;
1741 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1743 if (a->esz == 0) {
1744 return false;
1747 unsigned fullsz = vec_full_reg_size(s);
1748 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1749 int inc = numelem * a->imm;
1751 if (inc != 0) {
1752 if (sve_access_check(s)) {
1753 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1754 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1755 vec_full_reg_offset(s, a->rn),
1756 t, fullsz, fullsz);
1757 tcg_temp_free_i64(t);
1759 } else {
1760 do_mov_z(s, a->rd, a->rn);
1762 return true;
1765 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1766 uint32_t insn)
1768 if (a->esz == 0) {
1769 return false;
1772 unsigned fullsz = vec_full_reg_size(s);
1773 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1774 int inc = numelem * a->imm;
1776 if (inc != 0) {
1777 if (sve_access_check(s)) {
1778 TCGv_i64 t = tcg_const_i64(inc);
1779 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1780 tcg_temp_free_i64(t);
1782 } else {
1783 do_mov_z(s, a->rd, a->rn);
1785 return true;
1789 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
1792 /* Subroutine loading a vector register at VOFS of LEN bytes.
1793 * The load should begin at the address Rn + IMM.
1796 static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
1797 int rn, int imm)
1799 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
1800 uint32_t len_remain = len % 8;
1801 uint32_t nparts = len / 8 + ctpop8(len_remain);
1802 int midx = get_mem_index(s);
1803 TCGv_i64 addr, t0, t1;
1805 addr = tcg_temp_new_i64();
1806 t0 = tcg_temp_new_i64();
1808 /* Note that unpredicated load/store of vector/predicate registers
1809 * are defined as a stream of bytes, which equates to little-endian
1810 * operations on larger quantities. There is no nice way to force
1811 * a little-endian load for aarch64_be-linux-user out of line.
1813 * Attempt to keep code expansion to a minimum by limiting the
1814 * amount of unrolling done.
1816 if (nparts <= 4) {
1817 int i;
1819 for (i = 0; i < len_align; i += 8) {
1820 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
1821 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
1822 tcg_gen_st_i64(t0, cpu_env, vofs + i);
1824 } else {
1825 TCGLabel *loop = gen_new_label();
1826 TCGv_ptr tp, i = tcg_const_local_ptr(0);
1828 gen_set_label(loop);
1830 /* Minimize the number of local temps that must be re-read from
1831 * the stack each iteration. Instead, re-compute values other
1832 * than the loop counter.
1834 tp = tcg_temp_new_ptr();
1835 tcg_gen_addi_ptr(tp, i, imm);
1836 tcg_gen_extu_ptr_i64(addr, tp);
1837 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
1839 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
1841 tcg_gen_add_ptr(tp, cpu_env, i);
1842 tcg_gen_addi_ptr(i, i, 8);
1843 tcg_gen_st_i64(t0, tp, vofs);
1844 tcg_temp_free_ptr(tp);
1846 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
1847 tcg_temp_free_ptr(i);
1850 /* Predicate register loads can be any multiple of 2.
1851 * Note that we still store the entire 64-bit unit into cpu_env.
1853 if (len_remain) {
1854 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
1856 switch (len_remain) {
1857 case 2:
1858 case 4:
1859 case 8:
1860 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
1861 break;
1863 case 6:
1864 t1 = tcg_temp_new_i64();
1865 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
1866 tcg_gen_addi_i64(addr, addr, 4);
1867 tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
1868 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
1869 tcg_temp_free_i64(t1);
1870 break;
1872 default:
1873 g_assert_not_reached();
1875 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
1877 tcg_temp_free_i64(addr);
1878 tcg_temp_free_i64(t0);
1881 static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
1883 if (sve_access_check(s)) {
1884 int size = vec_full_reg_size(s);
1885 int off = vec_full_reg_offset(s, a->rd);
1886 do_ldr(s, off, size, a->rn, a->imm * size);
1888 return true;
1891 static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
1893 if (sve_access_check(s)) {
1894 int size = pred_full_reg_size(s);
1895 int off = pred_full_reg_offset(s, a->rd);
1896 do_ldr(s, off, size, a->rn, a->imm * size);
1898 return true;