2 * AArch64 SVE translation
4 * Copyright (c) 2018 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
22 #include "exec/exec-all.h"
23 #include "tcg/tcg-op.h"
24 #include "tcg/tcg-op-gvec.h"
25 #include "tcg/tcg-gvec-desc.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
33 #include "translate-a64.h"
34 #include "fpu/softfloat.h"
37 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64
, uint32_t, uint32_t);
40 typedef void gen_helper_gvec_flags_3(TCGv_i32
, TCGv_ptr
, TCGv_ptr
,
42 typedef void gen_helper_gvec_flags_4(TCGv_i32
, TCGv_ptr
, TCGv_ptr
,
43 TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
45 typedef void gen_helper_gvec_mem(TCGv_env
, TCGv_ptr
, TCGv_i64
, TCGv_i32
);
46 typedef void gen_helper_gvec_mem_scatter(TCGv_env
, TCGv_ptr
, TCGv_ptr
,
47 TCGv_ptr
, TCGv_i64
, TCGv_i32
);
50 * Helpers for extracting complex instruction fields.
53 /* See e.g. ASR (immediate, predicated).
54 * Returns -1 for unallocated encoding; diagnose later.
56 static int tszimm_esz(DisasContext
*s
, int x
)
58 x
>>= 3; /* discard imm3 */
62 static int tszimm_shr(DisasContext
*s
, int x
)
64 return (16 << tszimm_esz(s
, x
)) - x
;
67 /* See e.g. LSL (immediate, predicated). */
68 static int tszimm_shl(DisasContext
*s
, int x
)
70 return x
- (8 << tszimm_esz(s
, x
));
73 /* The SH bit is in bit 8. Extract the low 8 and shift. */
74 static inline int expand_imm_sh8s(DisasContext
*s
, int x
)
76 return (int8_t)x
<< (x
& 0x100 ? 8 : 0);
79 static inline int expand_imm_sh8u(DisasContext
*s
, int x
)
81 return (uint8_t)x
<< (x
& 0x100 ? 8 : 0);
84 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
85 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
87 static inline int msz_dtype(DisasContext
*s
, int msz
)
89 static const uint8_t dtype
[4] = { 0, 5, 10, 15 };
94 * Include the generated decoder.
97 #include "decode-sve.c.inc"
100 * Implement all of the translator functions referenced by the decoder.
103 /* Invoke an out-of-line helper on 2 Zregs. */
104 static bool gen_gvec_ool_zz(DisasContext
*s
, gen_helper_gvec_2
*fn
,
105 int rd
, int rn
, int data
)
110 if (sve_access_check(s
)) {
111 unsigned vsz
= vec_full_reg_size(s
);
112 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, rd
),
113 vec_full_reg_offset(s
, rn
),
119 static bool gen_gvec_fpst_zz(DisasContext
*s
, gen_helper_gvec_2_ptr
*fn
,
120 int rd
, int rn
, int data
,
121 ARMFPStatusFlavour flavour
)
126 if (sve_access_check(s
)) {
127 unsigned vsz
= vec_full_reg_size(s
);
128 TCGv_ptr status
= fpstatus_ptr(flavour
);
130 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s
, rd
),
131 vec_full_reg_offset(s
, rn
),
132 status
, vsz
, vsz
, data
, fn
);
133 tcg_temp_free_ptr(status
);
138 static bool gen_gvec_fpst_arg_zz(DisasContext
*s
, gen_helper_gvec_2_ptr
*fn
,
139 arg_rr_esz
*a
, int data
)
141 return gen_gvec_fpst_zz(s
, fn
, a
->rd
, a
->rn
, data
,
142 a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
145 /* Invoke an out-of-line helper on 3 Zregs. */
146 static bool gen_gvec_ool_zzz(DisasContext
*s
, gen_helper_gvec_3
*fn
,
147 int rd
, int rn
, int rm
, int data
)
152 if (sve_access_check(s
)) {
153 unsigned vsz
= vec_full_reg_size(s
);
154 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, rd
),
155 vec_full_reg_offset(s
, rn
),
156 vec_full_reg_offset(s
, rm
),
162 static bool gen_gvec_ool_arg_zzz(DisasContext
*s
, gen_helper_gvec_3
*fn
,
163 arg_rrr_esz
*a
, int data
)
165 return gen_gvec_ool_zzz(s
, fn
, a
->rd
, a
->rn
, a
->rm
, data
);
168 /* Invoke an out-of-line helper on 3 Zregs, plus float_status. */
169 static bool gen_gvec_fpst_zzz(DisasContext
*s
, gen_helper_gvec_3_ptr
*fn
,
170 int rd
, int rn
, int rm
,
171 int data
, ARMFPStatusFlavour flavour
)
176 if (sve_access_check(s
)) {
177 unsigned vsz
= vec_full_reg_size(s
);
178 TCGv_ptr status
= fpstatus_ptr(flavour
);
180 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, rd
),
181 vec_full_reg_offset(s
, rn
),
182 vec_full_reg_offset(s
, rm
),
183 status
, vsz
, vsz
, data
, fn
);
185 tcg_temp_free_ptr(status
);
190 static bool gen_gvec_fpst_arg_zzz(DisasContext
*s
, gen_helper_gvec_3_ptr
*fn
,
191 arg_rrr_esz
*a
, int data
)
193 return gen_gvec_fpst_zzz(s
, fn
, a
->rd
, a
->rn
, a
->rm
, data
,
194 a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
197 /* Invoke an out-of-line helper on 4 Zregs. */
198 static bool gen_gvec_ool_zzzz(DisasContext
*s
, gen_helper_gvec_4
*fn
,
199 int rd
, int rn
, int rm
, int ra
, int data
)
204 if (sve_access_check(s
)) {
205 unsigned vsz
= vec_full_reg_size(s
);
206 tcg_gen_gvec_4_ool(vec_full_reg_offset(s
, rd
),
207 vec_full_reg_offset(s
, rn
),
208 vec_full_reg_offset(s
, rm
),
209 vec_full_reg_offset(s
, ra
),
215 static bool gen_gvec_ool_arg_zzzz(DisasContext
*s
, gen_helper_gvec_4
*fn
,
216 arg_rrrr_esz
*a
, int data
)
218 return gen_gvec_ool_zzzz(s
, fn
, a
->rd
, a
->rn
, a
->rm
, a
->ra
, data
);
221 static bool gen_gvec_ool_arg_zzxz(DisasContext
*s
, gen_helper_gvec_4
*fn
,
224 return gen_gvec_ool_zzzz(s
, fn
, a
->rd
, a
->rn
, a
->rm
, a
->ra
, a
->index
);
227 /* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */
228 static bool gen_gvec_ptr_zzzz(DisasContext
*s
, gen_helper_gvec_4_ptr
*fn
,
229 int rd
, int rn
, int rm
, int ra
,
230 int data
, TCGv_ptr ptr
)
235 if (sve_access_check(s
)) {
236 unsigned vsz
= vec_full_reg_size(s
);
237 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s
, rd
),
238 vec_full_reg_offset(s
, rn
),
239 vec_full_reg_offset(s
, rm
),
240 vec_full_reg_offset(s
, ra
),
241 ptr
, vsz
, vsz
, data
, fn
);
246 static bool gen_gvec_fpst_zzzz(DisasContext
*s
, gen_helper_gvec_4_ptr
*fn
,
247 int rd
, int rn
, int rm
, int ra
,
248 int data
, ARMFPStatusFlavour flavour
)
250 TCGv_ptr status
= fpstatus_ptr(flavour
);
251 bool ret
= gen_gvec_ptr_zzzz(s
, fn
, rd
, rn
, rm
, ra
, data
, status
);
252 tcg_temp_free_ptr(status
);
256 /* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */
257 static bool gen_gvec_fpst_zzzzp(DisasContext
*s
, gen_helper_gvec_5_ptr
*fn
,
258 int rd
, int rn
, int rm
, int ra
, int pg
,
259 int data
, ARMFPStatusFlavour flavour
)
264 if (sve_access_check(s
)) {
265 unsigned vsz
= vec_full_reg_size(s
);
266 TCGv_ptr status
= fpstatus_ptr(flavour
);
268 tcg_gen_gvec_5_ptr(vec_full_reg_offset(s
, rd
),
269 vec_full_reg_offset(s
, rn
),
270 vec_full_reg_offset(s
, rm
),
271 vec_full_reg_offset(s
, ra
),
272 pred_full_reg_offset(s
, pg
),
273 status
, vsz
, vsz
, data
, fn
);
275 tcg_temp_free_ptr(status
);
280 /* Invoke an out-of-line helper on 2 Zregs and a predicate. */
281 static bool gen_gvec_ool_zzp(DisasContext
*s
, gen_helper_gvec_3
*fn
,
282 int rd
, int rn
, int pg
, int data
)
287 if (sve_access_check(s
)) {
288 unsigned vsz
= vec_full_reg_size(s
);
289 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, rd
),
290 vec_full_reg_offset(s
, rn
),
291 pred_full_reg_offset(s
, pg
),
297 static bool gen_gvec_ool_arg_zpz(DisasContext
*s
, gen_helper_gvec_3
*fn
,
298 arg_rpr_esz
*a
, int data
)
300 return gen_gvec_ool_zzp(s
, fn
, a
->rd
, a
->rn
, a
->pg
, data
);
303 static bool gen_gvec_ool_arg_zpzi(DisasContext
*s
, gen_helper_gvec_3
*fn
,
306 return gen_gvec_ool_zzp(s
, fn
, a
->rd
, a
->rn
, a
->pg
, a
->imm
);
309 static bool gen_gvec_fpst_zzp(DisasContext
*s
, gen_helper_gvec_3_ptr
*fn
,
310 int rd
, int rn
, int pg
, int data
,
311 ARMFPStatusFlavour flavour
)
316 if (sve_access_check(s
)) {
317 unsigned vsz
= vec_full_reg_size(s
);
318 TCGv_ptr status
= fpstatus_ptr(flavour
);
320 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, rd
),
321 vec_full_reg_offset(s
, rn
),
322 pred_full_reg_offset(s
, pg
),
323 status
, vsz
, vsz
, data
, fn
);
324 tcg_temp_free_ptr(status
);
329 static bool gen_gvec_fpst_arg_zpz(DisasContext
*s
, gen_helper_gvec_3_ptr
*fn
,
330 arg_rpr_esz
*a
, int data
,
331 ARMFPStatusFlavour flavour
)
333 return gen_gvec_fpst_zzp(s
, fn
, a
->rd
, a
->rn
, a
->pg
, data
, flavour
);
336 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */
337 static bool gen_gvec_ool_zzzp(DisasContext
*s
, gen_helper_gvec_4
*fn
,
338 int rd
, int rn
, int rm
, int pg
, int data
)
343 if (sve_access_check(s
)) {
344 unsigned vsz
= vec_full_reg_size(s
);
345 tcg_gen_gvec_4_ool(vec_full_reg_offset(s
, rd
),
346 vec_full_reg_offset(s
, rn
),
347 vec_full_reg_offset(s
, rm
),
348 pred_full_reg_offset(s
, pg
),
354 static bool gen_gvec_ool_arg_zpzz(DisasContext
*s
, gen_helper_gvec_4
*fn
,
355 arg_rprr_esz
*a
, int data
)
357 return gen_gvec_ool_zzzp(s
, fn
, a
->rd
, a
->rn
, a
->rm
, a
->pg
, data
);
360 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */
361 static bool gen_gvec_fpst_zzzp(DisasContext
*s
, gen_helper_gvec_4_ptr
*fn
,
362 int rd
, int rn
, int rm
, int pg
, int data
,
363 ARMFPStatusFlavour flavour
)
368 if (sve_access_check(s
)) {
369 unsigned vsz
= vec_full_reg_size(s
);
370 TCGv_ptr status
= fpstatus_ptr(flavour
);
372 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s
, rd
),
373 vec_full_reg_offset(s
, rn
),
374 vec_full_reg_offset(s
, rm
),
375 pred_full_reg_offset(s
, pg
),
376 status
, vsz
, vsz
, data
, fn
);
377 tcg_temp_free_ptr(status
);
382 static bool gen_gvec_fpst_arg_zpzz(DisasContext
*s
, gen_helper_gvec_4_ptr
*fn
,
385 return gen_gvec_fpst_zzzp(s
, fn
, a
->rd
, a
->rn
, a
->rm
, a
->pg
, 0,
386 a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
389 /* Invoke a vector expander on two Zregs and an immediate. */
390 static bool gen_gvec_fn_zzi(DisasContext
*s
, GVecGen2iFn
*gvec_fn
,
391 int esz
, int rd
, int rn
, uint64_t imm
)
393 if (gvec_fn
== NULL
) {
396 if (sve_access_check(s
)) {
397 unsigned vsz
= vec_full_reg_size(s
);
398 gvec_fn(esz
, vec_full_reg_offset(s
, rd
),
399 vec_full_reg_offset(s
, rn
), imm
, vsz
, vsz
);
404 static bool gen_gvec_fn_arg_zzi(DisasContext
*s
, GVecGen2iFn
*gvec_fn
,
408 /* Invalid tsz encoding -- see tszimm_esz. */
411 return gen_gvec_fn_zzi(s
, gvec_fn
, a
->esz
, a
->rd
, a
->rn
, a
->imm
);
414 /* Invoke a vector expander on three Zregs. */
415 static bool gen_gvec_fn_zzz(DisasContext
*s
, GVecGen3Fn
*gvec_fn
,
416 int esz
, int rd
, int rn
, int rm
)
418 if (gvec_fn
== NULL
) {
421 if (sve_access_check(s
)) {
422 unsigned vsz
= vec_full_reg_size(s
);
423 gvec_fn(esz
, vec_full_reg_offset(s
, rd
),
424 vec_full_reg_offset(s
, rn
),
425 vec_full_reg_offset(s
, rm
), vsz
, vsz
);
430 static bool gen_gvec_fn_arg_zzz(DisasContext
*s
, GVecGen3Fn
*fn
,
433 return gen_gvec_fn_zzz(s
, fn
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
436 /* Invoke a vector expander on four Zregs. */
437 static bool gen_gvec_fn_arg_zzzz(DisasContext
*s
, GVecGen4Fn
*gvec_fn
,
440 if (gvec_fn
== NULL
) {
443 if (sve_access_check(s
)) {
444 unsigned vsz
= vec_full_reg_size(s
);
445 gvec_fn(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
446 vec_full_reg_offset(s
, a
->rn
),
447 vec_full_reg_offset(s
, a
->rm
),
448 vec_full_reg_offset(s
, a
->ra
), vsz
, vsz
);
453 /* Invoke a vector move on two Zregs. */
454 static bool do_mov_z(DisasContext
*s
, int rd
, int rn
)
456 if (sve_access_check(s
)) {
457 unsigned vsz
= vec_full_reg_size(s
);
458 tcg_gen_gvec_mov(MO_8
, vec_full_reg_offset(s
, rd
),
459 vec_full_reg_offset(s
, rn
), vsz
, vsz
);
464 /* Initialize a Zreg with replications of a 64-bit immediate. */
465 static void do_dupi_z(DisasContext
*s
, int rd
, uint64_t word
)
467 unsigned vsz
= vec_full_reg_size(s
);
468 tcg_gen_gvec_dup_imm(MO_64
, vec_full_reg_offset(s
, rd
), vsz
, vsz
, word
);
471 /* Invoke a vector expander on three Pregs. */
472 static bool gen_gvec_fn_ppp(DisasContext
*s
, GVecGen3Fn
*gvec_fn
,
473 int rd
, int rn
, int rm
)
475 if (sve_access_check(s
)) {
476 unsigned psz
= pred_gvec_reg_size(s
);
477 gvec_fn(MO_64
, pred_full_reg_offset(s
, rd
),
478 pred_full_reg_offset(s
, rn
),
479 pred_full_reg_offset(s
, rm
), psz
, psz
);
484 /* Invoke a vector move on two Pregs. */
485 static bool do_mov_p(DisasContext
*s
, int rd
, int rn
)
487 if (sve_access_check(s
)) {
488 unsigned psz
= pred_gvec_reg_size(s
);
489 tcg_gen_gvec_mov(MO_8
, pred_full_reg_offset(s
, rd
),
490 pred_full_reg_offset(s
, rn
), psz
, psz
);
495 /* Set the cpu flags as per a return from an SVE helper. */
496 static void do_pred_flags(TCGv_i32 t
)
498 tcg_gen_mov_i32(cpu_NF
, t
);
499 tcg_gen_andi_i32(cpu_ZF
, t
, 2);
500 tcg_gen_andi_i32(cpu_CF
, t
, 1);
501 tcg_gen_movi_i32(cpu_VF
, 0);
504 /* Subroutines computing the ARM PredTest psuedofunction. */
505 static void do_predtest1(TCGv_i64 d
, TCGv_i64 g
)
507 TCGv_i32 t
= tcg_temp_new_i32();
509 gen_helper_sve_predtest1(t
, d
, g
);
511 tcg_temp_free_i32(t
);
514 static void do_predtest(DisasContext
*s
, int dofs
, int gofs
, int words
)
516 TCGv_ptr dptr
= tcg_temp_new_ptr();
517 TCGv_ptr gptr
= tcg_temp_new_ptr();
518 TCGv_i32 t
= tcg_temp_new_i32();
520 tcg_gen_addi_ptr(dptr
, cpu_env
, dofs
);
521 tcg_gen_addi_ptr(gptr
, cpu_env
, gofs
);
523 gen_helper_sve_predtest(t
, dptr
, gptr
, tcg_constant_i32(words
));
524 tcg_temp_free_ptr(dptr
);
525 tcg_temp_free_ptr(gptr
);
528 tcg_temp_free_i32(t
);
531 /* For each element size, the bits within a predicate word that are active. */
532 const uint64_t pred_esz_masks
[5] = {
533 0xffffffffffffffffull
, 0x5555555555555555ull
,
534 0x1111111111111111ull
, 0x0101010101010101ull
,
535 0x0001000100010001ull
,
538 static bool trans_INVALID(DisasContext
*s
, arg_INVALID
*a
)
540 unallocated_encoding(s
);
545 *** SVE Logical - Unpredicated Group
548 TRANS_FEAT(AND_zzz
, aa64_sve
, gen_gvec_fn_arg_zzz
, tcg_gen_gvec_and
, a
)
549 TRANS_FEAT(ORR_zzz
, aa64_sve
, gen_gvec_fn_arg_zzz
, tcg_gen_gvec_or
, a
)
550 TRANS_FEAT(EOR_zzz
, aa64_sve
, gen_gvec_fn_arg_zzz
, tcg_gen_gvec_xor
, a
)
551 TRANS_FEAT(BIC_zzz
, aa64_sve
, gen_gvec_fn_arg_zzz
, tcg_gen_gvec_andc
, a
)
553 static void gen_xar8_i64(TCGv_i64 d
, TCGv_i64 n
, TCGv_i64 m
, int64_t sh
)
555 TCGv_i64 t
= tcg_temp_new_i64();
556 uint64_t mask
= dup_const(MO_8
, 0xff >> sh
);
558 tcg_gen_xor_i64(t
, n
, m
);
559 tcg_gen_shri_i64(d
, t
, sh
);
560 tcg_gen_shli_i64(t
, t
, 8 - sh
);
561 tcg_gen_andi_i64(d
, d
, mask
);
562 tcg_gen_andi_i64(t
, t
, ~mask
);
563 tcg_gen_or_i64(d
, d
, t
);
564 tcg_temp_free_i64(t
);
567 static void gen_xar16_i64(TCGv_i64 d
, TCGv_i64 n
, TCGv_i64 m
, int64_t sh
)
569 TCGv_i64 t
= tcg_temp_new_i64();
570 uint64_t mask
= dup_const(MO_16
, 0xffff >> sh
);
572 tcg_gen_xor_i64(t
, n
, m
);
573 tcg_gen_shri_i64(d
, t
, sh
);
574 tcg_gen_shli_i64(t
, t
, 16 - sh
);
575 tcg_gen_andi_i64(d
, d
, mask
);
576 tcg_gen_andi_i64(t
, t
, ~mask
);
577 tcg_gen_or_i64(d
, d
, t
);
578 tcg_temp_free_i64(t
);
581 static void gen_xar_i32(TCGv_i32 d
, TCGv_i32 n
, TCGv_i32 m
, int32_t sh
)
583 tcg_gen_xor_i32(d
, n
, m
);
584 tcg_gen_rotri_i32(d
, d
, sh
);
587 static void gen_xar_i64(TCGv_i64 d
, TCGv_i64 n
, TCGv_i64 m
, int64_t sh
)
589 tcg_gen_xor_i64(d
, n
, m
);
590 tcg_gen_rotri_i64(d
, d
, sh
);
593 static void gen_xar_vec(unsigned vece
, TCGv_vec d
, TCGv_vec n
,
594 TCGv_vec m
, int64_t sh
)
596 tcg_gen_xor_vec(vece
, d
, n
, m
);
597 tcg_gen_rotri_vec(vece
, d
, d
, sh
);
600 void gen_gvec_xar(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
601 uint32_t rm_ofs
, int64_t shift
,
602 uint32_t opr_sz
, uint32_t max_sz
)
604 static const TCGOpcode vecop
[] = { INDEX_op_rotli_vec
, 0 };
605 static const GVecGen3i ops
[4] = {
606 { .fni8
= gen_xar8_i64
,
608 .fno
= gen_helper_sve2_xar_b
,
611 { .fni8
= gen_xar16_i64
,
613 .fno
= gen_helper_sve2_xar_h
,
616 { .fni4
= gen_xar_i32
,
618 .fno
= gen_helper_sve2_xar_s
,
621 { .fni8
= gen_xar_i64
,
623 .fno
= gen_helper_gvec_xar_d
,
627 int esize
= 8 << vece
;
629 /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
630 tcg_debug_assert(shift
>= 0);
631 tcg_debug_assert(shift
<= esize
);
635 /* xar with no rotate devolves to xor. */
636 tcg_gen_gvec_xor(vece
, rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
);
638 tcg_gen_gvec_3i(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
,
643 static bool trans_XAR(DisasContext
*s
, arg_rrri_esz
*a
)
645 if (a
->esz
< 0 || !dc_isar_feature(aa64_sve2
, s
)) {
648 if (sve_access_check(s
)) {
649 unsigned vsz
= vec_full_reg_size(s
);
650 gen_gvec_xar(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
651 vec_full_reg_offset(s
, a
->rn
),
652 vec_full_reg_offset(s
, a
->rm
), a
->imm
, vsz
, vsz
);
657 static void gen_eor3_i64(TCGv_i64 d
, TCGv_i64 n
, TCGv_i64 m
, TCGv_i64 k
)
659 tcg_gen_xor_i64(d
, n
, m
);
660 tcg_gen_xor_i64(d
, d
, k
);
663 static void gen_eor3_vec(unsigned vece
, TCGv_vec d
, TCGv_vec n
,
664 TCGv_vec m
, TCGv_vec k
)
666 tcg_gen_xor_vec(vece
, d
, n
, m
);
667 tcg_gen_xor_vec(vece
, d
, d
, k
);
670 static void gen_eor3(unsigned vece
, uint32_t d
, uint32_t n
, uint32_t m
,
671 uint32_t a
, uint32_t oprsz
, uint32_t maxsz
)
673 static const GVecGen4 op
= {
674 .fni8
= gen_eor3_i64
,
675 .fniv
= gen_eor3_vec
,
676 .fno
= gen_helper_sve2_eor3
,
678 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
680 tcg_gen_gvec_4(d
, n
, m
, a
, oprsz
, maxsz
, &op
);
683 TRANS_FEAT(EOR3
, aa64_sve2
, gen_gvec_fn_arg_zzzz
, gen_eor3
, a
)
685 static void gen_bcax_i64(TCGv_i64 d
, TCGv_i64 n
, TCGv_i64 m
, TCGv_i64 k
)
687 tcg_gen_andc_i64(d
, m
, k
);
688 tcg_gen_xor_i64(d
, d
, n
);
691 static void gen_bcax_vec(unsigned vece
, TCGv_vec d
, TCGv_vec n
,
692 TCGv_vec m
, TCGv_vec k
)
694 tcg_gen_andc_vec(vece
, d
, m
, k
);
695 tcg_gen_xor_vec(vece
, d
, d
, n
);
698 static void gen_bcax(unsigned vece
, uint32_t d
, uint32_t n
, uint32_t m
,
699 uint32_t a
, uint32_t oprsz
, uint32_t maxsz
)
701 static const GVecGen4 op
= {
702 .fni8
= gen_bcax_i64
,
703 .fniv
= gen_bcax_vec
,
704 .fno
= gen_helper_sve2_bcax
,
706 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
708 tcg_gen_gvec_4(d
, n
, m
, a
, oprsz
, maxsz
, &op
);
711 TRANS_FEAT(BCAX
, aa64_sve2
, gen_gvec_fn_arg_zzzz
, gen_bcax
, a
)
713 static void gen_bsl(unsigned vece
, uint32_t d
, uint32_t n
, uint32_t m
,
714 uint32_t a
, uint32_t oprsz
, uint32_t maxsz
)
716 /* BSL differs from the generic bitsel in argument ordering. */
717 tcg_gen_gvec_bitsel(vece
, d
, a
, n
, m
, oprsz
, maxsz
);
720 TRANS_FEAT(BSL
, aa64_sve2
, gen_gvec_fn_arg_zzzz
, gen_bsl
, a
)
722 static void gen_bsl1n_i64(TCGv_i64 d
, TCGv_i64 n
, TCGv_i64 m
, TCGv_i64 k
)
724 tcg_gen_andc_i64(n
, k
, n
);
725 tcg_gen_andc_i64(m
, m
, k
);
726 tcg_gen_or_i64(d
, n
, m
);
729 static void gen_bsl1n_vec(unsigned vece
, TCGv_vec d
, TCGv_vec n
,
730 TCGv_vec m
, TCGv_vec k
)
732 if (TCG_TARGET_HAS_bitsel_vec
) {
733 tcg_gen_not_vec(vece
, n
, n
);
734 tcg_gen_bitsel_vec(vece
, d
, k
, n
, m
);
736 tcg_gen_andc_vec(vece
, n
, k
, n
);
737 tcg_gen_andc_vec(vece
, m
, m
, k
);
738 tcg_gen_or_vec(vece
, d
, n
, m
);
742 static void gen_bsl1n(unsigned vece
, uint32_t d
, uint32_t n
, uint32_t m
,
743 uint32_t a
, uint32_t oprsz
, uint32_t maxsz
)
745 static const GVecGen4 op
= {
746 .fni8
= gen_bsl1n_i64
,
747 .fniv
= gen_bsl1n_vec
,
748 .fno
= gen_helper_sve2_bsl1n
,
750 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
752 tcg_gen_gvec_4(d
, n
, m
, a
, oprsz
, maxsz
, &op
);
755 TRANS_FEAT(BSL1N
, aa64_sve2
, gen_gvec_fn_arg_zzzz
, gen_bsl1n
, a
)
757 static void gen_bsl2n_i64(TCGv_i64 d
, TCGv_i64 n
, TCGv_i64 m
, TCGv_i64 k
)
760 * Z[dn] = (n & k) | (~m & ~k)
763 tcg_gen_and_i64(n
, n
, k
);
764 if (TCG_TARGET_HAS_orc_i64
) {
765 tcg_gen_or_i64(m
, m
, k
);
766 tcg_gen_orc_i64(d
, n
, m
);
768 tcg_gen_nor_i64(m
, m
, k
);
769 tcg_gen_or_i64(d
, n
, m
);
773 static void gen_bsl2n_vec(unsigned vece
, TCGv_vec d
, TCGv_vec n
,
774 TCGv_vec m
, TCGv_vec k
)
776 if (TCG_TARGET_HAS_bitsel_vec
) {
777 tcg_gen_not_vec(vece
, m
, m
);
778 tcg_gen_bitsel_vec(vece
, d
, k
, n
, m
);
780 tcg_gen_and_vec(vece
, n
, n
, k
);
781 tcg_gen_or_vec(vece
, m
, m
, k
);
782 tcg_gen_orc_vec(vece
, d
, n
, m
);
786 static void gen_bsl2n(unsigned vece
, uint32_t d
, uint32_t n
, uint32_t m
,
787 uint32_t a
, uint32_t oprsz
, uint32_t maxsz
)
789 static const GVecGen4 op
= {
790 .fni8
= gen_bsl2n_i64
,
791 .fniv
= gen_bsl2n_vec
,
792 .fno
= gen_helper_sve2_bsl2n
,
794 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
796 tcg_gen_gvec_4(d
, n
, m
, a
, oprsz
, maxsz
, &op
);
799 TRANS_FEAT(BSL2N
, aa64_sve2
, gen_gvec_fn_arg_zzzz
, gen_bsl2n
, a
)
801 static void gen_nbsl_i64(TCGv_i64 d
, TCGv_i64 n
, TCGv_i64 m
, TCGv_i64 k
)
803 tcg_gen_and_i64(n
, n
, k
);
804 tcg_gen_andc_i64(m
, m
, k
);
805 tcg_gen_nor_i64(d
, n
, m
);
808 static void gen_nbsl_vec(unsigned vece
, TCGv_vec d
, TCGv_vec n
,
809 TCGv_vec m
, TCGv_vec k
)
811 tcg_gen_bitsel_vec(vece
, d
, k
, n
, m
);
812 tcg_gen_not_vec(vece
, d
, d
);
815 static void gen_nbsl(unsigned vece
, uint32_t d
, uint32_t n
, uint32_t m
,
816 uint32_t a
, uint32_t oprsz
, uint32_t maxsz
)
818 static const GVecGen4 op
= {
819 .fni8
= gen_nbsl_i64
,
820 .fniv
= gen_nbsl_vec
,
821 .fno
= gen_helper_sve2_nbsl
,
823 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
825 tcg_gen_gvec_4(d
, n
, m
, a
, oprsz
, maxsz
, &op
);
828 TRANS_FEAT(NBSL
, aa64_sve2
, gen_gvec_fn_arg_zzzz
, gen_nbsl
, a
)
831 *** SVE Integer Arithmetic - Unpredicated Group
834 TRANS_FEAT(ADD_zzz
, aa64_sve
, gen_gvec_fn_arg_zzz
, tcg_gen_gvec_add
, a
)
835 TRANS_FEAT(SUB_zzz
, aa64_sve
, gen_gvec_fn_arg_zzz
, tcg_gen_gvec_sub
, a
)
836 TRANS_FEAT(SQADD_zzz
, aa64_sve
, gen_gvec_fn_arg_zzz
, tcg_gen_gvec_ssadd
, a
)
837 TRANS_FEAT(SQSUB_zzz
, aa64_sve
, gen_gvec_fn_arg_zzz
, tcg_gen_gvec_sssub
, a
)
838 TRANS_FEAT(UQADD_zzz
, aa64_sve
, gen_gvec_fn_arg_zzz
, tcg_gen_gvec_usadd
, a
)
839 TRANS_FEAT(UQSUB_zzz
, aa64_sve
, gen_gvec_fn_arg_zzz
, tcg_gen_gvec_ussub
, a
)
842 *** SVE Integer Arithmetic - Binary Predicated Group
845 /* Select active elememnts from Zn and inactive elements from Zm,
846 * storing the result in Zd.
848 static bool do_sel_z(DisasContext
*s
, int rd
, int rn
, int rm
, int pg
, int esz
)
850 static gen_helper_gvec_4
* const fns
[4] = {
851 gen_helper_sve_sel_zpzz_b
, gen_helper_sve_sel_zpzz_h
,
852 gen_helper_sve_sel_zpzz_s
, gen_helper_sve_sel_zpzz_d
854 return gen_gvec_ool_zzzp(s
, fns
[esz
], rd
, rn
, rm
, pg
, 0);
857 #define DO_ZPZZ(NAME, FEAT, name) \
858 static gen_helper_gvec_4 * const name##_zpzz_fns[4] = { \
859 gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h, \
860 gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d, \
862 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz, \
863 name##_zpzz_fns[a->esz], a, 0)
865 DO_ZPZZ(AND_zpzz
, aa64_sve
, sve_and
)
866 DO_ZPZZ(EOR_zpzz
, aa64_sve
, sve_eor
)
867 DO_ZPZZ(ORR_zpzz
, aa64_sve
, sve_orr
)
868 DO_ZPZZ(BIC_zpzz
, aa64_sve
, sve_bic
)
870 DO_ZPZZ(ADD_zpzz
, aa64_sve
, sve_add
)
871 DO_ZPZZ(SUB_zpzz
, aa64_sve
, sve_sub
)
873 DO_ZPZZ(SMAX_zpzz
, aa64_sve
, sve_smax
)
874 DO_ZPZZ(UMAX_zpzz
, aa64_sve
, sve_umax
)
875 DO_ZPZZ(SMIN_zpzz
, aa64_sve
, sve_smin
)
876 DO_ZPZZ(UMIN_zpzz
, aa64_sve
, sve_umin
)
877 DO_ZPZZ(SABD_zpzz
, aa64_sve
, sve_sabd
)
878 DO_ZPZZ(UABD_zpzz
, aa64_sve
, sve_uabd
)
880 DO_ZPZZ(MUL_zpzz
, aa64_sve
, sve_mul
)
881 DO_ZPZZ(SMULH_zpzz
, aa64_sve
, sve_smulh
)
882 DO_ZPZZ(UMULH_zpzz
, aa64_sve
, sve_umulh
)
884 DO_ZPZZ(ASR_zpzz
, aa64_sve
, sve_asr
)
885 DO_ZPZZ(LSR_zpzz
, aa64_sve
, sve_lsr
)
886 DO_ZPZZ(LSL_zpzz
, aa64_sve
, sve_lsl
)
888 static gen_helper_gvec_4
* const sdiv_fns
[4] = {
889 NULL
, NULL
, gen_helper_sve_sdiv_zpzz_s
, gen_helper_sve_sdiv_zpzz_d
891 TRANS_FEAT(SDIV_zpzz
, aa64_sve
, gen_gvec_ool_arg_zpzz
, sdiv_fns
[a
->esz
], a
, 0)
893 static gen_helper_gvec_4
* const udiv_fns
[4] = {
894 NULL
, NULL
, gen_helper_sve_udiv_zpzz_s
, gen_helper_sve_udiv_zpzz_d
896 TRANS_FEAT(UDIV_zpzz
, aa64_sve
, gen_gvec_ool_arg_zpzz
, udiv_fns
[a
->esz
], a
, 0)
898 TRANS_FEAT(SEL_zpzz
, aa64_sve
, do_sel_z
, a
->rd
, a
->rn
, a
->rm
, a
->pg
, a
->esz
)
901 *** SVE Integer Arithmetic - Unary Predicated Group
904 #define DO_ZPZ(NAME, FEAT, name) \
905 static gen_helper_gvec_3 * const name##_fns[4] = { \
906 gen_helper_##name##_b, gen_helper_##name##_h, \
907 gen_helper_##name##_s, gen_helper_##name##_d, \
909 TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0)
911 DO_ZPZ(CLS
, aa64_sve
, sve_cls
)
912 DO_ZPZ(CLZ
, aa64_sve
, sve_clz
)
913 DO_ZPZ(CNT_zpz
, aa64_sve
, sve_cnt_zpz
)
914 DO_ZPZ(CNOT
, aa64_sve
, sve_cnot
)
915 DO_ZPZ(NOT_zpz
, aa64_sve
, sve_not_zpz
)
916 DO_ZPZ(ABS
, aa64_sve
, sve_abs
)
917 DO_ZPZ(NEG
, aa64_sve
, sve_neg
)
918 DO_ZPZ(RBIT
, aa64_sve
, sve_rbit
)
920 static gen_helper_gvec_3
* const fabs_fns
[4] = {
921 NULL
, gen_helper_sve_fabs_h
,
922 gen_helper_sve_fabs_s
, gen_helper_sve_fabs_d
,
924 TRANS_FEAT(FABS
, aa64_sve
, gen_gvec_ool_arg_zpz
, fabs_fns
[a
->esz
], a
, 0)
926 static gen_helper_gvec_3
* const fneg_fns
[4] = {
927 NULL
, gen_helper_sve_fneg_h
,
928 gen_helper_sve_fneg_s
, gen_helper_sve_fneg_d
,
930 TRANS_FEAT(FNEG
, aa64_sve
, gen_gvec_ool_arg_zpz
, fneg_fns
[a
->esz
], a
, 0)
932 static gen_helper_gvec_3
* const sxtb_fns
[4] = {
933 NULL
, gen_helper_sve_sxtb_h
,
934 gen_helper_sve_sxtb_s
, gen_helper_sve_sxtb_d
,
936 TRANS_FEAT(SXTB
, aa64_sve
, gen_gvec_ool_arg_zpz
, sxtb_fns
[a
->esz
], a
, 0)
938 static gen_helper_gvec_3
* const uxtb_fns
[4] = {
939 NULL
, gen_helper_sve_uxtb_h
,
940 gen_helper_sve_uxtb_s
, gen_helper_sve_uxtb_d
,
942 TRANS_FEAT(UXTB
, aa64_sve
, gen_gvec_ool_arg_zpz
, uxtb_fns
[a
->esz
], a
, 0)
944 static gen_helper_gvec_3
* const sxth_fns
[4] = {
945 NULL
, NULL
, gen_helper_sve_sxth_s
, gen_helper_sve_sxth_d
947 TRANS_FEAT(SXTH
, aa64_sve
, gen_gvec_ool_arg_zpz
, sxth_fns
[a
->esz
], a
, 0)
949 static gen_helper_gvec_3
* const uxth_fns
[4] = {
950 NULL
, NULL
, gen_helper_sve_uxth_s
, gen_helper_sve_uxth_d
952 TRANS_FEAT(UXTH
, aa64_sve
, gen_gvec_ool_arg_zpz
, uxth_fns
[a
->esz
], a
, 0)
954 TRANS_FEAT(SXTW
, aa64_sve
, gen_gvec_ool_arg_zpz
,
955 a
->esz
== 3 ? gen_helper_sve_sxtw_d
: NULL
, a
, 0)
956 TRANS_FEAT(UXTW
, aa64_sve
, gen_gvec_ool_arg_zpz
,
957 a
->esz
== 3 ? gen_helper_sve_uxtw_d
: NULL
, a
, 0)
960 *** SVE Integer Reduction Group
963 typedef void gen_helper_gvec_reduc(TCGv_i64
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
964 static bool do_vpz_ool(DisasContext
*s
, arg_rpr_esz
*a
,
965 gen_helper_gvec_reduc
*fn
)
967 unsigned vsz
= vec_full_reg_size(s
);
975 if (!sve_access_check(s
)) {
979 desc
= tcg_constant_i32(simd_desc(vsz
, vsz
, 0));
980 temp
= tcg_temp_new_i64();
981 t_zn
= tcg_temp_new_ptr();
982 t_pg
= tcg_temp_new_ptr();
984 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
985 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
986 fn(temp
, t_zn
, t_pg
, desc
);
987 tcg_temp_free_ptr(t_zn
);
988 tcg_temp_free_ptr(t_pg
);
990 write_fp_dreg(s
, a
->rd
, temp
);
991 tcg_temp_free_i64(temp
);
995 #define DO_VPZ(NAME, name) \
996 static gen_helper_gvec_reduc * const name##_fns[4] = { \
997 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
998 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
1000 TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz])
1006 DO_VPZ(UADDV
, uaddv
)
1007 DO_VPZ(SMAXV
, smaxv
)
1008 DO_VPZ(UMAXV
, umaxv
)
1009 DO_VPZ(SMINV
, sminv
)
1010 DO_VPZ(UMINV
, uminv
)
1012 static gen_helper_gvec_reduc
* const saddv_fns
[4] = {
1013 gen_helper_sve_saddv_b
, gen_helper_sve_saddv_h
,
1014 gen_helper_sve_saddv_s
, NULL
1016 TRANS_FEAT(SADDV
, aa64_sve
, do_vpz_ool
, a
, saddv_fns
[a
->esz
])
1021 *** SVE Shift by Immediate - Predicated Group
1025 * Copy Zn into Zd, storing zeros into inactive elements.
1026 * If invert, store zeros into the active elements.
1028 static bool do_movz_zpz(DisasContext
*s
, int rd
, int rn
, int pg
,
1029 int esz
, bool invert
)
1031 static gen_helper_gvec_3
* const fns
[4] = {
1032 gen_helper_sve_movz_b
, gen_helper_sve_movz_h
,
1033 gen_helper_sve_movz_s
, gen_helper_sve_movz_d
,
1035 return gen_gvec_ool_zzp(s
, fns
[esz
], rd
, rn
, pg
, invert
);
1038 static bool do_shift_zpzi(DisasContext
*s
, arg_rpri_esz
*a
, bool asr
,
1039 gen_helper_gvec_3
* const fns
[4])
1044 /* Invalid tsz encoding -- see tszimm_esz. */
1049 * Shift by element size is architecturally valid.
1050 * For arithmetic right-shift, it's the same as by one less.
1051 * For logical shifts and ASRD, it is a zeroing operation.
1054 if (a
->imm
>= max
) {
1058 return do_movz_zpz(s
, a
->rd
, a
->rd
, a
->pg
, a
->esz
, true);
1061 return gen_gvec_ool_arg_zpzi(s
, fns
[a
->esz
], a
);
1064 static gen_helper_gvec_3
* const asr_zpzi_fns
[4] = {
1065 gen_helper_sve_asr_zpzi_b
, gen_helper_sve_asr_zpzi_h
,
1066 gen_helper_sve_asr_zpzi_s
, gen_helper_sve_asr_zpzi_d
,
1068 TRANS_FEAT(ASR_zpzi
, aa64_sve
, do_shift_zpzi
, a
, true, asr_zpzi_fns
)
1070 static gen_helper_gvec_3
* const lsr_zpzi_fns
[4] = {
1071 gen_helper_sve_lsr_zpzi_b
, gen_helper_sve_lsr_zpzi_h
,
1072 gen_helper_sve_lsr_zpzi_s
, gen_helper_sve_lsr_zpzi_d
,
1074 TRANS_FEAT(LSR_zpzi
, aa64_sve
, do_shift_zpzi
, a
, false, lsr_zpzi_fns
)
1076 static gen_helper_gvec_3
* const lsl_zpzi_fns
[4] = {
1077 gen_helper_sve_lsl_zpzi_b
, gen_helper_sve_lsl_zpzi_h
,
1078 gen_helper_sve_lsl_zpzi_s
, gen_helper_sve_lsl_zpzi_d
,
1080 TRANS_FEAT(LSL_zpzi
, aa64_sve
, do_shift_zpzi
, a
, false, lsl_zpzi_fns
)
1082 static gen_helper_gvec_3
* const asrd_fns
[4] = {
1083 gen_helper_sve_asrd_b
, gen_helper_sve_asrd_h
,
1084 gen_helper_sve_asrd_s
, gen_helper_sve_asrd_d
,
1086 TRANS_FEAT(ASRD
, aa64_sve
, do_shift_zpzi
, a
, false, asrd_fns
)
1088 static gen_helper_gvec_3
* const sqshl_zpzi_fns
[4] = {
1089 gen_helper_sve2_sqshl_zpzi_b
, gen_helper_sve2_sqshl_zpzi_h
,
1090 gen_helper_sve2_sqshl_zpzi_s
, gen_helper_sve2_sqshl_zpzi_d
,
1092 TRANS_FEAT(SQSHL_zpzi
, aa64_sve2
, gen_gvec_ool_arg_zpzi
,
1093 a
->esz
< 0 ? NULL
: sqshl_zpzi_fns
[a
->esz
], a
)
1095 static gen_helper_gvec_3
* const uqshl_zpzi_fns
[4] = {
1096 gen_helper_sve2_uqshl_zpzi_b
, gen_helper_sve2_uqshl_zpzi_h
,
1097 gen_helper_sve2_uqshl_zpzi_s
, gen_helper_sve2_uqshl_zpzi_d
,
1099 TRANS_FEAT(UQSHL_zpzi
, aa64_sve2
, gen_gvec_ool_arg_zpzi
,
1100 a
->esz
< 0 ? NULL
: uqshl_zpzi_fns
[a
->esz
], a
)
1102 static gen_helper_gvec_3
* const srshr_fns
[4] = {
1103 gen_helper_sve2_srshr_b
, gen_helper_sve2_srshr_h
,
1104 gen_helper_sve2_srshr_s
, gen_helper_sve2_srshr_d
,
1106 TRANS_FEAT(SRSHR
, aa64_sve2
, gen_gvec_ool_arg_zpzi
,
1107 a
->esz
< 0 ? NULL
: srshr_fns
[a
->esz
], a
)
1109 static gen_helper_gvec_3
* const urshr_fns
[4] = {
1110 gen_helper_sve2_urshr_b
, gen_helper_sve2_urshr_h
,
1111 gen_helper_sve2_urshr_s
, gen_helper_sve2_urshr_d
,
1113 TRANS_FEAT(URSHR
, aa64_sve2
, gen_gvec_ool_arg_zpzi
,
1114 a
->esz
< 0 ? NULL
: urshr_fns
[a
->esz
], a
)
1116 static gen_helper_gvec_3
* const sqshlu_fns
[4] = {
1117 gen_helper_sve2_sqshlu_b
, gen_helper_sve2_sqshlu_h
,
1118 gen_helper_sve2_sqshlu_s
, gen_helper_sve2_sqshlu_d
,
1120 TRANS_FEAT(SQSHLU
, aa64_sve2
, gen_gvec_ool_arg_zpzi
,
1121 a
->esz
< 0 ? NULL
: sqshlu_fns
[a
->esz
], a
)
1124 *** SVE Bitwise Shift - Predicated Group
1127 #define DO_ZPZW(NAME, name) \
1128 static gen_helper_gvec_4 * const name##_zpzw_fns[4] = { \
1129 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
1130 gen_helper_sve_##name##_zpzw_s, NULL \
1132 TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz, \
1133 a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0)
1142 *** SVE Bitwise Shift - Unpredicated Group
1145 static bool do_shift_imm(DisasContext
*s
, arg_rri_esz
*a
, bool asr
,
1146 void (*gvec_fn
)(unsigned, uint32_t, uint32_t,
1147 int64_t, uint32_t, uint32_t))
1150 /* Invalid tsz encoding -- see tszimm_esz. */
1153 if (sve_access_check(s
)) {
1154 unsigned vsz
= vec_full_reg_size(s
);
1155 /* Shift by element size is architecturally valid. For
1156 arithmetic right-shift, it's the same as by one less.
1157 Otherwise it is a zeroing operation. */
1158 if (a
->imm
>= 8 << a
->esz
) {
1160 a
->imm
= (8 << a
->esz
) - 1;
1162 do_dupi_z(s
, a
->rd
, 0);
1166 gvec_fn(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
1167 vec_full_reg_offset(s
, a
->rn
), a
->imm
, vsz
, vsz
);
1172 TRANS_FEAT(ASR_zzi
, aa64_sve
, do_shift_imm
, a
, true, tcg_gen_gvec_sari
)
1173 TRANS_FEAT(LSR_zzi
, aa64_sve
, do_shift_imm
, a
, false, tcg_gen_gvec_shri
)
1174 TRANS_FEAT(LSL_zzi
, aa64_sve
, do_shift_imm
, a
, false, tcg_gen_gvec_shli
)
1176 #define DO_ZZW(NAME, name) \
1177 static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \
1178 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
1179 gen_helper_sve_##name##_zzw_s, NULL \
1181 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \
1182 name##_zzw_fns[a->esz], a, 0)
1184 DO_ZZW(ASR_zzw
, asr
)
1185 DO_ZZW(LSR_zzw
, lsr
)
1186 DO_ZZW(LSL_zzw
, lsl
)
1191 *** SVE Integer Multiply-Add Group
1194 static bool do_zpzzz_ool(DisasContext
*s
, arg_rprrr_esz
*a
,
1195 gen_helper_gvec_5
*fn
)
1197 if (sve_access_check(s
)) {
1198 unsigned vsz
= vec_full_reg_size(s
);
1199 tcg_gen_gvec_5_ool(vec_full_reg_offset(s
, a
->rd
),
1200 vec_full_reg_offset(s
, a
->ra
),
1201 vec_full_reg_offset(s
, a
->rn
),
1202 vec_full_reg_offset(s
, a
->rm
),
1203 pred_full_reg_offset(s
, a
->pg
),
1209 static gen_helper_gvec_5
* const mla_fns
[4] = {
1210 gen_helper_sve_mla_b
, gen_helper_sve_mla_h
,
1211 gen_helper_sve_mla_s
, gen_helper_sve_mla_d
,
1213 TRANS_FEAT(MLA
, aa64_sve
, do_zpzzz_ool
, a
, mla_fns
[a
->esz
])
1215 static gen_helper_gvec_5
* const mls_fns
[4] = {
1216 gen_helper_sve_mls_b
, gen_helper_sve_mls_h
,
1217 gen_helper_sve_mls_s
, gen_helper_sve_mls_d
,
1219 TRANS_FEAT(MLS
, aa64_sve
, do_zpzzz_ool
, a
, mls_fns
[a
->esz
])
1222 *** SVE Index Generation Group
1225 static bool do_index(DisasContext
*s
, int esz
, int rd
,
1226 TCGv_i64 start
, TCGv_i64 incr
)
1232 if (!sve_access_check(s
)) {
1236 vsz
= vec_full_reg_size(s
);
1237 desc
= tcg_constant_i32(simd_desc(vsz
, vsz
, 0));
1238 t_zd
= tcg_temp_new_ptr();
1240 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, rd
));
1242 gen_helper_sve_index_d(t_zd
, start
, incr
, desc
);
1244 typedef void index_fn(TCGv_ptr
, TCGv_i32
, TCGv_i32
, TCGv_i32
);
1245 static index_fn
* const fns
[3] = {
1246 gen_helper_sve_index_b
,
1247 gen_helper_sve_index_h
,
1248 gen_helper_sve_index_s
,
1250 TCGv_i32 s32
= tcg_temp_new_i32();
1251 TCGv_i32 i32
= tcg_temp_new_i32();
1253 tcg_gen_extrl_i64_i32(s32
, start
);
1254 tcg_gen_extrl_i64_i32(i32
, incr
);
1255 fns
[esz
](t_zd
, s32
, i32
, desc
);
1257 tcg_temp_free_i32(s32
);
1258 tcg_temp_free_i32(i32
);
1260 tcg_temp_free_ptr(t_zd
);
1264 TRANS_FEAT(INDEX_ii
, aa64_sve
, do_index
, a
->esz
, a
->rd
,
1265 tcg_constant_i64(a
->imm1
), tcg_constant_i64(a
->imm2
))
1266 TRANS_FEAT(INDEX_ir
, aa64_sve
, do_index
, a
->esz
, a
->rd
,
1267 tcg_constant_i64(a
->imm
), cpu_reg(s
, a
->rm
))
1268 TRANS_FEAT(INDEX_ri
, aa64_sve
, do_index
, a
->esz
, a
->rd
,
1269 cpu_reg(s
, a
->rn
), tcg_constant_i64(a
->imm
))
1270 TRANS_FEAT(INDEX_rr
, aa64_sve
, do_index
, a
->esz
, a
->rd
,
1271 cpu_reg(s
, a
->rn
), cpu_reg(s
, a
->rm
))
1274 *** SVE Stack Allocation Group
1277 static bool trans_ADDVL(DisasContext
*s
, arg_ADDVL
*a
)
1279 if (!dc_isar_feature(aa64_sve
, s
)) {
1282 if (sve_access_check(s
)) {
1283 TCGv_i64 rd
= cpu_reg_sp(s
, a
->rd
);
1284 TCGv_i64 rn
= cpu_reg_sp(s
, a
->rn
);
1285 tcg_gen_addi_i64(rd
, rn
, a
->imm
* vec_full_reg_size(s
));
1290 static bool trans_ADDSVL(DisasContext
*s
, arg_ADDSVL
*a
)
1292 if (!dc_isar_feature(aa64_sme
, s
)) {
1295 if (sme_enabled_check(s
)) {
1296 TCGv_i64 rd
= cpu_reg_sp(s
, a
->rd
);
1297 TCGv_i64 rn
= cpu_reg_sp(s
, a
->rn
);
1298 tcg_gen_addi_i64(rd
, rn
, a
->imm
* streaming_vec_reg_size(s
));
1303 static bool trans_ADDPL(DisasContext
*s
, arg_ADDPL
*a
)
1305 if (!dc_isar_feature(aa64_sve
, s
)) {
1308 if (sve_access_check(s
)) {
1309 TCGv_i64 rd
= cpu_reg_sp(s
, a
->rd
);
1310 TCGv_i64 rn
= cpu_reg_sp(s
, a
->rn
);
1311 tcg_gen_addi_i64(rd
, rn
, a
->imm
* pred_full_reg_size(s
));
1316 static bool trans_ADDSPL(DisasContext
*s
, arg_ADDSPL
*a
)
1318 if (!dc_isar_feature(aa64_sme
, s
)) {
1321 if (sme_enabled_check(s
)) {
1322 TCGv_i64 rd
= cpu_reg_sp(s
, a
->rd
);
1323 TCGv_i64 rn
= cpu_reg_sp(s
, a
->rn
);
1324 tcg_gen_addi_i64(rd
, rn
, a
->imm
* streaming_pred_reg_size(s
));
1329 static bool trans_RDVL(DisasContext
*s
, arg_RDVL
*a
)
1331 if (!dc_isar_feature(aa64_sve
, s
)) {
1334 if (sve_access_check(s
)) {
1335 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1336 tcg_gen_movi_i64(reg
, a
->imm
* vec_full_reg_size(s
));
1341 static bool trans_RDSVL(DisasContext
*s
, arg_RDSVL
*a
)
1343 if (!dc_isar_feature(aa64_sme
, s
)) {
1346 if (sme_enabled_check(s
)) {
1347 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1348 tcg_gen_movi_i64(reg
, a
->imm
* streaming_vec_reg_size(s
));
1354 *** SVE Compute Vector Address Group
1357 static bool do_adr(DisasContext
*s
, arg_rrri
*a
, gen_helper_gvec_3
*fn
)
1359 return gen_gvec_ool_zzz(s
, fn
, a
->rd
, a
->rn
, a
->rm
, a
->imm
);
1362 TRANS_FEAT_NONSTREAMING(ADR_p32
, aa64_sve
, do_adr
, a
, gen_helper_sve_adr_p32
)
1363 TRANS_FEAT_NONSTREAMING(ADR_p64
, aa64_sve
, do_adr
, a
, gen_helper_sve_adr_p64
)
1364 TRANS_FEAT_NONSTREAMING(ADR_s32
, aa64_sve
, do_adr
, a
, gen_helper_sve_adr_s32
)
1365 TRANS_FEAT_NONSTREAMING(ADR_u32
, aa64_sve
, do_adr
, a
, gen_helper_sve_adr_u32
)
1368 *** SVE Integer Misc - Unpredicated Group
1371 static gen_helper_gvec_2
* const fexpa_fns
[4] = {
1372 NULL
, gen_helper_sve_fexpa_h
,
1373 gen_helper_sve_fexpa_s
, gen_helper_sve_fexpa_d
,
1375 TRANS_FEAT_NONSTREAMING(FEXPA
, aa64_sve
, gen_gvec_ool_zz
,
1376 fexpa_fns
[a
->esz
], a
->rd
, a
->rn
, 0)
1378 static gen_helper_gvec_3
* const ftssel_fns
[4] = {
1379 NULL
, gen_helper_sve_ftssel_h
,
1380 gen_helper_sve_ftssel_s
, gen_helper_sve_ftssel_d
,
1382 TRANS_FEAT_NONSTREAMING(FTSSEL
, aa64_sve
, gen_gvec_ool_arg_zzz
,
1383 ftssel_fns
[a
->esz
], a
, 0)
1386 *** SVE Predicate Logical Operations Group
1389 static bool do_pppp_flags(DisasContext
*s
, arg_rprr_s
*a
,
1390 const GVecGen4
*gvec_op
)
1392 if (!sve_access_check(s
)) {
1396 unsigned psz
= pred_gvec_reg_size(s
);
1397 int dofs
= pred_full_reg_offset(s
, a
->rd
);
1398 int nofs
= pred_full_reg_offset(s
, a
->rn
);
1399 int mofs
= pred_full_reg_offset(s
, a
->rm
);
1400 int gofs
= pred_full_reg_offset(s
, a
->pg
);
1403 tcg_gen_gvec_4(dofs
, nofs
, mofs
, gofs
, psz
, psz
, gvec_op
);
1408 /* Do the operation and the flags generation in temps. */
1409 TCGv_i64 pd
= tcg_temp_new_i64();
1410 TCGv_i64 pn
= tcg_temp_new_i64();
1411 TCGv_i64 pm
= tcg_temp_new_i64();
1412 TCGv_i64 pg
= tcg_temp_new_i64();
1414 tcg_gen_ld_i64(pn
, cpu_env
, nofs
);
1415 tcg_gen_ld_i64(pm
, cpu_env
, mofs
);
1416 tcg_gen_ld_i64(pg
, cpu_env
, gofs
);
1418 gvec_op
->fni8(pd
, pn
, pm
, pg
);
1419 tcg_gen_st_i64(pd
, cpu_env
, dofs
);
1421 do_predtest1(pd
, pg
);
1423 tcg_temp_free_i64(pd
);
1424 tcg_temp_free_i64(pn
);
1425 tcg_temp_free_i64(pm
);
1426 tcg_temp_free_i64(pg
);
1428 /* The operation and flags generation is large. The computation
1429 * of the flags depends on the original contents of the guarding
1430 * predicate. If the destination overwrites the guarding predicate,
1431 * then the easiest way to get this right is to save a copy.
1434 if (a
->rd
== a
->pg
) {
1435 tofs
= offsetof(CPUARMState
, vfp
.preg_tmp
);
1436 tcg_gen_gvec_mov(0, tofs
, gofs
, psz
, psz
);
1439 tcg_gen_gvec_4(dofs
, nofs
, mofs
, gofs
, psz
, psz
, gvec_op
);
1440 do_predtest(s
, dofs
, tofs
, psz
/ 8);
1445 static void gen_and_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1447 tcg_gen_and_i64(pd
, pn
, pm
);
1448 tcg_gen_and_i64(pd
, pd
, pg
);
1451 static void gen_and_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1452 TCGv_vec pm
, TCGv_vec pg
)
1454 tcg_gen_and_vec(vece
, pd
, pn
, pm
);
1455 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1458 static bool trans_AND_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1460 static const GVecGen4 op
= {
1461 .fni8
= gen_and_pg_i64
,
1462 .fniv
= gen_and_pg_vec
,
1463 .fno
= gen_helper_sve_and_pppp
,
1464 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1467 if (!dc_isar_feature(aa64_sve
, s
)) {
1471 if (a
->rn
== a
->rm
) {
1472 if (a
->pg
== a
->rn
) {
1473 return do_mov_p(s
, a
->rd
, a
->rn
);
1475 return gen_gvec_fn_ppp(s
, tcg_gen_gvec_and
, a
->rd
, a
->rn
, a
->pg
);
1476 } else if (a
->pg
== a
->rn
|| a
->pg
== a
->rm
) {
1477 return gen_gvec_fn_ppp(s
, tcg_gen_gvec_and
, a
->rd
, a
->rn
, a
->rm
);
1480 return do_pppp_flags(s
, a
, &op
);
1483 static void gen_bic_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1485 tcg_gen_andc_i64(pd
, pn
, pm
);
1486 tcg_gen_and_i64(pd
, pd
, pg
);
1489 static void gen_bic_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1490 TCGv_vec pm
, TCGv_vec pg
)
1492 tcg_gen_andc_vec(vece
, pd
, pn
, pm
);
1493 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1496 static bool trans_BIC_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1498 static const GVecGen4 op
= {
1499 .fni8
= gen_bic_pg_i64
,
1500 .fniv
= gen_bic_pg_vec
,
1501 .fno
= gen_helper_sve_bic_pppp
,
1502 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1505 if (!dc_isar_feature(aa64_sve
, s
)) {
1508 if (!a
->s
&& a
->pg
== a
->rn
) {
1509 return gen_gvec_fn_ppp(s
, tcg_gen_gvec_andc
, a
->rd
, a
->rn
, a
->rm
);
1511 return do_pppp_flags(s
, a
, &op
);
1514 static void gen_eor_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1516 tcg_gen_xor_i64(pd
, pn
, pm
);
1517 tcg_gen_and_i64(pd
, pd
, pg
);
1520 static void gen_eor_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1521 TCGv_vec pm
, TCGv_vec pg
)
1523 tcg_gen_xor_vec(vece
, pd
, pn
, pm
);
1524 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1527 static bool trans_EOR_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1529 static const GVecGen4 op
= {
1530 .fni8
= gen_eor_pg_i64
,
1531 .fniv
= gen_eor_pg_vec
,
1532 .fno
= gen_helper_sve_eor_pppp
,
1533 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1536 if (!dc_isar_feature(aa64_sve
, s
)) {
1539 /* Alias NOT (predicate) is EOR Pd.B, Pg/Z, Pn.B, Pg.B */
1540 if (!a
->s
&& a
->pg
== a
->rm
) {
1541 return gen_gvec_fn_ppp(s
, tcg_gen_gvec_andc
, a
->rd
, a
->pg
, a
->rn
);
1543 return do_pppp_flags(s
, a
, &op
);
1546 static bool trans_SEL_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1548 if (a
->s
|| !dc_isar_feature(aa64_sve
, s
)) {
1551 if (sve_access_check(s
)) {
1552 unsigned psz
= pred_gvec_reg_size(s
);
1553 tcg_gen_gvec_bitsel(MO_8
, pred_full_reg_offset(s
, a
->rd
),
1554 pred_full_reg_offset(s
, a
->pg
),
1555 pred_full_reg_offset(s
, a
->rn
),
1556 pred_full_reg_offset(s
, a
->rm
), psz
, psz
);
1561 static void gen_orr_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1563 tcg_gen_or_i64(pd
, pn
, pm
);
1564 tcg_gen_and_i64(pd
, pd
, pg
);
1567 static void gen_orr_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1568 TCGv_vec pm
, TCGv_vec pg
)
1570 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1571 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1574 static bool trans_ORR_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1576 static const GVecGen4 op
= {
1577 .fni8
= gen_orr_pg_i64
,
1578 .fniv
= gen_orr_pg_vec
,
1579 .fno
= gen_helper_sve_orr_pppp
,
1580 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1583 if (!dc_isar_feature(aa64_sve
, s
)) {
1586 if (!a
->s
&& a
->pg
== a
->rn
&& a
->rn
== a
->rm
) {
1587 return do_mov_p(s
, a
->rd
, a
->rn
);
1589 return do_pppp_flags(s
, a
, &op
);
1592 static void gen_orn_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1594 tcg_gen_orc_i64(pd
, pn
, pm
);
1595 tcg_gen_and_i64(pd
, pd
, pg
);
1598 static void gen_orn_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1599 TCGv_vec pm
, TCGv_vec pg
)
1601 tcg_gen_orc_vec(vece
, pd
, pn
, pm
);
1602 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1605 static bool trans_ORN_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1607 static const GVecGen4 op
= {
1608 .fni8
= gen_orn_pg_i64
,
1609 .fniv
= gen_orn_pg_vec
,
1610 .fno
= gen_helper_sve_orn_pppp
,
1611 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1614 if (!dc_isar_feature(aa64_sve
, s
)) {
1617 return do_pppp_flags(s
, a
, &op
);
1620 static void gen_nor_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1622 tcg_gen_or_i64(pd
, pn
, pm
);
1623 tcg_gen_andc_i64(pd
, pg
, pd
);
1626 static void gen_nor_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1627 TCGv_vec pm
, TCGv_vec pg
)
1629 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1630 tcg_gen_andc_vec(vece
, pd
, pg
, pd
);
1633 static bool trans_NOR_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1635 static const GVecGen4 op
= {
1636 .fni8
= gen_nor_pg_i64
,
1637 .fniv
= gen_nor_pg_vec
,
1638 .fno
= gen_helper_sve_nor_pppp
,
1639 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1642 if (!dc_isar_feature(aa64_sve
, s
)) {
1645 return do_pppp_flags(s
, a
, &op
);
1648 static void gen_nand_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1650 tcg_gen_and_i64(pd
, pn
, pm
);
1651 tcg_gen_andc_i64(pd
, pg
, pd
);
1654 static void gen_nand_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1655 TCGv_vec pm
, TCGv_vec pg
)
1657 tcg_gen_and_vec(vece
, pd
, pn
, pm
);
1658 tcg_gen_andc_vec(vece
, pd
, pg
, pd
);
1661 static bool trans_NAND_pppp(DisasContext
*s
, arg_rprr_s
*a
)
1663 static const GVecGen4 op
= {
1664 .fni8
= gen_nand_pg_i64
,
1665 .fniv
= gen_nand_pg_vec
,
1666 .fno
= gen_helper_sve_nand_pppp
,
1667 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1670 if (!dc_isar_feature(aa64_sve
, s
)) {
1673 return do_pppp_flags(s
, a
, &op
);
1677 *** SVE Predicate Misc Group
1680 static bool trans_PTEST(DisasContext
*s
, arg_PTEST
*a
)
1682 if (!dc_isar_feature(aa64_sve
, s
)) {
1685 if (sve_access_check(s
)) {
1686 int nofs
= pred_full_reg_offset(s
, a
->rn
);
1687 int gofs
= pred_full_reg_offset(s
, a
->pg
);
1688 int words
= DIV_ROUND_UP(pred_full_reg_size(s
), 8);
1691 TCGv_i64 pn
= tcg_temp_new_i64();
1692 TCGv_i64 pg
= tcg_temp_new_i64();
1694 tcg_gen_ld_i64(pn
, cpu_env
, nofs
);
1695 tcg_gen_ld_i64(pg
, cpu_env
, gofs
);
1696 do_predtest1(pn
, pg
);
1698 tcg_temp_free_i64(pn
);
1699 tcg_temp_free_i64(pg
);
1701 do_predtest(s
, nofs
, gofs
, words
);
1707 /* See the ARM pseudocode DecodePredCount. */
1708 static unsigned decode_pred_count(unsigned fullsz
, int pattern
, int esz
)
1710 unsigned elements
= fullsz
>> esz
;
1714 case 0x0: /* POW2 */
1715 return pow2floor(elements
);
1726 case 0x9: /* VL16 */
1727 case 0xa: /* VL32 */
1728 case 0xb: /* VL64 */
1729 case 0xc: /* VL128 */
1730 case 0xd: /* VL256 */
1731 bound
= 16 << (pattern
- 9);
1733 case 0x1d: /* MUL4 */
1734 return elements
- elements
% 4;
1735 case 0x1e: /* MUL3 */
1736 return elements
- elements
% 3;
1737 case 0x1f: /* ALL */
1739 default: /* #uimm5 */
1742 return elements
>= bound
? bound
: 0;
1745 /* This handles all of the predicate initialization instructions,
1746 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1747 * so that decode_pred_count returns 0. For SETFFR, we will have
1748 * set RD == 16 == FFR.
1750 static bool do_predset(DisasContext
*s
, int esz
, int rd
, int pat
, bool setflag
)
1752 if (!sve_access_check(s
)) {
1756 unsigned fullsz
= vec_full_reg_size(s
);
1757 unsigned ofs
= pred_full_reg_offset(s
, rd
);
1758 unsigned numelem
, setsz
, i
;
1759 uint64_t word
, lastword
;
1762 numelem
= decode_pred_count(fullsz
, pat
, esz
);
1764 /* Determine what we must store into each bit, and how many. */
1766 lastword
= word
= 0;
1769 setsz
= numelem
<< esz
;
1770 lastword
= word
= pred_esz_masks
[esz
];
1772 lastword
&= MAKE_64BIT_MASK(0, setsz
% 64);
1776 t
= tcg_temp_new_i64();
1778 tcg_gen_movi_i64(t
, lastword
);
1779 tcg_gen_st_i64(t
, cpu_env
, ofs
);
1783 if (word
== lastword
) {
1784 unsigned maxsz
= size_for_gvec(fullsz
/ 8);
1785 unsigned oprsz
= size_for_gvec(setsz
/ 8);
1787 if (oprsz
* 8 == setsz
) {
1788 tcg_gen_gvec_dup_imm(MO_64
, ofs
, oprsz
, maxsz
, word
);
1796 tcg_gen_movi_i64(t
, word
);
1797 for (i
= 0; i
< QEMU_ALIGN_DOWN(setsz
, 8); i
+= 8) {
1798 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1800 if (lastword
!= word
) {
1801 tcg_gen_movi_i64(t
, lastword
);
1802 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1806 tcg_gen_movi_i64(t
, 0);
1807 for (; i
< fullsz
; i
+= 8) {
1808 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1813 tcg_temp_free_i64(t
);
1817 tcg_gen_movi_i32(cpu_NF
, -(word
!= 0));
1818 tcg_gen_movi_i32(cpu_CF
, word
== 0);
1819 tcg_gen_movi_i32(cpu_VF
, 0);
1820 tcg_gen_mov_i32(cpu_ZF
, cpu_NF
);
1825 TRANS_FEAT(PTRUE
, aa64_sve
, do_predset
, a
->esz
, a
->rd
, a
->pat
, a
->s
)
1827 /* Note pat == 31 is #all, to set all elements. */
1828 TRANS_FEAT_NONSTREAMING(SETFFR
, aa64_sve
,
1829 do_predset
, 0, FFR_PRED_NUM
, 31, false)
1831 /* Note pat == 32 is #unimp, to set no elements. */
1832 TRANS_FEAT(PFALSE
, aa64_sve
, do_predset
, 0, a
->rd
, 32, false)
1834 static bool trans_RDFFR_p(DisasContext
*s
, arg_RDFFR_p
*a
)
1836 /* The path through do_pppp_flags is complicated enough to want to avoid
1837 * duplication. Frob the arguments into the form of a predicated AND.
1839 arg_rprr_s alt_a
= {
1840 .rd
= a
->rd
, .pg
= a
->pg
, .s
= a
->s
,
1841 .rn
= FFR_PRED_NUM
, .rm
= FFR_PRED_NUM
,
1844 s
->is_nonstreaming
= true;
1845 return trans_AND_pppp(s
, &alt_a
);
1848 TRANS_FEAT_NONSTREAMING(RDFFR
, aa64_sve
, do_mov_p
, a
->rd
, FFR_PRED_NUM
)
1849 TRANS_FEAT_NONSTREAMING(WRFFR
, aa64_sve
, do_mov_p
, FFR_PRED_NUM
, a
->rn
)
1851 static bool do_pfirst_pnext(DisasContext
*s
, arg_rr_esz
*a
,
1852 void (*gen_fn
)(TCGv_i32
, TCGv_ptr
,
1853 TCGv_ptr
, TCGv_i32
))
1855 if (!sve_access_check(s
)) {
1859 TCGv_ptr t_pd
= tcg_temp_new_ptr();
1860 TCGv_ptr t_pg
= tcg_temp_new_ptr();
1864 desc
= FIELD_DP32(desc
, PREDDESC
, OPRSZ
, pred_full_reg_size(s
));
1865 desc
= FIELD_DP32(desc
, PREDDESC
, ESZ
, a
->esz
);
1867 tcg_gen_addi_ptr(t_pd
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
1868 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
1869 t
= tcg_temp_new_i32();
1871 gen_fn(t
, t_pd
, t_pg
, tcg_constant_i32(desc
));
1872 tcg_temp_free_ptr(t_pd
);
1873 tcg_temp_free_ptr(t_pg
);
1876 tcg_temp_free_i32(t
);
1880 TRANS_FEAT(PFIRST
, aa64_sve
, do_pfirst_pnext
, a
, gen_helper_sve_pfirst
)
1881 TRANS_FEAT(PNEXT
, aa64_sve
, do_pfirst_pnext
, a
, gen_helper_sve_pnext
)
1884 *** SVE Element Count Group
1887 /* Perform an inline saturating addition of a 32-bit value within
1888 * a 64-bit register. The second operand is known to be positive,
1889 * which halves the comparisions we must perform to bound the result.
1891 static void do_sat_addsub_32(TCGv_i64 reg
, TCGv_i64 val
, bool u
, bool d
)
1895 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1897 tcg_gen_ext32u_i64(reg
, reg
);
1899 tcg_gen_ext32s_i64(reg
, reg
);
1902 tcg_gen_sub_i64(reg
, reg
, val
);
1903 ibound
= (u
? 0 : INT32_MIN
);
1904 tcg_gen_smax_i64(reg
, reg
, tcg_constant_i64(ibound
));
1906 tcg_gen_add_i64(reg
, reg
, val
);
1907 ibound
= (u
? UINT32_MAX
: INT32_MAX
);
1908 tcg_gen_smin_i64(reg
, reg
, tcg_constant_i64(ibound
));
1912 /* Similarly with 64-bit values. */
1913 static void do_sat_addsub_64(TCGv_i64 reg
, TCGv_i64 val
, bool u
, bool d
)
1915 TCGv_i64 t0
= tcg_temp_new_i64();
1920 tcg_gen_sub_i64(t0
, reg
, val
);
1921 t2
= tcg_constant_i64(0);
1922 tcg_gen_movcond_i64(TCG_COND_LTU
, reg
, reg
, val
, t2
, t0
);
1924 tcg_gen_add_i64(t0
, reg
, val
);
1925 t2
= tcg_constant_i64(-1);
1926 tcg_gen_movcond_i64(TCG_COND_LTU
, reg
, t0
, reg
, t2
, t0
);
1929 TCGv_i64 t1
= tcg_temp_new_i64();
1931 /* Detect signed overflow for subtraction. */
1932 tcg_gen_xor_i64(t0
, reg
, val
);
1933 tcg_gen_sub_i64(t1
, reg
, val
);
1934 tcg_gen_xor_i64(reg
, reg
, t1
);
1935 tcg_gen_and_i64(t0
, t0
, reg
);
1937 /* Bound the result. */
1938 tcg_gen_movi_i64(reg
, INT64_MIN
);
1939 t2
= tcg_constant_i64(0);
1940 tcg_gen_movcond_i64(TCG_COND_LT
, reg
, t0
, t2
, reg
, t1
);
1942 /* Detect signed overflow for addition. */
1943 tcg_gen_xor_i64(t0
, reg
, val
);
1944 tcg_gen_add_i64(reg
, reg
, val
);
1945 tcg_gen_xor_i64(t1
, reg
, val
);
1946 tcg_gen_andc_i64(t0
, t1
, t0
);
1948 /* Bound the result. */
1949 tcg_gen_movi_i64(t1
, INT64_MAX
);
1950 t2
= tcg_constant_i64(0);
1951 tcg_gen_movcond_i64(TCG_COND_LT
, reg
, t0
, t2
, t1
, reg
);
1953 tcg_temp_free_i64(t1
);
1955 tcg_temp_free_i64(t0
);
1958 /* Similarly with a vector and a scalar operand. */
1959 static void do_sat_addsub_vec(DisasContext
*s
, int esz
, int rd
, int rn
,
1960 TCGv_i64 val
, bool u
, bool d
)
1962 unsigned vsz
= vec_full_reg_size(s
);
1963 TCGv_ptr dptr
, nptr
;
1967 dptr
= tcg_temp_new_ptr();
1968 nptr
= tcg_temp_new_ptr();
1969 tcg_gen_addi_ptr(dptr
, cpu_env
, vec_full_reg_offset(s
, rd
));
1970 tcg_gen_addi_ptr(nptr
, cpu_env
, vec_full_reg_offset(s
, rn
));
1971 desc
= tcg_constant_i32(simd_desc(vsz
, vsz
, 0));
1975 t32
= tcg_temp_new_i32();
1976 tcg_gen_extrl_i64_i32(t32
, val
);
1978 tcg_gen_neg_i32(t32
, t32
);
1981 gen_helper_sve_uqaddi_b(dptr
, nptr
, t32
, desc
);
1983 gen_helper_sve_sqaddi_b(dptr
, nptr
, t32
, desc
);
1985 tcg_temp_free_i32(t32
);
1989 t32
= tcg_temp_new_i32();
1990 tcg_gen_extrl_i64_i32(t32
, val
);
1992 tcg_gen_neg_i32(t32
, t32
);
1995 gen_helper_sve_uqaddi_h(dptr
, nptr
, t32
, desc
);
1997 gen_helper_sve_sqaddi_h(dptr
, nptr
, t32
, desc
);
1999 tcg_temp_free_i32(t32
);
2003 t64
= tcg_temp_new_i64();
2005 tcg_gen_neg_i64(t64
, val
);
2007 tcg_gen_mov_i64(t64
, val
);
2010 gen_helper_sve_uqaddi_s(dptr
, nptr
, t64
, desc
);
2012 gen_helper_sve_sqaddi_s(dptr
, nptr
, t64
, desc
);
2014 tcg_temp_free_i64(t64
);
2020 gen_helper_sve_uqsubi_d(dptr
, nptr
, val
, desc
);
2022 gen_helper_sve_uqaddi_d(dptr
, nptr
, val
, desc
);
2025 t64
= tcg_temp_new_i64();
2026 tcg_gen_neg_i64(t64
, val
);
2027 gen_helper_sve_sqaddi_d(dptr
, nptr
, t64
, desc
);
2028 tcg_temp_free_i64(t64
);
2030 gen_helper_sve_sqaddi_d(dptr
, nptr
, val
, desc
);
2035 g_assert_not_reached();
2038 tcg_temp_free_ptr(dptr
);
2039 tcg_temp_free_ptr(nptr
);
2042 static bool trans_CNT_r(DisasContext
*s
, arg_CNT_r
*a
)
2044 if (!dc_isar_feature(aa64_sve
, s
)) {
2047 if (sve_access_check(s
)) {
2048 unsigned fullsz
= vec_full_reg_size(s
);
2049 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
2050 tcg_gen_movi_i64(cpu_reg(s
, a
->rd
), numelem
* a
->imm
);
2055 static bool trans_INCDEC_r(DisasContext
*s
, arg_incdec_cnt
*a
)
2057 if (!dc_isar_feature(aa64_sve
, s
)) {
2060 if (sve_access_check(s
)) {
2061 unsigned fullsz
= vec_full_reg_size(s
);
2062 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
2063 int inc
= numelem
* a
->imm
* (a
->d
? -1 : 1);
2064 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
2066 tcg_gen_addi_i64(reg
, reg
, inc
);
2071 static bool trans_SINCDEC_r_32(DisasContext
*s
, arg_incdec_cnt
*a
)
2073 if (!dc_isar_feature(aa64_sve
, s
)) {
2076 if (!sve_access_check(s
)) {
2080 unsigned fullsz
= vec_full_reg_size(s
);
2081 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
2082 int inc
= numelem
* a
->imm
;
2083 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
2085 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
2088 tcg_gen_ext32u_i64(reg
, reg
);
2090 tcg_gen_ext32s_i64(reg
, reg
);
2093 do_sat_addsub_32(reg
, tcg_constant_i64(inc
), a
->u
, a
->d
);
2098 static bool trans_SINCDEC_r_64(DisasContext
*s
, arg_incdec_cnt
*a
)
2100 if (!dc_isar_feature(aa64_sve
, s
)) {
2103 if (!sve_access_check(s
)) {
2107 unsigned fullsz
= vec_full_reg_size(s
);
2108 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
2109 int inc
= numelem
* a
->imm
;
2110 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
2113 do_sat_addsub_64(reg
, tcg_constant_i64(inc
), a
->u
, a
->d
);
2118 static bool trans_INCDEC_v(DisasContext
*s
, arg_incdec2_cnt
*a
)
2120 if (a
->esz
== 0 || !dc_isar_feature(aa64_sve
, s
)) {
2124 unsigned fullsz
= vec_full_reg_size(s
);
2125 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
2126 int inc
= numelem
* a
->imm
;
2129 if (sve_access_check(s
)) {
2130 tcg_gen_gvec_adds(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
2131 vec_full_reg_offset(s
, a
->rn
),
2132 tcg_constant_i64(a
->d
? -inc
: inc
),
2136 do_mov_z(s
, a
->rd
, a
->rn
);
2141 static bool trans_SINCDEC_v(DisasContext
*s
, arg_incdec2_cnt
*a
)
2143 if (a
->esz
== 0 || !dc_isar_feature(aa64_sve
, s
)) {
2147 unsigned fullsz
= vec_full_reg_size(s
);
2148 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
2149 int inc
= numelem
* a
->imm
;
2152 if (sve_access_check(s
)) {
2153 do_sat_addsub_vec(s
, a
->esz
, a
->rd
, a
->rn
,
2154 tcg_constant_i64(inc
), a
->u
, a
->d
);
2157 do_mov_z(s
, a
->rd
, a
->rn
);
2163 *** SVE Bitwise Immediate Group
2166 static bool do_zz_dbm(DisasContext
*s
, arg_rr_dbm
*a
, GVecGen2iFn
*gvec_fn
)
2169 if (!logic_imm_decode_wmask(&imm
, extract32(a
->dbm
, 12, 1),
2170 extract32(a
->dbm
, 0, 6),
2171 extract32(a
->dbm
, 6, 6))) {
2174 return gen_gvec_fn_zzi(s
, gvec_fn
, MO_64
, a
->rd
, a
->rn
, imm
);
2177 TRANS_FEAT(AND_zzi
, aa64_sve
, do_zz_dbm
, a
, tcg_gen_gvec_andi
)
2178 TRANS_FEAT(ORR_zzi
, aa64_sve
, do_zz_dbm
, a
, tcg_gen_gvec_ori
)
2179 TRANS_FEAT(EOR_zzi
, aa64_sve
, do_zz_dbm
, a
, tcg_gen_gvec_xori
)
2181 static bool trans_DUPM(DisasContext
*s
, arg_DUPM
*a
)
2185 if (!dc_isar_feature(aa64_sve
, s
)) {
2188 if (!logic_imm_decode_wmask(&imm
, extract32(a
->dbm
, 12, 1),
2189 extract32(a
->dbm
, 0, 6),
2190 extract32(a
->dbm
, 6, 6))) {
2193 if (sve_access_check(s
)) {
2194 do_dupi_z(s
, a
->rd
, imm
);
2200 *** SVE Integer Wide Immediate - Predicated Group
2203 /* Implement all merging copies. This is used for CPY (immediate),
2204 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
2206 static void do_cpy_m(DisasContext
*s
, int esz
, int rd
, int rn
, int pg
,
2209 typedef void gen_cpy(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i64
, TCGv_i32
);
2210 static gen_cpy
* const fns
[4] = {
2211 gen_helper_sve_cpy_m_b
, gen_helper_sve_cpy_m_h
,
2212 gen_helper_sve_cpy_m_s
, gen_helper_sve_cpy_m_d
,
2214 unsigned vsz
= vec_full_reg_size(s
);
2215 TCGv_i32 desc
= tcg_constant_i32(simd_desc(vsz
, vsz
, 0));
2216 TCGv_ptr t_zd
= tcg_temp_new_ptr();
2217 TCGv_ptr t_zn
= tcg_temp_new_ptr();
2218 TCGv_ptr t_pg
= tcg_temp_new_ptr();
2220 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, rd
));
2221 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, rn
));
2222 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
2224 fns
[esz
](t_zd
, t_zn
, t_pg
, val
, desc
);
2226 tcg_temp_free_ptr(t_zd
);
2227 tcg_temp_free_ptr(t_zn
);
2228 tcg_temp_free_ptr(t_pg
);
2231 static bool trans_FCPY(DisasContext
*s
, arg_FCPY
*a
)
2233 if (a
->esz
== 0 || !dc_isar_feature(aa64_sve
, s
)) {
2236 if (sve_access_check(s
)) {
2237 /* Decode the VFP immediate. */
2238 uint64_t imm
= vfp_expand_imm(a
->esz
, a
->imm
);
2239 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rn
, a
->pg
, tcg_constant_i64(imm
));
2244 static bool trans_CPY_m_i(DisasContext
*s
, arg_rpri_esz
*a
)
2246 if (!dc_isar_feature(aa64_sve
, s
)) {
2249 if (sve_access_check(s
)) {
2250 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rn
, a
->pg
, tcg_constant_i64(a
->imm
));
2255 static bool trans_CPY_z_i(DisasContext
*s
, arg_CPY_z_i
*a
)
2257 static gen_helper_gvec_2i
* const fns
[4] = {
2258 gen_helper_sve_cpy_z_b
, gen_helper_sve_cpy_z_h
,
2259 gen_helper_sve_cpy_z_s
, gen_helper_sve_cpy_z_d
,
2262 if (!dc_isar_feature(aa64_sve
, s
)) {
2265 if (sve_access_check(s
)) {
2266 unsigned vsz
= vec_full_reg_size(s
);
2267 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s
, a
->rd
),
2268 pred_full_reg_offset(s
, a
->pg
),
2269 tcg_constant_i64(a
->imm
),
2270 vsz
, vsz
, 0, fns
[a
->esz
]);
2276 *** SVE Permute Extract Group
2279 static bool do_EXT(DisasContext
*s
, int rd
, int rn
, int rm
, int imm
)
2281 if (!sve_access_check(s
)) {
2285 unsigned vsz
= vec_full_reg_size(s
);
2286 unsigned n_ofs
= imm
>= vsz
? 0 : imm
;
2287 unsigned n_siz
= vsz
- n_ofs
;
2288 unsigned d
= vec_full_reg_offset(s
, rd
);
2289 unsigned n
= vec_full_reg_offset(s
, rn
);
2290 unsigned m
= vec_full_reg_offset(s
, rm
);
2292 /* Use host vector move insns if we have appropriate sizes
2293 * and no unfortunate overlap.
2296 && n_ofs
== size_for_gvec(n_ofs
)
2297 && n_siz
== size_for_gvec(n_siz
)
2298 && (d
!= n
|| n_siz
<= n_ofs
)) {
2299 tcg_gen_gvec_mov(0, d
, n
+ n_ofs
, n_siz
, n_siz
);
2301 tcg_gen_gvec_mov(0, d
+ n_siz
, m
, n_ofs
, n_ofs
);
2304 tcg_gen_gvec_3_ool(d
, n
, m
, vsz
, vsz
, n_ofs
, gen_helper_sve_ext
);
2309 TRANS_FEAT(EXT
, aa64_sve
, do_EXT
, a
->rd
, a
->rn
, a
->rm
, a
->imm
)
2310 TRANS_FEAT(EXT_sve2
, aa64_sve2
, do_EXT
, a
->rd
, a
->rn
, (a
->rn
+ 1) % 32, a
->imm
)
2313 *** SVE Permute - Unpredicated Group
2316 static bool trans_DUP_s(DisasContext
*s
, arg_DUP_s
*a
)
2318 if (!dc_isar_feature(aa64_sve
, s
)) {
2321 if (sve_access_check(s
)) {
2322 unsigned vsz
= vec_full_reg_size(s
);
2323 tcg_gen_gvec_dup_i64(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
2324 vsz
, vsz
, cpu_reg_sp(s
, a
->rn
));
2329 static bool trans_DUP_x(DisasContext
*s
, arg_DUP_x
*a
)
2331 if (!dc_isar_feature(aa64_sve
, s
)) {
2334 if ((a
->imm
& 0x1f) == 0) {
2337 if (sve_access_check(s
)) {
2338 unsigned vsz
= vec_full_reg_size(s
);
2339 unsigned dofs
= vec_full_reg_offset(s
, a
->rd
);
2340 unsigned esz
, index
;
2342 esz
= ctz32(a
->imm
);
2343 index
= a
->imm
>> (esz
+ 1);
2345 if ((index
<< esz
) < vsz
) {
2346 unsigned nofs
= vec_reg_offset(s
, a
->rn
, index
, esz
);
2347 tcg_gen_gvec_dup_mem(esz
, dofs
, nofs
, vsz
, vsz
);
2350 * While dup_mem handles 128-bit elements, dup_imm does not.
2351 * Thankfully element size doesn't matter for splatting zero.
2353 tcg_gen_gvec_dup_imm(MO_64
, dofs
, vsz
, vsz
, 0);
2359 static void do_insr_i64(DisasContext
*s
, arg_rrr_esz
*a
, TCGv_i64 val
)
2361 typedef void gen_insr(TCGv_ptr
, TCGv_ptr
, TCGv_i64
, TCGv_i32
);
2362 static gen_insr
* const fns
[4] = {
2363 gen_helper_sve_insr_b
, gen_helper_sve_insr_h
,
2364 gen_helper_sve_insr_s
, gen_helper_sve_insr_d
,
2366 unsigned vsz
= vec_full_reg_size(s
);
2367 TCGv_i32 desc
= tcg_constant_i32(simd_desc(vsz
, vsz
, 0));
2368 TCGv_ptr t_zd
= tcg_temp_new_ptr();
2369 TCGv_ptr t_zn
= tcg_temp_new_ptr();
2371 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, a
->rd
));
2372 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
2374 fns
[a
->esz
](t_zd
, t_zn
, val
, desc
);
2376 tcg_temp_free_ptr(t_zd
);
2377 tcg_temp_free_ptr(t_zn
);
2380 static bool trans_INSR_f(DisasContext
*s
, arg_rrr_esz
*a
)
2382 if (!dc_isar_feature(aa64_sve
, s
)) {
2385 if (sve_access_check(s
)) {
2386 TCGv_i64 t
= tcg_temp_new_i64();
2387 tcg_gen_ld_i64(t
, cpu_env
, vec_reg_offset(s
, a
->rm
, 0, MO_64
));
2388 do_insr_i64(s
, a
, t
);
2389 tcg_temp_free_i64(t
);
2394 static bool trans_INSR_r(DisasContext
*s
, arg_rrr_esz
*a
)
2396 if (!dc_isar_feature(aa64_sve
, s
)) {
2399 if (sve_access_check(s
)) {
2400 do_insr_i64(s
, a
, cpu_reg(s
, a
->rm
));
2405 static gen_helper_gvec_2
* const rev_fns
[4] = {
2406 gen_helper_sve_rev_b
, gen_helper_sve_rev_h
,
2407 gen_helper_sve_rev_s
, gen_helper_sve_rev_d
2409 TRANS_FEAT(REV_v
, aa64_sve
, gen_gvec_ool_zz
, rev_fns
[a
->esz
], a
->rd
, a
->rn
, 0)
2411 static gen_helper_gvec_3
* const sve_tbl_fns
[4] = {
2412 gen_helper_sve_tbl_b
, gen_helper_sve_tbl_h
,
2413 gen_helper_sve_tbl_s
, gen_helper_sve_tbl_d
2415 TRANS_FEAT(TBL
, aa64_sve
, gen_gvec_ool_arg_zzz
, sve_tbl_fns
[a
->esz
], a
, 0)
2417 static gen_helper_gvec_4
* const sve2_tbl_fns
[4] = {
2418 gen_helper_sve2_tbl_b
, gen_helper_sve2_tbl_h
,
2419 gen_helper_sve2_tbl_s
, gen_helper_sve2_tbl_d
2421 TRANS_FEAT(TBL_sve2
, aa64_sve2
, gen_gvec_ool_zzzz
, sve2_tbl_fns
[a
->esz
],
2422 a
->rd
, a
->rn
, (a
->rn
+ 1) % 32, a
->rm
, 0)
2424 static gen_helper_gvec_3
* const tbx_fns
[4] = {
2425 gen_helper_sve2_tbx_b
, gen_helper_sve2_tbx_h
,
2426 gen_helper_sve2_tbx_s
, gen_helper_sve2_tbx_d
2428 TRANS_FEAT(TBX
, aa64_sve2
, gen_gvec_ool_arg_zzz
, tbx_fns
[a
->esz
], a
, 0)
2430 static bool trans_UNPK(DisasContext
*s
, arg_UNPK
*a
)
2432 static gen_helper_gvec_2
* const fns
[4][2] = {
2434 { gen_helper_sve_sunpk_h
, gen_helper_sve_uunpk_h
},
2435 { gen_helper_sve_sunpk_s
, gen_helper_sve_uunpk_s
},
2436 { gen_helper_sve_sunpk_d
, gen_helper_sve_uunpk_d
},
2439 if (a
->esz
== 0 || !dc_isar_feature(aa64_sve
, s
)) {
2442 if (sve_access_check(s
)) {
2443 unsigned vsz
= vec_full_reg_size(s
);
2444 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, a
->rd
),
2445 vec_full_reg_offset(s
, a
->rn
)
2446 + (a
->h
? vsz
/ 2 : 0),
2447 vsz
, vsz
, 0, fns
[a
->esz
][a
->u
]);
2453 *** SVE Permute - Predicates Group
2456 static bool do_perm_pred3(DisasContext
*s
, arg_rrr_esz
*a
, bool high_odd
,
2457 gen_helper_gvec_3
*fn
)
2459 if (!sve_access_check(s
)) {
2463 unsigned vsz
= pred_full_reg_size(s
);
2465 TCGv_ptr t_d
= tcg_temp_new_ptr();
2466 TCGv_ptr t_n
= tcg_temp_new_ptr();
2467 TCGv_ptr t_m
= tcg_temp_new_ptr();
2470 desc
= FIELD_DP32(desc
, PREDDESC
, OPRSZ
, vsz
);
2471 desc
= FIELD_DP32(desc
, PREDDESC
, ESZ
, a
->esz
);
2472 desc
= FIELD_DP32(desc
, PREDDESC
, DATA
, high_odd
);
2474 tcg_gen_addi_ptr(t_d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2475 tcg_gen_addi_ptr(t_n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2476 tcg_gen_addi_ptr(t_m
, cpu_env
, pred_full_reg_offset(s
, a
->rm
));
2478 fn(t_d
, t_n
, t_m
, tcg_constant_i32(desc
));
2480 tcg_temp_free_ptr(t_d
);
2481 tcg_temp_free_ptr(t_n
);
2482 tcg_temp_free_ptr(t_m
);
2486 static bool do_perm_pred2(DisasContext
*s
, arg_rr_esz
*a
, bool high_odd
,
2487 gen_helper_gvec_2
*fn
)
2489 if (!sve_access_check(s
)) {
2493 unsigned vsz
= pred_full_reg_size(s
);
2494 TCGv_ptr t_d
= tcg_temp_new_ptr();
2495 TCGv_ptr t_n
= tcg_temp_new_ptr();
2498 tcg_gen_addi_ptr(t_d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2499 tcg_gen_addi_ptr(t_n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2501 desc
= FIELD_DP32(desc
, PREDDESC
, OPRSZ
, vsz
);
2502 desc
= FIELD_DP32(desc
, PREDDESC
, ESZ
, a
->esz
);
2503 desc
= FIELD_DP32(desc
, PREDDESC
, DATA
, high_odd
);
2505 fn(t_d
, t_n
, tcg_constant_i32(desc
));
2507 tcg_temp_free_ptr(t_d
);
2508 tcg_temp_free_ptr(t_n
);
2512 TRANS_FEAT(ZIP1_p
, aa64_sve
, do_perm_pred3
, a
, 0, gen_helper_sve_zip_p
)
2513 TRANS_FEAT(ZIP2_p
, aa64_sve
, do_perm_pred3
, a
, 1, gen_helper_sve_zip_p
)
2514 TRANS_FEAT(UZP1_p
, aa64_sve
, do_perm_pred3
, a
, 0, gen_helper_sve_uzp_p
)
2515 TRANS_FEAT(UZP2_p
, aa64_sve
, do_perm_pred3
, a
, 1, gen_helper_sve_uzp_p
)
2516 TRANS_FEAT(TRN1_p
, aa64_sve
, do_perm_pred3
, a
, 0, gen_helper_sve_trn_p
)
2517 TRANS_FEAT(TRN2_p
, aa64_sve
, do_perm_pred3
, a
, 1, gen_helper_sve_trn_p
)
2519 TRANS_FEAT(REV_p
, aa64_sve
, do_perm_pred2
, a
, 0, gen_helper_sve_rev_p
)
2520 TRANS_FEAT(PUNPKLO
, aa64_sve
, do_perm_pred2
, a
, 0, gen_helper_sve_punpk_p
)
2521 TRANS_FEAT(PUNPKHI
, aa64_sve
, do_perm_pred2
, a
, 1, gen_helper_sve_punpk_p
)
2524 *** SVE Permute - Interleaving Group
2527 static gen_helper_gvec_3
* const zip_fns
[4] = {
2528 gen_helper_sve_zip_b
, gen_helper_sve_zip_h
,
2529 gen_helper_sve_zip_s
, gen_helper_sve_zip_d
,
2531 TRANS_FEAT(ZIP1_z
, aa64_sve
, gen_gvec_ool_arg_zzz
,
2532 zip_fns
[a
->esz
], a
, 0)
2533 TRANS_FEAT(ZIP2_z
, aa64_sve
, gen_gvec_ool_arg_zzz
,
2534 zip_fns
[a
->esz
], a
, vec_full_reg_size(s
) / 2)
2536 TRANS_FEAT(ZIP1_q
, aa64_sve_f64mm
, gen_gvec_ool_arg_zzz
,
2537 gen_helper_sve2_zip_q
, a
, 0)
2538 TRANS_FEAT(ZIP2_q
, aa64_sve_f64mm
, gen_gvec_ool_arg_zzz
,
2539 gen_helper_sve2_zip_q
, a
,
2540 QEMU_ALIGN_DOWN(vec_full_reg_size(s
), 32) / 2)
2542 static gen_helper_gvec_3
* const uzp_fns
[4] = {
2543 gen_helper_sve_uzp_b
, gen_helper_sve_uzp_h
,
2544 gen_helper_sve_uzp_s
, gen_helper_sve_uzp_d
,
2547 TRANS_FEAT(UZP1_z
, aa64_sve
, gen_gvec_ool_arg_zzz
,
2548 uzp_fns
[a
->esz
], a
, 0)
2549 TRANS_FEAT(UZP2_z
, aa64_sve
, gen_gvec_ool_arg_zzz
,
2550 uzp_fns
[a
->esz
], a
, 1 << a
->esz
)
2552 TRANS_FEAT(UZP1_q
, aa64_sve_f64mm
, gen_gvec_ool_arg_zzz
,
2553 gen_helper_sve2_uzp_q
, a
, 0)
2554 TRANS_FEAT(UZP2_q
, aa64_sve_f64mm
, gen_gvec_ool_arg_zzz
,
2555 gen_helper_sve2_uzp_q
, a
, 16)
2557 static gen_helper_gvec_3
* const trn_fns
[4] = {
2558 gen_helper_sve_trn_b
, gen_helper_sve_trn_h
,
2559 gen_helper_sve_trn_s
, gen_helper_sve_trn_d
,
2562 TRANS_FEAT(TRN1_z
, aa64_sve
, gen_gvec_ool_arg_zzz
,
2563 trn_fns
[a
->esz
], a
, 0)
2564 TRANS_FEAT(TRN2_z
, aa64_sve
, gen_gvec_ool_arg_zzz
,
2565 trn_fns
[a
->esz
], a
, 1 << a
->esz
)
2567 TRANS_FEAT(TRN1_q
, aa64_sve_f64mm
, gen_gvec_ool_arg_zzz
,
2568 gen_helper_sve2_trn_q
, a
, 0)
2569 TRANS_FEAT(TRN2_q
, aa64_sve_f64mm
, gen_gvec_ool_arg_zzz
,
2570 gen_helper_sve2_trn_q
, a
, 16)
2573 *** SVE Permute Vector - Predicated Group
2576 static gen_helper_gvec_3
* const compact_fns
[4] = {
2577 NULL
, NULL
, gen_helper_sve_compact_s
, gen_helper_sve_compact_d
2579 TRANS_FEAT_NONSTREAMING(COMPACT
, aa64_sve
, gen_gvec_ool_arg_zpz
,
2580 compact_fns
[a
->esz
], a
, 0)
2582 /* Call the helper that computes the ARM LastActiveElement pseudocode
2583 * function, scaled by the element size. This includes the not found
2584 * indication; e.g. not found for esz=3 is -8.
2586 static void find_last_active(DisasContext
*s
, TCGv_i32 ret
, int esz
, int pg
)
2588 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2589 * round up, as we do elsewhere, because we need the exact size.
2591 TCGv_ptr t_p
= tcg_temp_new_ptr();
2594 desc
= FIELD_DP32(desc
, PREDDESC
, OPRSZ
, pred_full_reg_size(s
));
2595 desc
= FIELD_DP32(desc
, PREDDESC
, ESZ
, esz
);
2597 tcg_gen_addi_ptr(t_p
, cpu_env
, pred_full_reg_offset(s
, pg
));
2599 gen_helper_sve_last_active_element(ret
, t_p
, tcg_constant_i32(desc
));
2601 tcg_temp_free_ptr(t_p
);
2604 /* Increment LAST to the offset of the next element in the vector,
2605 * wrapping around to 0.
2607 static void incr_last_active(DisasContext
*s
, TCGv_i32 last
, int esz
)
2609 unsigned vsz
= vec_full_reg_size(s
);
2611 tcg_gen_addi_i32(last
, last
, 1 << esz
);
2612 if (is_power_of_2(vsz
)) {
2613 tcg_gen_andi_i32(last
, last
, vsz
- 1);
2615 TCGv_i32 max
= tcg_constant_i32(vsz
);
2616 TCGv_i32 zero
= tcg_constant_i32(0);
2617 tcg_gen_movcond_i32(TCG_COND_GEU
, last
, last
, max
, zero
, last
);
2621 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
2622 static void wrap_last_active(DisasContext
*s
, TCGv_i32 last
, int esz
)
2624 unsigned vsz
= vec_full_reg_size(s
);
2626 if (is_power_of_2(vsz
)) {
2627 tcg_gen_andi_i32(last
, last
, vsz
- 1);
2629 TCGv_i32 max
= tcg_constant_i32(vsz
- (1 << esz
));
2630 TCGv_i32 zero
= tcg_constant_i32(0);
2631 tcg_gen_movcond_i32(TCG_COND_LT
, last
, last
, zero
, max
, last
);
2635 /* Load an unsigned element of ESZ from BASE+OFS. */
2636 static TCGv_i64
load_esz(TCGv_ptr base
, int ofs
, int esz
)
2638 TCGv_i64 r
= tcg_temp_new_i64();
2642 tcg_gen_ld8u_i64(r
, base
, ofs
);
2645 tcg_gen_ld16u_i64(r
, base
, ofs
);
2648 tcg_gen_ld32u_i64(r
, base
, ofs
);
2651 tcg_gen_ld_i64(r
, base
, ofs
);
2654 g_assert_not_reached();
2659 /* Load an unsigned element of ESZ from RM[LAST]. */
2660 static TCGv_i64
load_last_active(DisasContext
*s
, TCGv_i32 last
,
2663 TCGv_ptr p
= tcg_temp_new_ptr();
2666 /* Convert offset into vector into offset into ENV.
2667 * The final adjustment for the vector register base
2668 * is added via constant offset to the load.
2671 /* Adjust for element ordering. See vec_reg_offset. */
2673 tcg_gen_xori_i32(last
, last
, 8 - (1 << esz
));
2676 tcg_gen_ext_i32_ptr(p
, last
);
2677 tcg_gen_add_ptr(p
, p
, cpu_env
);
2679 r
= load_esz(p
, vec_full_reg_offset(s
, rm
), esz
);
2680 tcg_temp_free_ptr(p
);
2685 /* Compute CLAST for a Zreg. */
2686 static bool do_clast_vector(DisasContext
*s
, arg_rprr_esz
*a
, bool before
)
2691 unsigned vsz
, esz
= a
->esz
;
2693 if (!sve_access_check(s
)) {
2697 last
= tcg_temp_local_new_i32();
2698 over
= gen_new_label();
2700 find_last_active(s
, last
, esz
, a
->pg
);
2702 /* There is of course no movcond for a 2048-bit vector,
2703 * so we must branch over the actual store.
2705 tcg_gen_brcondi_i32(TCG_COND_LT
, last
, 0, over
);
2708 incr_last_active(s
, last
, esz
);
2711 ele
= load_last_active(s
, last
, a
->rm
, esz
);
2712 tcg_temp_free_i32(last
);
2714 vsz
= vec_full_reg_size(s
);
2715 tcg_gen_gvec_dup_i64(esz
, vec_full_reg_offset(s
, a
->rd
), vsz
, vsz
, ele
);
2716 tcg_temp_free_i64(ele
);
2718 /* If this insn used MOVPRFX, we may need a second move. */
2719 if (a
->rd
!= a
->rn
) {
2720 TCGLabel
*done
= gen_new_label();
2723 gen_set_label(over
);
2724 do_mov_z(s
, a
->rd
, a
->rn
);
2726 gen_set_label(done
);
2728 gen_set_label(over
);
2733 TRANS_FEAT(CLASTA_z
, aa64_sve
, do_clast_vector
, a
, false)
2734 TRANS_FEAT(CLASTB_z
, aa64_sve
, do_clast_vector
, a
, true)
2736 /* Compute CLAST for a scalar. */
2737 static void do_clast_scalar(DisasContext
*s
, int esz
, int pg
, int rm
,
2738 bool before
, TCGv_i64 reg_val
)
2740 TCGv_i32 last
= tcg_temp_new_i32();
2743 find_last_active(s
, last
, esz
, pg
);
2745 /* Extend the original value of last prior to incrementing. */
2746 cmp
= tcg_temp_new_i64();
2747 tcg_gen_ext_i32_i64(cmp
, last
);
2750 incr_last_active(s
, last
, esz
);
2753 /* The conceit here is that while last < 0 indicates not found, after
2754 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2755 * from which we can load garbage. We then discard the garbage with
2756 * a conditional move.
2758 ele
= load_last_active(s
, last
, rm
, esz
);
2759 tcg_temp_free_i32(last
);
2761 tcg_gen_movcond_i64(TCG_COND_GE
, reg_val
, cmp
, tcg_constant_i64(0),
2764 tcg_temp_free_i64(cmp
);
2765 tcg_temp_free_i64(ele
);
2768 /* Compute CLAST for a Vreg. */
2769 static bool do_clast_fp(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2771 if (sve_access_check(s
)) {
2773 int ofs
= vec_reg_offset(s
, a
->rd
, 0, esz
);
2774 TCGv_i64 reg
= load_esz(cpu_env
, ofs
, esz
);
2776 do_clast_scalar(s
, esz
, a
->pg
, a
->rn
, before
, reg
);
2777 write_fp_dreg(s
, a
->rd
, reg
);
2778 tcg_temp_free_i64(reg
);
2783 TRANS_FEAT(CLASTA_v
, aa64_sve
, do_clast_fp
, a
, false)
2784 TRANS_FEAT(CLASTB_v
, aa64_sve
, do_clast_fp
, a
, true)
2786 /* Compute CLAST for a Xreg. */
2787 static bool do_clast_general(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2791 if (!sve_access_check(s
)) {
2795 reg
= cpu_reg(s
, a
->rd
);
2798 tcg_gen_ext8u_i64(reg
, reg
);
2801 tcg_gen_ext16u_i64(reg
, reg
);
2804 tcg_gen_ext32u_i64(reg
, reg
);
2809 g_assert_not_reached();
2812 do_clast_scalar(s
, a
->esz
, a
->pg
, a
->rn
, before
, reg
);
2816 TRANS_FEAT(CLASTA_r
, aa64_sve
, do_clast_general
, a
, false)
2817 TRANS_FEAT(CLASTB_r
, aa64_sve
, do_clast_general
, a
, true)
2819 /* Compute LAST for a scalar. */
2820 static TCGv_i64
do_last_scalar(DisasContext
*s
, int esz
,
2821 int pg
, int rm
, bool before
)
2823 TCGv_i32 last
= tcg_temp_new_i32();
2826 find_last_active(s
, last
, esz
, pg
);
2828 wrap_last_active(s
, last
, esz
);
2830 incr_last_active(s
, last
, esz
);
2833 ret
= load_last_active(s
, last
, rm
, esz
);
2834 tcg_temp_free_i32(last
);
2838 /* Compute LAST for a Vreg. */
2839 static bool do_last_fp(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2841 if (sve_access_check(s
)) {
2842 TCGv_i64 val
= do_last_scalar(s
, a
->esz
, a
->pg
, a
->rn
, before
);
2843 write_fp_dreg(s
, a
->rd
, val
);
2844 tcg_temp_free_i64(val
);
2849 TRANS_FEAT(LASTA_v
, aa64_sve
, do_last_fp
, a
, false)
2850 TRANS_FEAT(LASTB_v
, aa64_sve
, do_last_fp
, a
, true)
2852 /* Compute LAST for a Xreg. */
2853 static bool do_last_general(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2855 if (sve_access_check(s
)) {
2856 TCGv_i64 val
= do_last_scalar(s
, a
->esz
, a
->pg
, a
->rn
, before
);
2857 tcg_gen_mov_i64(cpu_reg(s
, a
->rd
), val
);
2858 tcg_temp_free_i64(val
);
2863 TRANS_FEAT(LASTA_r
, aa64_sve
, do_last_general
, a
, false)
2864 TRANS_FEAT(LASTB_r
, aa64_sve
, do_last_general
, a
, true)
2866 static bool trans_CPY_m_r(DisasContext
*s
, arg_rpr_esz
*a
)
2868 if (!dc_isar_feature(aa64_sve
, s
)) {
2871 if (sve_access_check(s
)) {
2872 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rd
, a
->pg
, cpu_reg_sp(s
, a
->rn
));
2877 static bool trans_CPY_m_v(DisasContext
*s
, arg_rpr_esz
*a
)
2879 if (!dc_isar_feature(aa64_sve
, s
)) {
2882 if (sve_access_check(s
)) {
2883 int ofs
= vec_reg_offset(s
, a
->rn
, 0, a
->esz
);
2884 TCGv_i64 t
= load_esz(cpu_env
, ofs
, a
->esz
);
2885 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rd
, a
->pg
, t
);
2886 tcg_temp_free_i64(t
);
2891 static gen_helper_gvec_3
* const revb_fns
[4] = {
2892 NULL
, gen_helper_sve_revb_h
,
2893 gen_helper_sve_revb_s
, gen_helper_sve_revb_d
,
2895 TRANS_FEAT(REVB
, aa64_sve
, gen_gvec_ool_arg_zpz
, revb_fns
[a
->esz
], a
, 0)
2897 static gen_helper_gvec_3
* const revh_fns
[4] = {
2898 NULL
, NULL
, gen_helper_sve_revh_s
, gen_helper_sve_revh_d
,
2900 TRANS_FEAT(REVH
, aa64_sve
, gen_gvec_ool_arg_zpz
, revh_fns
[a
->esz
], a
, 0)
2902 TRANS_FEAT(REVW
, aa64_sve
, gen_gvec_ool_arg_zpz
,
2903 a
->esz
== 3 ? gen_helper_sve_revw_d
: NULL
, a
, 0)
2905 TRANS_FEAT(REVD
, aa64_sme
, gen_gvec_ool_arg_zpz
, gen_helper_sme_revd_q
, a
, 0)
2907 TRANS_FEAT(SPLICE
, aa64_sve
, gen_gvec_ool_arg_zpzz
,
2908 gen_helper_sve_splice
, a
, a
->esz
)
2910 TRANS_FEAT(SPLICE_sve2
, aa64_sve2
, gen_gvec_ool_zzzp
, gen_helper_sve_splice
,
2911 a
->rd
, a
->rn
, (a
->rn
+ 1) % 32, a
->pg
, a
->esz
)
2914 *** SVE Integer Compare - Vectors Group
2917 static bool do_ppzz_flags(DisasContext
*s
, arg_rprr_esz
*a
,
2918 gen_helper_gvec_flags_4
*gen_fn
)
2920 TCGv_ptr pd
, zn
, zm
, pg
;
2924 if (gen_fn
== NULL
) {
2927 if (!sve_access_check(s
)) {
2931 vsz
= vec_full_reg_size(s
);
2932 t
= tcg_temp_new_i32();
2933 pd
= tcg_temp_new_ptr();
2934 zn
= tcg_temp_new_ptr();
2935 zm
= tcg_temp_new_ptr();
2936 pg
= tcg_temp_new_ptr();
2938 tcg_gen_addi_ptr(pd
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2939 tcg_gen_addi_ptr(zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
2940 tcg_gen_addi_ptr(zm
, cpu_env
, vec_full_reg_offset(s
, a
->rm
));
2941 tcg_gen_addi_ptr(pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2943 gen_fn(t
, pd
, zn
, zm
, pg
, tcg_constant_i32(simd_desc(vsz
, vsz
, 0)));
2945 tcg_temp_free_ptr(pd
);
2946 tcg_temp_free_ptr(zn
);
2947 tcg_temp_free_ptr(zm
);
2948 tcg_temp_free_ptr(pg
);
2952 tcg_temp_free_i32(t
);
2956 #define DO_PPZZ(NAME, name) \
2957 static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = { \
2958 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2959 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2961 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags, \
2962 a, name##_ppzz_fns[a->esz])
2964 DO_PPZZ(CMPEQ
, cmpeq
)
2965 DO_PPZZ(CMPNE
, cmpne
)
2966 DO_PPZZ(CMPGT
, cmpgt
)
2967 DO_PPZZ(CMPGE
, cmpge
)
2968 DO_PPZZ(CMPHI
, cmphi
)
2969 DO_PPZZ(CMPHS
, cmphs
)
2973 #define DO_PPZW(NAME, name) \
2974 static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = { \
2975 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2976 gen_helper_sve_##name##_ppzw_s, NULL \
2978 TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags, \
2979 a, name##_ppzw_fns[a->esz])
2981 DO_PPZW(CMPEQ
, cmpeq
)
2982 DO_PPZW(CMPNE
, cmpne
)
2983 DO_PPZW(CMPGT
, cmpgt
)
2984 DO_PPZW(CMPGE
, cmpge
)
2985 DO_PPZW(CMPHI
, cmphi
)
2986 DO_PPZW(CMPHS
, cmphs
)
2987 DO_PPZW(CMPLT
, cmplt
)
2988 DO_PPZW(CMPLE
, cmple
)
2989 DO_PPZW(CMPLO
, cmplo
)
2990 DO_PPZW(CMPLS
, cmpls
)
2995 *** SVE Integer Compare - Immediate Groups
2998 static bool do_ppzi_flags(DisasContext
*s
, arg_rpri_esz
*a
,
2999 gen_helper_gvec_flags_3
*gen_fn
)
3001 TCGv_ptr pd
, zn
, pg
;
3005 if (gen_fn
== NULL
) {
3008 if (!sve_access_check(s
)) {
3012 vsz
= vec_full_reg_size(s
);
3013 t
= tcg_temp_new_i32();
3014 pd
= tcg_temp_new_ptr();
3015 zn
= tcg_temp_new_ptr();
3016 pg
= tcg_temp_new_ptr();
3018 tcg_gen_addi_ptr(pd
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
3019 tcg_gen_addi_ptr(zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
3020 tcg_gen_addi_ptr(pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
3022 gen_fn(t
, pd
, zn
, pg
, tcg_constant_i32(simd_desc(vsz
, vsz
, a
->imm
)));
3024 tcg_temp_free_ptr(pd
);
3025 tcg_temp_free_ptr(zn
);
3026 tcg_temp_free_ptr(pg
);
3030 tcg_temp_free_i32(t
);
3034 #define DO_PPZI(NAME, name) \
3035 static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = { \
3036 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
3037 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
3039 TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a, \
3040 name##_ppzi_fns[a->esz])
3042 DO_PPZI(CMPEQ
, cmpeq
)
3043 DO_PPZI(CMPNE
, cmpne
)
3044 DO_PPZI(CMPGT
, cmpgt
)
3045 DO_PPZI(CMPGE
, cmpge
)
3046 DO_PPZI(CMPHI
, cmphi
)
3047 DO_PPZI(CMPHS
, cmphs
)
3048 DO_PPZI(CMPLT
, cmplt
)
3049 DO_PPZI(CMPLE
, cmple
)
3050 DO_PPZI(CMPLO
, cmplo
)
3051 DO_PPZI(CMPLS
, cmpls
)
3056 *** SVE Partition Break Group
3059 static bool do_brk3(DisasContext
*s
, arg_rprr_s
*a
,
3060 gen_helper_gvec_4
*fn
, gen_helper_gvec_flags_4
*fn_s
)
3062 if (!sve_access_check(s
)) {
3066 unsigned vsz
= pred_full_reg_size(s
);
3068 /* Predicate sizes may be smaller and cannot use simd_desc. */
3069 TCGv_ptr d
= tcg_temp_new_ptr();
3070 TCGv_ptr n
= tcg_temp_new_ptr();
3071 TCGv_ptr m
= tcg_temp_new_ptr();
3072 TCGv_ptr g
= tcg_temp_new_ptr();
3073 TCGv_i32 desc
= tcg_constant_i32(FIELD_DP32(0, PREDDESC
, OPRSZ
, vsz
));
3075 tcg_gen_addi_ptr(d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
3076 tcg_gen_addi_ptr(n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
3077 tcg_gen_addi_ptr(m
, cpu_env
, pred_full_reg_offset(s
, a
->rm
));
3078 tcg_gen_addi_ptr(g
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
3081 TCGv_i32 t
= tcg_temp_new_i32();
3082 fn_s(t
, d
, n
, m
, g
, desc
);
3084 tcg_temp_free_i32(t
);
3086 fn(d
, n
, m
, g
, desc
);
3088 tcg_temp_free_ptr(d
);
3089 tcg_temp_free_ptr(n
);
3090 tcg_temp_free_ptr(m
);
3091 tcg_temp_free_ptr(g
);
3095 static bool do_brk2(DisasContext
*s
, arg_rpr_s
*a
,
3096 gen_helper_gvec_3
*fn
, gen_helper_gvec_flags_3
*fn_s
)
3098 if (!sve_access_check(s
)) {
3102 unsigned vsz
= pred_full_reg_size(s
);
3104 /* Predicate sizes may be smaller and cannot use simd_desc. */
3105 TCGv_ptr d
= tcg_temp_new_ptr();
3106 TCGv_ptr n
= tcg_temp_new_ptr();
3107 TCGv_ptr g
= tcg_temp_new_ptr();
3108 TCGv_i32 desc
= tcg_constant_i32(FIELD_DP32(0, PREDDESC
, OPRSZ
, vsz
));
3110 tcg_gen_addi_ptr(d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
3111 tcg_gen_addi_ptr(n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
3112 tcg_gen_addi_ptr(g
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
3115 TCGv_i32 t
= tcg_temp_new_i32();
3116 fn_s(t
, d
, n
, g
, desc
);
3118 tcg_temp_free_i32(t
);
3122 tcg_temp_free_ptr(d
);
3123 tcg_temp_free_ptr(n
);
3124 tcg_temp_free_ptr(g
);
3128 TRANS_FEAT(BRKPA
, aa64_sve
, do_brk3
, a
,
3129 gen_helper_sve_brkpa
, gen_helper_sve_brkpas
)
3130 TRANS_FEAT(BRKPB
, aa64_sve
, do_brk3
, a
,
3131 gen_helper_sve_brkpb
, gen_helper_sve_brkpbs
)
3133 TRANS_FEAT(BRKA_m
, aa64_sve
, do_brk2
, a
,
3134 gen_helper_sve_brka_m
, gen_helper_sve_brkas_m
)
3135 TRANS_FEAT(BRKB_m
, aa64_sve
, do_brk2
, a
,
3136 gen_helper_sve_brkb_m
, gen_helper_sve_brkbs_m
)
3138 TRANS_FEAT(BRKA_z
, aa64_sve
, do_brk2
, a
,
3139 gen_helper_sve_brka_z
, gen_helper_sve_brkas_z
)
3140 TRANS_FEAT(BRKB_z
, aa64_sve
, do_brk2
, a
,
3141 gen_helper_sve_brkb_z
, gen_helper_sve_brkbs_z
)
3143 TRANS_FEAT(BRKN
, aa64_sve
, do_brk2
, a
,
3144 gen_helper_sve_brkn
, gen_helper_sve_brkns
)
3147 *** SVE Predicate Count Group
3150 static void do_cntp(DisasContext
*s
, TCGv_i64 val
, int esz
, int pn
, int pg
)
3152 unsigned psz
= pred_full_reg_size(s
);
3157 tcg_gen_ld_i64(val
, cpu_env
, pred_full_reg_offset(s
, pn
));
3159 TCGv_i64 g
= tcg_temp_new_i64();
3160 tcg_gen_ld_i64(g
, cpu_env
, pred_full_reg_offset(s
, pg
));
3161 tcg_gen_and_i64(val
, val
, g
);
3162 tcg_temp_free_i64(g
);
3165 /* Reduce the pred_esz_masks value simply to reduce the
3166 * size of the code generated here.
3168 psz_mask
= MAKE_64BIT_MASK(0, psz
* 8);
3169 tcg_gen_andi_i64(val
, val
, pred_esz_masks
[esz
] & psz_mask
);
3171 tcg_gen_ctpop_i64(val
, val
);
3173 TCGv_ptr t_pn
= tcg_temp_new_ptr();
3174 TCGv_ptr t_pg
= tcg_temp_new_ptr();
3177 desc
= FIELD_DP32(desc
, PREDDESC
, OPRSZ
, psz
);
3178 desc
= FIELD_DP32(desc
, PREDDESC
, ESZ
, esz
);
3180 tcg_gen_addi_ptr(t_pn
, cpu_env
, pred_full_reg_offset(s
, pn
));
3181 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
3183 gen_helper_sve_cntp(val
, t_pn
, t_pg
, tcg_constant_i32(desc
));
3184 tcg_temp_free_ptr(t_pn
);
3185 tcg_temp_free_ptr(t_pg
);
3189 static bool trans_CNTP(DisasContext
*s
, arg_CNTP
*a
)
3191 if (!dc_isar_feature(aa64_sve
, s
)) {
3194 if (sve_access_check(s
)) {
3195 do_cntp(s
, cpu_reg(s
, a
->rd
), a
->esz
, a
->rn
, a
->pg
);
3200 static bool trans_INCDECP_r(DisasContext
*s
, arg_incdec_pred
*a
)
3202 if (!dc_isar_feature(aa64_sve
, s
)) {
3205 if (sve_access_check(s
)) {
3206 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
3207 TCGv_i64 val
= tcg_temp_new_i64();
3209 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3211 tcg_gen_sub_i64(reg
, reg
, val
);
3213 tcg_gen_add_i64(reg
, reg
, val
);
3215 tcg_temp_free_i64(val
);
3220 static bool trans_INCDECP_z(DisasContext
*s
, arg_incdec2_pred
*a
)
3222 if (a
->esz
== 0 || !dc_isar_feature(aa64_sve
, s
)) {
3225 if (sve_access_check(s
)) {
3226 unsigned vsz
= vec_full_reg_size(s
);
3227 TCGv_i64 val
= tcg_temp_new_i64();
3228 GVecGen2sFn
*gvec_fn
= a
->d
? tcg_gen_gvec_subs
: tcg_gen_gvec_adds
;
3230 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3231 gvec_fn(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
3232 vec_full_reg_offset(s
, a
->rn
), val
, vsz
, vsz
);
3237 static bool trans_SINCDECP_r_32(DisasContext
*s
, arg_incdec_pred
*a
)
3239 if (!dc_isar_feature(aa64_sve
, s
)) {
3242 if (sve_access_check(s
)) {
3243 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
3244 TCGv_i64 val
= tcg_temp_new_i64();
3246 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3247 do_sat_addsub_32(reg
, val
, a
->u
, a
->d
);
3252 static bool trans_SINCDECP_r_64(DisasContext
*s
, arg_incdec_pred
*a
)
3254 if (!dc_isar_feature(aa64_sve
, s
)) {
3257 if (sve_access_check(s
)) {
3258 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
3259 TCGv_i64 val
= tcg_temp_new_i64();
3261 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3262 do_sat_addsub_64(reg
, val
, a
->u
, a
->d
);
3267 static bool trans_SINCDECP_z(DisasContext
*s
, arg_incdec2_pred
*a
)
3269 if (a
->esz
== 0 || !dc_isar_feature(aa64_sve
, s
)) {
3272 if (sve_access_check(s
)) {
3273 TCGv_i64 val
= tcg_temp_new_i64();
3274 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3275 do_sat_addsub_vec(s
, a
->esz
, a
->rd
, a
->rn
, val
, a
->u
, a
->d
);
3281 *** SVE Integer Compare Scalars Group
3284 static bool trans_CTERM(DisasContext
*s
, arg_CTERM
*a
)
3286 if (!dc_isar_feature(aa64_sve
, s
)) {
3289 if (!sve_access_check(s
)) {
3293 TCGCond cond
= (a
->ne
? TCG_COND_NE
: TCG_COND_EQ
);
3294 TCGv_i64 rn
= read_cpu_reg(s
, a
->rn
, a
->sf
);
3295 TCGv_i64 rm
= read_cpu_reg(s
, a
->rm
, a
->sf
);
3296 TCGv_i64 cmp
= tcg_temp_new_i64();
3298 tcg_gen_setcond_i64(cond
, cmp
, rn
, rm
);
3299 tcg_gen_extrl_i64_i32(cpu_NF
, cmp
);
3300 tcg_temp_free_i64(cmp
);
3302 /* VF = !NF & !CF. */
3303 tcg_gen_xori_i32(cpu_VF
, cpu_NF
, 1);
3304 tcg_gen_andc_i32(cpu_VF
, cpu_VF
, cpu_CF
);
3306 /* Both NF and VF actually look at bit 31. */
3307 tcg_gen_neg_i32(cpu_NF
, cpu_NF
);
3308 tcg_gen_neg_i32(cpu_VF
, cpu_VF
);
3312 static bool trans_WHILE(DisasContext
*s
, arg_WHILE
*a
)
3314 TCGv_i64 op0
, op1
, t0
, t1
, tmax
;
3317 unsigned vsz
= vec_full_reg_size(s
);
3321 /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
3322 bool eq
= a
->eq
== a
->lt
;
3324 /* The greater-than conditions are all SVE2. */
3326 ? !dc_isar_feature(aa64_sve
, s
)
3327 : !dc_isar_feature(aa64_sve2
, s
)) {
3330 if (!sve_access_check(s
)) {
3334 op0
= read_cpu_reg(s
, a
->rn
, 1);
3335 op1
= read_cpu_reg(s
, a
->rm
, 1);
3339 tcg_gen_ext32u_i64(op0
, op0
);
3340 tcg_gen_ext32u_i64(op1
, op1
);
3342 tcg_gen_ext32s_i64(op0
, op0
);
3343 tcg_gen_ext32s_i64(op1
, op1
);
3347 /* For the helper, compress the different conditions into a computation
3348 * of how many iterations for which the condition is true.
3350 t0
= tcg_temp_new_i64();
3351 t1
= tcg_temp_new_i64();
3354 tcg_gen_sub_i64(t0
, op1
, op0
);
3356 maxval
= a
->sf
? UINT64_MAX
: UINT32_MAX
;
3357 cond
= eq
? TCG_COND_LEU
: TCG_COND_LTU
;
3359 maxval
= a
->sf
? INT64_MAX
: INT32_MAX
;
3360 cond
= eq
? TCG_COND_LE
: TCG_COND_LT
;
3363 tcg_gen_sub_i64(t0
, op0
, op1
);
3366 cond
= eq
? TCG_COND_GEU
: TCG_COND_GTU
;
3368 maxval
= a
->sf
? INT64_MIN
: INT32_MIN
;
3369 cond
= eq
? TCG_COND_GE
: TCG_COND_GT
;
3373 tmax
= tcg_constant_i64(vsz
>> a
->esz
);
3375 /* Equality means one more iteration. */
3376 tcg_gen_addi_i64(t0
, t0
, 1);
3379 * For the less-than while, if op1 is maxval (and the only time
3380 * the addition above could overflow), then we produce an all-true
3381 * predicate by setting the count to the vector length. This is
3382 * because the pseudocode is described as an increment + compare
3383 * loop, and the maximum integer would always compare true.
3384 * Similarly, the greater-than while has the same issue with the
3385 * minimum integer due to the decrement + compare loop.
3387 tcg_gen_movi_i64(t1
, maxval
);
3388 tcg_gen_movcond_i64(TCG_COND_EQ
, t0
, op1
, t1
, tmax
, t0
);
3391 /* Bound to the maximum. */
3392 tcg_gen_umin_i64(t0
, t0
, tmax
);
3394 /* Set the count to zero if the condition is false. */
3395 tcg_gen_movi_i64(t1
, 0);
3396 tcg_gen_movcond_i64(cond
, t0
, op0
, op1
, t0
, t1
);
3397 tcg_temp_free_i64(t1
);
3399 /* Since we're bounded, pass as a 32-bit type. */
3400 t2
= tcg_temp_new_i32();
3401 tcg_gen_extrl_i64_i32(t2
, t0
);
3402 tcg_temp_free_i64(t0
);
3404 /* Scale elements to bits. */
3405 tcg_gen_shli_i32(t2
, t2
, a
->esz
);
3407 desc
= FIELD_DP32(desc
, PREDDESC
, OPRSZ
, vsz
/ 8);
3408 desc
= FIELD_DP32(desc
, PREDDESC
, ESZ
, a
->esz
);
3410 ptr
= tcg_temp_new_ptr();
3411 tcg_gen_addi_ptr(ptr
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
3414 gen_helper_sve_whilel(t2
, ptr
, t2
, tcg_constant_i32(desc
));
3416 gen_helper_sve_whileg(t2
, ptr
, t2
, tcg_constant_i32(desc
));
3420 tcg_temp_free_ptr(ptr
);
3421 tcg_temp_free_i32(t2
);
3425 static bool trans_WHILE_ptr(DisasContext
*s
, arg_WHILE_ptr
*a
)
3427 TCGv_i64 op0
, op1
, diff
, t1
, tmax
;
3430 unsigned vsz
= vec_full_reg_size(s
);
3433 if (!dc_isar_feature(aa64_sve2
, s
)) {
3436 if (!sve_access_check(s
)) {
3440 op0
= read_cpu_reg(s
, a
->rn
, 1);
3441 op1
= read_cpu_reg(s
, a
->rm
, 1);
3443 tmax
= tcg_constant_i64(vsz
);
3444 diff
= tcg_temp_new_i64();
3448 /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
3449 t1
= tcg_temp_new_i64();
3450 tcg_gen_sub_i64(diff
, op0
, op1
);
3451 tcg_gen_sub_i64(t1
, op1
, op0
);
3452 tcg_gen_movcond_i64(TCG_COND_GEU
, diff
, op0
, op1
, diff
, t1
);
3453 tcg_temp_free_i64(t1
);
3454 /* Round down to a multiple of ESIZE. */
3455 tcg_gen_andi_i64(diff
, diff
, -1 << a
->esz
);
3456 /* If op1 == op0, diff == 0, and the condition is always true. */
3457 tcg_gen_movcond_i64(TCG_COND_EQ
, diff
, op0
, op1
, tmax
, diff
);
3460 tcg_gen_sub_i64(diff
, op1
, op0
);
3461 /* Round down to a multiple of ESIZE. */
3462 tcg_gen_andi_i64(diff
, diff
, -1 << a
->esz
);
3463 /* If op0 >= op1, diff <= 0, the condition is always true. */
3464 tcg_gen_movcond_i64(TCG_COND_GEU
, diff
, op0
, op1
, tmax
, diff
);
3467 /* Bound to the maximum. */
3468 tcg_gen_umin_i64(diff
, diff
, tmax
);
3470 /* Since we're bounded, pass as a 32-bit type. */
3471 t2
= tcg_temp_new_i32();
3472 tcg_gen_extrl_i64_i32(t2
, diff
);
3473 tcg_temp_free_i64(diff
);
3475 desc
= FIELD_DP32(desc
, PREDDESC
, OPRSZ
, vsz
/ 8);
3476 desc
= FIELD_DP32(desc
, PREDDESC
, ESZ
, a
->esz
);
3478 ptr
= tcg_temp_new_ptr();
3479 tcg_gen_addi_ptr(ptr
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
3481 gen_helper_sve_whilel(t2
, ptr
, t2
, tcg_constant_i32(desc
));
3484 tcg_temp_free_ptr(ptr
);
3485 tcg_temp_free_i32(t2
);
3490 *** SVE Integer Wide Immediate - Unpredicated Group
3493 static bool trans_FDUP(DisasContext
*s
, arg_FDUP
*a
)
3495 if (a
->esz
== 0 || !dc_isar_feature(aa64_sve
, s
)) {
3498 if (sve_access_check(s
)) {
3499 unsigned vsz
= vec_full_reg_size(s
);
3500 int dofs
= vec_full_reg_offset(s
, a
->rd
);
3503 /* Decode the VFP immediate. */
3504 imm
= vfp_expand_imm(a
->esz
, a
->imm
);
3505 tcg_gen_gvec_dup_imm(a
->esz
, dofs
, vsz
, vsz
, imm
);
3510 static bool trans_DUP_i(DisasContext
*s
, arg_DUP_i
*a
)
3512 if (!dc_isar_feature(aa64_sve
, s
)) {
3515 if (sve_access_check(s
)) {
3516 unsigned vsz
= vec_full_reg_size(s
);
3517 int dofs
= vec_full_reg_offset(s
, a
->rd
);
3518 tcg_gen_gvec_dup_imm(a
->esz
, dofs
, vsz
, vsz
, a
->imm
);
3523 TRANS_FEAT(ADD_zzi
, aa64_sve
, gen_gvec_fn_arg_zzi
, tcg_gen_gvec_addi
, a
)
3525 static bool trans_SUB_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3528 return trans_ADD_zzi(s
, a
);
3531 static bool trans_SUBR_zzi(DisasContext
*s
, arg_rri_esz
*a
)
3533 static const TCGOpcode vecop_list
[] = { INDEX_op_sub_vec
, 0 };
3534 static const GVecGen2s op
[4] = {
3535 { .fni8
= tcg_gen_vec_sub8_i64
,
3536 .fniv
= tcg_gen_sub_vec
,
3537 .fno
= gen_helper_sve_subri_b
,
3538 .opt_opc
= vecop_list
,
3540 .scalar_first
= true },
3541 { .fni8
= tcg_gen_vec_sub16_i64
,
3542 .fniv
= tcg_gen_sub_vec
,
3543 .fno
= gen_helper_sve_subri_h
,
3544 .opt_opc
= vecop_list
,
3546 .scalar_first
= true },
3547 { .fni4
= tcg_gen_sub_i32
,
3548 .fniv
= tcg_gen_sub_vec
,
3549 .fno
= gen_helper_sve_subri_s
,
3550 .opt_opc
= vecop_list
,
3552 .scalar_first
= true },
3553 { .fni8
= tcg_gen_sub_i64
,
3554 .fniv
= tcg_gen_sub_vec
,
3555 .fno
= gen_helper_sve_subri_d
,
3556 .opt_opc
= vecop_list
,
3557 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
3559 .scalar_first
= true }
3562 if (!dc_isar_feature(aa64_sve
, s
)) {
3565 if (sve_access_check(s
)) {
3566 unsigned vsz
= vec_full_reg_size(s
);
3567 tcg_gen_gvec_2s(vec_full_reg_offset(s
, a
->rd
),
3568 vec_full_reg_offset(s
, a
->rn
),
3569 vsz
, vsz
, tcg_constant_i64(a
->imm
), &op
[a
->esz
]);
3574 TRANS_FEAT(MUL_zzi
, aa64_sve
, gen_gvec_fn_arg_zzi
, tcg_gen_gvec_muli
, a
)
3576 static bool do_zzi_sat(DisasContext
*s
, arg_rri_esz
*a
, bool u
, bool d
)
3578 if (sve_access_check(s
)) {
3579 do_sat_addsub_vec(s
, a
->esz
, a
->rd
, a
->rn
,
3580 tcg_constant_i64(a
->imm
), u
, d
);
3585 TRANS_FEAT(SQADD_zzi
, aa64_sve
, do_zzi_sat
, a
, false, false)
3586 TRANS_FEAT(UQADD_zzi
, aa64_sve
, do_zzi_sat
, a
, true, false)
3587 TRANS_FEAT(SQSUB_zzi
, aa64_sve
, do_zzi_sat
, a
, false, true)
3588 TRANS_FEAT(UQSUB_zzi
, aa64_sve
, do_zzi_sat
, a
, true, true)
3590 static bool do_zzi_ool(DisasContext
*s
, arg_rri_esz
*a
, gen_helper_gvec_2i
*fn
)
3592 if (sve_access_check(s
)) {
3593 unsigned vsz
= vec_full_reg_size(s
);
3594 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s
, a
->rd
),
3595 vec_full_reg_offset(s
, a
->rn
),
3596 tcg_constant_i64(a
->imm
), vsz
, vsz
, 0, fn
);
3601 #define DO_ZZI(NAME, name) \
3602 static gen_helper_gvec_2i * const name##i_fns[4] = { \
3603 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3604 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3606 TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz])
3615 static gen_helper_gvec_4
* const dot_fns
[2][2] = {
3616 { gen_helper_gvec_sdot_b
, gen_helper_gvec_sdot_h
},
3617 { gen_helper_gvec_udot_b
, gen_helper_gvec_udot_h
}
3619 TRANS_FEAT(DOT_zzzz
, aa64_sve
, gen_gvec_ool_zzzz
,
3620 dot_fns
[a
->u
][a
->sz
], a
->rd
, a
->rn
, a
->rm
, a
->ra
, 0)
3623 * SVE Multiply - Indexed
3626 TRANS_FEAT(SDOT_zzxw_s
, aa64_sve
, gen_gvec_ool_arg_zzxz
,
3627 gen_helper_gvec_sdot_idx_b
, a
)
3628 TRANS_FEAT(SDOT_zzxw_d
, aa64_sve
, gen_gvec_ool_arg_zzxz
,
3629 gen_helper_gvec_sdot_idx_h
, a
)
3630 TRANS_FEAT(UDOT_zzxw_s
, aa64_sve
, gen_gvec_ool_arg_zzxz
,
3631 gen_helper_gvec_udot_idx_b
, a
)
3632 TRANS_FEAT(UDOT_zzxw_d
, aa64_sve
, gen_gvec_ool_arg_zzxz
,
3633 gen_helper_gvec_udot_idx_h
, a
)
3635 TRANS_FEAT(SUDOT_zzxw_s
, aa64_sve_i8mm
, gen_gvec_ool_arg_zzxz
,
3636 gen_helper_gvec_sudot_idx_b
, a
)
3637 TRANS_FEAT(USDOT_zzxw_s
, aa64_sve_i8mm
, gen_gvec_ool_arg_zzxz
,
3638 gen_helper_gvec_usdot_idx_b
, a
)
3640 #define DO_SVE2_RRX(NAME, FUNC) \
3641 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3642 a->rd, a->rn, a->rm, a->index)
3644 DO_SVE2_RRX(MUL_zzx_h
, gen_helper_gvec_mul_idx_h
)
3645 DO_SVE2_RRX(MUL_zzx_s
, gen_helper_gvec_mul_idx_s
)
3646 DO_SVE2_RRX(MUL_zzx_d
, gen_helper_gvec_mul_idx_d
)
3648 DO_SVE2_RRX(SQDMULH_zzx_h
, gen_helper_sve2_sqdmulh_idx_h
)
3649 DO_SVE2_RRX(SQDMULH_zzx_s
, gen_helper_sve2_sqdmulh_idx_s
)
3650 DO_SVE2_RRX(SQDMULH_zzx_d
, gen_helper_sve2_sqdmulh_idx_d
)
3652 DO_SVE2_RRX(SQRDMULH_zzx_h
, gen_helper_sve2_sqrdmulh_idx_h
)
3653 DO_SVE2_RRX(SQRDMULH_zzx_s
, gen_helper_sve2_sqrdmulh_idx_s
)
3654 DO_SVE2_RRX(SQRDMULH_zzx_d
, gen_helper_sve2_sqrdmulh_idx_d
)
3658 #define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
3659 TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
3660 a->rd, a->rn, a->rm, (a->index << 1) | TOP)
3662 DO_SVE2_RRX_TB(SQDMULLB_zzx_s
, gen_helper_sve2_sqdmull_idx_s
, false)
3663 DO_SVE2_RRX_TB(SQDMULLB_zzx_d
, gen_helper_sve2_sqdmull_idx_d
, false)
3664 DO_SVE2_RRX_TB(SQDMULLT_zzx_s
, gen_helper_sve2_sqdmull_idx_s
, true)
3665 DO_SVE2_RRX_TB(SQDMULLT_zzx_d
, gen_helper_sve2_sqdmull_idx_d
, true)
3667 DO_SVE2_RRX_TB(SMULLB_zzx_s
, gen_helper_sve2_smull_idx_s
, false)
3668 DO_SVE2_RRX_TB(SMULLB_zzx_d
, gen_helper_sve2_smull_idx_d
, false)
3669 DO_SVE2_RRX_TB(SMULLT_zzx_s
, gen_helper_sve2_smull_idx_s
, true)
3670 DO_SVE2_RRX_TB(SMULLT_zzx_d
, gen_helper_sve2_smull_idx_d
, true)
3672 DO_SVE2_RRX_TB(UMULLB_zzx_s
, gen_helper_sve2_umull_idx_s
, false)
3673 DO_SVE2_RRX_TB(UMULLB_zzx_d
, gen_helper_sve2_umull_idx_d
, false)
3674 DO_SVE2_RRX_TB(UMULLT_zzx_s
, gen_helper_sve2_umull_idx_s
, true)
3675 DO_SVE2_RRX_TB(UMULLT_zzx_d
, gen_helper_sve2_umull_idx_d
, true)
3677 #undef DO_SVE2_RRX_TB
3679 #define DO_SVE2_RRXR(NAME, FUNC) \
3680 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a)
3682 DO_SVE2_RRXR(MLA_zzxz_h
, gen_helper_gvec_mla_idx_h
)
3683 DO_SVE2_RRXR(MLA_zzxz_s
, gen_helper_gvec_mla_idx_s
)
3684 DO_SVE2_RRXR(MLA_zzxz_d
, gen_helper_gvec_mla_idx_d
)
3686 DO_SVE2_RRXR(MLS_zzxz_h
, gen_helper_gvec_mls_idx_h
)
3687 DO_SVE2_RRXR(MLS_zzxz_s
, gen_helper_gvec_mls_idx_s
)
3688 DO_SVE2_RRXR(MLS_zzxz_d
, gen_helper_gvec_mls_idx_d
)
3690 DO_SVE2_RRXR(SQRDMLAH_zzxz_h
, gen_helper_sve2_sqrdmlah_idx_h
)
3691 DO_SVE2_RRXR(SQRDMLAH_zzxz_s
, gen_helper_sve2_sqrdmlah_idx_s
)
3692 DO_SVE2_RRXR(SQRDMLAH_zzxz_d
, gen_helper_sve2_sqrdmlah_idx_d
)
3694 DO_SVE2_RRXR(SQRDMLSH_zzxz_h
, gen_helper_sve2_sqrdmlsh_idx_h
)
3695 DO_SVE2_RRXR(SQRDMLSH_zzxz_s
, gen_helper_sve2_sqrdmlsh_idx_s
)
3696 DO_SVE2_RRXR(SQRDMLSH_zzxz_d
, gen_helper_sve2_sqrdmlsh_idx_d
)
3700 #define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
3701 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3702 a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP)
3704 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s
, gen_helper_sve2_sqdmlal_idx_s
, false)
3705 DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d
, gen_helper_sve2_sqdmlal_idx_d
, false)
3706 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s
, gen_helper_sve2_sqdmlal_idx_s
, true)
3707 DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d
, gen_helper_sve2_sqdmlal_idx_d
, true)
3709 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s
, gen_helper_sve2_sqdmlsl_idx_s
, false)
3710 DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d
, gen_helper_sve2_sqdmlsl_idx_d
, false)
3711 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s
, gen_helper_sve2_sqdmlsl_idx_s
, true)
3712 DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d
, gen_helper_sve2_sqdmlsl_idx_d
, true)
3714 DO_SVE2_RRXR_TB(SMLALB_zzxw_s
, gen_helper_sve2_smlal_idx_s
, false)
3715 DO_SVE2_RRXR_TB(SMLALB_zzxw_d
, gen_helper_sve2_smlal_idx_d
, false)
3716 DO_SVE2_RRXR_TB(SMLALT_zzxw_s
, gen_helper_sve2_smlal_idx_s
, true)
3717 DO_SVE2_RRXR_TB(SMLALT_zzxw_d
, gen_helper_sve2_smlal_idx_d
, true)
3719 DO_SVE2_RRXR_TB(UMLALB_zzxw_s
, gen_helper_sve2_umlal_idx_s
, false)
3720 DO_SVE2_RRXR_TB(UMLALB_zzxw_d
, gen_helper_sve2_umlal_idx_d
, false)
3721 DO_SVE2_RRXR_TB(UMLALT_zzxw_s
, gen_helper_sve2_umlal_idx_s
, true)
3722 DO_SVE2_RRXR_TB(UMLALT_zzxw_d
, gen_helper_sve2_umlal_idx_d
, true)
3724 DO_SVE2_RRXR_TB(SMLSLB_zzxw_s
, gen_helper_sve2_smlsl_idx_s
, false)
3725 DO_SVE2_RRXR_TB(SMLSLB_zzxw_d
, gen_helper_sve2_smlsl_idx_d
, false)
3726 DO_SVE2_RRXR_TB(SMLSLT_zzxw_s
, gen_helper_sve2_smlsl_idx_s
, true)
3727 DO_SVE2_RRXR_TB(SMLSLT_zzxw_d
, gen_helper_sve2_smlsl_idx_d
, true)
3729 DO_SVE2_RRXR_TB(UMLSLB_zzxw_s
, gen_helper_sve2_umlsl_idx_s
, false)
3730 DO_SVE2_RRXR_TB(UMLSLB_zzxw_d
, gen_helper_sve2_umlsl_idx_d
, false)
3731 DO_SVE2_RRXR_TB(UMLSLT_zzxw_s
, gen_helper_sve2_umlsl_idx_s
, true)
3732 DO_SVE2_RRXR_TB(UMLSLT_zzxw_d
, gen_helper_sve2_umlsl_idx_d
, true)
3734 #undef DO_SVE2_RRXR_TB
3736 #define DO_SVE2_RRXR_ROT(NAME, FUNC) \
3737 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
3738 a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot)
3740 DO_SVE2_RRXR_ROT(CMLA_zzxz_h
, gen_helper_sve2_cmla_idx_h
)
3741 DO_SVE2_RRXR_ROT(CMLA_zzxz_s
, gen_helper_sve2_cmla_idx_s
)
3743 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h
, gen_helper_sve2_sqrdcmlah_idx_h
)
3744 DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s
, gen_helper_sve2_sqrdcmlah_idx_s
)
3746 DO_SVE2_RRXR_ROT(CDOT_zzxw_s
, gen_helper_sve2_cdot_idx_s
)
3747 DO_SVE2_RRXR_ROT(CDOT_zzxw_d
, gen_helper_sve2_cdot_idx_d
)
3749 #undef DO_SVE2_RRXR_ROT
3752 *** SVE Floating Point Multiply-Add Indexed Group
3755 static bool do_FMLA_zzxz(DisasContext
*s
, arg_rrxr_esz
*a
, bool sub
)
3757 static gen_helper_gvec_4_ptr
* const fns
[4] = {
3759 gen_helper_gvec_fmla_idx_h
,
3760 gen_helper_gvec_fmla_idx_s
,
3761 gen_helper_gvec_fmla_idx_d
,
3763 return gen_gvec_fpst_zzzz(s
, fns
[a
->esz
], a
->rd
, a
->rn
, a
->rm
, a
->ra
,
3764 (a
->index
<< 1) | sub
,
3765 a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
3768 TRANS_FEAT(FMLA_zzxz
, aa64_sve
, do_FMLA_zzxz
, a
, false)
3769 TRANS_FEAT(FMLS_zzxz
, aa64_sve
, do_FMLA_zzxz
, a
, true)
3772 *** SVE Floating Point Multiply Indexed Group
3775 static gen_helper_gvec_3_ptr
* const fmul_idx_fns
[4] = {
3776 NULL
, gen_helper_gvec_fmul_idx_h
,
3777 gen_helper_gvec_fmul_idx_s
, gen_helper_gvec_fmul_idx_d
,
3779 TRANS_FEAT(FMUL_zzx
, aa64_sve
, gen_gvec_fpst_zzz
,
3780 fmul_idx_fns
[a
->esz
], a
->rd
, a
->rn
, a
->rm
, a
->index
,
3781 a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
)
3784 *** SVE Floating Point Fast Reduction Group
3787 typedef void gen_helper_fp_reduce(TCGv_i64
, TCGv_ptr
, TCGv_ptr
,
3788 TCGv_ptr
, TCGv_i32
);
3790 static bool do_reduce(DisasContext
*s
, arg_rpr_esz
*a
,
3791 gen_helper_fp_reduce
*fn
)
3793 unsigned vsz
, p2vsz
;
3795 TCGv_ptr t_zn
, t_pg
, status
;
3801 if (!sve_access_check(s
)) {
3805 vsz
= vec_full_reg_size(s
);
3806 p2vsz
= pow2ceil(vsz
);
3807 t_desc
= tcg_constant_i32(simd_desc(vsz
, vsz
, p2vsz
));
3808 temp
= tcg_temp_new_i64();
3809 t_zn
= tcg_temp_new_ptr();
3810 t_pg
= tcg_temp_new_ptr();
3812 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
3813 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
3814 status
= fpstatus_ptr(a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
3816 fn(temp
, t_zn
, t_pg
, status
, t_desc
);
3817 tcg_temp_free_ptr(t_zn
);
3818 tcg_temp_free_ptr(t_pg
);
3819 tcg_temp_free_ptr(status
);
3821 write_fp_dreg(s
, a
->rd
, temp
);
3822 tcg_temp_free_i64(temp
);
3826 #define DO_VPZ(NAME, name) \
3827 static gen_helper_fp_reduce * const name##_fns[4] = { \
3828 NULL, gen_helper_sve_##name##_h, \
3829 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
3831 TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz])
3833 DO_VPZ(FADDV
, faddv
)
3834 DO_VPZ(FMINNMV
, fminnmv
)
3835 DO_VPZ(FMAXNMV
, fmaxnmv
)
3836 DO_VPZ(FMINV
, fminv
)
3837 DO_VPZ(FMAXV
, fmaxv
)
3842 *** SVE Floating Point Unary Operations - Unpredicated Group
3845 static gen_helper_gvec_2_ptr
* const frecpe_fns
[] = {
3846 NULL
, gen_helper_gvec_frecpe_h
,
3847 gen_helper_gvec_frecpe_s
, gen_helper_gvec_frecpe_d
,
3849 TRANS_FEAT(FRECPE
, aa64_sve
, gen_gvec_fpst_arg_zz
, frecpe_fns
[a
->esz
], a
, 0)
3851 static gen_helper_gvec_2_ptr
* const frsqrte_fns
[] = {
3852 NULL
, gen_helper_gvec_frsqrte_h
,
3853 gen_helper_gvec_frsqrte_s
, gen_helper_gvec_frsqrte_d
,
3855 TRANS_FEAT(FRSQRTE
, aa64_sve
, gen_gvec_fpst_arg_zz
, frsqrte_fns
[a
->esz
], a
, 0)
3858 *** SVE Floating Point Compare with Zero Group
3861 static bool do_ppz_fp(DisasContext
*s
, arg_rpr_esz
*a
,
3862 gen_helper_gvec_3_ptr
*fn
)
3867 if (sve_access_check(s
)) {
3868 unsigned vsz
= vec_full_reg_size(s
);
3870 fpstatus_ptr(a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
3872 tcg_gen_gvec_3_ptr(pred_full_reg_offset(s
, a
->rd
),
3873 vec_full_reg_offset(s
, a
->rn
),
3874 pred_full_reg_offset(s
, a
->pg
),
3875 status
, vsz
, vsz
, 0, fn
);
3876 tcg_temp_free_ptr(status
);
3881 #define DO_PPZ(NAME, name) \
3882 static gen_helper_gvec_3_ptr * const name##_fns[] = { \
3883 NULL, gen_helper_sve_##name##_h, \
3884 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
3886 TRANS_FEAT(NAME, aa64_sve, do_ppz_fp, a, name##_fns[a->esz])
3888 DO_PPZ(FCMGE_ppz0
, fcmge0
)
3889 DO_PPZ(FCMGT_ppz0
, fcmgt0
)
3890 DO_PPZ(FCMLE_ppz0
, fcmle0
)
3891 DO_PPZ(FCMLT_ppz0
, fcmlt0
)
3892 DO_PPZ(FCMEQ_ppz0
, fcmeq0
)
3893 DO_PPZ(FCMNE_ppz0
, fcmne0
)
3898 *** SVE floating-point trig multiply-add coefficient
3901 static gen_helper_gvec_3_ptr
* const ftmad_fns
[4] = {
3902 NULL
, gen_helper_sve_ftmad_h
,
3903 gen_helper_sve_ftmad_s
, gen_helper_sve_ftmad_d
,
3905 TRANS_FEAT_NONSTREAMING(FTMAD
, aa64_sve
, gen_gvec_fpst_zzz
,
3906 ftmad_fns
[a
->esz
], a
->rd
, a
->rn
, a
->rm
, a
->imm
,
3907 a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
)
3910 *** SVE Floating Point Accumulating Reduction Group
3913 static bool trans_FADDA(DisasContext
*s
, arg_rprr_esz
*a
)
3915 typedef void fadda_fn(TCGv_i64
, TCGv_i64
, TCGv_ptr
,
3916 TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
3917 static fadda_fn
* const fns
[3] = {
3918 gen_helper_sve_fadda_h
,
3919 gen_helper_sve_fadda_s
,
3920 gen_helper_sve_fadda_d
,
3922 unsigned vsz
= vec_full_reg_size(s
);
3923 TCGv_ptr t_rm
, t_pg
, t_fpst
;
3927 if (a
->esz
== 0 || !dc_isar_feature(aa64_sve
, s
)) {
3930 s
->is_nonstreaming
= true;
3931 if (!sve_access_check(s
)) {
3935 t_val
= load_esz(cpu_env
, vec_reg_offset(s
, a
->rn
, 0, a
->esz
), a
->esz
);
3936 t_rm
= tcg_temp_new_ptr();
3937 t_pg
= tcg_temp_new_ptr();
3938 tcg_gen_addi_ptr(t_rm
, cpu_env
, vec_full_reg_offset(s
, a
->rm
));
3939 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
3940 t_fpst
= fpstatus_ptr(a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
3941 t_desc
= tcg_constant_i32(simd_desc(vsz
, vsz
, 0));
3943 fns
[a
->esz
- 1](t_val
, t_val
, t_rm
, t_pg
, t_fpst
, t_desc
);
3945 tcg_temp_free_ptr(t_fpst
);
3946 tcg_temp_free_ptr(t_pg
);
3947 tcg_temp_free_ptr(t_rm
);
3949 write_fp_dreg(s
, a
->rd
, t_val
);
3950 tcg_temp_free_i64(t_val
);
3955 *** SVE Floating Point Arithmetic - Unpredicated Group
3958 #define DO_FP3(NAME, name) \
3959 static gen_helper_gvec_3_ptr * const name##_fns[4] = { \
3960 NULL, gen_helper_gvec_##name##_h, \
3961 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3963 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0)
3965 DO_FP3(FADD_zzz
, fadd
)
3966 DO_FP3(FSUB_zzz
, fsub
)
3967 DO_FP3(FMUL_zzz
, fmul
)
3968 DO_FP3(FRECPS
, recps
)
3969 DO_FP3(FRSQRTS
, rsqrts
)
3973 static gen_helper_gvec_3_ptr
* const ftsmul_fns
[4] = {
3974 NULL
, gen_helper_gvec_ftsmul_h
,
3975 gen_helper_gvec_ftsmul_s
, gen_helper_gvec_ftsmul_d
3977 TRANS_FEAT_NONSTREAMING(FTSMUL
, aa64_sve
, gen_gvec_fpst_arg_zzz
,
3978 ftsmul_fns
[a
->esz
], a
, 0)
3981 *** SVE Floating Point Arithmetic - Predicated Group
3984 #define DO_ZPZZ_FP(NAME, FEAT, name) \
3985 static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \
3986 NULL, gen_helper_##name##_h, \
3987 gen_helper_##name##_s, gen_helper_##name##_d \
3989 TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a)
3991 DO_ZPZZ_FP(FADD_zpzz
, aa64_sve
, sve_fadd
)
3992 DO_ZPZZ_FP(FSUB_zpzz
, aa64_sve
, sve_fsub
)
3993 DO_ZPZZ_FP(FMUL_zpzz
, aa64_sve
, sve_fmul
)
3994 DO_ZPZZ_FP(FMIN_zpzz
, aa64_sve
, sve_fmin
)
3995 DO_ZPZZ_FP(FMAX_zpzz
, aa64_sve
, sve_fmax
)
3996 DO_ZPZZ_FP(FMINNM_zpzz
, aa64_sve
, sve_fminnum
)
3997 DO_ZPZZ_FP(FMAXNM_zpzz
, aa64_sve
, sve_fmaxnum
)
3998 DO_ZPZZ_FP(FABD
, aa64_sve
, sve_fabd
)
3999 DO_ZPZZ_FP(FSCALE
, aa64_sve
, sve_fscalbn
)
4000 DO_ZPZZ_FP(FDIV
, aa64_sve
, sve_fdiv
)
4001 DO_ZPZZ_FP(FMULX
, aa64_sve
, sve_fmulx
)
4003 typedef void gen_helper_sve_fp2scalar(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
,
4004 TCGv_i64
, TCGv_ptr
, TCGv_i32
);
4006 static void do_fp_scalar(DisasContext
*s
, int zd
, int zn
, int pg
, bool is_fp16
,
4007 TCGv_i64 scalar
, gen_helper_sve_fp2scalar
*fn
)
4009 unsigned vsz
= vec_full_reg_size(s
);
4010 TCGv_ptr t_zd
, t_zn
, t_pg
, status
;
4013 t_zd
= tcg_temp_new_ptr();
4014 t_zn
= tcg_temp_new_ptr();
4015 t_pg
= tcg_temp_new_ptr();
4016 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, zd
));
4017 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, zn
));
4018 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
4020 status
= fpstatus_ptr(is_fp16
? FPST_FPCR_F16
: FPST_FPCR
);
4021 desc
= tcg_constant_i32(simd_desc(vsz
, vsz
, 0));
4022 fn(t_zd
, t_zn
, t_pg
, scalar
, status
, desc
);
4024 tcg_temp_free_ptr(status
);
4025 tcg_temp_free_ptr(t_pg
);
4026 tcg_temp_free_ptr(t_zn
);
4027 tcg_temp_free_ptr(t_zd
);
4030 static bool do_fp_imm(DisasContext
*s
, arg_rpri_esz
*a
, uint64_t imm
,
4031 gen_helper_sve_fp2scalar
*fn
)
4036 if (sve_access_check(s
)) {
4037 do_fp_scalar(s
, a
->rd
, a
->rn
, a
->pg
, a
->esz
== MO_16
,
4038 tcg_constant_i64(imm
), fn
);
4043 #define DO_FP_IMM(NAME, name, const0, const1) \
4044 static gen_helper_sve_fp2scalar * const name##_fns[4] = { \
4045 NULL, gen_helper_sve_##name##_h, \
4046 gen_helper_sve_##name##_s, \
4047 gen_helper_sve_##name##_d \
4049 static uint64_t const name##_const[4][2] = { \
4051 { float16_##const0, float16_##const1 }, \
4052 { float32_##const0, float32_##const1 }, \
4053 { float64_##const0, float64_##const1 }, \
4055 TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \
4056 name##_const[a->esz][a->imm], name##_fns[a->esz])
4058 DO_FP_IMM(FADD
, fadds
, half
, one
)
4059 DO_FP_IMM(FSUB
, fsubs
, half
, one
)
4060 DO_FP_IMM(FMUL
, fmuls
, half
, two
)
4061 DO_FP_IMM(FSUBR
, fsubrs
, half
, one
)
4062 DO_FP_IMM(FMAXNM
, fmaxnms
, zero
, one
)
4063 DO_FP_IMM(FMINNM
, fminnms
, zero
, one
)
4064 DO_FP_IMM(FMAX
, fmaxs
, zero
, one
)
4065 DO_FP_IMM(FMIN
, fmins
, zero
, one
)
4069 static bool do_fp_cmp(DisasContext
*s
, arg_rprr_esz
*a
,
4070 gen_helper_gvec_4_ptr
*fn
)
4075 if (sve_access_check(s
)) {
4076 unsigned vsz
= vec_full_reg_size(s
);
4077 TCGv_ptr status
= fpstatus_ptr(a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
4078 tcg_gen_gvec_4_ptr(pred_full_reg_offset(s
, a
->rd
),
4079 vec_full_reg_offset(s
, a
->rn
),
4080 vec_full_reg_offset(s
, a
->rm
),
4081 pred_full_reg_offset(s
, a
->pg
),
4082 status
, vsz
, vsz
, 0, fn
);
4083 tcg_temp_free_ptr(status
);
4088 #define DO_FPCMP(NAME, name) \
4089 static gen_helper_gvec_4_ptr * const name##_fns[4] = { \
4090 NULL, gen_helper_sve_##name##_h, \
4091 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4093 TRANS_FEAT(NAME##_ppzz, aa64_sve, do_fp_cmp, a, name##_fns[a->esz])
4095 DO_FPCMP(FCMGE
, fcmge
)
4096 DO_FPCMP(FCMGT
, fcmgt
)
4097 DO_FPCMP(FCMEQ
, fcmeq
)
4098 DO_FPCMP(FCMNE
, fcmne
)
4099 DO_FPCMP(FCMUO
, fcmuo
)
4100 DO_FPCMP(FACGE
, facge
)
4101 DO_FPCMP(FACGT
, facgt
)
4105 static gen_helper_gvec_4_ptr
* const fcadd_fns
[] = {
4106 NULL
, gen_helper_sve_fcadd_h
,
4107 gen_helper_sve_fcadd_s
, gen_helper_sve_fcadd_d
,
4109 TRANS_FEAT(FCADD
, aa64_sve
, gen_gvec_fpst_zzzp
, fcadd_fns
[a
->esz
],
4110 a
->rd
, a
->rn
, a
->rm
, a
->pg
, a
->rot
,
4111 a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
)
4113 #define DO_FMLA(NAME, name) \
4114 static gen_helper_gvec_5_ptr * const name##_fns[4] = { \
4115 NULL, gen_helper_sve_##name##_h, \
4116 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
4118 TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \
4119 a->rd, a->rn, a->rm, a->ra, a->pg, 0, \
4120 a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
4122 DO_FMLA(FMLA_zpzzz
, fmla_zpzzz
)
4123 DO_FMLA(FMLS_zpzzz
, fmls_zpzzz
)
4124 DO_FMLA(FNMLA_zpzzz
, fnmla_zpzzz
)
4125 DO_FMLA(FNMLS_zpzzz
, fnmls_zpzzz
)
4129 static gen_helper_gvec_5_ptr
* const fcmla_fns
[4] = {
4130 NULL
, gen_helper_sve_fcmla_zpzzz_h
,
4131 gen_helper_sve_fcmla_zpzzz_s
, gen_helper_sve_fcmla_zpzzz_d
,
4133 TRANS_FEAT(FCMLA_zpzzz
, aa64_sve
, gen_gvec_fpst_zzzzp
, fcmla_fns
[a
->esz
],
4134 a
->rd
, a
->rn
, a
->rm
, a
->ra
, a
->pg
, a
->rot
,
4135 a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
)
4137 static gen_helper_gvec_4_ptr
* const fcmla_idx_fns
[4] = {
4138 NULL
, gen_helper_gvec_fcmlah_idx
, gen_helper_gvec_fcmlas_idx
, NULL
4140 TRANS_FEAT(FCMLA_zzxz
, aa64_sve
, gen_gvec_fpst_zzzz
, fcmla_idx_fns
[a
->esz
],
4141 a
->rd
, a
->rn
, a
->rm
, a
->ra
, a
->index
* 4 + a
->rot
,
4142 a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
)
4145 *** SVE Floating Point Unary Operations Predicated Group
4148 TRANS_FEAT(FCVT_sh
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4149 gen_helper_sve_fcvt_sh
, a
, 0, FPST_FPCR
)
4150 TRANS_FEAT(FCVT_hs
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4151 gen_helper_sve_fcvt_hs
, a
, 0, FPST_FPCR
)
4153 TRANS_FEAT(BFCVT
, aa64_sve_bf16
, gen_gvec_fpst_arg_zpz
,
4154 gen_helper_sve_bfcvt
, a
, 0, FPST_FPCR
)
4156 TRANS_FEAT(FCVT_dh
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4157 gen_helper_sve_fcvt_dh
, a
, 0, FPST_FPCR
)
4158 TRANS_FEAT(FCVT_hd
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4159 gen_helper_sve_fcvt_hd
, a
, 0, FPST_FPCR
)
4160 TRANS_FEAT(FCVT_ds
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4161 gen_helper_sve_fcvt_ds
, a
, 0, FPST_FPCR
)
4162 TRANS_FEAT(FCVT_sd
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4163 gen_helper_sve_fcvt_sd
, a
, 0, FPST_FPCR
)
4165 TRANS_FEAT(FCVTZS_hh
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4166 gen_helper_sve_fcvtzs_hh
, a
, 0, FPST_FPCR_F16
)
4167 TRANS_FEAT(FCVTZU_hh
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4168 gen_helper_sve_fcvtzu_hh
, a
, 0, FPST_FPCR_F16
)
4169 TRANS_FEAT(FCVTZS_hs
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4170 gen_helper_sve_fcvtzs_hs
, a
, 0, FPST_FPCR_F16
)
4171 TRANS_FEAT(FCVTZU_hs
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4172 gen_helper_sve_fcvtzu_hs
, a
, 0, FPST_FPCR_F16
)
4173 TRANS_FEAT(FCVTZS_hd
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4174 gen_helper_sve_fcvtzs_hd
, a
, 0, FPST_FPCR_F16
)
4175 TRANS_FEAT(FCVTZU_hd
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4176 gen_helper_sve_fcvtzu_hd
, a
, 0, FPST_FPCR_F16
)
4178 TRANS_FEAT(FCVTZS_ss
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4179 gen_helper_sve_fcvtzs_ss
, a
, 0, FPST_FPCR
)
4180 TRANS_FEAT(FCVTZU_ss
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4181 gen_helper_sve_fcvtzu_ss
, a
, 0, FPST_FPCR
)
4182 TRANS_FEAT(FCVTZS_sd
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4183 gen_helper_sve_fcvtzs_sd
, a
, 0, FPST_FPCR
)
4184 TRANS_FEAT(FCVTZU_sd
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4185 gen_helper_sve_fcvtzu_sd
, a
, 0, FPST_FPCR
)
4186 TRANS_FEAT(FCVTZS_ds
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4187 gen_helper_sve_fcvtzs_ds
, a
, 0, FPST_FPCR
)
4188 TRANS_FEAT(FCVTZU_ds
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4189 gen_helper_sve_fcvtzu_ds
, a
, 0, FPST_FPCR
)
4191 TRANS_FEAT(FCVTZS_dd
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4192 gen_helper_sve_fcvtzs_dd
, a
, 0, FPST_FPCR
)
4193 TRANS_FEAT(FCVTZU_dd
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4194 gen_helper_sve_fcvtzu_dd
, a
, 0, FPST_FPCR
)
4196 static gen_helper_gvec_3_ptr
* const frint_fns
[] = {
4198 gen_helper_sve_frint_h
,
4199 gen_helper_sve_frint_s
,
4200 gen_helper_sve_frint_d
4202 TRANS_FEAT(FRINTI
, aa64_sve
, gen_gvec_fpst_arg_zpz
, frint_fns
[a
->esz
],
4203 a
, 0, a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
)
4205 static gen_helper_gvec_3_ptr
* const frintx_fns
[] = {
4207 gen_helper_sve_frintx_h
,
4208 gen_helper_sve_frintx_s
,
4209 gen_helper_sve_frintx_d
4211 TRANS_FEAT(FRINTX
, aa64_sve
, gen_gvec_fpst_arg_zpz
, frintx_fns
[a
->esz
],
4212 a
, 0, a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
4214 static bool do_frint_mode(DisasContext
*s
, arg_rpr_esz
*a
,
4215 int mode
, gen_helper_gvec_3_ptr
*fn
)
4224 if (!sve_access_check(s
)) {
4228 vsz
= vec_full_reg_size(s
);
4229 tmode
= tcg_const_i32(mode
);
4230 status
= fpstatus_ptr(a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
);
4232 gen_helper_set_rmode(tmode
, tmode
, status
);
4234 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s
, a
->rd
),
4235 vec_full_reg_offset(s
, a
->rn
),
4236 pred_full_reg_offset(s
, a
->pg
),
4237 status
, vsz
, vsz
, 0, fn
);
4239 gen_helper_set_rmode(tmode
, tmode
, status
);
4240 tcg_temp_free_i32(tmode
);
4241 tcg_temp_free_ptr(status
);
4245 TRANS_FEAT(FRINTN
, aa64_sve
, do_frint_mode
, a
,
4246 float_round_nearest_even
, frint_fns
[a
->esz
])
4247 TRANS_FEAT(FRINTP
, aa64_sve
, do_frint_mode
, a
,
4248 float_round_up
, frint_fns
[a
->esz
])
4249 TRANS_FEAT(FRINTM
, aa64_sve
, do_frint_mode
, a
,
4250 float_round_down
, frint_fns
[a
->esz
])
4251 TRANS_FEAT(FRINTZ
, aa64_sve
, do_frint_mode
, a
,
4252 float_round_to_zero
, frint_fns
[a
->esz
])
4253 TRANS_FEAT(FRINTA
, aa64_sve
, do_frint_mode
, a
,
4254 float_round_ties_away
, frint_fns
[a
->esz
])
4256 static gen_helper_gvec_3_ptr
* const frecpx_fns
[] = {
4257 NULL
, gen_helper_sve_frecpx_h
,
4258 gen_helper_sve_frecpx_s
, gen_helper_sve_frecpx_d
,
4260 TRANS_FEAT(FRECPX
, aa64_sve
, gen_gvec_fpst_arg_zpz
, frecpx_fns
[a
->esz
],
4261 a
, 0, a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
)
4263 static gen_helper_gvec_3_ptr
* const fsqrt_fns
[] = {
4264 NULL
, gen_helper_sve_fsqrt_h
,
4265 gen_helper_sve_fsqrt_s
, gen_helper_sve_fsqrt_d
,
4267 TRANS_FEAT(FSQRT
, aa64_sve
, gen_gvec_fpst_arg_zpz
, fsqrt_fns
[a
->esz
],
4268 a
, 0, a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
)
4270 TRANS_FEAT(SCVTF_hh
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4271 gen_helper_sve_scvt_hh
, a
, 0, FPST_FPCR_F16
)
4272 TRANS_FEAT(SCVTF_sh
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4273 gen_helper_sve_scvt_sh
, a
, 0, FPST_FPCR_F16
)
4274 TRANS_FEAT(SCVTF_dh
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4275 gen_helper_sve_scvt_dh
, a
, 0, FPST_FPCR_F16
)
4277 TRANS_FEAT(SCVTF_ss
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4278 gen_helper_sve_scvt_ss
, a
, 0, FPST_FPCR
)
4279 TRANS_FEAT(SCVTF_ds
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4280 gen_helper_sve_scvt_ds
, a
, 0, FPST_FPCR
)
4282 TRANS_FEAT(SCVTF_sd
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4283 gen_helper_sve_scvt_sd
, a
, 0, FPST_FPCR
)
4284 TRANS_FEAT(SCVTF_dd
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4285 gen_helper_sve_scvt_dd
, a
, 0, FPST_FPCR
)
4287 TRANS_FEAT(UCVTF_hh
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4288 gen_helper_sve_ucvt_hh
, a
, 0, FPST_FPCR_F16
)
4289 TRANS_FEAT(UCVTF_sh
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4290 gen_helper_sve_ucvt_sh
, a
, 0, FPST_FPCR_F16
)
4291 TRANS_FEAT(UCVTF_dh
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4292 gen_helper_sve_ucvt_dh
, a
, 0, FPST_FPCR_F16
)
4294 TRANS_FEAT(UCVTF_ss
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4295 gen_helper_sve_ucvt_ss
, a
, 0, FPST_FPCR
)
4296 TRANS_FEAT(UCVTF_ds
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4297 gen_helper_sve_ucvt_ds
, a
, 0, FPST_FPCR
)
4298 TRANS_FEAT(UCVTF_sd
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4299 gen_helper_sve_ucvt_sd
, a
, 0, FPST_FPCR
)
4301 TRANS_FEAT(UCVTF_dd
, aa64_sve
, gen_gvec_fpst_arg_zpz
,
4302 gen_helper_sve_ucvt_dd
, a
, 0, FPST_FPCR
)
4305 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4308 /* Subroutine loading a vector register at VOFS of LEN bytes.
4309 * The load should begin at the address Rn + IMM.
4312 void gen_sve_ldr(DisasContext
*s
, TCGv_ptr base
, int vofs
,
4313 int len
, int rn
, int imm
)
4315 int len_align
= QEMU_ALIGN_DOWN(len
, 8);
4316 int len_remain
= len
% 8;
4317 int nparts
= len
/ 8 + ctpop8(len_remain
);
4318 int midx
= get_mem_index(s
);
4319 TCGv_i64 dirty_addr
, clean_addr
, t0
, t1
;
4321 dirty_addr
= tcg_temp_new_i64();
4322 tcg_gen_addi_i64(dirty_addr
, cpu_reg_sp(s
, rn
), imm
);
4323 clean_addr
= gen_mte_checkN(s
, dirty_addr
, false, rn
!= 31, len
);
4324 tcg_temp_free_i64(dirty_addr
);
4327 * Note that unpredicated load/store of vector/predicate registers
4328 * are defined as a stream of bytes, which equates to little-endian
4329 * operations on larger quantities.
4330 * Attempt to keep code expansion to a minimum by limiting the
4331 * amount of unrolling done.
4336 t0
= tcg_temp_new_i64();
4337 for (i
= 0; i
< len_align
; i
+= 8) {
4338 tcg_gen_qemu_ld_i64(t0
, clean_addr
, midx
, MO_LEUQ
);
4339 tcg_gen_st_i64(t0
, base
, vofs
+ i
);
4340 tcg_gen_addi_i64(clean_addr
, clean_addr
, 8);
4342 tcg_temp_free_i64(t0
);
4344 TCGLabel
*loop
= gen_new_label();
4345 TCGv_ptr tp
, i
= tcg_const_local_ptr(0);
4347 /* Copy the clean address into a local temp, live across the loop. */
4349 clean_addr
= new_tmp_a64_local(s
);
4350 tcg_gen_mov_i64(clean_addr
, t0
);
4352 if (base
!= cpu_env
) {
4353 TCGv_ptr b
= tcg_temp_local_new_ptr();
4354 tcg_gen_mov_ptr(b
, base
);
4358 gen_set_label(loop
);
4360 t0
= tcg_temp_new_i64();
4361 tcg_gen_qemu_ld_i64(t0
, clean_addr
, midx
, MO_LEUQ
);
4362 tcg_gen_addi_i64(clean_addr
, clean_addr
, 8);
4364 tp
= tcg_temp_new_ptr();
4365 tcg_gen_add_ptr(tp
, base
, i
);
4366 tcg_gen_addi_ptr(i
, i
, 8);
4367 tcg_gen_st_i64(t0
, tp
, vofs
);
4368 tcg_temp_free_ptr(tp
);
4369 tcg_temp_free_i64(t0
);
4371 tcg_gen_brcondi_ptr(TCG_COND_LTU
, i
, len_align
, loop
);
4372 tcg_temp_free_ptr(i
);
4374 if (base
!= cpu_env
) {
4375 tcg_temp_free_ptr(base
);
4376 assert(len_remain
== 0);
4381 * Predicate register loads can be any multiple of 2.
4382 * Note that we still store the entire 64-bit unit into cpu_env.
4385 t0
= tcg_temp_new_i64();
4386 switch (len_remain
) {
4390 tcg_gen_qemu_ld_i64(t0
, clean_addr
, midx
,
4391 MO_LE
| ctz32(len_remain
));
4395 t1
= tcg_temp_new_i64();
4396 tcg_gen_qemu_ld_i64(t0
, clean_addr
, midx
, MO_LEUL
);
4397 tcg_gen_addi_i64(clean_addr
, clean_addr
, 4);
4398 tcg_gen_qemu_ld_i64(t1
, clean_addr
, midx
, MO_LEUW
);
4399 tcg_gen_deposit_i64(t0
, t0
, t1
, 32, 32);
4400 tcg_temp_free_i64(t1
);
4404 g_assert_not_reached();
4406 tcg_gen_st_i64(t0
, base
, vofs
+ len_align
);
4407 tcg_temp_free_i64(t0
);
4411 /* Similarly for stores. */
4412 void gen_sve_str(DisasContext
*s
, TCGv_ptr base
, int vofs
,
4413 int len
, int rn
, int imm
)
4415 int len_align
= QEMU_ALIGN_DOWN(len
, 8);
4416 int len_remain
= len
% 8;
4417 int nparts
= len
/ 8 + ctpop8(len_remain
);
4418 int midx
= get_mem_index(s
);
4419 TCGv_i64 dirty_addr
, clean_addr
, t0
;
4421 dirty_addr
= tcg_temp_new_i64();
4422 tcg_gen_addi_i64(dirty_addr
, cpu_reg_sp(s
, rn
), imm
);
4423 clean_addr
= gen_mte_checkN(s
, dirty_addr
, false, rn
!= 31, len
);
4424 tcg_temp_free_i64(dirty_addr
);
4426 /* Note that unpredicated load/store of vector/predicate registers
4427 * are defined as a stream of bytes, which equates to little-endian
4428 * operations on larger quantities. There is no nice way to force
4429 * a little-endian store for aarch64_be-linux-user out of line.
4431 * Attempt to keep code expansion to a minimum by limiting the
4432 * amount of unrolling done.
4437 t0
= tcg_temp_new_i64();
4438 for (i
= 0; i
< len_align
; i
+= 8) {
4439 tcg_gen_ld_i64(t0
, base
, vofs
+ i
);
4440 tcg_gen_qemu_st_i64(t0
, clean_addr
, midx
, MO_LEUQ
);
4441 tcg_gen_addi_i64(clean_addr
, clean_addr
, 8);
4443 tcg_temp_free_i64(t0
);
4445 TCGLabel
*loop
= gen_new_label();
4446 TCGv_ptr tp
, i
= tcg_const_local_ptr(0);
4448 /* Copy the clean address into a local temp, live across the loop. */
4450 clean_addr
= new_tmp_a64_local(s
);
4451 tcg_gen_mov_i64(clean_addr
, t0
);
4453 if (base
!= cpu_env
) {
4454 TCGv_ptr b
= tcg_temp_local_new_ptr();
4455 tcg_gen_mov_ptr(b
, base
);
4459 gen_set_label(loop
);
4461 t0
= tcg_temp_new_i64();
4462 tp
= tcg_temp_new_ptr();
4463 tcg_gen_add_ptr(tp
, base
, i
);
4464 tcg_gen_ld_i64(t0
, tp
, vofs
);
4465 tcg_gen_addi_ptr(i
, i
, 8);
4466 tcg_temp_free_ptr(tp
);
4468 tcg_gen_qemu_st_i64(t0
, clean_addr
, midx
, MO_LEUQ
);
4469 tcg_gen_addi_i64(clean_addr
, clean_addr
, 8);
4470 tcg_temp_free_i64(t0
);
4472 tcg_gen_brcondi_ptr(TCG_COND_LTU
, i
, len_align
, loop
);
4473 tcg_temp_free_ptr(i
);
4475 if (base
!= cpu_env
) {
4476 tcg_temp_free_ptr(base
);
4477 assert(len_remain
== 0);
4481 /* Predicate register stores can be any multiple of 2. */
4483 t0
= tcg_temp_new_i64();
4484 tcg_gen_ld_i64(t0
, base
, vofs
+ len_align
);
4486 switch (len_remain
) {
4490 tcg_gen_qemu_st_i64(t0
, clean_addr
, midx
,
4491 MO_LE
| ctz32(len_remain
));
4495 tcg_gen_qemu_st_i64(t0
, clean_addr
, midx
, MO_LEUL
);
4496 tcg_gen_addi_i64(clean_addr
, clean_addr
, 4);
4497 tcg_gen_shri_i64(t0
, t0
, 32);
4498 tcg_gen_qemu_st_i64(t0
, clean_addr
, midx
, MO_LEUW
);
4502 g_assert_not_reached();
4504 tcg_temp_free_i64(t0
);
4508 static bool trans_LDR_zri(DisasContext
*s
, arg_rri
*a
)
4510 if (!dc_isar_feature(aa64_sve
, s
)) {
4513 if (sve_access_check(s
)) {
4514 int size
= vec_full_reg_size(s
);
4515 int off
= vec_full_reg_offset(s
, a
->rd
);
4516 gen_sve_ldr(s
, cpu_env
, off
, size
, a
->rn
, a
->imm
* size
);
4521 static bool trans_LDR_pri(DisasContext
*s
, arg_rri
*a
)
4523 if (!dc_isar_feature(aa64_sve
, s
)) {
4526 if (sve_access_check(s
)) {
4527 int size
= pred_full_reg_size(s
);
4528 int off
= pred_full_reg_offset(s
, a
->rd
);
4529 gen_sve_ldr(s
, cpu_env
, off
, size
, a
->rn
, a
->imm
* size
);
4534 static bool trans_STR_zri(DisasContext
*s
, arg_rri
*a
)
4536 if (!dc_isar_feature(aa64_sve
, s
)) {
4539 if (sve_access_check(s
)) {
4540 int size
= vec_full_reg_size(s
);
4541 int off
= vec_full_reg_offset(s
, a
->rd
);
4542 gen_sve_str(s
, cpu_env
, off
, size
, a
->rn
, a
->imm
* size
);
4547 static bool trans_STR_pri(DisasContext
*s
, arg_rri
*a
)
4549 if (!dc_isar_feature(aa64_sve
, s
)) {
4552 if (sve_access_check(s
)) {
4553 int size
= pred_full_reg_size(s
);
4554 int off
= pred_full_reg_offset(s
, a
->rd
);
4555 gen_sve_str(s
, cpu_env
, off
, size
, a
->rn
, a
->imm
* size
);
4561 *** SVE Memory - Contiguous Load Group
4564 /* The memory mode of the dtype. */
4565 static const MemOp dtype_mop
[16] = {
4566 MO_UB
, MO_UB
, MO_UB
, MO_UB
,
4567 MO_SL
, MO_UW
, MO_UW
, MO_UW
,
4568 MO_SW
, MO_SW
, MO_UL
, MO_UL
,
4569 MO_SB
, MO_SB
, MO_SB
, MO_UQ
4572 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4574 /* The vector element size of dtype. */
4575 static const uint8_t dtype_esz
[16] = {
4582 static void do_mem_zpa(DisasContext
*s
, int zt
, int pg
, TCGv_i64 addr
,
4583 int dtype
, uint32_t mte_n
, bool is_write
,
4584 gen_helper_gvec_mem
*fn
)
4586 unsigned vsz
= vec_full_reg_size(s
);
4591 * For e.g. LD4, there are not enough arguments to pass all 4
4592 * registers as pointers, so encode the regno into the data field.
4593 * For consistency, do this even for LD1.
4595 if (s
->mte_active
[0]) {
4596 int msz
= dtype_msz(dtype
);
4598 desc
= FIELD_DP32(desc
, MTEDESC
, MIDX
, get_mem_index(s
));
4599 desc
= FIELD_DP32(desc
, MTEDESC
, TBI
, s
->tbid
);
4600 desc
= FIELD_DP32(desc
, MTEDESC
, TCMA
, s
->tcma
);
4601 desc
= FIELD_DP32(desc
, MTEDESC
, WRITE
, is_write
);
4602 desc
= FIELD_DP32(desc
, MTEDESC
, SIZEM1
, (mte_n
<< msz
) - 1);
4603 desc
<<= SVE_MTEDESC_SHIFT
;
4605 addr
= clean_data_tbi(s
, addr
);
4608 desc
= simd_desc(vsz
, vsz
, zt
| desc
);
4609 t_pg
= tcg_temp_new_ptr();
4611 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
4612 fn(cpu_env
, t_pg
, addr
, tcg_constant_i32(desc
));
4614 tcg_temp_free_ptr(t_pg
);
4617 /* Indexed by [mte][be][dtype][nreg] */
4618 static gen_helper_gvec_mem
* const ldr_fns
[2][2][16][4] = {
4619 { /* mte inactive, little-endian */
4620 { { gen_helper_sve_ld1bb_r
, gen_helper_sve_ld2bb_r
,
4621 gen_helper_sve_ld3bb_r
, gen_helper_sve_ld4bb_r
},
4622 { gen_helper_sve_ld1bhu_r
, NULL
, NULL
, NULL
},
4623 { gen_helper_sve_ld1bsu_r
, NULL
, NULL
, NULL
},
4624 { gen_helper_sve_ld1bdu_r
, NULL
, NULL
, NULL
},
4626 { gen_helper_sve_ld1sds_le_r
, NULL
, NULL
, NULL
},
4627 { gen_helper_sve_ld1hh_le_r
, gen_helper_sve_ld2hh_le_r
,
4628 gen_helper_sve_ld3hh_le_r
, gen_helper_sve_ld4hh_le_r
},
4629 { gen_helper_sve_ld1hsu_le_r
, NULL
, NULL
, NULL
},
4630 { gen_helper_sve_ld1hdu_le_r
, NULL
, NULL
, NULL
},
4632 { gen_helper_sve_ld1hds_le_r
, NULL
, NULL
, NULL
},
4633 { gen_helper_sve_ld1hss_le_r
, NULL
, NULL
, NULL
},
4634 { gen_helper_sve_ld1ss_le_r
, gen_helper_sve_ld2ss_le_r
,
4635 gen_helper_sve_ld3ss_le_r
, gen_helper_sve_ld4ss_le_r
},
4636 { gen_helper_sve_ld1sdu_le_r
, NULL
, NULL
, NULL
},
4638 { gen_helper_sve_ld1bds_r
, NULL
, NULL
, NULL
},
4639 { gen_helper_sve_ld1bss_r
, NULL
, NULL
, NULL
},
4640 { gen_helper_sve_ld1bhs_r
, NULL
, NULL
, NULL
},
4641 { gen_helper_sve_ld1dd_le_r
, gen_helper_sve_ld2dd_le_r
,
4642 gen_helper_sve_ld3dd_le_r
, gen_helper_sve_ld4dd_le_r
} },
4644 /* mte inactive, big-endian */
4645 { { gen_helper_sve_ld1bb_r
, gen_helper_sve_ld2bb_r
,
4646 gen_helper_sve_ld3bb_r
, gen_helper_sve_ld4bb_r
},
4647 { gen_helper_sve_ld1bhu_r
, NULL
, NULL
, NULL
},
4648 { gen_helper_sve_ld1bsu_r
, NULL
, NULL
, NULL
},
4649 { gen_helper_sve_ld1bdu_r
, NULL
, NULL
, NULL
},
4651 { gen_helper_sve_ld1sds_be_r
, NULL
, NULL
, NULL
},
4652 { gen_helper_sve_ld1hh_be_r
, gen_helper_sve_ld2hh_be_r
,
4653 gen_helper_sve_ld3hh_be_r
, gen_helper_sve_ld4hh_be_r
},
4654 { gen_helper_sve_ld1hsu_be_r
, NULL
, NULL
, NULL
},
4655 { gen_helper_sve_ld1hdu_be_r
, NULL
, NULL
, NULL
},
4657 { gen_helper_sve_ld1hds_be_r
, NULL
, NULL
, NULL
},
4658 { gen_helper_sve_ld1hss_be_r
, NULL
, NULL
, NULL
},
4659 { gen_helper_sve_ld1ss_be_r
, gen_helper_sve_ld2ss_be_r
,
4660 gen_helper_sve_ld3ss_be_r
, gen_helper_sve_ld4ss_be_r
},
4661 { gen_helper_sve_ld1sdu_be_r
, NULL
, NULL
, NULL
},
4663 { gen_helper_sve_ld1bds_r
, NULL
, NULL
, NULL
},
4664 { gen_helper_sve_ld1bss_r
, NULL
, NULL
, NULL
},
4665 { gen_helper_sve_ld1bhs_r
, NULL
, NULL
, NULL
},
4666 { gen_helper_sve_ld1dd_be_r
, gen_helper_sve_ld2dd_be_r
,
4667 gen_helper_sve_ld3dd_be_r
, gen_helper_sve_ld4dd_be_r
} } },
4669 { /* mte active, little-endian */
4670 { { gen_helper_sve_ld1bb_r_mte
,
4671 gen_helper_sve_ld2bb_r_mte
,
4672 gen_helper_sve_ld3bb_r_mte
,
4673 gen_helper_sve_ld4bb_r_mte
},
4674 { gen_helper_sve_ld1bhu_r_mte
, NULL
, NULL
, NULL
},
4675 { gen_helper_sve_ld1bsu_r_mte
, NULL
, NULL
, NULL
},
4676 { gen_helper_sve_ld1bdu_r_mte
, NULL
, NULL
, NULL
},
4678 { gen_helper_sve_ld1sds_le_r_mte
, NULL
, NULL
, NULL
},
4679 { gen_helper_sve_ld1hh_le_r_mte
,
4680 gen_helper_sve_ld2hh_le_r_mte
,
4681 gen_helper_sve_ld3hh_le_r_mte
,
4682 gen_helper_sve_ld4hh_le_r_mte
},
4683 { gen_helper_sve_ld1hsu_le_r_mte
, NULL
, NULL
, NULL
},
4684 { gen_helper_sve_ld1hdu_le_r_mte
, NULL
, NULL
, NULL
},
4686 { gen_helper_sve_ld1hds_le_r_mte
, NULL
, NULL
, NULL
},
4687 { gen_helper_sve_ld1hss_le_r_mte
, NULL
, NULL
, NULL
},
4688 { gen_helper_sve_ld1ss_le_r_mte
,
4689 gen_helper_sve_ld2ss_le_r_mte
,
4690 gen_helper_sve_ld3ss_le_r_mte
,
4691 gen_helper_sve_ld4ss_le_r_mte
},
4692 { gen_helper_sve_ld1sdu_le_r_mte
, NULL
, NULL
, NULL
},
4694 { gen_helper_sve_ld1bds_r_mte
, NULL
, NULL
, NULL
},
4695 { gen_helper_sve_ld1bss_r_mte
, NULL
, NULL
, NULL
},
4696 { gen_helper_sve_ld1bhs_r_mte
, NULL
, NULL
, NULL
},
4697 { gen_helper_sve_ld1dd_le_r_mte
,
4698 gen_helper_sve_ld2dd_le_r_mte
,
4699 gen_helper_sve_ld3dd_le_r_mte
,
4700 gen_helper_sve_ld4dd_le_r_mte
} },
4702 /* mte active, big-endian */
4703 { { gen_helper_sve_ld1bb_r_mte
,
4704 gen_helper_sve_ld2bb_r_mte
,
4705 gen_helper_sve_ld3bb_r_mte
,
4706 gen_helper_sve_ld4bb_r_mte
},
4707 { gen_helper_sve_ld1bhu_r_mte
, NULL
, NULL
, NULL
},
4708 { gen_helper_sve_ld1bsu_r_mte
, NULL
, NULL
, NULL
},
4709 { gen_helper_sve_ld1bdu_r_mte
, NULL
, NULL
, NULL
},
4711 { gen_helper_sve_ld1sds_be_r_mte
, NULL
, NULL
, NULL
},
4712 { gen_helper_sve_ld1hh_be_r_mte
,
4713 gen_helper_sve_ld2hh_be_r_mte
,
4714 gen_helper_sve_ld3hh_be_r_mte
,
4715 gen_helper_sve_ld4hh_be_r_mte
},
4716 { gen_helper_sve_ld1hsu_be_r_mte
, NULL
, NULL
, NULL
},
4717 { gen_helper_sve_ld1hdu_be_r_mte
, NULL
, NULL
, NULL
},
4719 { gen_helper_sve_ld1hds_be_r_mte
, NULL
, NULL
, NULL
},
4720 { gen_helper_sve_ld1hss_be_r_mte
, NULL
, NULL
, NULL
},
4721 { gen_helper_sve_ld1ss_be_r_mte
,
4722 gen_helper_sve_ld2ss_be_r_mte
,
4723 gen_helper_sve_ld3ss_be_r_mte
,
4724 gen_helper_sve_ld4ss_be_r_mte
},
4725 { gen_helper_sve_ld1sdu_be_r_mte
, NULL
, NULL
, NULL
},
4727 { gen_helper_sve_ld1bds_r_mte
, NULL
, NULL
, NULL
},
4728 { gen_helper_sve_ld1bss_r_mte
, NULL
, NULL
, NULL
},
4729 { gen_helper_sve_ld1bhs_r_mte
, NULL
, NULL
, NULL
},
4730 { gen_helper_sve_ld1dd_be_r_mte
,
4731 gen_helper_sve_ld2dd_be_r_mte
,
4732 gen_helper_sve_ld3dd_be_r_mte
,
4733 gen_helper_sve_ld4dd_be_r_mte
} } },
4736 static void do_ld_zpa(DisasContext
*s
, int zt
, int pg
,
4737 TCGv_i64 addr
, int dtype
, int nreg
)
4739 gen_helper_gvec_mem
*fn
4740 = ldr_fns
[s
->mte_active
[0]][s
->be_data
== MO_BE
][dtype
][nreg
];
4743 * While there are holes in the table, they are not
4744 * accessible via the instruction encoding.
4747 do_mem_zpa(s
, zt
, pg
, addr
, dtype
, nreg
, false, fn
);
4750 static bool trans_LD_zprr(DisasContext
*s
, arg_rprr_load
*a
)
4752 if (a
->rm
== 31 || !dc_isar_feature(aa64_sve
, s
)) {
4755 if (sve_access_check(s
)) {
4756 TCGv_i64 addr
= new_tmp_a64(s
);
4757 tcg_gen_shli_i64(addr
, cpu_reg(s
, a
->rm
), dtype_msz(a
->dtype
));
4758 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
4759 do_ld_zpa(s
, a
->rd
, a
->pg
, addr
, a
->dtype
, a
->nreg
);
4764 static bool trans_LD_zpri(DisasContext
*s
, arg_rpri_load
*a
)
4766 if (!dc_isar_feature(aa64_sve
, s
)) {
4769 if (sve_access_check(s
)) {
4770 int vsz
= vec_full_reg_size(s
);
4771 int elements
= vsz
>> dtype_esz
[a
->dtype
];
4772 TCGv_i64 addr
= new_tmp_a64(s
);
4774 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
),
4775 (a
->imm
* elements
* (a
->nreg
+ 1))
4776 << dtype_msz(a
->dtype
));
4777 do_ld_zpa(s
, a
->rd
, a
->pg
, addr
, a
->dtype
, a
->nreg
);
4782 static bool trans_LDFF1_zprr(DisasContext
*s
, arg_rprr_load
*a
)
4784 static gen_helper_gvec_mem
* const fns
[2][2][16] = {
4785 { /* mte inactive, little-endian */
4786 { gen_helper_sve_ldff1bb_r
,
4787 gen_helper_sve_ldff1bhu_r
,
4788 gen_helper_sve_ldff1bsu_r
,
4789 gen_helper_sve_ldff1bdu_r
,
4791 gen_helper_sve_ldff1sds_le_r
,
4792 gen_helper_sve_ldff1hh_le_r
,
4793 gen_helper_sve_ldff1hsu_le_r
,
4794 gen_helper_sve_ldff1hdu_le_r
,
4796 gen_helper_sve_ldff1hds_le_r
,
4797 gen_helper_sve_ldff1hss_le_r
,
4798 gen_helper_sve_ldff1ss_le_r
,
4799 gen_helper_sve_ldff1sdu_le_r
,
4801 gen_helper_sve_ldff1bds_r
,
4802 gen_helper_sve_ldff1bss_r
,
4803 gen_helper_sve_ldff1bhs_r
,
4804 gen_helper_sve_ldff1dd_le_r
},
4806 /* mte inactive, big-endian */
4807 { gen_helper_sve_ldff1bb_r
,
4808 gen_helper_sve_ldff1bhu_r
,
4809 gen_helper_sve_ldff1bsu_r
,
4810 gen_helper_sve_ldff1bdu_r
,
4812 gen_helper_sve_ldff1sds_be_r
,
4813 gen_helper_sve_ldff1hh_be_r
,
4814 gen_helper_sve_ldff1hsu_be_r
,
4815 gen_helper_sve_ldff1hdu_be_r
,
4817 gen_helper_sve_ldff1hds_be_r
,
4818 gen_helper_sve_ldff1hss_be_r
,
4819 gen_helper_sve_ldff1ss_be_r
,
4820 gen_helper_sve_ldff1sdu_be_r
,
4822 gen_helper_sve_ldff1bds_r
,
4823 gen_helper_sve_ldff1bss_r
,
4824 gen_helper_sve_ldff1bhs_r
,
4825 gen_helper_sve_ldff1dd_be_r
} },
4827 { /* mte active, little-endian */
4828 { gen_helper_sve_ldff1bb_r_mte
,
4829 gen_helper_sve_ldff1bhu_r_mte
,
4830 gen_helper_sve_ldff1bsu_r_mte
,
4831 gen_helper_sve_ldff1bdu_r_mte
,
4833 gen_helper_sve_ldff1sds_le_r_mte
,
4834 gen_helper_sve_ldff1hh_le_r_mte
,
4835 gen_helper_sve_ldff1hsu_le_r_mte
,
4836 gen_helper_sve_ldff1hdu_le_r_mte
,
4838 gen_helper_sve_ldff1hds_le_r_mte
,
4839 gen_helper_sve_ldff1hss_le_r_mte
,
4840 gen_helper_sve_ldff1ss_le_r_mte
,
4841 gen_helper_sve_ldff1sdu_le_r_mte
,
4843 gen_helper_sve_ldff1bds_r_mte
,
4844 gen_helper_sve_ldff1bss_r_mte
,
4845 gen_helper_sve_ldff1bhs_r_mte
,
4846 gen_helper_sve_ldff1dd_le_r_mte
},
4848 /* mte active, big-endian */
4849 { gen_helper_sve_ldff1bb_r_mte
,
4850 gen_helper_sve_ldff1bhu_r_mte
,
4851 gen_helper_sve_ldff1bsu_r_mte
,
4852 gen_helper_sve_ldff1bdu_r_mte
,
4854 gen_helper_sve_ldff1sds_be_r_mte
,
4855 gen_helper_sve_ldff1hh_be_r_mte
,
4856 gen_helper_sve_ldff1hsu_be_r_mte
,
4857 gen_helper_sve_ldff1hdu_be_r_mte
,
4859 gen_helper_sve_ldff1hds_be_r_mte
,
4860 gen_helper_sve_ldff1hss_be_r_mte
,
4861 gen_helper_sve_ldff1ss_be_r_mte
,
4862 gen_helper_sve_ldff1sdu_be_r_mte
,
4864 gen_helper_sve_ldff1bds_r_mte
,
4865 gen_helper_sve_ldff1bss_r_mte
,
4866 gen_helper_sve_ldff1bhs_r_mte
,
4867 gen_helper_sve_ldff1dd_be_r_mte
} },
4870 if (!dc_isar_feature(aa64_sve
, s
)) {
4873 s
->is_nonstreaming
= true;
4874 if (sve_access_check(s
)) {
4875 TCGv_i64 addr
= new_tmp_a64(s
);
4876 tcg_gen_shli_i64(addr
, cpu_reg(s
, a
->rm
), dtype_msz(a
->dtype
));
4877 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
4878 do_mem_zpa(s
, a
->rd
, a
->pg
, addr
, a
->dtype
, 1, false,
4879 fns
[s
->mte_active
[0]][s
->be_data
== MO_BE
][a
->dtype
]);
4884 static bool trans_LDNF1_zpri(DisasContext
*s
, arg_rpri_load
*a
)
4886 static gen_helper_gvec_mem
* const fns
[2][2][16] = {
4887 { /* mte inactive, little-endian */
4888 { gen_helper_sve_ldnf1bb_r
,
4889 gen_helper_sve_ldnf1bhu_r
,
4890 gen_helper_sve_ldnf1bsu_r
,
4891 gen_helper_sve_ldnf1bdu_r
,
4893 gen_helper_sve_ldnf1sds_le_r
,
4894 gen_helper_sve_ldnf1hh_le_r
,
4895 gen_helper_sve_ldnf1hsu_le_r
,
4896 gen_helper_sve_ldnf1hdu_le_r
,
4898 gen_helper_sve_ldnf1hds_le_r
,
4899 gen_helper_sve_ldnf1hss_le_r
,
4900 gen_helper_sve_ldnf1ss_le_r
,
4901 gen_helper_sve_ldnf1sdu_le_r
,
4903 gen_helper_sve_ldnf1bds_r
,
4904 gen_helper_sve_ldnf1bss_r
,
4905 gen_helper_sve_ldnf1bhs_r
,
4906 gen_helper_sve_ldnf1dd_le_r
},
4908 /* mte inactive, big-endian */
4909 { gen_helper_sve_ldnf1bb_r
,
4910 gen_helper_sve_ldnf1bhu_r
,
4911 gen_helper_sve_ldnf1bsu_r
,
4912 gen_helper_sve_ldnf1bdu_r
,
4914 gen_helper_sve_ldnf1sds_be_r
,
4915 gen_helper_sve_ldnf1hh_be_r
,
4916 gen_helper_sve_ldnf1hsu_be_r
,
4917 gen_helper_sve_ldnf1hdu_be_r
,
4919 gen_helper_sve_ldnf1hds_be_r
,
4920 gen_helper_sve_ldnf1hss_be_r
,
4921 gen_helper_sve_ldnf1ss_be_r
,
4922 gen_helper_sve_ldnf1sdu_be_r
,
4924 gen_helper_sve_ldnf1bds_r
,
4925 gen_helper_sve_ldnf1bss_r
,
4926 gen_helper_sve_ldnf1bhs_r
,
4927 gen_helper_sve_ldnf1dd_be_r
} },
4929 { /* mte inactive, little-endian */
4930 { gen_helper_sve_ldnf1bb_r_mte
,
4931 gen_helper_sve_ldnf1bhu_r_mte
,
4932 gen_helper_sve_ldnf1bsu_r_mte
,
4933 gen_helper_sve_ldnf1bdu_r_mte
,
4935 gen_helper_sve_ldnf1sds_le_r_mte
,
4936 gen_helper_sve_ldnf1hh_le_r_mte
,
4937 gen_helper_sve_ldnf1hsu_le_r_mte
,
4938 gen_helper_sve_ldnf1hdu_le_r_mte
,
4940 gen_helper_sve_ldnf1hds_le_r_mte
,
4941 gen_helper_sve_ldnf1hss_le_r_mte
,
4942 gen_helper_sve_ldnf1ss_le_r_mte
,
4943 gen_helper_sve_ldnf1sdu_le_r_mte
,
4945 gen_helper_sve_ldnf1bds_r_mte
,
4946 gen_helper_sve_ldnf1bss_r_mte
,
4947 gen_helper_sve_ldnf1bhs_r_mte
,
4948 gen_helper_sve_ldnf1dd_le_r_mte
},
4950 /* mte inactive, big-endian */
4951 { gen_helper_sve_ldnf1bb_r_mte
,
4952 gen_helper_sve_ldnf1bhu_r_mte
,
4953 gen_helper_sve_ldnf1bsu_r_mte
,
4954 gen_helper_sve_ldnf1bdu_r_mte
,
4956 gen_helper_sve_ldnf1sds_be_r_mte
,
4957 gen_helper_sve_ldnf1hh_be_r_mte
,
4958 gen_helper_sve_ldnf1hsu_be_r_mte
,
4959 gen_helper_sve_ldnf1hdu_be_r_mte
,
4961 gen_helper_sve_ldnf1hds_be_r_mte
,
4962 gen_helper_sve_ldnf1hss_be_r_mte
,
4963 gen_helper_sve_ldnf1ss_be_r_mte
,
4964 gen_helper_sve_ldnf1sdu_be_r_mte
,
4966 gen_helper_sve_ldnf1bds_r_mte
,
4967 gen_helper_sve_ldnf1bss_r_mte
,
4968 gen_helper_sve_ldnf1bhs_r_mte
,
4969 gen_helper_sve_ldnf1dd_be_r_mte
} },
4972 if (!dc_isar_feature(aa64_sve
, s
)) {
4975 s
->is_nonstreaming
= true;
4976 if (sve_access_check(s
)) {
4977 int vsz
= vec_full_reg_size(s
);
4978 int elements
= vsz
>> dtype_esz
[a
->dtype
];
4979 int off
= (a
->imm
* elements
) << dtype_msz(a
->dtype
);
4980 TCGv_i64 addr
= new_tmp_a64(s
);
4982 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
), off
);
4983 do_mem_zpa(s
, a
->rd
, a
->pg
, addr
, a
->dtype
, 1, false,
4984 fns
[s
->mte_active
[0]][s
->be_data
== MO_BE
][a
->dtype
]);
4989 static void do_ldrq(DisasContext
*s
, int zt
, int pg
, TCGv_i64 addr
, int dtype
)
4991 unsigned vsz
= vec_full_reg_size(s
);
4995 /* Load the first quadword using the normal predicated load helpers. */
4996 poff
= pred_full_reg_offset(s
, pg
);
4999 * Zero-extend the first 16 bits of the predicate into a temporary.
5000 * This avoids triggering an assert making sure we don't have bits
5001 * set within a predicate beyond VQ, but we have lowered VQ to 1
5002 * for this load operation.
5004 TCGv_i64 tmp
= tcg_temp_new_i64();
5008 tcg_gen_ld16u_i64(tmp
, cpu_env
, poff
);
5010 poff
= offsetof(CPUARMState
, vfp
.preg_tmp
);
5011 tcg_gen_st_i64(tmp
, cpu_env
, poff
);
5012 tcg_temp_free_i64(tmp
);
5015 t_pg
= tcg_temp_new_ptr();
5016 tcg_gen_addi_ptr(t_pg
, cpu_env
, poff
);
5018 gen_helper_gvec_mem
*fn
5019 = ldr_fns
[s
->mte_active
[0]][s
->be_data
== MO_BE
][dtype
][0];
5020 fn(cpu_env
, t_pg
, addr
, tcg_constant_i32(simd_desc(16, 16, zt
)));
5022 tcg_temp_free_ptr(t_pg
);
5024 /* Replicate that first quadword. */
5026 int doff
= vec_full_reg_offset(s
, zt
);
5027 tcg_gen_gvec_dup_mem(4, doff
+ 16, doff
, vsz
- 16, vsz
- 16);
5031 static bool trans_LD1RQ_zprr(DisasContext
*s
, arg_rprr_load
*a
)
5033 if (a
->rm
== 31 || !dc_isar_feature(aa64_sve
, s
)) {
5036 if (sve_access_check(s
)) {
5037 int msz
= dtype_msz(a
->dtype
);
5038 TCGv_i64 addr
= new_tmp_a64(s
);
5039 tcg_gen_shli_i64(addr
, cpu_reg(s
, a
->rm
), msz
);
5040 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
5041 do_ldrq(s
, a
->rd
, a
->pg
, addr
, a
->dtype
);
5046 static bool trans_LD1RQ_zpri(DisasContext
*s
, arg_rpri_load
*a
)
5048 if (!dc_isar_feature(aa64_sve
, s
)) {
5051 if (sve_access_check(s
)) {
5052 TCGv_i64 addr
= new_tmp_a64(s
);
5053 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
), a
->imm
* 16);
5054 do_ldrq(s
, a
->rd
, a
->pg
, addr
, a
->dtype
);
5059 static void do_ldro(DisasContext
*s
, int zt
, int pg
, TCGv_i64 addr
, int dtype
)
5061 unsigned vsz
= vec_full_reg_size(s
);
5068 * Note that this UNDEFINED check comes after CheckSVEEnabled()
5069 * in the ARM pseudocode, which is the sve_access_check() done
5070 * in our caller. We should not now return false from the caller.
5072 unallocated_encoding(s
);
5076 /* Load the first octaword using the normal predicated load helpers. */
5078 poff
= pred_full_reg_offset(s
, pg
);
5081 * Zero-extend the first 32 bits of the predicate into a temporary.
5082 * This avoids triggering an assert making sure we don't have bits
5083 * set within a predicate beyond VQ, but we have lowered VQ to 2
5084 * for this load operation.
5086 TCGv_i64 tmp
= tcg_temp_new_i64();
5090 tcg_gen_ld32u_i64(tmp
, cpu_env
, poff
);
5092 poff
= offsetof(CPUARMState
, vfp
.preg_tmp
);
5093 tcg_gen_st_i64(tmp
, cpu_env
, poff
);
5094 tcg_temp_free_i64(tmp
);
5097 t_pg
= tcg_temp_new_ptr();
5098 tcg_gen_addi_ptr(t_pg
, cpu_env
, poff
);
5100 gen_helper_gvec_mem
*fn
5101 = ldr_fns
[s
->mte_active
[0]][s
->be_data
== MO_BE
][dtype
][0];
5102 fn(cpu_env
, t_pg
, addr
, tcg_constant_i32(simd_desc(32, 32, zt
)));
5104 tcg_temp_free_ptr(t_pg
);
5107 * Replicate that first octaword.
5108 * The replication happens in units of 32; if the full vector size
5109 * is not a multiple of 32, the final bits are zeroed.
5111 doff
= vec_full_reg_offset(s
, zt
);
5112 vsz_r32
= QEMU_ALIGN_DOWN(vsz
, 32);
5114 tcg_gen_gvec_dup_mem(5, doff
+ 32, doff
, vsz_r32
- 32, vsz_r32
- 32);
5118 tcg_gen_gvec_dup_imm(MO_64
, doff
+ vsz_r32
, vsz
, vsz
, 0);
5122 static bool trans_LD1RO_zprr(DisasContext
*s
, arg_rprr_load
*a
)
5124 if (!dc_isar_feature(aa64_sve_f64mm
, s
)) {
5130 s
->is_nonstreaming
= true;
5131 if (sve_access_check(s
)) {
5132 TCGv_i64 addr
= new_tmp_a64(s
);
5133 tcg_gen_shli_i64(addr
, cpu_reg(s
, a
->rm
), dtype_msz(a
->dtype
));
5134 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
5135 do_ldro(s
, a
->rd
, a
->pg
, addr
, a
->dtype
);
5140 static bool trans_LD1RO_zpri(DisasContext
*s
, arg_rpri_load
*a
)
5142 if (!dc_isar_feature(aa64_sve_f64mm
, s
)) {
5145 s
->is_nonstreaming
= true;
5146 if (sve_access_check(s
)) {
5147 TCGv_i64 addr
= new_tmp_a64(s
);
5148 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
), a
->imm
* 32);
5149 do_ldro(s
, a
->rd
, a
->pg
, addr
, a
->dtype
);
5154 /* Load and broadcast element. */
5155 static bool trans_LD1R_zpri(DisasContext
*s
, arg_rpri_load
*a
)
5157 unsigned vsz
= vec_full_reg_size(s
);
5158 unsigned psz
= pred_full_reg_size(s
);
5159 unsigned esz
= dtype_esz
[a
->dtype
];
5160 unsigned msz
= dtype_msz(a
->dtype
);
5162 TCGv_i64 temp
, clean_addr
;
5164 if (!dc_isar_feature(aa64_sve
, s
)) {
5167 if (!sve_access_check(s
)) {
5171 over
= gen_new_label();
5173 /* If the guarding predicate has no bits set, no load occurs. */
5175 /* Reduce the pred_esz_masks value simply to reduce the
5176 * size of the code generated here.
5178 uint64_t psz_mask
= MAKE_64BIT_MASK(0, psz
* 8);
5179 temp
= tcg_temp_new_i64();
5180 tcg_gen_ld_i64(temp
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
5181 tcg_gen_andi_i64(temp
, temp
, pred_esz_masks
[esz
] & psz_mask
);
5182 tcg_gen_brcondi_i64(TCG_COND_EQ
, temp
, 0, over
);
5183 tcg_temp_free_i64(temp
);
5185 TCGv_i32 t32
= tcg_temp_new_i32();
5186 find_last_active(s
, t32
, esz
, a
->pg
);
5187 tcg_gen_brcondi_i32(TCG_COND_LT
, t32
, 0, over
);
5188 tcg_temp_free_i32(t32
);
5191 /* Load the data. */
5192 temp
= tcg_temp_new_i64();
5193 tcg_gen_addi_i64(temp
, cpu_reg_sp(s
, a
->rn
), a
->imm
<< msz
);
5194 clean_addr
= gen_mte_check1(s
, temp
, false, true, msz
);
5196 tcg_gen_qemu_ld_i64(temp
, clean_addr
, get_mem_index(s
),
5197 finalize_memop(s
, dtype_mop
[a
->dtype
]));
5199 /* Broadcast to *all* elements. */
5200 tcg_gen_gvec_dup_i64(esz
, vec_full_reg_offset(s
, a
->rd
),
5202 tcg_temp_free_i64(temp
);
5204 /* Zero the inactive elements. */
5205 gen_set_label(over
);
5206 return do_movz_zpz(s
, a
->rd
, a
->rd
, a
->pg
, esz
, false);
5209 static void do_st_zpa(DisasContext
*s
, int zt
, int pg
, TCGv_i64 addr
,
5210 int msz
, int esz
, int nreg
)
5212 static gen_helper_gvec_mem
* const fn_single
[2][2][4][4] = {
5213 { { { gen_helper_sve_st1bb_r
,
5214 gen_helper_sve_st1bh_r
,
5215 gen_helper_sve_st1bs_r
,
5216 gen_helper_sve_st1bd_r
},
5218 gen_helper_sve_st1hh_le_r
,
5219 gen_helper_sve_st1hs_le_r
,
5220 gen_helper_sve_st1hd_le_r
},
5222 gen_helper_sve_st1ss_le_r
,
5223 gen_helper_sve_st1sd_le_r
},
5225 gen_helper_sve_st1dd_le_r
} },
5226 { { gen_helper_sve_st1bb_r
,
5227 gen_helper_sve_st1bh_r
,
5228 gen_helper_sve_st1bs_r
,
5229 gen_helper_sve_st1bd_r
},
5231 gen_helper_sve_st1hh_be_r
,
5232 gen_helper_sve_st1hs_be_r
,
5233 gen_helper_sve_st1hd_be_r
},
5235 gen_helper_sve_st1ss_be_r
,
5236 gen_helper_sve_st1sd_be_r
},
5238 gen_helper_sve_st1dd_be_r
} } },
5240 { { { gen_helper_sve_st1bb_r_mte
,
5241 gen_helper_sve_st1bh_r_mte
,
5242 gen_helper_sve_st1bs_r_mte
,
5243 gen_helper_sve_st1bd_r_mte
},
5245 gen_helper_sve_st1hh_le_r_mte
,
5246 gen_helper_sve_st1hs_le_r_mte
,
5247 gen_helper_sve_st1hd_le_r_mte
},
5249 gen_helper_sve_st1ss_le_r_mte
,
5250 gen_helper_sve_st1sd_le_r_mte
},
5252 gen_helper_sve_st1dd_le_r_mte
} },
5253 { { gen_helper_sve_st1bb_r_mte
,
5254 gen_helper_sve_st1bh_r_mte
,
5255 gen_helper_sve_st1bs_r_mte
,
5256 gen_helper_sve_st1bd_r_mte
},
5258 gen_helper_sve_st1hh_be_r_mte
,
5259 gen_helper_sve_st1hs_be_r_mte
,
5260 gen_helper_sve_st1hd_be_r_mte
},
5262 gen_helper_sve_st1ss_be_r_mte
,
5263 gen_helper_sve_st1sd_be_r_mte
},
5265 gen_helper_sve_st1dd_be_r_mte
} } },
5267 static gen_helper_gvec_mem
* const fn_multiple
[2][2][3][4] = {
5268 { { { gen_helper_sve_st2bb_r
,
5269 gen_helper_sve_st2hh_le_r
,
5270 gen_helper_sve_st2ss_le_r
,
5271 gen_helper_sve_st2dd_le_r
},
5272 { gen_helper_sve_st3bb_r
,
5273 gen_helper_sve_st3hh_le_r
,
5274 gen_helper_sve_st3ss_le_r
,
5275 gen_helper_sve_st3dd_le_r
},
5276 { gen_helper_sve_st4bb_r
,
5277 gen_helper_sve_st4hh_le_r
,
5278 gen_helper_sve_st4ss_le_r
,
5279 gen_helper_sve_st4dd_le_r
} },
5280 { { gen_helper_sve_st2bb_r
,
5281 gen_helper_sve_st2hh_be_r
,
5282 gen_helper_sve_st2ss_be_r
,
5283 gen_helper_sve_st2dd_be_r
},
5284 { gen_helper_sve_st3bb_r
,
5285 gen_helper_sve_st3hh_be_r
,
5286 gen_helper_sve_st3ss_be_r
,
5287 gen_helper_sve_st3dd_be_r
},
5288 { gen_helper_sve_st4bb_r
,
5289 gen_helper_sve_st4hh_be_r
,
5290 gen_helper_sve_st4ss_be_r
,
5291 gen_helper_sve_st4dd_be_r
} } },
5292 { { { gen_helper_sve_st2bb_r_mte
,
5293 gen_helper_sve_st2hh_le_r_mte
,
5294 gen_helper_sve_st2ss_le_r_mte
,
5295 gen_helper_sve_st2dd_le_r_mte
},
5296 { gen_helper_sve_st3bb_r_mte
,
5297 gen_helper_sve_st3hh_le_r_mte
,
5298 gen_helper_sve_st3ss_le_r_mte
,
5299 gen_helper_sve_st3dd_le_r_mte
},
5300 { gen_helper_sve_st4bb_r_mte
,
5301 gen_helper_sve_st4hh_le_r_mte
,
5302 gen_helper_sve_st4ss_le_r_mte
,
5303 gen_helper_sve_st4dd_le_r_mte
} },
5304 { { gen_helper_sve_st2bb_r_mte
,
5305 gen_helper_sve_st2hh_be_r_mte
,
5306 gen_helper_sve_st2ss_be_r_mte
,
5307 gen_helper_sve_st2dd_be_r_mte
},
5308 { gen_helper_sve_st3bb_r_mte
,
5309 gen_helper_sve_st3hh_be_r_mte
,
5310 gen_helper_sve_st3ss_be_r_mte
,
5311 gen_helper_sve_st3dd_be_r_mte
},
5312 { gen_helper_sve_st4bb_r_mte
,
5313 gen_helper_sve_st4hh_be_r_mte
,
5314 gen_helper_sve_st4ss_be_r_mte
,
5315 gen_helper_sve_st4dd_be_r_mte
} } },
5317 gen_helper_gvec_mem
*fn
;
5318 int be
= s
->be_data
== MO_BE
;
5322 fn
= fn_single
[s
->mte_active
[0]][be
][msz
][esz
];
5325 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5327 fn
= fn_multiple
[s
->mte_active
[0]][be
][nreg
- 1][msz
];
5330 do_mem_zpa(s
, zt
, pg
, addr
, msz_dtype(s
, msz
), nreg
, true, fn
);
5333 static bool trans_ST_zprr(DisasContext
*s
, arg_rprr_store
*a
)
5335 if (!dc_isar_feature(aa64_sve
, s
)) {
5338 if (a
->rm
== 31 || a
->msz
> a
->esz
) {
5341 if (sve_access_check(s
)) {
5342 TCGv_i64 addr
= new_tmp_a64(s
);
5343 tcg_gen_shli_i64(addr
, cpu_reg(s
, a
->rm
), a
->msz
);
5344 tcg_gen_add_i64(addr
, addr
, cpu_reg_sp(s
, a
->rn
));
5345 do_st_zpa(s
, a
->rd
, a
->pg
, addr
, a
->msz
, a
->esz
, a
->nreg
);
5350 static bool trans_ST_zpri(DisasContext
*s
, arg_rpri_store
*a
)
5352 if (!dc_isar_feature(aa64_sve
, s
)) {
5355 if (a
->msz
> a
->esz
) {
5358 if (sve_access_check(s
)) {
5359 int vsz
= vec_full_reg_size(s
);
5360 int elements
= vsz
>> a
->esz
;
5361 TCGv_i64 addr
= new_tmp_a64(s
);
5363 tcg_gen_addi_i64(addr
, cpu_reg_sp(s
, a
->rn
),
5364 (a
->imm
* elements
* (a
->nreg
+ 1)) << a
->msz
);
5365 do_st_zpa(s
, a
->rd
, a
->pg
, addr
, a
->msz
, a
->esz
, a
->nreg
);
5371 *** SVE gather loads / scatter stores
5374 static void do_mem_zpz(DisasContext
*s
, int zt
, int pg
, int zm
,
5375 int scale
, TCGv_i64 scalar
, int msz
, bool is_write
,
5376 gen_helper_gvec_mem_scatter
*fn
)
5378 unsigned vsz
= vec_full_reg_size(s
);
5379 TCGv_ptr t_zm
= tcg_temp_new_ptr();
5380 TCGv_ptr t_pg
= tcg_temp_new_ptr();
5381 TCGv_ptr t_zt
= tcg_temp_new_ptr();
5384 if (s
->mte_active
[0]) {
5385 desc
= FIELD_DP32(desc
, MTEDESC
, MIDX
, get_mem_index(s
));
5386 desc
= FIELD_DP32(desc
, MTEDESC
, TBI
, s
->tbid
);
5387 desc
= FIELD_DP32(desc
, MTEDESC
, TCMA
, s
->tcma
);
5388 desc
= FIELD_DP32(desc
, MTEDESC
, WRITE
, is_write
);
5389 desc
= FIELD_DP32(desc
, MTEDESC
, SIZEM1
, (1 << msz
) - 1);
5390 desc
<<= SVE_MTEDESC_SHIFT
;
5392 desc
= simd_desc(vsz
, vsz
, desc
| scale
);
5394 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
5395 tcg_gen_addi_ptr(t_zm
, cpu_env
, vec_full_reg_offset(s
, zm
));
5396 tcg_gen_addi_ptr(t_zt
, cpu_env
, vec_full_reg_offset(s
, zt
));
5397 fn(cpu_env
, t_zt
, t_pg
, t_zm
, scalar
, tcg_constant_i32(desc
));
5399 tcg_temp_free_ptr(t_zt
);
5400 tcg_temp_free_ptr(t_zm
);
5401 tcg_temp_free_ptr(t_pg
);
5404 /* Indexed by [mte][be][ff][xs][u][msz]. */
5405 static gen_helper_gvec_mem_scatter
* const
5406 gather_load_fn32
[2][2][2][2][2][3] = {
5407 { /* MTE Inactive */
5408 { /* Little-endian */
5409 { { { gen_helper_sve_ldbss_zsu
,
5410 gen_helper_sve_ldhss_le_zsu
,
5412 { gen_helper_sve_ldbsu_zsu
,
5413 gen_helper_sve_ldhsu_le_zsu
,
5414 gen_helper_sve_ldss_le_zsu
, } },
5415 { { gen_helper_sve_ldbss_zss
,
5416 gen_helper_sve_ldhss_le_zss
,
5418 { gen_helper_sve_ldbsu_zss
,
5419 gen_helper_sve_ldhsu_le_zss
,
5420 gen_helper_sve_ldss_le_zss
, } } },
5423 { { { gen_helper_sve_ldffbss_zsu
,
5424 gen_helper_sve_ldffhss_le_zsu
,
5426 { gen_helper_sve_ldffbsu_zsu
,
5427 gen_helper_sve_ldffhsu_le_zsu
,
5428 gen_helper_sve_ldffss_le_zsu
, } },
5429 { { gen_helper_sve_ldffbss_zss
,
5430 gen_helper_sve_ldffhss_le_zss
,
5432 { gen_helper_sve_ldffbsu_zss
,
5433 gen_helper_sve_ldffhsu_le_zss
,
5434 gen_helper_sve_ldffss_le_zss
, } } } },
5437 { { { gen_helper_sve_ldbss_zsu
,
5438 gen_helper_sve_ldhss_be_zsu
,
5440 { gen_helper_sve_ldbsu_zsu
,
5441 gen_helper_sve_ldhsu_be_zsu
,
5442 gen_helper_sve_ldss_be_zsu
, } },
5443 { { gen_helper_sve_ldbss_zss
,
5444 gen_helper_sve_ldhss_be_zss
,
5446 { gen_helper_sve_ldbsu_zss
,
5447 gen_helper_sve_ldhsu_be_zss
,
5448 gen_helper_sve_ldss_be_zss
, } } },
5451 { { { gen_helper_sve_ldffbss_zsu
,
5452 gen_helper_sve_ldffhss_be_zsu
,
5454 { gen_helper_sve_ldffbsu_zsu
,
5455 gen_helper_sve_ldffhsu_be_zsu
,
5456 gen_helper_sve_ldffss_be_zsu
, } },
5457 { { gen_helper_sve_ldffbss_zss
,
5458 gen_helper_sve_ldffhss_be_zss
,
5460 { gen_helper_sve_ldffbsu_zss
,
5461 gen_helper_sve_ldffhsu_be_zss
,
5462 gen_helper_sve_ldffss_be_zss
, } } } } },
5464 { /* Little-endian */
5465 { { { gen_helper_sve_ldbss_zsu_mte
,
5466 gen_helper_sve_ldhss_le_zsu_mte
,
5468 { gen_helper_sve_ldbsu_zsu_mte
,
5469 gen_helper_sve_ldhsu_le_zsu_mte
,
5470 gen_helper_sve_ldss_le_zsu_mte
, } },
5471 { { gen_helper_sve_ldbss_zss_mte
,
5472 gen_helper_sve_ldhss_le_zss_mte
,
5474 { gen_helper_sve_ldbsu_zss_mte
,
5475 gen_helper_sve_ldhsu_le_zss_mte
,
5476 gen_helper_sve_ldss_le_zss_mte
, } } },
5479 { { { gen_helper_sve_ldffbss_zsu_mte
,
5480 gen_helper_sve_ldffhss_le_zsu_mte
,
5482 { gen_helper_sve_ldffbsu_zsu_mte
,
5483 gen_helper_sve_ldffhsu_le_zsu_mte
,
5484 gen_helper_sve_ldffss_le_zsu_mte
, } },
5485 { { gen_helper_sve_ldffbss_zss_mte
,
5486 gen_helper_sve_ldffhss_le_zss_mte
,
5488 { gen_helper_sve_ldffbsu_zss_mte
,
5489 gen_helper_sve_ldffhsu_le_zss_mte
,
5490 gen_helper_sve_ldffss_le_zss_mte
, } } } },
5493 { { { gen_helper_sve_ldbss_zsu_mte
,
5494 gen_helper_sve_ldhss_be_zsu_mte
,
5496 { gen_helper_sve_ldbsu_zsu_mte
,
5497 gen_helper_sve_ldhsu_be_zsu_mte
,
5498 gen_helper_sve_ldss_be_zsu_mte
, } },
5499 { { gen_helper_sve_ldbss_zss_mte
,
5500 gen_helper_sve_ldhss_be_zss_mte
,
5502 { gen_helper_sve_ldbsu_zss_mte
,
5503 gen_helper_sve_ldhsu_be_zss_mte
,
5504 gen_helper_sve_ldss_be_zss_mte
, } } },
5507 { { { gen_helper_sve_ldffbss_zsu_mte
,
5508 gen_helper_sve_ldffhss_be_zsu_mte
,
5510 { gen_helper_sve_ldffbsu_zsu_mte
,
5511 gen_helper_sve_ldffhsu_be_zsu_mte
,
5512 gen_helper_sve_ldffss_be_zsu_mte
, } },
5513 { { gen_helper_sve_ldffbss_zss_mte
,
5514 gen_helper_sve_ldffhss_be_zss_mte
,
5516 { gen_helper_sve_ldffbsu_zss_mte
,
5517 gen_helper_sve_ldffhsu_be_zss_mte
,
5518 gen_helper_sve_ldffss_be_zss_mte
, } } } } },
5521 /* Note that we overload xs=2 to indicate 64-bit offset. */
5522 static gen_helper_gvec_mem_scatter
* const
5523 gather_load_fn64
[2][2][2][3][2][4] = {
5524 { /* MTE Inactive */
5525 { /* Little-endian */
5526 { { { gen_helper_sve_ldbds_zsu
,
5527 gen_helper_sve_ldhds_le_zsu
,
5528 gen_helper_sve_ldsds_le_zsu
,
5530 { gen_helper_sve_ldbdu_zsu
,
5531 gen_helper_sve_ldhdu_le_zsu
,
5532 gen_helper_sve_ldsdu_le_zsu
,
5533 gen_helper_sve_lddd_le_zsu
, } },
5534 { { gen_helper_sve_ldbds_zss
,
5535 gen_helper_sve_ldhds_le_zss
,
5536 gen_helper_sve_ldsds_le_zss
,
5538 { gen_helper_sve_ldbdu_zss
,
5539 gen_helper_sve_ldhdu_le_zss
,
5540 gen_helper_sve_ldsdu_le_zss
,
5541 gen_helper_sve_lddd_le_zss
, } },
5542 { { gen_helper_sve_ldbds_zd
,
5543 gen_helper_sve_ldhds_le_zd
,
5544 gen_helper_sve_ldsds_le_zd
,
5546 { gen_helper_sve_ldbdu_zd
,
5547 gen_helper_sve_ldhdu_le_zd
,
5548 gen_helper_sve_ldsdu_le_zd
,
5549 gen_helper_sve_lddd_le_zd
, } } },
5552 { { { gen_helper_sve_ldffbds_zsu
,
5553 gen_helper_sve_ldffhds_le_zsu
,
5554 gen_helper_sve_ldffsds_le_zsu
,
5556 { gen_helper_sve_ldffbdu_zsu
,
5557 gen_helper_sve_ldffhdu_le_zsu
,
5558 gen_helper_sve_ldffsdu_le_zsu
,
5559 gen_helper_sve_ldffdd_le_zsu
, } },
5560 { { gen_helper_sve_ldffbds_zss
,
5561 gen_helper_sve_ldffhds_le_zss
,
5562 gen_helper_sve_ldffsds_le_zss
,
5564 { gen_helper_sve_ldffbdu_zss
,
5565 gen_helper_sve_ldffhdu_le_zss
,
5566 gen_helper_sve_ldffsdu_le_zss
,
5567 gen_helper_sve_ldffdd_le_zss
, } },
5568 { { gen_helper_sve_ldffbds_zd
,
5569 gen_helper_sve_ldffhds_le_zd
,
5570 gen_helper_sve_ldffsds_le_zd
,
5572 { gen_helper_sve_ldffbdu_zd
,
5573 gen_helper_sve_ldffhdu_le_zd
,
5574 gen_helper_sve_ldffsdu_le_zd
,
5575 gen_helper_sve_ldffdd_le_zd
, } } } },
5577 { { { gen_helper_sve_ldbds_zsu
,
5578 gen_helper_sve_ldhds_be_zsu
,
5579 gen_helper_sve_ldsds_be_zsu
,
5581 { gen_helper_sve_ldbdu_zsu
,
5582 gen_helper_sve_ldhdu_be_zsu
,
5583 gen_helper_sve_ldsdu_be_zsu
,
5584 gen_helper_sve_lddd_be_zsu
, } },
5585 { { gen_helper_sve_ldbds_zss
,
5586 gen_helper_sve_ldhds_be_zss
,
5587 gen_helper_sve_ldsds_be_zss
,
5589 { gen_helper_sve_ldbdu_zss
,
5590 gen_helper_sve_ldhdu_be_zss
,
5591 gen_helper_sve_ldsdu_be_zss
,
5592 gen_helper_sve_lddd_be_zss
, } },
5593 { { gen_helper_sve_ldbds_zd
,
5594 gen_helper_sve_ldhds_be_zd
,
5595 gen_helper_sve_ldsds_be_zd
,
5597 { gen_helper_sve_ldbdu_zd
,
5598 gen_helper_sve_ldhdu_be_zd
,
5599 gen_helper_sve_ldsdu_be_zd
,
5600 gen_helper_sve_lddd_be_zd
, } } },
5603 { { { gen_helper_sve_ldffbds_zsu
,
5604 gen_helper_sve_ldffhds_be_zsu
,
5605 gen_helper_sve_ldffsds_be_zsu
,
5607 { gen_helper_sve_ldffbdu_zsu
,
5608 gen_helper_sve_ldffhdu_be_zsu
,
5609 gen_helper_sve_ldffsdu_be_zsu
,
5610 gen_helper_sve_ldffdd_be_zsu
, } },
5611 { { gen_helper_sve_ldffbds_zss
,
5612 gen_helper_sve_ldffhds_be_zss
,
5613 gen_helper_sve_ldffsds_be_zss
,
5615 { gen_helper_sve_ldffbdu_zss
,
5616 gen_helper_sve_ldffhdu_be_zss
,
5617 gen_helper_sve_ldffsdu_be_zss
,
5618 gen_helper_sve_ldffdd_be_zss
, } },
5619 { { gen_helper_sve_ldffbds_zd
,
5620 gen_helper_sve_ldffhds_be_zd
,
5621 gen_helper_sve_ldffsds_be_zd
,
5623 { gen_helper_sve_ldffbdu_zd
,
5624 gen_helper_sve_ldffhdu_be_zd
,
5625 gen_helper_sve_ldffsdu_be_zd
,
5626 gen_helper_sve_ldffdd_be_zd
, } } } } },
5628 { /* Little-endian */
5629 { { { gen_helper_sve_ldbds_zsu_mte
,
5630 gen_helper_sve_ldhds_le_zsu_mte
,
5631 gen_helper_sve_ldsds_le_zsu_mte
,
5633 { gen_helper_sve_ldbdu_zsu_mte
,
5634 gen_helper_sve_ldhdu_le_zsu_mte
,
5635 gen_helper_sve_ldsdu_le_zsu_mte
,
5636 gen_helper_sve_lddd_le_zsu_mte
, } },
5637 { { gen_helper_sve_ldbds_zss_mte
,
5638 gen_helper_sve_ldhds_le_zss_mte
,
5639 gen_helper_sve_ldsds_le_zss_mte
,
5641 { gen_helper_sve_ldbdu_zss_mte
,
5642 gen_helper_sve_ldhdu_le_zss_mte
,
5643 gen_helper_sve_ldsdu_le_zss_mte
,
5644 gen_helper_sve_lddd_le_zss_mte
, } },
5645 { { gen_helper_sve_ldbds_zd_mte
,
5646 gen_helper_sve_ldhds_le_zd_mte
,
5647 gen_helper_sve_ldsds_le_zd_mte
,
5649 { gen_helper_sve_ldbdu_zd_mte
,
5650 gen_helper_sve_ldhdu_le_zd_mte
,
5651 gen_helper_sve_ldsdu_le_zd_mte
,
5652 gen_helper_sve_lddd_le_zd_mte
, } } },
5655 { { { gen_helper_sve_ldffbds_zsu_mte
,
5656 gen_helper_sve_ldffhds_le_zsu_mte
,
5657 gen_helper_sve_ldffsds_le_zsu_mte
,
5659 { gen_helper_sve_ldffbdu_zsu_mte
,
5660 gen_helper_sve_ldffhdu_le_zsu_mte
,
5661 gen_helper_sve_ldffsdu_le_zsu_mte
,
5662 gen_helper_sve_ldffdd_le_zsu_mte
, } },
5663 { { gen_helper_sve_ldffbds_zss_mte
,
5664 gen_helper_sve_ldffhds_le_zss_mte
,
5665 gen_helper_sve_ldffsds_le_zss_mte
,
5667 { gen_helper_sve_ldffbdu_zss_mte
,
5668 gen_helper_sve_ldffhdu_le_zss_mte
,
5669 gen_helper_sve_ldffsdu_le_zss_mte
,
5670 gen_helper_sve_ldffdd_le_zss_mte
, } },
5671 { { gen_helper_sve_ldffbds_zd_mte
,
5672 gen_helper_sve_ldffhds_le_zd_mte
,
5673 gen_helper_sve_ldffsds_le_zd_mte
,
5675 { gen_helper_sve_ldffbdu_zd_mte
,
5676 gen_helper_sve_ldffhdu_le_zd_mte
,
5677 gen_helper_sve_ldffsdu_le_zd_mte
,
5678 gen_helper_sve_ldffdd_le_zd_mte
, } } } },
5680 { { { gen_helper_sve_ldbds_zsu_mte
,
5681 gen_helper_sve_ldhds_be_zsu_mte
,
5682 gen_helper_sve_ldsds_be_zsu_mte
,
5684 { gen_helper_sve_ldbdu_zsu_mte
,
5685 gen_helper_sve_ldhdu_be_zsu_mte
,
5686 gen_helper_sve_ldsdu_be_zsu_mte
,
5687 gen_helper_sve_lddd_be_zsu_mte
, } },
5688 { { gen_helper_sve_ldbds_zss_mte
,
5689 gen_helper_sve_ldhds_be_zss_mte
,
5690 gen_helper_sve_ldsds_be_zss_mte
,
5692 { gen_helper_sve_ldbdu_zss_mte
,
5693 gen_helper_sve_ldhdu_be_zss_mte
,
5694 gen_helper_sve_ldsdu_be_zss_mte
,
5695 gen_helper_sve_lddd_be_zss_mte
, } },
5696 { { gen_helper_sve_ldbds_zd_mte
,
5697 gen_helper_sve_ldhds_be_zd_mte
,
5698 gen_helper_sve_ldsds_be_zd_mte
,
5700 { gen_helper_sve_ldbdu_zd_mte
,
5701 gen_helper_sve_ldhdu_be_zd_mte
,
5702 gen_helper_sve_ldsdu_be_zd_mte
,
5703 gen_helper_sve_lddd_be_zd_mte
, } } },
5706 { { { gen_helper_sve_ldffbds_zsu_mte
,
5707 gen_helper_sve_ldffhds_be_zsu_mte
,
5708 gen_helper_sve_ldffsds_be_zsu_mte
,
5710 { gen_helper_sve_ldffbdu_zsu_mte
,
5711 gen_helper_sve_ldffhdu_be_zsu_mte
,
5712 gen_helper_sve_ldffsdu_be_zsu_mte
,
5713 gen_helper_sve_ldffdd_be_zsu_mte
, } },
5714 { { gen_helper_sve_ldffbds_zss_mte
,
5715 gen_helper_sve_ldffhds_be_zss_mte
,
5716 gen_helper_sve_ldffsds_be_zss_mte
,
5718 { gen_helper_sve_ldffbdu_zss_mte
,
5719 gen_helper_sve_ldffhdu_be_zss_mte
,
5720 gen_helper_sve_ldffsdu_be_zss_mte
,
5721 gen_helper_sve_ldffdd_be_zss_mte
, } },
5722 { { gen_helper_sve_ldffbds_zd_mte
,
5723 gen_helper_sve_ldffhds_be_zd_mte
,
5724 gen_helper_sve_ldffsds_be_zd_mte
,
5726 { gen_helper_sve_ldffbdu_zd_mte
,
5727 gen_helper_sve_ldffhdu_be_zd_mte
,
5728 gen_helper_sve_ldffsdu_be_zd_mte
,
5729 gen_helper_sve_ldffdd_be_zd_mte
, } } } } },
5732 static bool trans_LD1_zprz(DisasContext
*s
, arg_LD1_zprz
*a
)
5734 gen_helper_gvec_mem_scatter
*fn
= NULL
;
5735 bool be
= s
->be_data
== MO_BE
;
5736 bool mte
= s
->mte_active
[0];
5738 if (!dc_isar_feature(aa64_sve
, s
)) {
5741 s
->is_nonstreaming
= true;
5742 if (!sve_access_check(s
)) {
5748 fn
= gather_load_fn32
[mte
][be
][a
->ff
][a
->xs
][a
->u
][a
->msz
];
5751 fn
= gather_load_fn64
[mte
][be
][a
->ff
][a
->xs
][a
->u
][a
->msz
];
5756 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rm
, a
->scale
* a
->msz
,
5757 cpu_reg_sp(s
, a
->rn
), a
->msz
, false, fn
);
5761 static bool trans_LD1_zpiz(DisasContext
*s
, arg_LD1_zpiz
*a
)
5763 gen_helper_gvec_mem_scatter
*fn
= NULL
;
5764 bool be
= s
->be_data
== MO_BE
;
5765 bool mte
= s
->mte_active
[0];
5767 if (a
->esz
< a
->msz
|| (a
->esz
== a
->msz
&& !a
->u
)) {
5770 if (!dc_isar_feature(aa64_sve
, s
)) {
5773 s
->is_nonstreaming
= true;
5774 if (!sve_access_check(s
)) {
5780 fn
= gather_load_fn32
[mte
][be
][a
->ff
][0][a
->u
][a
->msz
];
5783 fn
= gather_load_fn64
[mte
][be
][a
->ff
][2][a
->u
][a
->msz
];
5788 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5789 * by loading the immediate into the scalar parameter.
5791 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rn
, 0,
5792 tcg_constant_i64(a
->imm
<< a
->msz
), a
->msz
, false, fn
);
5796 static bool trans_LDNT1_zprz(DisasContext
*s
, arg_LD1_zprz
*a
)
5798 gen_helper_gvec_mem_scatter
*fn
= NULL
;
5799 bool be
= s
->be_data
== MO_BE
;
5800 bool mte
= s
->mte_active
[0];
5802 if (a
->esz
< a
->msz
+ !a
->u
) {
5805 if (!dc_isar_feature(aa64_sve2
, s
)) {
5808 s
->is_nonstreaming
= true;
5809 if (!sve_access_check(s
)) {
5815 fn
= gather_load_fn32
[mte
][be
][0][0][a
->u
][a
->msz
];
5818 fn
= gather_load_fn64
[mte
][be
][0][2][a
->u
][a
->msz
];
5823 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rn
, 0,
5824 cpu_reg(s
, a
->rm
), a
->msz
, false, fn
);
5828 /* Indexed by [mte][be][xs][msz]. */
5829 static gen_helper_gvec_mem_scatter
* const scatter_store_fn32
[2][2][2][3] = {
5830 { /* MTE Inactive */
5831 { /* Little-endian */
5832 { gen_helper_sve_stbs_zsu
,
5833 gen_helper_sve_sths_le_zsu
,
5834 gen_helper_sve_stss_le_zsu
, },
5835 { gen_helper_sve_stbs_zss
,
5836 gen_helper_sve_sths_le_zss
,
5837 gen_helper_sve_stss_le_zss
, } },
5839 { gen_helper_sve_stbs_zsu
,
5840 gen_helper_sve_sths_be_zsu
,
5841 gen_helper_sve_stss_be_zsu
, },
5842 { gen_helper_sve_stbs_zss
,
5843 gen_helper_sve_sths_be_zss
,
5844 gen_helper_sve_stss_be_zss
, } } },
5846 { /* Little-endian */
5847 { gen_helper_sve_stbs_zsu_mte
,
5848 gen_helper_sve_sths_le_zsu_mte
,
5849 gen_helper_sve_stss_le_zsu_mte
, },
5850 { gen_helper_sve_stbs_zss_mte
,
5851 gen_helper_sve_sths_le_zss_mte
,
5852 gen_helper_sve_stss_le_zss_mte
, } },
5854 { gen_helper_sve_stbs_zsu_mte
,
5855 gen_helper_sve_sths_be_zsu_mte
,
5856 gen_helper_sve_stss_be_zsu_mte
, },
5857 { gen_helper_sve_stbs_zss_mte
,
5858 gen_helper_sve_sths_be_zss_mte
,
5859 gen_helper_sve_stss_be_zss_mte
, } } },
5862 /* Note that we overload xs=2 to indicate 64-bit offset. */
5863 static gen_helper_gvec_mem_scatter
* const scatter_store_fn64
[2][2][3][4] = {
5864 { /* MTE Inactive */
5865 { /* Little-endian */
5866 { gen_helper_sve_stbd_zsu
,
5867 gen_helper_sve_sthd_le_zsu
,
5868 gen_helper_sve_stsd_le_zsu
,
5869 gen_helper_sve_stdd_le_zsu
, },
5870 { gen_helper_sve_stbd_zss
,
5871 gen_helper_sve_sthd_le_zss
,
5872 gen_helper_sve_stsd_le_zss
,
5873 gen_helper_sve_stdd_le_zss
, },
5874 { gen_helper_sve_stbd_zd
,
5875 gen_helper_sve_sthd_le_zd
,
5876 gen_helper_sve_stsd_le_zd
,
5877 gen_helper_sve_stdd_le_zd
, } },
5879 { gen_helper_sve_stbd_zsu
,
5880 gen_helper_sve_sthd_be_zsu
,
5881 gen_helper_sve_stsd_be_zsu
,
5882 gen_helper_sve_stdd_be_zsu
, },
5883 { gen_helper_sve_stbd_zss
,
5884 gen_helper_sve_sthd_be_zss
,
5885 gen_helper_sve_stsd_be_zss
,
5886 gen_helper_sve_stdd_be_zss
, },
5887 { gen_helper_sve_stbd_zd
,
5888 gen_helper_sve_sthd_be_zd
,
5889 gen_helper_sve_stsd_be_zd
,
5890 gen_helper_sve_stdd_be_zd
, } } },
5891 { /* MTE Inactive */
5892 { /* Little-endian */
5893 { gen_helper_sve_stbd_zsu_mte
,
5894 gen_helper_sve_sthd_le_zsu_mte
,
5895 gen_helper_sve_stsd_le_zsu_mte
,
5896 gen_helper_sve_stdd_le_zsu_mte
, },
5897 { gen_helper_sve_stbd_zss_mte
,
5898 gen_helper_sve_sthd_le_zss_mte
,
5899 gen_helper_sve_stsd_le_zss_mte
,
5900 gen_helper_sve_stdd_le_zss_mte
, },
5901 { gen_helper_sve_stbd_zd_mte
,
5902 gen_helper_sve_sthd_le_zd_mte
,
5903 gen_helper_sve_stsd_le_zd_mte
,
5904 gen_helper_sve_stdd_le_zd_mte
, } },
5906 { gen_helper_sve_stbd_zsu_mte
,
5907 gen_helper_sve_sthd_be_zsu_mte
,
5908 gen_helper_sve_stsd_be_zsu_mte
,
5909 gen_helper_sve_stdd_be_zsu_mte
, },
5910 { gen_helper_sve_stbd_zss_mte
,
5911 gen_helper_sve_sthd_be_zss_mte
,
5912 gen_helper_sve_stsd_be_zss_mte
,
5913 gen_helper_sve_stdd_be_zss_mte
, },
5914 { gen_helper_sve_stbd_zd_mte
,
5915 gen_helper_sve_sthd_be_zd_mte
,
5916 gen_helper_sve_stsd_be_zd_mte
,
5917 gen_helper_sve_stdd_be_zd_mte
, } } },
5920 static bool trans_ST1_zprz(DisasContext
*s
, arg_ST1_zprz
*a
)
5922 gen_helper_gvec_mem_scatter
*fn
;
5923 bool be
= s
->be_data
== MO_BE
;
5924 bool mte
= s
->mte_active
[0];
5926 if (a
->esz
< a
->msz
|| (a
->msz
== 0 && a
->scale
)) {
5929 if (!dc_isar_feature(aa64_sve
, s
)) {
5932 s
->is_nonstreaming
= true;
5933 if (!sve_access_check(s
)) {
5938 fn
= scatter_store_fn32
[mte
][be
][a
->xs
][a
->msz
];
5941 fn
= scatter_store_fn64
[mte
][be
][a
->xs
][a
->msz
];
5944 g_assert_not_reached();
5946 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rm
, a
->scale
* a
->msz
,
5947 cpu_reg_sp(s
, a
->rn
), a
->msz
, true, fn
);
5951 static bool trans_ST1_zpiz(DisasContext
*s
, arg_ST1_zpiz
*a
)
5953 gen_helper_gvec_mem_scatter
*fn
= NULL
;
5954 bool be
= s
->be_data
== MO_BE
;
5955 bool mte
= s
->mte_active
[0];
5957 if (a
->esz
< a
->msz
) {
5960 if (!dc_isar_feature(aa64_sve
, s
)) {
5963 s
->is_nonstreaming
= true;
5964 if (!sve_access_check(s
)) {
5970 fn
= scatter_store_fn32
[mte
][be
][0][a
->msz
];
5973 fn
= scatter_store_fn64
[mte
][be
][2][a
->msz
];
5978 /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5979 * by loading the immediate into the scalar parameter.
5981 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rn
, 0,
5982 tcg_constant_i64(a
->imm
<< a
->msz
), a
->msz
, true, fn
);
5986 static bool trans_STNT1_zprz(DisasContext
*s
, arg_ST1_zprz
*a
)
5988 gen_helper_gvec_mem_scatter
*fn
;
5989 bool be
= s
->be_data
== MO_BE
;
5990 bool mte
= s
->mte_active
[0];
5992 if (a
->esz
< a
->msz
) {
5995 if (!dc_isar_feature(aa64_sve2
, s
)) {
5998 s
->is_nonstreaming
= true;
5999 if (!sve_access_check(s
)) {
6005 fn
= scatter_store_fn32
[mte
][be
][0][a
->msz
];
6008 fn
= scatter_store_fn64
[mte
][be
][2][a
->msz
];
6011 g_assert_not_reached();
6014 do_mem_zpz(s
, a
->rd
, a
->pg
, a
->rn
, 0,
6015 cpu_reg(s
, a
->rm
), a
->msz
, true, fn
);
6023 static bool trans_PRF(DisasContext
*s
, arg_PRF
*a
)
6025 if (!dc_isar_feature(aa64_sve
, s
)) {
6028 /* Prefetch is a nop within QEMU. */
6029 (void)sve_access_check(s
);
6033 static bool trans_PRF_rr(DisasContext
*s
, arg_PRF_rr
*a
)
6035 if (a
->rm
== 31 || !dc_isar_feature(aa64_sve
, s
)) {
6038 /* Prefetch is a nop within QEMU. */
6039 (void)sve_access_check(s
);
6043 static bool trans_PRF_ns(DisasContext
*s
, arg_PRF_ns
*a
)
6045 if (!dc_isar_feature(aa64_sve
, s
)) {
6048 /* Prefetch is a nop within QEMU. */
6049 s
->is_nonstreaming
= true;
6050 (void)sve_access_check(s
);
6057 * TODO: The implementation so far could handle predicated merging movprfx.
6058 * The helper functions as written take an extra source register to
6059 * use in the operation, but the result is only written when predication
6060 * succeeds. For unpredicated movprfx, we need to rearrange the helpers
6061 * to allow the final write back to the destination to be unconditional.
6062 * For predicated zeroing movprfx, we need to rearrange the helpers to
6063 * allow the final write back to zero inactives.
6065 * In the meantime, just emit the moves.
6068 TRANS_FEAT(MOVPRFX
, aa64_sve
, do_mov_z
, a
->rd
, a
->rn
)
6069 TRANS_FEAT(MOVPRFX_m
, aa64_sve
, do_sel_z
, a
->rd
, a
->rn
, a
->rd
, a
->pg
, a
->esz
)
6070 TRANS_FEAT(MOVPRFX_z
, aa64_sve
, do_movz_zpz
, a
->rd
, a
->rn
, a
->pg
, a
->esz
, false)
6073 * SVE2 Integer Multiply - Unpredicated
6076 TRANS_FEAT(MUL_zzz
, aa64_sve2
, gen_gvec_fn_arg_zzz
, tcg_gen_gvec_mul
, a
)
6078 static gen_helper_gvec_3
* const smulh_zzz_fns
[4] = {
6079 gen_helper_gvec_smulh_b
, gen_helper_gvec_smulh_h
,
6080 gen_helper_gvec_smulh_s
, gen_helper_gvec_smulh_d
,
6082 TRANS_FEAT(SMULH_zzz
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6083 smulh_zzz_fns
[a
->esz
], a
, 0)
6085 static gen_helper_gvec_3
* const umulh_zzz_fns
[4] = {
6086 gen_helper_gvec_umulh_b
, gen_helper_gvec_umulh_h
,
6087 gen_helper_gvec_umulh_s
, gen_helper_gvec_umulh_d
,
6089 TRANS_FEAT(UMULH_zzz
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6090 umulh_zzz_fns
[a
->esz
], a
, 0)
6092 TRANS_FEAT(PMUL_zzz
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6093 gen_helper_gvec_pmul_b
, a
, 0)
6095 static gen_helper_gvec_3
* const sqdmulh_zzz_fns
[4] = {
6096 gen_helper_sve2_sqdmulh_b
, gen_helper_sve2_sqdmulh_h
,
6097 gen_helper_sve2_sqdmulh_s
, gen_helper_sve2_sqdmulh_d
,
6099 TRANS_FEAT(SQDMULH_zzz
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6100 sqdmulh_zzz_fns
[a
->esz
], a
, 0)
6102 static gen_helper_gvec_3
* const sqrdmulh_zzz_fns
[4] = {
6103 gen_helper_sve2_sqrdmulh_b
, gen_helper_sve2_sqrdmulh_h
,
6104 gen_helper_sve2_sqrdmulh_s
, gen_helper_sve2_sqrdmulh_d
,
6106 TRANS_FEAT(SQRDMULH_zzz
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6107 sqrdmulh_zzz_fns
[a
->esz
], a
, 0)
6110 * SVE2 Integer - Predicated
6113 static gen_helper_gvec_4
* const sadlp_fns
[4] = {
6114 NULL
, gen_helper_sve2_sadalp_zpzz_h
,
6115 gen_helper_sve2_sadalp_zpzz_s
, gen_helper_sve2_sadalp_zpzz_d
,
6117 TRANS_FEAT(SADALP_zpzz
, aa64_sve2
, gen_gvec_ool_arg_zpzz
,
6118 sadlp_fns
[a
->esz
], a
, 0)
6120 static gen_helper_gvec_4
* const uadlp_fns
[4] = {
6121 NULL
, gen_helper_sve2_uadalp_zpzz_h
,
6122 gen_helper_sve2_uadalp_zpzz_s
, gen_helper_sve2_uadalp_zpzz_d
,
6124 TRANS_FEAT(UADALP_zpzz
, aa64_sve2
, gen_gvec_ool_arg_zpzz
,
6125 uadlp_fns
[a
->esz
], a
, 0)
6128 * SVE2 integer unary operations (predicated)
6131 TRANS_FEAT(URECPE
, aa64_sve2
, gen_gvec_ool_arg_zpz
,
6132 a
->esz
== 2 ? gen_helper_sve2_urecpe_s
: NULL
, a
, 0)
6134 TRANS_FEAT(URSQRTE
, aa64_sve2
, gen_gvec_ool_arg_zpz
,
6135 a
->esz
== 2 ? gen_helper_sve2_ursqrte_s
: NULL
, a
, 0)
6137 static gen_helper_gvec_3
* const sqabs_fns
[4] = {
6138 gen_helper_sve2_sqabs_b
, gen_helper_sve2_sqabs_h
,
6139 gen_helper_sve2_sqabs_s
, gen_helper_sve2_sqabs_d
,
6141 TRANS_FEAT(SQABS
, aa64_sve2
, gen_gvec_ool_arg_zpz
, sqabs_fns
[a
->esz
], a
, 0)
6143 static gen_helper_gvec_3
* const sqneg_fns
[4] = {
6144 gen_helper_sve2_sqneg_b
, gen_helper_sve2_sqneg_h
,
6145 gen_helper_sve2_sqneg_s
, gen_helper_sve2_sqneg_d
,
6147 TRANS_FEAT(SQNEG
, aa64_sve2
, gen_gvec_ool_arg_zpz
, sqneg_fns
[a
->esz
], a
, 0)
6149 DO_ZPZZ(SQSHL
, aa64_sve2
, sve2_sqshl
)
6150 DO_ZPZZ(SQRSHL
, aa64_sve2
, sve2_sqrshl
)
6151 DO_ZPZZ(SRSHL
, aa64_sve2
, sve2_srshl
)
6153 DO_ZPZZ(UQSHL
, aa64_sve2
, sve2_uqshl
)
6154 DO_ZPZZ(UQRSHL
, aa64_sve2
, sve2_uqrshl
)
6155 DO_ZPZZ(URSHL
, aa64_sve2
, sve2_urshl
)
6157 DO_ZPZZ(SHADD
, aa64_sve2
, sve2_shadd
)
6158 DO_ZPZZ(SRHADD
, aa64_sve2
, sve2_srhadd
)
6159 DO_ZPZZ(SHSUB
, aa64_sve2
, sve2_shsub
)
6161 DO_ZPZZ(UHADD
, aa64_sve2
, sve2_uhadd
)
6162 DO_ZPZZ(URHADD
, aa64_sve2
, sve2_urhadd
)
6163 DO_ZPZZ(UHSUB
, aa64_sve2
, sve2_uhsub
)
6165 DO_ZPZZ(ADDP
, aa64_sve2
, sve2_addp
)
6166 DO_ZPZZ(SMAXP
, aa64_sve2
, sve2_smaxp
)
6167 DO_ZPZZ(UMAXP
, aa64_sve2
, sve2_umaxp
)
6168 DO_ZPZZ(SMINP
, aa64_sve2
, sve2_sminp
)
6169 DO_ZPZZ(UMINP
, aa64_sve2
, sve2_uminp
)
6171 DO_ZPZZ(SQADD_zpzz
, aa64_sve2
, sve2_sqadd
)
6172 DO_ZPZZ(UQADD_zpzz
, aa64_sve2
, sve2_uqadd
)
6173 DO_ZPZZ(SQSUB_zpzz
, aa64_sve2
, sve2_sqsub
)
6174 DO_ZPZZ(UQSUB_zpzz
, aa64_sve2
, sve2_uqsub
)
6175 DO_ZPZZ(SUQADD
, aa64_sve2
, sve2_suqadd
)
6176 DO_ZPZZ(USQADD
, aa64_sve2
, sve2_usqadd
)
6179 * SVE2 Widening Integer Arithmetic
6182 static gen_helper_gvec_3
* const saddl_fns
[4] = {
6183 NULL
, gen_helper_sve2_saddl_h
,
6184 gen_helper_sve2_saddl_s
, gen_helper_sve2_saddl_d
,
6186 TRANS_FEAT(SADDLB
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6187 saddl_fns
[a
->esz
], a
, 0)
6188 TRANS_FEAT(SADDLT
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6189 saddl_fns
[a
->esz
], a
, 3)
6190 TRANS_FEAT(SADDLBT
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6191 saddl_fns
[a
->esz
], a
, 2)
6193 static gen_helper_gvec_3
* const ssubl_fns
[4] = {
6194 NULL
, gen_helper_sve2_ssubl_h
,
6195 gen_helper_sve2_ssubl_s
, gen_helper_sve2_ssubl_d
,
6197 TRANS_FEAT(SSUBLB
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6198 ssubl_fns
[a
->esz
], a
, 0)
6199 TRANS_FEAT(SSUBLT
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6200 ssubl_fns
[a
->esz
], a
, 3)
6201 TRANS_FEAT(SSUBLBT
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6202 ssubl_fns
[a
->esz
], a
, 2)
6203 TRANS_FEAT(SSUBLTB
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6204 ssubl_fns
[a
->esz
], a
, 1)
6206 static gen_helper_gvec_3
* const sabdl_fns
[4] = {
6207 NULL
, gen_helper_sve2_sabdl_h
,
6208 gen_helper_sve2_sabdl_s
, gen_helper_sve2_sabdl_d
,
6210 TRANS_FEAT(SABDLB
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6211 sabdl_fns
[a
->esz
], a
, 0)
6212 TRANS_FEAT(SABDLT
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6213 sabdl_fns
[a
->esz
], a
, 3)
6215 static gen_helper_gvec_3
* const uaddl_fns
[4] = {
6216 NULL
, gen_helper_sve2_uaddl_h
,
6217 gen_helper_sve2_uaddl_s
, gen_helper_sve2_uaddl_d
,
6219 TRANS_FEAT(UADDLB
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6220 uaddl_fns
[a
->esz
], a
, 0)
6221 TRANS_FEAT(UADDLT
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6222 uaddl_fns
[a
->esz
], a
, 3)
6224 static gen_helper_gvec_3
* const usubl_fns
[4] = {
6225 NULL
, gen_helper_sve2_usubl_h
,
6226 gen_helper_sve2_usubl_s
, gen_helper_sve2_usubl_d
,
6228 TRANS_FEAT(USUBLB
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6229 usubl_fns
[a
->esz
], a
, 0)
6230 TRANS_FEAT(USUBLT
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6231 usubl_fns
[a
->esz
], a
, 3)
6233 static gen_helper_gvec_3
* const uabdl_fns
[4] = {
6234 NULL
, gen_helper_sve2_uabdl_h
,
6235 gen_helper_sve2_uabdl_s
, gen_helper_sve2_uabdl_d
,
6237 TRANS_FEAT(UABDLB
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6238 uabdl_fns
[a
->esz
], a
, 0)
6239 TRANS_FEAT(UABDLT
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6240 uabdl_fns
[a
->esz
], a
, 3)
6242 static gen_helper_gvec_3
* const sqdmull_fns
[4] = {
6243 NULL
, gen_helper_sve2_sqdmull_zzz_h
,
6244 gen_helper_sve2_sqdmull_zzz_s
, gen_helper_sve2_sqdmull_zzz_d
,
6246 TRANS_FEAT(SQDMULLB_zzz
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6247 sqdmull_fns
[a
->esz
], a
, 0)
6248 TRANS_FEAT(SQDMULLT_zzz
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6249 sqdmull_fns
[a
->esz
], a
, 3)
6251 static gen_helper_gvec_3
* const smull_fns
[4] = {
6252 NULL
, gen_helper_sve2_smull_zzz_h
,
6253 gen_helper_sve2_smull_zzz_s
, gen_helper_sve2_smull_zzz_d
,
6255 TRANS_FEAT(SMULLB_zzz
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6256 smull_fns
[a
->esz
], a
, 0)
6257 TRANS_FEAT(SMULLT_zzz
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6258 smull_fns
[a
->esz
], a
, 3)
6260 static gen_helper_gvec_3
* const umull_fns
[4] = {
6261 NULL
, gen_helper_sve2_umull_zzz_h
,
6262 gen_helper_sve2_umull_zzz_s
, gen_helper_sve2_umull_zzz_d
,
6264 TRANS_FEAT(UMULLB_zzz
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6265 umull_fns
[a
->esz
], a
, 0)
6266 TRANS_FEAT(UMULLT_zzz
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6267 umull_fns
[a
->esz
], a
, 3)
6269 static gen_helper_gvec_3
* const eoril_fns
[4] = {
6270 gen_helper_sve2_eoril_b
, gen_helper_sve2_eoril_h
,
6271 gen_helper_sve2_eoril_s
, gen_helper_sve2_eoril_d
,
6273 TRANS_FEAT(EORBT
, aa64_sve2
, gen_gvec_ool_arg_zzz
, eoril_fns
[a
->esz
], a
, 2)
6274 TRANS_FEAT(EORTB
, aa64_sve2
, gen_gvec_ool_arg_zzz
, eoril_fns
[a
->esz
], a
, 1)
6276 static bool do_trans_pmull(DisasContext
*s
, arg_rrr_esz
*a
, bool sel
)
6278 static gen_helper_gvec_3
* const fns
[4] = {
6279 gen_helper_gvec_pmull_q
, gen_helper_sve2_pmull_h
,
6280 NULL
, gen_helper_sve2_pmull_d
,
6284 if (!dc_isar_feature(aa64_sve2_pmull128
, s
)) {
6287 s
->is_nonstreaming
= true;
6288 } else if (!dc_isar_feature(aa64_sve
, s
)) {
6291 return gen_gvec_ool_arg_zzz(s
, fns
[a
->esz
], a
, sel
);
6294 TRANS_FEAT(PMULLB
, aa64_sve2
, do_trans_pmull
, a
, false)
6295 TRANS_FEAT(PMULLT
, aa64_sve2
, do_trans_pmull
, a
, true)
6297 static gen_helper_gvec_3
* const saddw_fns
[4] = {
6298 NULL
, gen_helper_sve2_saddw_h
,
6299 gen_helper_sve2_saddw_s
, gen_helper_sve2_saddw_d
,
6301 TRANS_FEAT(SADDWB
, aa64_sve2
, gen_gvec_ool_arg_zzz
, saddw_fns
[a
->esz
], a
, 0)
6302 TRANS_FEAT(SADDWT
, aa64_sve2
, gen_gvec_ool_arg_zzz
, saddw_fns
[a
->esz
], a
, 1)
6304 static gen_helper_gvec_3
* const ssubw_fns
[4] = {
6305 NULL
, gen_helper_sve2_ssubw_h
,
6306 gen_helper_sve2_ssubw_s
, gen_helper_sve2_ssubw_d
,
6308 TRANS_FEAT(SSUBWB
, aa64_sve2
, gen_gvec_ool_arg_zzz
, ssubw_fns
[a
->esz
], a
, 0)
6309 TRANS_FEAT(SSUBWT
, aa64_sve2
, gen_gvec_ool_arg_zzz
, ssubw_fns
[a
->esz
], a
, 1)
6311 static gen_helper_gvec_3
* const uaddw_fns
[4] = {
6312 NULL
, gen_helper_sve2_uaddw_h
,
6313 gen_helper_sve2_uaddw_s
, gen_helper_sve2_uaddw_d
,
6315 TRANS_FEAT(UADDWB
, aa64_sve2
, gen_gvec_ool_arg_zzz
, uaddw_fns
[a
->esz
], a
, 0)
6316 TRANS_FEAT(UADDWT
, aa64_sve2
, gen_gvec_ool_arg_zzz
, uaddw_fns
[a
->esz
], a
, 1)
6318 static gen_helper_gvec_3
* const usubw_fns
[4] = {
6319 NULL
, gen_helper_sve2_usubw_h
,
6320 gen_helper_sve2_usubw_s
, gen_helper_sve2_usubw_d
,
6322 TRANS_FEAT(USUBWB
, aa64_sve2
, gen_gvec_ool_arg_zzz
, usubw_fns
[a
->esz
], a
, 0)
6323 TRANS_FEAT(USUBWT
, aa64_sve2
, gen_gvec_ool_arg_zzz
, usubw_fns
[a
->esz
], a
, 1)
6325 static void gen_sshll_vec(unsigned vece
, TCGv_vec d
, TCGv_vec n
, int64_t imm
)
6329 int halfbits
= 4 << vece
;
6332 if (shl
== halfbits
) {
6333 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
6334 tcg_gen_dupi_vec(vece
, t
, MAKE_64BIT_MASK(halfbits
, halfbits
));
6335 tcg_gen_and_vec(vece
, d
, n
, t
);
6336 tcg_temp_free_vec(t
);
6338 tcg_gen_sari_vec(vece
, d
, n
, halfbits
);
6339 tcg_gen_shli_vec(vece
, d
, d
, shl
);
6342 tcg_gen_shli_vec(vece
, d
, n
, halfbits
);
6343 tcg_gen_sari_vec(vece
, d
, d
, halfbits
- shl
);
6347 static void gen_ushll_i64(unsigned vece
, TCGv_i64 d
, TCGv_i64 n
, int imm
)
6349 int halfbits
= 4 << vece
;
6351 int shl
= (imm
>> 1);
6355 mask
= MAKE_64BIT_MASK(0, halfbits
);
6357 mask
= dup_const(vece
, mask
);
6359 shift
= shl
- top
* halfbits
;
6361 tcg_gen_shri_i64(d
, n
, -shift
);
6363 tcg_gen_shli_i64(d
, n
, shift
);
6365 tcg_gen_andi_i64(d
, d
, mask
);
6368 static void gen_ushll16_i64(TCGv_i64 d
, TCGv_i64 n
, int64_t imm
)
6370 gen_ushll_i64(MO_16
, d
, n
, imm
);
6373 static void gen_ushll32_i64(TCGv_i64 d
, TCGv_i64 n
, int64_t imm
)
6375 gen_ushll_i64(MO_32
, d
, n
, imm
);
6378 static void gen_ushll64_i64(TCGv_i64 d
, TCGv_i64 n
, int64_t imm
)
6380 gen_ushll_i64(MO_64
, d
, n
, imm
);
6383 static void gen_ushll_vec(unsigned vece
, TCGv_vec d
, TCGv_vec n
, int64_t imm
)
6385 int halfbits
= 4 << vece
;
6390 if (shl
== halfbits
) {
6391 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
6392 tcg_gen_dupi_vec(vece
, t
, MAKE_64BIT_MASK(halfbits
, halfbits
));
6393 tcg_gen_and_vec(vece
, d
, n
, t
);
6394 tcg_temp_free_vec(t
);
6396 tcg_gen_shri_vec(vece
, d
, n
, halfbits
);
6397 tcg_gen_shli_vec(vece
, d
, d
, shl
);
6401 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
6402 tcg_gen_dupi_vec(vece
, t
, MAKE_64BIT_MASK(0, halfbits
));
6403 tcg_gen_and_vec(vece
, d
, n
, t
);
6404 tcg_temp_free_vec(t
);
6406 tcg_gen_shli_vec(vece
, d
, n
, halfbits
);
6407 tcg_gen_shri_vec(vece
, d
, d
, halfbits
- shl
);
6412 static bool do_shll_tb(DisasContext
*s
, arg_rri_esz
*a
,
6413 const GVecGen2i ops
[3], bool sel
)
6416 if (a
->esz
< 0 || a
->esz
> 2) {
6419 if (sve_access_check(s
)) {
6420 unsigned vsz
= vec_full_reg_size(s
);
6421 tcg_gen_gvec_2i(vec_full_reg_offset(s
, a
->rd
),
6422 vec_full_reg_offset(s
, a
->rn
),
6423 vsz
, vsz
, (a
->imm
<< 1) | sel
,
6429 static const TCGOpcode sshll_list
[] = {
6430 INDEX_op_shli_vec
, INDEX_op_sari_vec
, 0
6432 static const GVecGen2i sshll_ops
[3] = {
6433 { .fniv
= gen_sshll_vec
,
6434 .opt_opc
= sshll_list
,
6435 .fno
= gen_helper_sve2_sshll_h
,
6437 { .fniv
= gen_sshll_vec
,
6438 .opt_opc
= sshll_list
,
6439 .fno
= gen_helper_sve2_sshll_s
,
6441 { .fniv
= gen_sshll_vec
,
6442 .opt_opc
= sshll_list
,
6443 .fno
= gen_helper_sve2_sshll_d
,
6446 TRANS_FEAT(SSHLLB
, aa64_sve2
, do_shll_tb
, a
, sshll_ops
, false)
6447 TRANS_FEAT(SSHLLT
, aa64_sve2
, do_shll_tb
, a
, sshll_ops
, true)
6449 static const TCGOpcode ushll_list
[] = {
6450 INDEX_op_shli_vec
, INDEX_op_shri_vec
, 0
6452 static const GVecGen2i ushll_ops
[3] = {
6453 { .fni8
= gen_ushll16_i64
,
6454 .fniv
= gen_ushll_vec
,
6455 .opt_opc
= ushll_list
,
6456 .fno
= gen_helper_sve2_ushll_h
,
6458 { .fni8
= gen_ushll32_i64
,
6459 .fniv
= gen_ushll_vec
,
6460 .opt_opc
= ushll_list
,
6461 .fno
= gen_helper_sve2_ushll_s
,
6463 { .fni8
= gen_ushll64_i64
,
6464 .fniv
= gen_ushll_vec
,
6465 .opt_opc
= ushll_list
,
6466 .fno
= gen_helper_sve2_ushll_d
,
6469 TRANS_FEAT(USHLLB
, aa64_sve2
, do_shll_tb
, a
, ushll_ops
, false)
6470 TRANS_FEAT(USHLLT
, aa64_sve2
, do_shll_tb
, a
, ushll_ops
, true)
6472 static gen_helper_gvec_3
* const bext_fns
[4] = {
6473 gen_helper_sve2_bext_b
, gen_helper_sve2_bext_h
,
6474 gen_helper_sve2_bext_s
, gen_helper_sve2_bext_d
,
6476 TRANS_FEAT_NONSTREAMING(BEXT
, aa64_sve2_bitperm
, gen_gvec_ool_arg_zzz
,
6477 bext_fns
[a
->esz
], a
, 0)
6479 static gen_helper_gvec_3
* const bdep_fns
[4] = {
6480 gen_helper_sve2_bdep_b
, gen_helper_sve2_bdep_h
,
6481 gen_helper_sve2_bdep_s
, gen_helper_sve2_bdep_d
,
6483 TRANS_FEAT_NONSTREAMING(BDEP
, aa64_sve2_bitperm
, gen_gvec_ool_arg_zzz
,
6484 bdep_fns
[a
->esz
], a
, 0)
6486 static gen_helper_gvec_3
* const bgrp_fns
[4] = {
6487 gen_helper_sve2_bgrp_b
, gen_helper_sve2_bgrp_h
,
6488 gen_helper_sve2_bgrp_s
, gen_helper_sve2_bgrp_d
,
6490 TRANS_FEAT_NONSTREAMING(BGRP
, aa64_sve2_bitperm
, gen_gvec_ool_arg_zzz
,
6491 bgrp_fns
[a
->esz
], a
, 0)
6493 static gen_helper_gvec_3
* const cadd_fns
[4] = {
6494 gen_helper_sve2_cadd_b
, gen_helper_sve2_cadd_h
,
6495 gen_helper_sve2_cadd_s
, gen_helper_sve2_cadd_d
,
6497 TRANS_FEAT(CADD_rot90
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6498 cadd_fns
[a
->esz
], a
, 0)
6499 TRANS_FEAT(CADD_rot270
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6500 cadd_fns
[a
->esz
], a
, 1)
6502 static gen_helper_gvec_3
* const sqcadd_fns
[4] = {
6503 gen_helper_sve2_sqcadd_b
, gen_helper_sve2_sqcadd_h
,
6504 gen_helper_sve2_sqcadd_s
, gen_helper_sve2_sqcadd_d
,
6506 TRANS_FEAT(SQCADD_rot90
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6507 sqcadd_fns
[a
->esz
], a
, 0)
6508 TRANS_FEAT(SQCADD_rot270
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
6509 sqcadd_fns
[a
->esz
], a
, 1)
6511 static gen_helper_gvec_4
* const sabal_fns
[4] = {
6512 NULL
, gen_helper_sve2_sabal_h
,
6513 gen_helper_sve2_sabal_s
, gen_helper_sve2_sabal_d
,
6515 TRANS_FEAT(SABALB
, aa64_sve2
, gen_gvec_ool_arg_zzzz
, sabal_fns
[a
->esz
], a
, 0)
6516 TRANS_FEAT(SABALT
, aa64_sve2
, gen_gvec_ool_arg_zzzz
, sabal_fns
[a
->esz
], a
, 1)
6518 static gen_helper_gvec_4
* const uabal_fns
[4] = {
6519 NULL
, gen_helper_sve2_uabal_h
,
6520 gen_helper_sve2_uabal_s
, gen_helper_sve2_uabal_d
,
6522 TRANS_FEAT(UABALB
, aa64_sve2
, gen_gvec_ool_arg_zzzz
, uabal_fns
[a
->esz
], a
, 0)
6523 TRANS_FEAT(UABALT
, aa64_sve2
, gen_gvec_ool_arg_zzzz
, uabal_fns
[a
->esz
], a
, 1)
6525 static bool do_adcl(DisasContext
*s
, arg_rrrr_esz
*a
, bool sel
)
6527 static gen_helper_gvec_4
* const fns
[2] = {
6528 gen_helper_sve2_adcl_s
,
6529 gen_helper_sve2_adcl_d
,
6532 * Note that in this case the ESZ field encodes both size and sign.
6533 * Split out 'subtract' into bit 1 of the data field for the helper.
6535 return gen_gvec_ool_arg_zzzz(s
, fns
[a
->esz
& 1], a
, (a
->esz
& 2) | sel
);
6538 TRANS_FEAT(ADCLB
, aa64_sve2
, do_adcl
, a
, false)
6539 TRANS_FEAT(ADCLT
, aa64_sve2
, do_adcl
, a
, true)
6541 TRANS_FEAT(SSRA
, aa64_sve2
, gen_gvec_fn_arg_zzi
, gen_gvec_ssra
, a
)
6542 TRANS_FEAT(USRA
, aa64_sve2
, gen_gvec_fn_arg_zzi
, gen_gvec_usra
, a
)
6543 TRANS_FEAT(SRSRA
, aa64_sve2
, gen_gvec_fn_arg_zzi
, gen_gvec_srsra
, a
)
6544 TRANS_FEAT(URSRA
, aa64_sve2
, gen_gvec_fn_arg_zzi
, gen_gvec_ursra
, a
)
6545 TRANS_FEAT(SRI
, aa64_sve2
, gen_gvec_fn_arg_zzi
, gen_gvec_sri
, a
)
6546 TRANS_FEAT(SLI
, aa64_sve2
, gen_gvec_fn_arg_zzi
, gen_gvec_sli
, a
)
6548 TRANS_FEAT(SABA
, aa64_sve2
, gen_gvec_fn_arg_zzz
, gen_gvec_saba
, a
)
6549 TRANS_FEAT(UABA
, aa64_sve2
, gen_gvec_fn_arg_zzz
, gen_gvec_uaba
, a
)
6551 static bool do_narrow_extract(DisasContext
*s
, arg_rri_esz
*a
,
6552 const GVecGen2 ops
[3])
6554 if (a
->esz
< 0 || a
->esz
> MO_32
|| a
->imm
!= 0) {
6557 if (sve_access_check(s
)) {
6558 unsigned vsz
= vec_full_reg_size(s
);
6559 tcg_gen_gvec_2(vec_full_reg_offset(s
, a
->rd
),
6560 vec_full_reg_offset(s
, a
->rn
),
6561 vsz
, vsz
, &ops
[a
->esz
]);
6566 static const TCGOpcode sqxtn_list
[] = {
6567 INDEX_op_shli_vec
, INDEX_op_smin_vec
, INDEX_op_smax_vec
, 0
6570 static void gen_sqxtnb_vec(unsigned vece
, TCGv_vec d
, TCGv_vec n
)
6572 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
6573 int halfbits
= 4 << vece
;
6574 int64_t mask
= (1ull << halfbits
) - 1;
6575 int64_t min
= -1ull << (halfbits
- 1);
6576 int64_t max
= -min
- 1;
6578 tcg_gen_dupi_vec(vece
, t
, min
);
6579 tcg_gen_smax_vec(vece
, d
, n
, t
);
6580 tcg_gen_dupi_vec(vece
, t
, max
);
6581 tcg_gen_smin_vec(vece
, d
, d
, t
);
6582 tcg_gen_dupi_vec(vece
, t
, mask
);
6583 tcg_gen_and_vec(vece
, d
, d
, t
);
6584 tcg_temp_free_vec(t
);
6587 static const GVecGen2 sqxtnb_ops
[3] = {
6588 { .fniv
= gen_sqxtnb_vec
,
6589 .opt_opc
= sqxtn_list
,
6590 .fno
= gen_helper_sve2_sqxtnb_h
,
6592 { .fniv
= gen_sqxtnb_vec
,
6593 .opt_opc
= sqxtn_list
,
6594 .fno
= gen_helper_sve2_sqxtnb_s
,
6596 { .fniv
= gen_sqxtnb_vec
,
6597 .opt_opc
= sqxtn_list
,
6598 .fno
= gen_helper_sve2_sqxtnb_d
,
6601 TRANS_FEAT(SQXTNB
, aa64_sve2
, do_narrow_extract
, a
, sqxtnb_ops
)
6603 static void gen_sqxtnt_vec(unsigned vece
, TCGv_vec d
, TCGv_vec n
)
6605 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
6606 int halfbits
= 4 << vece
;
6607 int64_t mask
= (1ull << halfbits
) - 1;
6608 int64_t min
= -1ull << (halfbits
- 1);
6609 int64_t max
= -min
- 1;
6611 tcg_gen_dupi_vec(vece
, t
, min
);
6612 tcg_gen_smax_vec(vece
, n
, n
, t
);
6613 tcg_gen_dupi_vec(vece
, t
, max
);
6614 tcg_gen_smin_vec(vece
, n
, n
, t
);
6615 tcg_gen_shli_vec(vece
, n
, n
, halfbits
);
6616 tcg_gen_dupi_vec(vece
, t
, mask
);
6617 tcg_gen_bitsel_vec(vece
, d
, t
, d
, n
);
6618 tcg_temp_free_vec(t
);
6621 static const GVecGen2 sqxtnt_ops
[3] = {
6622 { .fniv
= gen_sqxtnt_vec
,
6623 .opt_opc
= sqxtn_list
,
6625 .fno
= gen_helper_sve2_sqxtnt_h
,
6627 { .fniv
= gen_sqxtnt_vec
,
6628 .opt_opc
= sqxtn_list
,
6630 .fno
= gen_helper_sve2_sqxtnt_s
,
6632 { .fniv
= gen_sqxtnt_vec
,
6633 .opt_opc
= sqxtn_list
,
6635 .fno
= gen_helper_sve2_sqxtnt_d
,
6638 TRANS_FEAT(SQXTNT
, aa64_sve2
, do_narrow_extract
, a
, sqxtnt_ops
)
6640 static const TCGOpcode uqxtn_list
[] = {
6641 INDEX_op_shli_vec
, INDEX_op_umin_vec
, 0
6644 static void gen_uqxtnb_vec(unsigned vece
, TCGv_vec d
, TCGv_vec n
)
6646 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
6647 int halfbits
= 4 << vece
;
6648 int64_t max
= (1ull << halfbits
) - 1;
6650 tcg_gen_dupi_vec(vece
, t
, max
);
6651 tcg_gen_umin_vec(vece
, d
, n
, t
);
6652 tcg_temp_free_vec(t
);
6655 static const GVecGen2 uqxtnb_ops
[3] = {
6656 { .fniv
= gen_uqxtnb_vec
,
6657 .opt_opc
= uqxtn_list
,
6658 .fno
= gen_helper_sve2_uqxtnb_h
,
6660 { .fniv
= gen_uqxtnb_vec
,
6661 .opt_opc
= uqxtn_list
,
6662 .fno
= gen_helper_sve2_uqxtnb_s
,
6664 { .fniv
= gen_uqxtnb_vec
,
6665 .opt_opc
= uqxtn_list
,
6666 .fno
= gen_helper_sve2_uqxtnb_d
,
6669 TRANS_FEAT(UQXTNB
, aa64_sve2
, do_narrow_extract
, a
, uqxtnb_ops
)
6671 static void gen_uqxtnt_vec(unsigned vece
, TCGv_vec d
, TCGv_vec n
)
6673 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
6674 int halfbits
= 4 << vece
;
6675 int64_t max
= (1ull << halfbits
) - 1;
6677 tcg_gen_dupi_vec(vece
, t
, max
);
6678 tcg_gen_umin_vec(vece
, n
, n
, t
);
6679 tcg_gen_shli_vec(vece
, n
, n
, halfbits
);
6680 tcg_gen_bitsel_vec(vece
, d
, t
, d
, n
);
6681 tcg_temp_free_vec(t
);
6684 static const GVecGen2 uqxtnt_ops
[3] = {
6685 { .fniv
= gen_uqxtnt_vec
,
6686 .opt_opc
= uqxtn_list
,
6688 .fno
= gen_helper_sve2_uqxtnt_h
,
6690 { .fniv
= gen_uqxtnt_vec
,
6691 .opt_opc
= uqxtn_list
,
6693 .fno
= gen_helper_sve2_uqxtnt_s
,
6695 { .fniv
= gen_uqxtnt_vec
,
6696 .opt_opc
= uqxtn_list
,
6698 .fno
= gen_helper_sve2_uqxtnt_d
,
6701 TRANS_FEAT(UQXTNT
, aa64_sve2
, do_narrow_extract
, a
, uqxtnt_ops
)
6703 static const TCGOpcode sqxtun_list
[] = {
6704 INDEX_op_shli_vec
, INDEX_op_umin_vec
, INDEX_op_smax_vec
, 0
6707 static void gen_sqxtunb_vec(unsigned vece
, TCGv_vec d
, TCGv_vec n
)
6709 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
6710 int halfbits
= 4 << vece
;
6711 int64_t max
= (1ull << halfbits
) - 1;
6713 tcg_gen_dupi_vec(vece
, t
, 0);
6714 tcg_gen_smax_vec(vece
, d
, n
, t
);
6715 tcg_gen_dupi_vec(vece
, t
, max
);
6716 tcg_gen_umin_vec(vece
, d
, d
, t
);
6717 tcg_temp_free_vec(t
);
6720 static const GVecGen2 sqxtunb_ops
[3] = {
6721 { .fniv
= gen_sqxtunb_vec
,
6722 .opt_opc
= sqxtun_list
,
6723 .fno
= gen_helper_sve2_sqxtunb_h
,
6725 { .fniv
= gen_sqxtunb_vec
,
6726 .opt_opc
= sqxtun_list
,
6727 .fno
= gen_helper_sve2_sqxtunb_s
,
6729 { .fniv
= gen_sqxtunb_vec
,
6730 .opt_opc
= sqxtun_list
,
6731 .fno
= gen_helper_sve2_sqxtunb_d
,
6734 TRANS_FEAT(SQXTUNB
, aa64_sve2
, do_narrow_extract
, a
, sqxtunb_ops
)
6736 static void gen_sqxtunt_vec(unsigned vece
, TCGv_vec d
, TCGv_vec n
)
6738 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
6739 int halfbits
= 4 << vece
;
6740 int64_t max
= (1ull << halfbits
) - 1;
6742 tcg_gen_dupi_vec(vece
, t
, 0);
6743 tcg_gen_smax_vec(vece
, n
, n
, t
);
6744 tcg_gen_dupi_vec(vece
, t
, max
);
6745 tcg_gen_umin_vec(vece
, n
, n
, t
);
6746 tcg_gen_shli_vec(vece
, n
, n
, halfbits
);
6747 tcg_gen_bitsel_vec(vece
, d
, t
, d
, n
);
6748 tcg_temp_free_vec(t
);
6751 static const GVecGen2 sqxtunt_ops
[3] = {
6752 { .fniv
= gen_sqxtunt_vec
,
6753 .opt_opc
= sqxtun_list
,
6755 .fno
= gen_helper_sve2_sqxtunt_h
,
6757 { .fniv
= gen_sqxtunt_vec
,
6758 .opt_opc
= sqxtun_list
,
6760 .fno
= gen_helper_sve2_sqxtunt_s
,
6762 { .fniv
= gen_sqxtunt_vec
,
6763 .opt_opc
= sqxtun_list
,
6765 .fno
= gen_helper_sve2_sqxtunt_d
,
6768 TRANS_FEAT(SQXTUNT
, aa64_sve2
, do_narrow_extract
, a
, sqxtunt_ops
)
6770 static bool do_shr_narrow(DisasContext
*s
, arg_rri_esz
*a
,
6771 const GVecGen2i ops
[3])
6773 if (a
->esz
< 0 || a
->esz
> MO_32
) {
6776 assert(a
->imm
> 0 && a
->imm
<= (8 << a
->esz
));
6777 if (sve_access_check(s
)) {
6778 unsigned vsz
= vec_full_reg_size(s
);
6779 tcg_gen_gvec_2i(vec_full_reg_offset(s
, a
->rd
),
6780 vec_full_reg_offset(s
, a
->rn
),
6781 vsz
, vsz
, a
->imm
, &ops
[a
->esz
]);
6786 static void gen_shrnb_i64(unsigned vece
, TCGv_i64 d
, TCGv_i64 n
, int shr
)
6788 int halfbits
= 4 << vece
;
6789 uint64_t mask
= dup_const(vece
, MAKE_64BIT_MASK(0, halfbits
));
6791 tcg_gen_shri_i64(d
, n
, shr
);
6792 tcg_gen_andi_i64(d
, d
, mask
);
6795 static void gen_shrnb16_i64(TCGv_i64 d
, TCGv_i64 n
, int64_t shr
)
6797 gen_shrnb_i64(MO_16
, d
, n
, shr
);
6800 static void gen_shrnb32_i64(TCGv_i64 d
, TCGv_i64 n
, int64_t shr
)
6802 gen_shrnb_i64(MO_32
, d
, n
, shr
);
6805 static void gen_shrnb64_i64(TCGv_i64 d
, TCGv_i64 n
, int64_t shr
)
6807 gen_shrnb_i64(MO_64
, d
, n
, shr
);
6810 static void gen_shrnb_vec(unsigned vece
, TCGv_vec d
, TCGv_vec n
, int64_t shr
)
6812 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
6813 int halfbits
= 4 << vece
;
6814 uint64_t mask
= MAKE_64BIT_MASK(0, halfbits
);
6816 tcg_gen_shri_vec(vece
, n
, n
, shr
);
6817 tcg_gen_dupi_vec(vece
, t
, mask
);
6818 tcg_gen_and_vec(vece
, d
, n
, t
);
6819 tcg_temp_free_vec(t
);
6822 static const TCGOpcode shrnb_vec_list
[] = { INDEX_op_shri_vec
, 0 };
6823 static const GVecGen2i shrnb_ops
[3] = {
6824 { .fni8
= gen_shrnb16_i64
,
6825 .fniv
= gen_shrnb_vec
,
6826 .opt_opc
= shrnb_vec_list
,
6827 .fno
= gen_helper_sve2_shrnb_h
,
6829 { .fni8
= gen_shrnb32_i64
,
6830 .fniv
= gen_shrnb_vec
,
6831 .opt_opc
= shrnb_vec_list
,
6832 .fno
= gen_helper_sve2_shrnb_s
,
6834 { .fni8
= gen_shrnb64_i64
,
6835 .fniv
= gen_shrnb_vec
,
6836 .opt_opc
= shrnb_vec_list
,
6837 .fno
= gen_helper_sve2_shrnb_d
,
6840 TRANS_FEAT(SHRNB
, aa64_sve2
, do_shr_narrow
, a
, shrnb_ops
)
6842 static void gen_shrnt_i64(unsigned vece
, TCGv_i64 d
, TCGv_i64 n
, int shr
)
6844 int halfbits
= 4 << vece
;
6845 uint64_t mask
= dup_const(vece
, MAKE_64BIT_MASK(0, halfbits
));
6847 tcg_gen_shli_i64(n
, n
, halfbits
- shr
);
6848 tcg_gen_andi_i64(n
, n
, ~mask
);
6849 tcg_gen_andi_i64(d
, d
, mask
);
6850 tcg_gen_or_i64(d
, d
, n
);
6853 static void gen_shrnt16_i64(TCGv_i64 d
, TCGv_i64 n
, int64_t shr
)
6855 gen_shrnt_i64(MO_16
, d
, n
, shr
);
6858 static void gen_shrnt32_i64(TCGv_i64 d
, TCGv_i64 n
, int64_t shr
)
6860 gen_shrnt_i64(MO_32
, d
, n
, shr
);
6863 static void gen_shrnt64_i64(TCGv_i64 d
, TCGv_i64 n
, int64_t shr
)
6865 tcg_gen_shri_i64(n
, n
, shr
);
6866 tcg_gen_deposit_i64(d
, d
, n
, 32, 32);
6869 static void gen_shrnt_vec(unsigned vece
, TCGv_vec d
, TCGv_vec n
, int64_t shr
)
6871 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
6872 int halfbits
= 4 << vece
;
6873 uint64_t mask
= MAKE_64BIT_MASK(0, halfbits
);
6875 tcg_gen_shli_vec(vece
, n
, n
, halfbits
- shr
);
6876 tcg_gen_dupi_vec(vece
, t
, mask
);
6877 tcg_gen_bitsel_vec(vece
, d
, t
, d
, n
);
6878 tcg_temp_free_vec(t
);
6881 static const TCGOpcode shrnt_vec_list
[] = { INDEX_op_shli_vec
, 0 };
6882 static const GVecGen2i shrnt_ops
[3] = {
6883 { .fni8
= gen_shrnt16_i64
,
6884 .fniv
= gen_shrnt_vec
,
6885 .opt_opc
= shrnt_vec_list
,
6887 .fno
= gen_helper_sve2_shrnt_h
,
6889 { .fni8
= gen_shrnt32_i64
,
6890 .fniv
= gen_shrnt_vec
,
6891 .opt_opc
= shrnt_vec_list
,
6893 .fno
= gen_helper_sve2_shrnt_s
,
6895 { .fni8
= gen_shrnt64_i64
,
6896 .fniv
= gen_shrnt_vec
,
6897 .opt_opc
= shrnt_vec_list
,
6899 .fno
= gen_helper_sve2_shrnt_d
,
6902 TRANS_FEAT(SHRNT
, aa64_sve2
, do_shr_narrow
, a
, shrnt_ops
)
6904 static const GVecGen2i rshrnb_ops
[3] = {
6905 { .fno
= gen_helper_sve2_rshrnb_h
},
6906 { .fno
= gen_helper_sve2_rshrnb_s
},
6907 { .fno
= gen_helper_sve2_rshrnb_d
},
6909 TRANS_FEAT(RSHRNB
, aa64_sve2
, do_shr_narrow
, a
, rshrnb_ops
)
6911 static const GVecGen2i rshrnt_ops
[3] = {
6912 { .fno
= gen_helper_sve2_rshrnt_h
},
6913 { .fno
= gen_helper_sve2_rshrnt_s
},
6914 { .fno
= gen_helper_sve2_rshrnt_d
},
6916 TRANS_FEAT(RSHRNT
, aa64_sve2
, do_shr_narrow
, a
, rshrnt_ops
)
6918 static void gen_sqshrunb_vec(unsigned vece
, TCGv_vec d
,
6919 TCGv_vec n
, int64_t shr
)
6921 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
6922 int halfbits
= 4 << vece
;
6924 tcg_gen_sari_vec(vece
, n
, n
, shr
);
6925 tcg_gen_dupi_vec(vece
, t
, 0);
6926 tcg_gen_smax_vec(vece
, n
, n
, t
);
6927 tcg_gen_dupi_vec(vece
, t
, MAKE_64BIT_MASK(0, halfbits
));
6928 tcg_gen_umin_vec(vece
, d
, n
, t
);
6929 tcg_temp_free_vec(t
);
6932 static const TCGOpcode sqshrunb_vec_list
[] = {
6933 INDEX_op_sari_vec
, INDEX_op_smax_vec
, INDEX_op_umin_vec
, 0
6935 static const GVecGen2i sqshrunb_ops
[3] = {
6936 { .fniv
= gen_sqshrunb_vec
,
6937 .opt_opc
= sqshrunb_vec_list
,
6938 .fno
= gen_helper_sve2_sqshrunb_h
,
6940 { .fniv
= gen_sqshrunb_vec
,
6941 .opt_opc
= sqshrunb_vec_list
,
6942 .fno
= gen_helper_sve2_sqshrunb_s
,
6944 { .fniv
= gen_sqshrunb_vec
,
6945 .opt_opc
= sqshrunb_vec_list
,
6946 .fno
= gen_helper_sve2_sqshrunb_d
,
6949 TRANS_FEAT(SQSHRUNB
, aa64_sve2
, do_shr_narrow
, a
, sqshrunb_ops
)
6951 static void gen_sqshrunt_vec(unsigned vece
, TCGv_vec d
,
6952 TCGv_vec n
, int64_t shr
)
6954 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
6955 int halfbits
= 4 << vece
;
6957 tcg_gen_sari_vec(vece
, n
, n
, shr
);
6958 tcg_gen_dupi_vec(vece
, t
, 0);
6959 tcg_gen_smax_vec(vece
, n
, n
, t
);
6960 tcg_gen_dupi_vec(vece
, t
, MAKE_64BIT_MASK(0, halfbits
));
6961 tcg_gen_umin_vec(vece
, n
, n
, t
);
6962 tcg_gen_shli_vec(vece
, n
, n
, halfbits
);
6963 tcg_gen_bitsel_vec(vece
, d
, t
, d
, n
);
6964 tcg_temp_free_vec(t
);
6967 static const TCGOpcode sqshrunt_vec_list
[] = {
6968 INDEX_op_shli_vec
, INDEX_op_sari_vec
,
6969 INDEX_op_smax_vec
, INDEX_op_umin_vec
, 0
6971 static const GVecGen2i sqshrunt_ops
[3] = {
6972 { .fniv
= gen_sqshrunt_vec
,
6973 .opt_opc
= sqshrunt_vec_list
,
6975 .fno
= gen_helper_sve2_sqshrunt_h
,
6977 { .fniv
= gen_sqshrunt_vec
,
6978 .opt_opc
= sqshrunt_vec_list
,
6980 .fno
= gen_helper_sve2_sqshrunt_s
,
6982 { .fniv
= gen_sqshrunt_vec
,
6983 .opt_opc
= sqshrunt_vec_list
,
6985 .fno
= gen_helper_sve2_sqshrunt_d
,
6988 TRANS_FEAT(SQSHRUNT
, aa64_sve2
, do_shr_narrow
, a
, sqshrunt_ops
)
6990 static const GVecGen2i sqrshrunb_ops
[3] = {
6991 { .fno
= gen_helper_sve2_sqrshrunb_h
},
6992 { .fno
= gen_helper_sve2_sqrshrunb_s
},
6993 { .fno
= gen_helper_sve2_sqrshrunb_d
},
6995 TRANS_FEAT(SQRSHRUNB
, aa64_sve2
, do_shr_narrow
, a
, sqrshrunb_ops
)
6997 static const GVecGen2i sqrshrunt_ops
[3] = {
6998 { .fno
= gen_helper_sve2_sqrshrunt_h
},
6999 { .fno
= gen_helper_sve2_sqrshrunt_s
},
7000 { .fno
= gen_helper_sve2_sqrshrunt_d
},
7002 TRANS_FEAT(SQRSHRUNT
, aa64_sve2
, do_shr_narrow
, a
, sqrshrunt_ops
)
7004 static void gen_sqshrnb_vec(unsigned vece
, TCGv_vec d
,
7005 TCGv_vec n
, int64_t shr
)
7007 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
7008 int halfbits
= 4 << vece
;
7009 int64_t max
= MAKE_64BIT_MASK(0, halfbits
- 1);
7010 int64_t min
= -max
- 1;
7012 tcg_gen_sari_vec(vece
, n
, n
, shr
);
7013 tcg_gen_dupi_vec(vece
, t
, min
);
7014 tcg_gen_smax_vec(vece
, n
, n
, t
);
7015 tcg_gen_dupi_vec(vece
, t
, max
);
7016 tcg_gen_smin_vec(vece
, n
, n
, t
);
7017 tcg_gen_dupi_vec(vece
, t
, MAKE_64BIT_MASK(0, halfbits
));
7018 tcg_gen_and_vec(vece
, d
, n
, t
);
7019 tcg_temp_free_vec(t
);
7022 static const TCGOpcode sqshrnb_vec_list
[] = {
7023 INDEX_op_sari_vec
, INDEX_op_smax_vec
, INDEX_op_smin_vec
, 0
7025 static const GVecGen2i sqshrnb_ops
[3] = {
7026 { .fniv
= gen_sqshrnb_vec
,
7027 .opt_opc
= sqshrnb_vec_list
,
7028 .fno
= gen_helper_sve2_sqshrnb_h
,
7030 { .fniv
= gen_sqshrnb_vec
,
7031 .opt_opc
= sqshrnb_vec_list
,
7032 .fno
= gen_helper_sve2_sqshrnb_s
,
7034 { .fniv
= gen_sqshrnb_vec
,
7035 .opt_opc
= sqshrnb_vec_list
,
7036 .fno
= gen_helper_sve2_sqshrnb_d
,
7039 TRANS_FEAT(SQSHRNB
, aa64_sve2
, do_shr_narrow
, a
, sqshrnb_ops
)
7041 static void gen_sqshrnt_vec(unsigned vece
, TCGv_vec d
,
7042 TCGv_vec n
, int64_t shr
)
7044 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
7045 int halfbits
= 4 << vece
;
7046 int64_t max
= MAKE_64BIT_MASK(0, halfbits
- 1);
7047 int64_t min
= -max
- 1;
7049 tcg_gen_sari_vec(vece
, n
, n
, shr
);
7050 tcg_gen_dupi_vec(vece
, t
, min
);
7051 tcg_gen_smax_vec(vece
, n
, n
, t
);
7052 tcg_gen_dupi_vec(vece
, t
, max
);
7053 tcg_gen_smin_vec(vece
, n
, n
, t
);
7054 tcg_gen_shli_vec(vece
, n
, n
, halfbits
);
7055 tcg_gen_dupi_vec(vece
, t
, MAKE_64BIT_MASK(0, halfbits
));
7056 tcg_gen_bitsel_vec(vece
, d
, t
, d
, n
);
7057 tcg_temp_free_vec(t
);
7060 static const TCGOpcode sqshrnt_vec_list
[] = {
7061 INDEX_op_shli_vec
, INDEX_op_sari_vec
,
7062 INDEX_op_smax_vec
, INDEX_op_smin_vec
, 0
7064 static const GVecGen2i sqshrnt_ops
[3] = {
7065 { .fniv
= gen_sqshrnt_vec
,
7066 .opt_opc
= sqshrnt_vec_list
,
7068 .fno
= gen_helper_sve2_sqshrnt_h
,
7070 { .fniv
= gen_sqshrnt_vec
,
7071 .opt_opc
= sqshrnt_vec_list
,
7073 .fno
= gen_helper_sve2_sqshrnt_s
,
7075 { .fniv
= gen_sqshrnt_vec
,
7076 .opt_opc
= sqshrnt_vec_list
,
7078 .fno
= gen_helper_sve2_sqshrnt_d
,
7081 TRANS_FEAT(SQSHRNT
, aa64_sve2
, do_shr_narrow
, a
, sqshrnt_ops
)
7083 static const GVecGen2i sqrshrnb_ops
[3] = {
7084 { .fno
= gen_helper_sve2_sqrshrnb_h
},
7085 { .fno
= gen_helper_sve2_sqrshrnb_s
},
7086 { .fno
= gen_helper_sve2_sqrshrnb_d
},
7088 TRANS_FEAT(SQRSHRNB
, aa64_sve2
, do_shr_narrow
, a
, sqrshrnb_ops
)
7090 static const GVecGen2i sqrshrnt_ops
[3] = {
7091 { .fno
= gen_helper_sve2_sqrshrnt_h
},
7092 { .fno
= gen_helper_sve2_sqrshrnt_s
},
7093 { .fno
= gen_helper_sve2_sqrshrnt_d
},
7095 TRANS_FEAT(SQRSHRNT
, aa64_sve2
, do_shr_narrow
, a
, sqrshrnt_ops
)
7097 static void gen_uqshrnb_vec(unsigned vece
, TCGv_vec d
,
7098 TCGv_vec n
, int64_t shr
)
7100 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
7101 int halfbits
= 4 << vece
;
7103 tcg_gen_shri_vec(vece
, n
, n
, shr
);
7104 tcg_gen_dupi_vec(vece
, t
, MAKE_64BIT_MASK(0, halfbits
));
7105 tcg_gen_umin_vec(vece
, d
, n
, t
);
7106 tcg_temp_free_vec(t
);
7109 static const TCGOpcode uqshrnb_vec_list
[] = {
7110 INDEX_op_shri_vec
, INDEX_op_umin_vec
, 0
7112 static const GVecGen2i uqshrnb_ops
[3] = {
7113 { .fniv
= gen_uqshrnb_vec
,
7114 .opt_opc
= uqshrnb_vec_list
,
7115 .fno
= gen_helper_sve2_uqshrnb_h
,
7117 { .fniv
= gen_uqshrnb_vec
,
7118 .opt_opc
= uqshrnb_vec_list
,
7119 .fno
= gen_helper_sve2_uqshrnb_s
,
7121 { .fniv
= gen_uqshrnb_vec
,
7122 .opt_opc
= uqshrnb_vec_list
,
7123 .fno
= gen_helper_sve2_uqshrnb_d
,
7126 TRANS_FEAT(UQSHRNB
, aa64_sve2
, do_shr_narrow
, a
, uqshrnb_ops
)
7128 static void gen_uqshrnt_vec(unsigned vece
, TCGv_vec d
,
7129 TCGv_vec n
, int64_t shr
)
7131 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
7132 int halfbits
= 4 << vece
;
7134 tcg_gen_shri_vec(vece
, n
, n
, shr
);
7135 tcg_gen_dupi_vec(vece
, t
, MAKE_64BIT_MASK(0, halfbits
));
7136 tcg_gen_umin_vec(vece
, n
, n
, t
);
7137 tcg_gen_shli_vec(vece
, n
, n
, halfbits
);
7138 tcg_gen_bitsel_vec(vece
, d
, t
, d
, n
);
7139 tcg_temp_free_vec(t
);
7142 static const TCGOpcode uqshrnt_vec_list
[] = {
7143 INDEX_op_shli_vec
, INDEX_op_shri_vec
, INDEX_op_umin_vec
, 0
7145 static const GVecGen2i uqshrnt_ops
[3] = {
7146 { .fniv
= gen_uqshrnt_vec
,
7147 .opt_opc
= uqshrnt_vec_list
,
7149 .fno
= gen_helper_sve2_uqshrnt_h
,
7151 { .fniv
= gen_uqshrnt_vec
,
7152 .opt_opc
= uqshrnt_vec_list
,
7154 .fno
= gen_helper_sve2_uqshrnt_s
,
7156 { .fniv
= gen_uqshrnt_vec
,
7157 .opt_opc
= uqshrnt_vec_list
,
7159 .fno
= gen_helper_sve2_uqshrnt_d
,
7162 TRANS_FEAT(UQSHRNT
, aa64_sve2
, do_shr_narrow
, a
, uqshrnt_ops
)
7164 static const GVecGen2i uqrshrnb_ops
[3] = {
7165 { .fno
= gen_helper_sve2_uqrshrnb_h
},
7166 { .fno
= gen_helper_sve2_uqrshrnb_s
},
7167 { .fno
= gen_helper_sve2_uqrshrnb_d
},
7169 TRANS_FEAT(UQRSHRNB
, aa64_sve2
, do_shr_narrow
, a
, uqrshrnb_ops
)
7171 static const GVecGen2i uqrshrnt_ops
[3] = {
7172 { .fno
= gen_helper_sve2_uqrshrnt_h
},
7173 { .fno
= gen_helper_sve2_uqrshrnt_s
},
7174 { .fno
= gen_helper_sve2_uqrshrnt_d
},
7176 TRANS_FEAT(UQRSHRNT
, aa64_sve2
, do_shr_narrow
, a
, uqrshrnt_ops
)
7178 #define DO_SVE2_ZZZ_NARROW(NAME, name) \
7179 static gen_helper_gvec_3 * const name##_fns[4] = { \
7180 NULL, gen_helper_sve2_##name##_h, \
7181 gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
7183 TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \
7184 name##_fns[a->esz], a, 0)
7186 DO_SVE2_ZZZ_NARROW(ADDHNB
, addhnb
)
7187 DO_SVE2_ZZZ_NARROW(ADDHNT
, addhnt
)
7188 DO_SVE2_ZZZ_NARROW(RADDHNB
, raddhnb
)
7189 DO_SVE2_ZZZ_NARROW(RADDHNT
, raddhnt
)
7191 DO_SVE2_ZZZ_NARROW(SUBHNB
, subhnb
)
7192 DO_SVE2_ZZZ_NARROW(SUBHNT
, subhnt
)
7193 DO_SVE2_ZZZ_NARROW(RSUBHNB
, rsubhnb
)
7194 DO_SVE2_ZZZ_NARROW(RSUBHNT
, rsubhnt
)
7196 static gen_helper_gvec_flags_4
* const match_fns
[4] = {
7197 gen_helper_sve2_match_ppzz_b
, gen_helper_sve2_match_ppzz_h
, NULL
, NULL
7199 TRANS_FEAT_NONSTREAMING(MATCH
, aa64_sve2
, do_ppzz_flags
, a
, match_fns
[a
->esz
])
7201 static gen_helper_gvec_flags_4
* const nmatch_fns
[4] = {
7202 gen_helper_sve2_nmatch_ppzz_b
, gen_helper_sve2_nmatch_ppzz_h
, NULL
, NULL
7204 TRANS_FEAT_NONSTREAMING(NMATCH
, aa64_sve2
, do_ppzz_flags
, a
, nmatch_fns
[a
->esz
])
7206 static gen_helper_gvec_4
* const histcnt_fns
[4] = {
7207 NULL
, NULL
, gen_helper_sve2_histcnt_s
, gen_helper_sve2_histcnt_d
7209 TRANS_FEAT_NONSTREAMING(HISTCNT
, aa64_sve2
, gen_gvec_ool_arg_zpzz
,
7210 histcnt_fns
[a
->esz
], a
, 0)
7212 TRANS_FEAT_NONSTREAMING(HISTSEG
, aa64_sve2
, gen_gvec_ool_arg_zzz
,
7213 a
->esz
== 0 ? gen_helper_sve2_histseg
: NULL
, a
, 0)
7215 DO_ZPZZ_FP(FADDP
, aa64_sve2
, sve2_faddp_zpzz
)
7216 DO_ZPZZ_FP(FMAXNMP
, aa64_sve2
, sve2_fmaxnmp_zpzz
)
7217 DO_ZPZZ_FP(FMINNMP
, aa64_sve2
, sve2_fminnmp_zpzz
)
7218 DO_ZPZZ_FP(FMAXP
, aa64_sve2
, sve2_fmaxp_zpzz
)
7219 DO_ZPZZ_FP(FMINP
, aa64_sve2
, sve2_fminp_zpzz
)
7222 * SVE Integer Multiply-Add (unpredicated)
7225 TRANS_FEAT_NONSTREAMING(FMMLA_s
, aa64_sve_f32mm
, gen_gvec_fpst_zzzz
,
7226 gen_helper_fmmla_s
, a
->rd
, a
->rn
, a
->rm
, a
->ra
,
7228 TRANS_FEAT_NONSTREAMING(FMMLA_d
, aa64_sve_f64mm
, gen_gvec_fpst_zzzz
,
7229 gen_helper_fmmla_d
, a
->rd
, a
->rn
, a
->rm
, a
->ra
,
7232 static gen_helper_gvec_4
* const sqdmlal_zzzw_fns
[] = {
7233 NULL
, gen_helper_sve2_sqdmlal_zzzw_h
,
7234 gen_helper_sve2_sqdmlal_zzzw_s
, gen_helper_sve2_sqdmlal_zzzw_d
,
7236 TRANS_FEAT(SQDMLALB_zzzw
, aa64_sve2
, gen_gvec_ool_arg_zzzz
,
7237 sqdmlal_zzzw_fns
[a
->esz
], a
, 0)
7238 TRANS_FEAT(SQDMLALT_zzzw
, aa64_sve2
, gen_gvec_ool_arg_zzzz
,
7239 sqdmlal_zzzw_fns
[a
->esz
], a
, 3)
7240 TRANS_FEAT(SQDMLALBT
, aa64_sve2
, gen_gvec_ool_arg_zzzz
,
7241 sqdmlal_zzzw_fns
[a
->esz
], a
, 2)
7243 static gen_helper_gvec_4
* const sqdmlsl_zzzw_fns
[] = {
7244 NULL
, gen_helper_sve2_sqdmlsl_zzzw_h
,
7245 gen_helper_sve2_sqdmlsl_zzzw_s
, gen_helper_sve2_sqdmlsl_zzzw_d
,
7247 TRANS_FEAT(SQDMLSLB_zzzw
, aa64_sve2
, gen_gvec_ool_arg_zzzz
,
7248 sqdmlsl_zzzw_fns
[a
->esz
], a
, 0)
7249 TRANS_FEAT(SQDMLSLT_zzzw
, aa64_sve2
, gen_gvec_ool_arg_zzzz
,
7250 sqdmlsl_zzzw_fns
[a
->esz
], a
, 3)
7251 TRANS_FEAT(SQDMLSLBT
, aa64_sve2
, gen_gvec_ool_arg_zzzz
,
7252 sqdmlsl_zzzw_fns
[a
->esz
], a
, 2)
7254 static gen_helper_gvec_4
* const sqrdmlah_fns
[] = {
7255 gen_helper_sve2_sqrdmlah_b
, gen_helper_sve2_sqrdmlah_h
,
7256 gen_helper_sve2_sqrdmlah_s
, gen_helper_sve2_sqrdmlah_d
,
7258 TRANS_FEAT(SQRDMLAH_zzzz
, aa64_sve2
, gen_gvec_ool_arg_zzzz
,
7259 sqrdmlah_fns
[a
->esz
], a
, 0)
7261 static gen_helper_gvec_4
* const sqrdmlsh_fns
[] = {
7262 gen_helper_sve2_sqrdmlsh_b
, gen_helper_sve2_sqrdmlsh_h
,
7263 gen_helper_sve2_sqrdmlsh_s
, gen_helper_sve2_sqrdmlsh_d
,
7265 TRANS_FEAT(SQRDMLSH_zzzz
, aa64_sve2
, gen_gvec_ool_arg_zzzz
,
7266 sqrdmlsh_fns
[a
->esz
], a
, 0)
7268 static gen_helper_gvec_4
* const smlal_zzzw_fns
[] = {
7269 NULL
, gen_helper_sve2_smlal_zzzw_h
,
7270 gen_helper_sve2_smlal_zzzw_s
, gen_helper_sve2_smlal_zzzw_d
,
7272 TRANS_FEAT(SMLALB_zzzw
, aa64_sve2
, gen_gvec_ool_arg_zzzz
,
7273 smlal_zzzw_fns
[a
->esz
], a
, 0)
7274 TRANS_FEAT(SMLALT_zzzw
, aa64_sve2
, gen_gvec_ool_arg_zzzz
,
7275 smlal_zzzw_fns
[a
->esz
], a
, 1)
7277 static gen_helper_gvec_4
* const umlal_zzzw_fns
[] = {
7278 NULL
, gen_helper_sve2_umlal_zzzw_h
,
7279 gen_helper_sve2_umlal_zzzw_s
, gen_helper_sve2_umlal_zzzw_d
,
7281 TRANS_FEAT(UMLALB_zzzw
, aa64_sve2
, gen_gvec_ool_arg_zzzz
,
7282 umlal_zzzw_fns
[a
->esz
], a
, 0)
7283 TRANS_FEAT(UMLALT_zzzw
, aa64_sve2
, gen_gvec_ool_arg_zzzz
,
7284 umlal_zzzw_fns
[a
->esz
], a
, 1)
7286 static gen_helper_gvec_4
* const smlsl_zzzw_fns
[] = {
7287 NULL
, gen_helper_sve2_smlsl_zzzw_h
,
7288 gen_helper_sve2_smlsl_zzzw_s
, gen_helper_sve2_smlsl_zzzw_d
,
7290 TRANS_FEAT(SMLSLB_zzzw
, aa64_sve2
, gen_gvec_ool_arg_zzzz
,
7291 smlsl_zzzw_fns
[a
->esz
], a
, 0)
7292 TRANS_FEAT(SMLSLT_zzzw
, aa64_sve2
, gen_gvec_ool_arg_zzzz
,
7293 smlsl_zzzw_fns
[a
->esz
], a
, 1)
7295 static gen_helper_gvec_4
* const umlsl_zzzw_fns
[] = {
7296 NULL
, gen_helper_sve2_umlsl_zzzw_h
,
7297 gen_helper_sve2_umlsl_zzzw_s
, gen_helper_sve2_umlsl_zzzw_d
,
7299 TRANS_FEAT(UMLSLB_zzzw
, aa64_sve2
, gen_gvec_ool_arg_zzzz
,
7300 umlsl_zzzw_fns
[a
->esz
], a
, 0)
7301 TRANS_FEAT(UMLSLT_zzzw
, aa64_sve2
, gen_gvec_ool_arg_zzzz
,
7302 umlsl_zzzw_fns
[a
->esz
], a
, 1)
7304 static gen_helper_gvec_4
* const cmla_fns
[] = {
7305 gen_helper_sve2_cmla_zzzz_b
, gen_helper_sve2_cmla_zzzz_h
,
7306 gen_helper_sve2_cmla_zzzz_s
, gen_helper_sve2_cmla_zzzz_d
,
7308 TRANS_FEAT(CMLA_zzzz
, aa64_sve2
, gen_gvec_ool_zzzz
,
7309 cmla_fns
[a
->esz
], a
->rd
, a
->rn
, a
->rm
, a
->ra
, a
->rot
)
7311 static gen_helper_gvec_4
* const cdot_fns
[] = {
7312 NULL
, NULL
, gen_helper_sve2_cdot_zzzz_s
, gen_helper_sve2_cdot_zzzz_d
7314 TRANS_FEAT(CDOT_zzzz
, aa64_sve2
, gen_gvec_ool_zzzz
,
7315 cdot_fns
[a
->esz
], a
->rd
, a
->rn
, a
->rm
, a
->ra
, a
->rot
)
7317 static gen_helper_gvec_4
* const sqrdcmlah_fns
[] = {
7318 gen_helper_sve2_sqrdcmlah_zzzz_b
, gen_helper_sve2_sqrdcmlah_zzzz_h
,
7319 gen_helper_sve2_sqrdcmlah_zzzz_s
, gen_helper_sve2_sqrdcmlah_zzzz_d
,
7321 TRANS_FEAT(SQRDCMLAH_zzzz
, aa64_sve2
, gen_gvec_ool_zzzz
,
7322 sqrdcmlah_fns
[a
->esz
], a
->rd
, a
->rn
, a
->rm
, a
->ra
, a
->rot
)
7324 TRANS_FEAT(USDOT_zzzz
, aa64_sve_i8mm
, gen_gvec_ool_arg_zzzz
,
7325 a
->esz
== 2 ? gen_helper_gvec_usdot_b
: NULL
, a
, 0)
7327 TRANS_FEAT_NONSTREAMING(AESMC
, aa64_sve2_aes
, gen_gvec_ool_zz
,
7328 gen_helper_crypto_aesmc
, a
->rd
, a
->rd
, a
->decrypt
)
7330 TRANS_FEAT_NONSTREAMING(AESE
, aa64_sve2_aes
, gen_gvec_ool_arg_zzz
,
7331 gen_helper_crypto_aese
, a
, false)
7332 TRANS_FEAT_NONSTREAMING(AESD
, aa64_sve2_aes
, gen_gvec_ool_arg_zzz
,
7333 gen_helper_crypto_aese
, a
, true)
7335 TRANS_FEAT_NONSTREAMING(SM4E
, aa64_sve2_sm4
, gen_gvec_ool_arg_zzz
,
7336 gen_helper_crypto_sm4e
, a
, 0)
7337 TRANS_FEAT_NONSTREAMING(SM4EKEY
, aa64_sve2_sm4
, gen_gvec_ool_arg_zzz
,
7338 gen_helper_crypto_sm4ekey
, a
, 0)
7340 TRANS_FEAT_NONSTREAMING(RAX1
, aa64_sve2_sha3
, gen_gvec_fn_arg_zzz
,
7343 TRANS_FEAT(FCVTNT_sh
, aa64_sve2
, gen_gvec_fpst_arg_zpz
,
7344 gen_helper_sve2_fcvtnt_sh
, a
, 0, FPST_FPCR
)
7345 TRANS_FEAT(FCVTNT_ds
, aa64_sve2
, gen_gvec_fpst_arg_zpz
,
7346 gen_helper_sve2_fcvtnt_ds
, a
, 0, FPST_FPCR
)
7348 TRANS_FEAT(BFCVTNT
, aa64_sve_bf16
, gen_gvec_fpst_arg_zpz
,
7349 gen_helper_sve_bfcvtnt
, a
, 0, FPST_FPCR
)
7351 TRANS_FEAT(FCVTLT_hs
, aa64_sve2
, gen_gvec_fpst_arg_zpz
,
7352 gen_helper_sve2_fcvtlt_hs
, a
, 0, FPST_FPCR
)
7353 TRANS_FEAT(FCVTLT_sd
, aa64_sve2
, gen_gvec_fpst_arg_zpz
,
7354 gen_helper_sve2_fcvtlt_sd
, a
, 0, FPST_FPCR
)
7356 TRANS_FEAT(FCVTX_ds
, aa64_sve2
, do_frint_mode
, a
,
7357 float_round_to_odd
, gen_helper_sve_fcvt_ds
)
7358 TRANS_FEAT(FCVTXNT_ds
, aa64_sve2
, do_frint_mode
, a
,
7359 float_round_to_odd
, gen_helper_sve2_fcvtnt_ds
)
7361 static gen_helper_gvec_3_ptr
* const flogb_fns
[] = {
7362 NULL
, gen_helper_flogb_h
,
7363 gen_helper_flogb_s
, gen_helper_flogb_d
7365 TRANS_FEAT(FLOGB
, aa64_sve2
, gen_gvec_fpst_arg_zpz
, flogb_fns
[a
->esz
],
7366 a
, 0, a
->esz
== MO_16
? FPST_FPCR_F16
: FPST_FPCR
)
7368 static bool do_FMLAL_zzzw(DisasContext
*s
, arg_rrrr_esz
*a
, bool sub
, bool sel
)
7370 return gen_gvec_ptr_zzzz(s
, gen_helper_sve2_fmlal_zzzw_s
,
7371 a
->rd
, a
->rn
, a
->rm
, a
->ra
,
7372 (sel
<< 1) | sub
, cpu_env
);
7375 TRANS_FEAT(FMLALB_zzzw
, aa64_sve2
, do_FMLAL_zzzw
, a
, false, false)
7376 TRANS_FEAT(FMLALT_zzzw
, aa64_sve2
, do_FMLAL_zzzw
, a
, false, true)
7377 TRANS_FEAT(FMLSLB_zzzw
, aa64_sve2
, do_FMLAL_zzzw
, a
, true, false)
7378 TRANS_FEAT(FMLSLT_zzzw
, aa64_sve2
, do_FMLAL_zzzw
, a
, true, true)
7380 static bool do_FMLAL_zzxw(DisasContext
*s
, arg_rrxr_esz
*a
, bool sub
, bool sel
)
7382 return gen_gvec_ptr_zzzz(s
, gen_helper_sve2_fmlal_zzxw_s
,
7383 a
->rd
, a
->rn
, a
->rm
, a
->ra
,
7384 (a
->index
<< 2) | (sel
<< 1) | sub
, cpu_env
);
7387 TRANS_FEAT(FMLALB_zzxw
, aa64_sve2
, do_FMLAL_zzxw
, a
, false, false)
7388 TRANS_FEAT(FMLALT_zzxw
, aa64_sve2
, do_FMLAL_zzxw
, a
, false, true)
7389 TRANS_FEAT(FMLSLB_zzxw
, aa64_sve2
, do_FMLAL_zzxw
, a
, true, false)
7390 TRANS_FEAT(FMLSLT_zzxw
, aa64_sve2
, do_FMLAL_zzxw
, a
, true, true)
7392 TRANS_FEAT_NONSTREAMING(SMMLA
, aa64_sve_i8mm
, gen_gvec_ool_arg_zzzz
,
7393 gen_helper_gvec_smmla_b
, a
, 0)
7394 TRANS_FEAT_NONSTREAMING(USMMLA
, aa64_sve_i8mm
, gen_gvec_ool_arg_zzzz
,
7395 gen_helper_gvec_usmmla_b
, a
, 0)
7396 TRANS_FEAT_NONSTREAMING(UMMLA
, aa64_sve_i8mm
, gen_gvec_ool_arg_zzzz
,
7397 gen_helper_gvec_ummla_b
, a
, 0)
7399 TRANS_FEAT(BFDOT_zzzz
, aa64_sve_bf16
, gen_gvec_ool_arg_zzzz
,
7400 gen_helper_gvec_bfdot
, a
, 0)
7401 TRANS_FEAT(BFDOT_zzxz
, aa64_sve_bf16
, gen_gvec_ool_arg_zzxz
,
7402 gen_helper_gvec_bfdot_idx
, a
)
7404 TRANS_FEAT_NONSTREAMING(BFMMLA
, aa64_sve_bf16
, gen_gvec_ool_arg_zzzz
,
7405 gen_helper_gvec_bfmmla
, a
, 0)
7407 static bool do_BFMLAL_zzzw(DisasContext
*s
, arg_rrrr_esz
*a
, bool sel
)
7409 return gen_gvec_fpst_zzzz(s
, gen_helper_gvec_bfmlal
,
7410 a
->rd
, a
->rn
, a
->rm
, a
->ra
, sel
, FPST_FPCR
);
7413 TRANS_FEAT(BFMLALB_zzzw
, aa64_sve_bf16
, do_BFMLAL_zzzw
, a
, false)
7414 TRANS_FEAT(BFMLALT_zzzw
, aa64_sve_bf16
, do_BFMLAL_zzzw
, a
, true)
7416 static bool do_BFMLAL_zzxw(DisasContext
*s
, arg_rrxr_esz
*a
, bool sel
)
7418 return gen_gvec_fpst_zzzz(s
, gen_helper_gvec_bfmlal_idx
,
7419 a
->rd
, a
->rn
, a
->rm
, a
->ra
,
7420 (a
->index
<< 1) | sel
, FPST_FPCR
);
7423 TRANS_FEAT(BFMLALB_zzxw
, aa64_sve_bf16
, do_BFMLAL_zzxw
, a
, false)
7424 TRANS_FEAT(BFMLALT_zzxw
, aa64_sve_bf16
, do_BFMLAL_zzxw
, a
, true)
7426 static bool trans_PSEL(DisasContext
*s
, arg_psel
*a
)
7428 int vl
= vec_full_reg_size(s
);
7429 int pl
= pred_gvec_reg_size(s
);
7430 int elements
= vl
>> a
->esz
;
7431 TCGv_i64 tmp
, didx
, dbit
;
7434 if (!dc_isar_feature(aa64_sme
, s
)) {
7437 if (!sve_access_check(s
)) {
7441 tmp
= tcg_temp_new_i64();
7442 dbit
= tcg_temp_new_i64();
7443 didx
= tcg_temp_new_i64();
7444 ptr
= tcg_temp_new_ptr();
7446 /* Compute the predicate element. */
7447 tcg_gen_addi_i64(tmp
, cpu_reg(s
, a
->rv
), a
->imm
);
7448 if (is_power_of_2(elements
)) {
7449 tcg_gen_andi_i64(tmp
, tmp
, elements
- 1);
7451 tcg_gen_remu_i64(tmp
, tmp
, tcg_constant_i64(elements
));
7454 /* Extract the predicate byte and bit indices. */
7455 tcg_gen_shli_i64(tmp
, tmp
, a
->esz
);
7456 tcg_gen_andi_i64(dbit
, tmp
, 7);
7457 tcg_gen_shri_i64(didx
, tmp
, 3);
7458 if (HOST_BIG_ENDIAN
) {
7459 tcg_gen_xori_i64(didx
, didx
, 7);
7462 /* Load the predicate word. */
7463 tcg_gen_trunc_i64_ptr(ptr
, didx
);
7464 tcg_gen_add_ptr(ptr
, ptr
, cpu_env
);
7465 tcg_gen_ld8u_i64(tmp
, ptr
, pred_full_reg_offset(s
, a
->pm
));
7467 /* Extract the predicate bit and replicate to MO_64. */
7468 tcg_gen_shr_i64(tmp
, tmp
, dbit
);
7469 tcg_gen_andi_i64(tmp
, tmp
, 1);
7470 tcg_gen_neg_i64(tmp
, tmp
);
7472 /* Apply to either copy the source, or write zeros. */
7473 tcg_gen_gvec_ands(MO_64
, pred_full_reg_offset(s
, a
->pd
),
7474 pred_full_reg_offset(s
, a
->pn
), tmp
, pl
, pl
);
7476 tcg_temp_free_i64(tmp
);
7477 tcg_temp_free_i64(dbit
);
7478 tcg_temp_free_i64(didx
);
7479 tcg_temp_free_ptr(ptr
);
7483 static void gen_sclamp_i32(TCGv_i32 d
, TCGv_i32 n
, TCGv_i32 m
, TCGv_i32 a
)
7485 tcg_gen_smax_i32(d
, a
, n
);
7486 tcg_gen_smin_i32(d
, d
, m
);
7489 static void gen_sclamp_i64(TCGv_i64 d
, TCGv_i64 n
, TCGv_i64 m
, TCGv_i64 a
)
7491 tcg_gen_smax_i64(d
, a
, n
);
7492 tcg_gen_smin_i64(d
, d
, m
);
7495 static void gen_sclamp_vec(unsigned vece
, TCGv_vec d
, TCGv_vec n
,
7496 TCGv_vec m
, TCGv_vec a
)
7498 tcg_gen_smax_vec(vece
, d
, a
, n
);
7499 tcg_gen_smin_vec(vece
, d
, d
, m
);
7502 static void gen_sclamp(unsigned vece
, uint32_t d
, uint32_t n
, uint32_t m
,
7503 uint32_t a
, uint32_t oprsz
, uint32_t maxsz
)
7505 static const TCGOpcode vecop
[] = {
7506 INDEX_op_smin_vec
, INDEX_op_smax_vec
, 0
7508 static const GVecGen4 ops
[4] = {
7509 { .fniv
= gen_sclamp_vec
,
7510 .fno
= gen_helper_gvec_sclamp_b
,
7513 { .fniv
= gen_sclamp_vec
,
7514 .fno
= gen_helper_gvec_sclamp_h
,
7517 { .fni4
= gen_sclamp_i32
,
7518 .fniv
= gen_sclamp_vec
,
7519 .fno
= gen_helper_gvec_sclamp_s
,
7522 { .fni8
= gen_sclamp_i64
,
7523 .fniv
= gen_sclamp_vec
,
7524 .fno
= gen_helper_gvec_sclamp_d
,
7527 .prefer_i64
= TCG_TARGET_REG_BITS
== 64 }
7529 tcg_gen_gvec_4(d
, n
, m
, a
, oprsz
, maxsz
, &ops
[vece
]);
7532 TRANS_FEAT(SCLAMP
, aa64_sme
, gen_gvec_fn_arg_zzzz
, gen_sclamp
, a
)
7534 static void gen_uclamp_i32(TCGv_i32 d
, TCGv_i32 n
, TCGv_i32 m
, TCGv_i32 a
)
7536 tcg_gen_umax_i32(d
, a
, n
);
7537 tcg_gen_umin_i32(d
, d
, m
);
7540 static void gen_uclamp_i64(TCGv_i64 d
, TCGv_i64 n
, TCGv_i64 m
, TCGv_i64 a
)
7542 tcg_gen_umax_i64(d
, a
, n
);
7543 tcg_gen_umin_i64(d
, d
, m
);
7546 static void gen_uclamp_vec(unsigned vece
, TCGv_vec d
, TCGv_vec n
,
7547 TCGv_vec m
, TCGv_vec a
)
7549 tcg_gen_umax_vec(vece
, d
, a
, n
);
7550 tcg_gen_umin_vec(vece
, d
, d
, m
);
7553 static void gen_uclamp(unsigned vece
, uint32_t d
, uint32_t n
, uint32_t m
,
7554 uint32_t a
, uint32_t oprsz
, uint32_t maxsz
)
7556 static const TCGOpcode vecop
[] = {
7557 INDEX_op_umin_vec
, INDEX_op_umax_vec
, 0
7559 static const GVecGen4 ops
[4] = {
7560 { .fniv
= gen_uclamp_vec
,
7561 .fno
= gen_helper_gvec_uclamp_b
,
7564 { .fniv
= gen_uclamp_vec
,
7565 .fno
= gen_helper_gvec_uclamp_h
,
7568 { .fni4
= gen_uclamp_i32
,
7569 .fniv
= gen_uclamp_vec
,
7570 .fno
= gen_helper_gvec_uclamp_s
,
7573 { .fni8
= gen_uclamp_i64
,
7574 .fniv
= gen_uclamp_vec
,
7575 .fno
= gen_helper_gvec_uclamp_d
,
7578 .prefer_i64
= TCG_TARGET_REG_BITS
== 64 }
7580 tcg_gen_gvec_4(d
, n
, m
, a
, oprsz
, maxsz
, &ops
[vece
]);
7583 TRANS_FEAT(UCLAMP
, aa64_sme
, gen_gvec_fn_arg_zzzz
, gen_uclamp
, a
)