2 * ARM translation: M-profile MVE instructions
4 * Copyright (c) 2021 Linaro, Ltd.
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "tcg/tcg-op.h"
22 #include "tcg/tcg-op-gvec.h"
23 #include "exec/exec-all.h"
24 #include "exec/gen-icount.h"
25 #include "translate.h"
26 #include "translate-a32.h"
28 static inline int vidup_imm(DisasContext
*s
, int x
)
33 /* Include the generated decoder */
34 #include "decode-mve.c.inc"
36 typedef void MVEGenLdStFn(TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
37 typedef void MVEGenLdStSGFn(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
38 typedef void MVEGenLdStIlFn(TCGv_ptr
, TCGv_i32
, TCGv_i32
);
39 typedef void MVEGenOneOpFn(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
);
40 typedef void MVEGenTwoOpFn(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_ptr
);
41 typedef void MVEGenTwoOpScalarFn(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
42 typedef void MVEGenTwoOpShiftFn(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
43 typedef void MVEGenLongDualAccOpFn(TCGv_i64
, TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i64
);
44 typedef void MVEGenVADDVFn(TCGv_i32
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
45 typedef void MVEGenOneOpImmFn(TCGv_ptr
, TCGv_ptr
, TCGv_i64
);
46 typedef void MVEGenVIDUPFn(TCGv_i32
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
, TCGv_i32
);
47 typedef void MVEGenVIWDUPFn(TCGv_i32
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
, TCGv_i32
, TCGv_i32
);
48 typedef void MVEGenCmpFn(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
);
49 typedef void MVEGenScalarCmpFn(TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
50 typedef void MVEGenVABAVFn(TCGv_i32
, TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
51 typedef void MVEGenDualAccOpFn(TCGv_i32
, TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
52 typedef void MVEGenVCVTRmodeFn(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
54 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
55 static inline long mve_qreg_offset(unsigned reg
)
57 return offsetof(CPUARMState
, vfp
.zregs
[reg
].d
[0]);
60 static TCGv_ptr
mve_qreg_ptr(unsigned reg
)
62 TCGv_ptr ret
= tcg_temp_new_ptr();
63 tcg_gen_addi_ptr(ret
, cpu_env
, mve_qreg_offset(reg
));
67 static bool mve_no_predication(DisasContext
*s
)
70 * Return true if we are executing the entire MVE instruction
71 * with no predication or partial-execution, and so we can safely
72 * use an inline TCG vector implementation.
74 return s
->eci
== 0 && s
->mve_no_pred
;
77 static bool mve_check_qreg_bank(DisasContext
*s
, int qmask
)
80 * Check whether Qregs are in range. For v8.1M only Q0..Q7
81 * are supported, see VFPSmallRegisterBank().
86 bool mve_eci_check(DisasContext
*s
)
89 * This is a beatwise insn: check that ECI is valid (not a
90 * reserved value) and note that we are handling it.
91 * Return true if OK, false if we generated an exception.
93 s
->eci_handled
= true;
102 /* Reserved value: INVSTATE UsageFault */
103 gen_exception_insn(s
, s
->pc_curr
, EXCP_INVSTATE
, syn_uncategorized());
108 void mve_update_eci(DisasContext
*s
)
111 * The helper function will always update the CPUState field,
112 * so we only need to update the DisasContext field.
115 s
->eci
= (s
->eci
== ECI_A0A1A2B0
) ? ECI_A0
: ECI_NONE
;
119 void mve_update_and_store_eci(DisasContext
*s
)
122 * For insns which don't call a helper function that will call
123 * mve_advance_vpt(), this version updates s->eci and also stores
124 * it out to the CPUState field.
128 store_cpu_field(tcg_constant_i32(s
->eci
<< 4), condexec_bits
);
132 static bool mve_skip_first_beat(DisasContext
*s
)
134 /* Return true if PSR.ECI says we must skip the first beat of this insn */
144 g_assert_not_reached();
148 static bool do_ldst(DisasContext
*s
, arg_VLDR_VSTR
*a
, MVEGenLdStFn
*fn
,
155 if (!dc_isar_feature(aa32_mve
, s
) ||
156 !mve_check_qreg_bank(s
, a
->qd
) ||
161 /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
162 if (a
->rn
== 15 || (a
->rn
== 13 && a
->w
)) {
166 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
170 offset
= a
->imm
<< msize
;
174 addr
= load_reg(s
, a
->rn
);
176 tcg_gen_addi_i32(addr
, addr
, offset
);
179 qreg
= mve_qreg_ptr(a
->qd
);
180 fn(cpu_env
, qreg
, addr
);
181 tcg_temp_free_ptr(qreg
);
184 * Writeback always happens after the last beat of the insn,
185 * regardless of predication
189 tcg_gen_addi_i32(addr
, addr
, offset
);
191 store_reg(s
, a
->rn
, addr
);
193 tcg_temp_free_i32(addr
);
199 static bool trans_VLDR_VSTR(DisasContext
*s
, arg_VLDR_VSTR
*a
)
201 static MVEGenLdStFn
* const ldstfns
[4][2] = {
202 { gen_helper_mve_vstrb
, gen_helper_mve_vldrb
},
203 { gen_helper_mve_vstrh
, gen_helper_mve_vldrh
},
204 { gen_helper_mve_vstrw
, gen_helper_mve_vldrw
},
207 return do_ldst(s
, a
, ldstfns
[a
->size
][a
->l
], a
->size
);
210 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \
211 static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \
213 static MVEGenLdStFn * const ldstfns[2][2] = { \
214 { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \
215 { NULL, gen_helper_mve_##ULD }, \
217 return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \
220 DO_VLDST_WIDE_NARROW(VLDSTB_H
, vldrb_sh
, vldrb_uh
, vstrb_h
, MO_8
)
221 DO_VLDST_WIDE_NARROW(VLDSTB_W
, vldrb_sw
, vldrb_uw
, vstrb_w
, MO_8
)
222 DO_VLDST_WIDE_NARROW(VLDSTH_W
, vldrh_sw
, vldrh_uw
, vstrh_w
, MO_16
)
224 static bool do_ldst_sg(DisasContext
*s
, arg_vldst_sg
*a
, MVEGenLdStSGFn fn
)
229 if (!dc_isar_feature(aa32_mve
, s
) ||
230 !mve_check_qreg_bank(s
, a
->qd
| a
->qm
) ||
231 !fn
|| a
->rn
== 15) {
232 /* Rn case is UNPREDICTABLE */
236 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
240 addr
= load_reg(s
, a
->rn
);
242 qd
= mve_qreg_ptr(a
->qd
);
243 qm
= mve_qreg_ptr(a
->qm
);
244 fn(cpu_env
, qd
, qm
, addr
);
245 tcg_temp_free_ptr(qd
);
246 tcg_temp_free_ptr(qm
);
247 tcg_temp_free_i32(addr
);
253 * The naming scheme here is "vldrb_sg_sh == in-memory byte loads
254 * signextended to halfword elements in register". _os_ indicates that
255 * the offsets in Qm should be scaled by the element size.
257 /* This macro is just to make the arrays more compact in these functions */
258 #define F(N) gen_helper_mve_##N
260 /* VLDRB/VSTRB (ie msize 1) with OS=1 is UNPREDICTABLE; we UNDEF */
261 static bool trans_VLDR_S_sg(DisasContext
*s
, arg_vldst_sg
*a
)
263 static MVEGenLdStSGFn
* const fns
[2][4][4] = { {
264 { NULL
, F(vldrb_sg_sh
), F(vldrb_sg_sw
), NULL
},
265 { NULL
, NULL
, F(vldrh_sg_sw
), NULL
},
266 { NULL
, NULL
, NULL
, NULL
},
267 { NULL
, NULL
, NULL
, NULL
}
269 { NULL
, NULL
, NULL
, NULL
},
270 { NULL
, NULL
, F(vldrh_sg_os_sw
), NULL
},
271 { NULL
, NULL
, NULL
, NULL
},
272 { NULL
, NULL
, NULL
, NULL
}
275 if (a
->qd
== a
->qm
) {
276 return false; /* UNPREDICTABLE */
278 return do_ldst_sg(s
, a
, fns
[a
->os
][a
->msize
][a
->size
]);
281 static bool trans_VLDR_U_sg(DisasContext
*s
, arg_vldst_sg
*a
)
283 static MVEGenLdStSGFn
* const fns
[2][4][4] = { {
284 { F(vldrb_sg_ub
), F(vldrb_sg_uh
), F(vldrb_sg_uw
), NULL
},
285 { NULL
, F(vldrh_sg_uh
), F(vldrh_sg_uw
), NULL
},
286 { NULL
, NULL
, F(vldrw_sg_uw
), NULL
},
287 { NULL
, NULL
, NULL
, F(vldrd_sg_ud
) }
289 { NULL
, NULL
, NULL
, NULL
},
290 { NULL
, F(vldrh_sg_os_uh
), F(vldrh_sg_os_uw
), NULL
},
291 { NULL
, NULL
, F(vldrw_sg_os_uw
), NULL
},
292 { NULL
, NULL
, NULL
, F(vldrd_sg_os_ud
) }
295 if (a
->qd
== a
->qm
) {
296 return false; /* UNPREDICTABLE */
298 return do_ldst_sg(s
, a
, fns
[a
->os
][a
->msize
][a
->size
]);
301 static bool trans_VSTR_sg(DisasContext
*s
, arg_vldst_sg
*a
)
303 static MVEGenLdStSGFn
* const fns
[2][4][4] = { {
304 { F(vstrb_sg_ub
), F(vstrb_sg_uh
), F(vstrb_sg_uw
), NULL
},
305 { NULL
, F(vstrh_sg_uh
), F(vstrh_sg_uw
), NULL
},
306 { NULL
, NULL
, F(vstrw_sg_uw
), NULL
},
307 { NULL
, NULL
, NULL
, F(vstrd_sg_ud
) }
309 { NULL
, NULL
, NULL
, NULL
},
310 { NULL
, F(vstrh_sg_os_uh
), F(vstrh_sg_os_uw
), NULL
},
311 { NULL
, NULL
, F(vstrw_sg_os_uw
), NULL
},
312 { NULL
, NULL
, NULL
, F(vstrd_sg_os_ud
) }
315 return do_ldst_sg(s
, a
, fns
[a
->os
][a
->msize
][a
->size
]);
320 static bool do_ldst_sg_imm(DisasContext
*s
, arg_vldst_sg_imm
*a
,
321 MVEGenLdStSGFn
*fn
, unsigned msize
)
326 if (!dc_isar_feature(aa32_mve
, s
) ||
327 !mve_check_qreg_bank(s
, a
->qd
| a
->qm
) ||
332 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
336 offset
= a
->imm
<< msize
;
341 qd
= mve_qreg_ptr(a
->qd
);
342 qm
= mve_qreg_ptr(a
->qm
);
343 fn(cpu_env
, qd
, qm
, tcg_constant_i32(offset
));
344 tcg_temp_free_ptr(qd
);
345 tcg_temp_free_ptr(qm
);
350 static bool trans_VLDRW_sg_imm(DisasContext
*s
, arg_vldst_sg_imm
*a
)
352 static MVEGenLdStSGFn
* const fns
[] = {
353 gen_helper_mve_vldrw_sg_uw
,
354 gen_helper_mve_vldrw_sg_wb_uw
,
356 if (a
->qd
== a
->qm
) {
357 return false; /* UNPREDICTABLE */
359 return do_ldst_sg_imm(s
, a
, fns
[a
->w
], MO_32
);
362 static bool trans_VLDRD_sg_imm(DisasContext
*s
, arg_vldst_sg_imm
*a
)
364 static MVEGenLdStSGFn
* const fns
[] = {
365 gen_helper_mve_vldrd_sg_ud
,
366 gen_helper_mve_vldrd_sg_wb_ud
,
368 if (a
->qd
== a
->qm
) {
369 return false; /* UNPREDICTABLE */
371 return do_ldst_sg_imm(s
, a
, fns
[a
->w
], MO_64
);
374 static bool trans_VSTRW_sg_imm(DisasContext
*s
, arg_vldst_sg_imm
*a
)
376 static MVEGenLdStSGFn
* const fns
[] = {
377 gen_helper_mve_vstrw_sg_uw
,
378 gen_helper_mve_vstrw_sg_wb_uw
,
380 return do_ldst_sg_imm(s
, a
, fns
[a
->w
], MO_32
);
383 static bool trans_VSTRD_sg_imm(DisasContext
*s
, arg_vldst_sg_imm
*a
)
385 static MVEGenLdStSGFn
* const fns
[] = {
386 gen_helper_mve_vstrd_sg_ud
,
387 gen_helper_mve_vstrd_sg_wb_ud
,
389 return do_ldst_sg_imm(s
, a
, fns
[a
->w
], MO_64
);
392 static bool do_vldst_il(DisasContext
*s
, arg_vldst_il
*a
, MVEGenLdStIlFn
*fn
,
397 if (!dc_isar_feature(aa32_mve
, s
) ||
398 !mve_check_qreg_bank(s
, a
->qd
) ||
399 !fn
|| (a
->rn
== 13 && a
->w
) || a
->rn
== 15) {
400 /* Variously UNPREDICTABLE or UNDEF or related-encoding */
403 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
407 rn
= load_reg(s
, a
->rn
);
409 * We pass the index of Qd, not a pointer, because the helper must
410 * access multiple Q registers starting at Qd and working up.
412 fn(cpu_env
, tcg_constant_i32(a
->qd
), rn
);
415 tcg_gen_addi_i32(rn
, rn
, addrinc
);
416 store_reg(s
, a
->rn
, rn
);
418 tcg_temp_free_i32(rn
);
420 mve_update_and_store_eci(s
);
424 /* This macro is just to make the arrays more compact in these functions */
425 #define F(N) gen_helper_mve_##N
427 static bool trans_VLD2(DisasContext
*s
, arg_vldst_il
*a
)
429 static MVEGenLdStIlFn
* const fns
[4][4] = {
430 { F(vld20b
), F(vld20h
), F(vld20w
), NULL
, },
431 { F(vld21b
), F(vld21h
), F(vld21w
), NULL
, },
432 { NULL
, NULL
, NULL
, NULL
},
433 { NULL
, NULL
, NULL
, NULL
},
438 return do_vldst_il(s
, a
, fns
[a
->pat
][a
->size
], 32);
441 static bool trans_VLD4(DisasContext
*s
, arg_vldst_il
*a
)
443 static MVEGenLdStIlFn
* const fns
[4][4] = {
444 { F(vld40b
), F(vld40h
), F(vld40w
), NULL
, },
445 { F(vld41b
), F(vld41h
), F(vld41w
), NULL
, },
446 { F(vld42b
), F(vld42h
), F(vld42w
), NULL
, },
447 { F(vld43b
), F(vld43h
), F(vld43w
), NULL
, },
452 return do_vldst_il(s
, a
, fns
[a
->pat
][a
->size
], 64);
455 static bool trans_VST2(DisasContext
*s
, arg_vldst_il
*a
)
457 static MVEGenLdStIlFn
* const fns
[4][4] = {
458 { F(vst20b
), F(vst20h
), F(vst20w
), NULL
, },
459 { F(vst21b
), F(vst21h
), F(vst21w
), NULL
, },
460 { NULL
, NULL
, NULL
, NULL
},
461 { NULL
, NULL
, NULL
, NULL
},
466 return do_vldst_il(s
, a
, fns
[a
->pat
][a
->size
], 32);
469 static bool trans_VST4(DisasContext
*s
, arg_vldst_il
*a
)
471 static MVEGenLdStIlFn
* const fns
[4][4] = {
472 { F(vst40b
), F(vst40h
), F(vst40w
), NULL
, },
473 { F(vst41b
), F(vst41h
), F(vst41w
), NULL
, },
474 { F(vst42b
), F(vst42h
), F(vst42w
), NULL
, },
475 { F(vst43b
), F(vst43h
), F(vst43w
), NULL
, },
480 return do_vldst_il(s
, a
, fns
[a
->pat
][a
->size
], 64);
485 static bool trans_VDUP(DisasContext
*s
, arg_VDUP
*a
)
490 if (!dc_isar_feature(aa32_mve
, s
) ||
491 !mve_check_qreg_bank(s
, a
->qd
)) {
494 if (a
->rt
== 13 || a
->rt
== 15) {
495 /* UNPREDICTABLE; we choose to UNDEF */
498 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
502 rt
= load_reg(s
, a
->rt
);
503 if (mve_no_predication(s
)) {
504 tcg_gen_gvec_dup_i32(a
->size
, mve_qreg_offset(a
->qd
), 16, 16, rt
);
506 qd
= mve_qreg_ptr(a
->qd
);
507 tcg_gen_dup_i32(a
->size
, rt
, rt
);
508 gen_helper_mve_vdup(cpu_env
, qd
, rt
);
509 tcg_temp_free_ptr(qd
);
511 tcg_temp_free_i32(rt
);
516 static bool do_1op_vec(DisasContext
*s
, arg_1op
*a
, MVEGenOneOpFn fn
,
521 if (!dc_isar_feature(aa32_mve
, s
) ||
522 !mve_check_qreg_bank(s
, a
->qd
| a
->qm
) ||
527 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
531 if (vecfn
&& mve_no_predication(s
)) {
532 vecfn(a
->size
, mve_qreg_offset(a
->qd
), mve_qreg_offset(a
->qm
), 16, 16);
534 qd
= mve_qreg_ptr(a
->qd
);
535 qm
= mve_qreg_ptr(a
->qm
);
537 tcg_temp_free_ptr(qd
);
538 tcg_temp_free_ptr(qm
);
544 static bool do_1op(DisasContext
*s
, arg_1op
*a
, MVEGenOneOpFn fn
)
546 return do_1op_vec(s
, a
, fn
, NULL
);
549 #define DO_1OP_VEC(INSN, FN, VECFN) \
550 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
552 static MVEGenOneOpFn * const fns[] = { \
553 gen_helper_mve_##FN##b, \
554 gen_helper_mve_##FN##h, \
555 gen_helper_mve_##FN##w, \
558 return do_1op_vec(s, a, fns[a->size], VECFN); \
561 #define DO_1OP(INSN, FN) DO_1OP_VEC(INSN, FN, NULL)
565 DO_1OP_VEC(VABS
, vabs
, tcg_gen_gvec_abs
)
566 DO_1OP_VEC(VNEG
, vneg
, tcg_gen_gvec_neg
)
573 * For simple float/int conversions we use the fixed-point
574 * conversion helpers with a zero shift count
576 #define DO_VCVT(INSN, HFN, SFN) \
577 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
579 gen_helper_mve_##HFN(env, qd, qm, tcg_constant_i32(0)); \
581 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
583 gen_helper_mve_##SFN(env, qd, qm, tcg_constant_i32(0)); \
585 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
587 static MVEGenOneOpFn * const fns[] = { \
593 if (!dc_isar_feature(aa32_mve_fp, s)) { \
596 return do_1op(s, a, fns[a->size]); \
599 DO_VCVT(VCVT_SF
, vcvt_sh
, vcvt_sf
)
600 DO_VCVT(VCVT_UF
, vcvt_uh
, vcvt_uf
)
601 DO_VCVT(VCVT_FS
, vcvt_hs
, vcvt_fs
)
602 DO_VCVT(VCVT_FU
, vcvt_hu
, vcvt_fu
)
604 static bool do_vcvt_rmode(DisasContext
*s
, arg_1op
*a
,
605 enum arm_fprounding rmode
, bool u
)
608 * Handle VCVT fp to int with specified rounding mode.
609 * This is a 1op fn but we must pass the rounding mode as
610 * an immediate to the helper.
613 static MVEGenVCVTRmodeFn
* const fns
[4][2] = {
615 { gen_helper_mve_vcvt_rm_sh
, gen_helper_mve_vcvt_rm_uh
},
616 { gen_helper_mve_vcvt_rm_ss
, gen_helper_mve_vcvt_rm_us
},
619 MVEGenVCVTRmodeFn
*fn
= fns
[a
->size
][u
];
621 if (!dc_isar_feature(aa32_mve_fp
, s
) ||
622 !mve_check_qreg_bank(s
, a
->qd
| a
->qm
) ||
627 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
631 qd
= mve_qreg_ptr(a
->qd
);
632 qm
= mve_qreg_ptr(a
->qm
);
633 fn(cpu_env
, qd
, qm
, tcg_constant_i32(arm_rmode_to_sf(rmode
)));
634 tcg_temp_free_ptr(qd
);
635 tcg_temp_free_ptr(qm
);
640 #define DO_VCVT_RMODE(INSN, RMODE, U) \
641 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
643 return do_vcvt_rmode(s, a, RMODE, U); \
646 DO_VCVT_RMODE(VCVTAS, FPROUNDING_TIEAWAY, false)
647 DO_VCVT_RMODE(VCVTAU
, FPROUNDING_TIEAWAY
, true)
648 DO_VCVT_RMODE(VCVTNS
, FPROUNDING_TIEEVEN
, false)
649 DO_VCVT_RMODE(VCVTNU
, FPROUNDING_TIEEVEN
, true)
650 DO_VCVT_RMODE(VCVTPS
, FPROUNDING_POSINF
, false)
651 DO_VCVT_RMODE(VCVTPU
, FPROUNDING_POSINF
, true)
652 DO_VCVT_RMODE(VCVTMS
, FPROUNDING_NEGINF
, false)
653 DO_VCVT_RMODE(VCVTMU
, FPROUNDING_NEGINF
, true)
655 #define DO_VCVT_SH(INSN, FN) \
656 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
658 if (!dc_isar_feature(aa32_mve_fp, s)) { \
661 return do_1op(s, a, gen_helper_mve_##FN); \
664 DO_VCVT_SH(VCVTB_SH, vcvtb_sh)
665 DO_VCVT_SH(VCVTT_SH
, vcvtt_sh
)
666 DO_VCVT_SH(VCVTB_HS
, vcvtb_hs
)
667 DO_VCVT_SH(VCVTT_HS
, vcvtt_hs
)
669 #define DO_VRINT(INSN, RMODE) \
670 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
672 gen_helper_mve_vrint_rm_h(env, qd, qm, \
673 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \
675 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
677 gen_helper_mve_vrint_rm_s(env, qd, qm, \
678 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \
680 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
682 static MVEGenOneOpFn * const fns[] = { \
688 if (!dc_isar_feature(aa32_mve_fp, s)) { \
691 return do_1op(s, a, fns[a->size]); \
694 DO_VRINT(VRINTN
, FPROUNDING_TIEEVEN
)
695 DO_VRINT(VRINTA
, FPROUNDING_TIEAWAY
)
696 DO_VRINT(VRINTZ
, FPROUNDING_ZERO
)
697 DO_VRINT(VRINTM
, FPROUNDING_NEGINF
)
698 DO_VRINT(VRINTP
, FPROUNDING_POSINF
)
700 static bool trans_VRINTX(DisasContext
*s
, arg_1op
*a
)
702 static MVEGenOneOpFn
* const fns
[] = {
704 gen_helper_mve_vrintx_h
,
705 gen_helper_mve_vrintx_s
,
708 if (!dc_isar_feature(aa32_mve_fp
, s
)) {
711 return do_1op(s
, a
, fns
[a
->size
]);
714 /* Narrowing moves: only size 0 and 1 are valid */
715 #define DO_VMOVN(INSN, FN) \
716 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
718 static MVEGenOneOpFn * const fns[] = { \
719 gen_helper_mve_##FN##b, \
720 gen_helper_mve_##FN##h, \
724 return do_1op(s, a, fns[a->size]); \
727 DO_VMOVN(VMOVNB
, vmovnb
)
728 DO_VMOVN(VMOVNT
, vmovnt
)
729 DO_VMOVN(VQMOVUNB
, vqmovunb
)
730 DO_VMOVN(VQMOVUNT
, vqmovunt
)
731 DO_VMOVN(VQMOVN_BS
, vqmovnbs
)
732 DO_VMOVN(VQMOVN_TS
, vqmovnts
)
733 DO_VMOVN(VQMOVN_BU
, vqmovnbu
)
734 DO_VMOVN(VQMOVN_TU
, vqmovntu
)
736 static bool trans_VREV16(DisasContext
*s
, arg_1op
*a
)
738 static MVEGenOneOpFn
* const fns
[] = {
739 gen_helper_mve_vrev16b
,
744 return do_1op(s
, a
, fns
[a
->size
]);
747 static bool trans_VREV32(DisasContext
*s
, arg_1op
*a
)
749 static MVEGenOneOpFn
* const fns
[] = {
750 gen_helper_mve_vrev32b
,
751 gen_helper_mve_vrev32h
,
755 return do_1op(s
, a
, fns
[a
->size
]);
758 static bool trans_VREV64(DisasContext
*s
, arg_1op
*a
)
760 static MVEGenOneOpFn
* const fns
[] = {
761 gen_helper_mve_vrev64b
,
762 gen_helper_mve_vrev64h
,
763 gen_helper_mve_vrev64w
,
766 return do_1op(s
, a
, fns
[a
->size
]);
769 static bool trans_VMVN(DisasContext
*s
, arg_1op
*a
)
771 return do_1op_vec(s
, a
, gen_helper_mve_vmvn
, tcg_gen_gvec_not
);
774 static bool trans_VABS_fp(DisasContext
*s
, arg_1op
*a
)
776 static MVEGenOneOpFn
* const fns
[] = {
778 gen_helper_mve_vfabsh
,
779 gen_helper_mve_vfabss
,
782 if (!dc_isar_feature(aa32_mve_fp
, s
)) {
785 return do_1op(s
, a
, fns
[a
->size
]);
788 static bool trans_VNEG_fp(DisasContext
*s
, arg_1op
*a
)
790 static MVEGenOneOpFn
* const fns
[] = {
792 gen_helper_mve_vfnegh
,
793 gen_helper_mve_vfnegs
,
796 if (!dc_isar_feature(aa32_mve_fp
, s
)) {
799 return do_1op(s
, a
, fns
[a
->size
]);
802 static bool do_2op_vec(DisasContext
*s
, arg_2op
*a
, MVEGenTwoOpFn fn
,
807 if (!dc_isar_feature(aa32_mve
, s
) ||
808 !mve_check_qreg_bank(s
, a
->qd
| a
->qn
| a
->qm
) ||
812 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
816 if (vecfn
&& mve_no_predication(s
)) {
817 vecfn(a
->size
, mve_qreg_offset(a
->qd
), mve_qreg_offset(a
->qn
),
818 mve_qreg_offset(a
->qm
), 16, 16);
820 qd
= mve_qreg_ptr(a
->qd
);
821 qn
= mve_qreg_ptr(a
->qn
);
822 qm
= mve_qreg_ptr(a
->qm
);
823 fn(cpu_env
, qd
, qn
, qm
);
824 tcg_temp_free_ptr(qd
);
825 tcg_temp_free_ptr(qn
);
826 tcg_temp_free_ptr(qm
);
832 static bool do_2op(DisasContext
*s
, arg_2op
*a
, MVEGenTwoOpFn
*fn
)
834 return do_2op_vec(s
, a
, fn
, NULL
);
837 #define DO_LOGIC(INSN, HELPER, VECFN) \
838 static bool trans_##INSN(DisasContext *s, arg_2op *a) \
840 return do_2op_vec(s, a, HELPER, VECFN); \
843 DO_LOGIC(VAND
, gen_helper_mve_vand
, tcg_gen_gvec_and
)
844 DO_LOGIC(VBIC
, gen_helper_mve_vbic
, tcg_gen_gvec_andc
)
845 DO_LOGIC(VORR
, gen_helper_mve_vorr
, tcg_gen_gvec_or
)
846 DO_LOGIC(VORN
, gen_helper_mve_vorn
, tcg_gen_gvec_orc
)
847 DO_LOGIC(VEOR
, gen_helper_mve_veor
, tcg_gen_gvec_xor
)
849 static bool trans_VPSEL(DisasContext
*s
, arg_2op
*a
)
851 /* This insn updates predication bits */
852 s
->base
.is_jmp
= DISAS_UPDATE_NOCHAIN
;
853 return do_2op(s
, a
, gen_helper_mve_vpsel
);
856 #define DO_2OP_VEC(INSN, FN, VECFN) \
857 static bool trans_##INSN(DisasContext *s, arg_2op *a) \
859 static MVEGenTwoOpFn * const fns[] = { \
860 gen_helper_mve_##FN##b, \
861 gen_helper_mve_##FN##h, \
862 gen_helper_mve_##FN##w, \
865 return do_2op_vec(s, a, fns[a->size], VECFN); \
868 #define DO_2OP(INSN, FN) DO_2OP_VEC(INSN, FN, NULL)
870 DO_2OP_VEC(VADD
, vadd
, tcg_gen_gvec_add
)
871 DO_2OP_VEC(VSUB
, vsub
, tcg_gen_gvec_sub
)
872 DO_2OP_VEC(VMUL
, vmul
, tcg_gen_gvec_mul
)
873 DO_2OP(VMULH_S
, vmulhs
)
874 DO_2OP(VMULH_U
, vmulhu
)
875 DO_2OP(VRMULH_S
, vrmulhs
)
876 DO_2OP(VRMULH_U
, vrmulhu
)
877 DO_2OP_VEC(VMAX_S
, vmaxs
, tcg_gen_gvec_smax
)
878 DO_2OP_VEC(VMAX_U
, vmaxu
, tcg_gen_gvec_umax
)
879 DO_2OP_VEC(VMIN_S
, vmins
, tcg_gen_gvec_smin
)
880 DO_2OP_VEC(VMIN_U
, vminu
, tcg_gen_gvec_umin
)
881 DO_2OP(VABD_S
, vabds
)
882 DO_2OP(VABD_U
, vabdu
)
883 DO_2OP(VHADD_S
, vhadds
)
884 DO_2OP(VHADD_U
, vhaddu
)
885 DO_2OP(VHSUB_S
, vhsubs
)
886 DO_2OP(VHSUB_U
, vhsubu
)
887 DO_2OP(VMULL_BS
, vmullbs
)
888 DO_2OP(VMULL_BU
, vmullbu
)
889 DO_2OP(VMULL_TS
, vmullts
)
890 DO_2OP(VMULL_TU
, vmulltu
)
891 DO_2OP(VQDMULH
, vqdmulh
)
892 DO_2OP(VQRDMULH
, vqrdmulh
)
893 DO_2OP(VQADD_S
, vqadds
)
894 DO_2OP(VQADD_U
, vqaddu
)
895 DO_2OP(VQSUB_S
, vqsubs
)
896 DO_2OP(VQSUB_U
, vqsubu
)
897 DO_2OP(VSHL_S
, vshls
)
898 DO_2OP(VSHL_U
, vshlu
)
899 DO_2OP(VRSHL_S
, vrshls
)
900 DO_2OP(VRSHL_U
, vrshlu
)
901 DO_2OP(VQSHL_S
, vqshls
)
902 DO_2OP(VQSHL_U
, vqshlu
)
903 DO_2OP(VQRSHL_S
, vqrshls
)
904 DO_2OP(VQRSHL_U
, vqrshlu
)
905 DO_2OP(VQDMLADH
, vqdmladh
)
906 DO_2OP(VQDMLADHX
, vqdmladhx
)
907 DO_2OP(VQRDMLADH
, vqrdmladh
)
908 DO_2OP(VQRDMLADHX
, vqrdmladhx
)
909 DO_2OP(VQDMLSDH
, vqdmlsdh
)
910 DO_2OP(VQDMLSDHX
, vqdmlsdhx
)
911 DO_2OP(VQRDMLSDH
, vqrdmlsdh
)
912 DO_2OP(VQRDMLSDHX
, vqrdmlsdhx
)
913 DO_2OP(VRHADD_S
, vrhadds
)
914 DO_2OP(VRHADD_U
, vrhaddu
)
916 * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose
917 * so we can reuse the DO_2OP macro. (Our implementation calculates the
918 * "expected" results in this case.) Similarly for VHCADD.
920 DO_2OP(VCADD90
, vcadd90
)
921 DO_2OP(VCADD270
, vcadd270
)
922 DO_2OP(VHCADD90
, vhcadd90
)
923 DO_2OP(VHCADD270
, vhcadd270
)
925 static bool trans_VQDMULLB(DisasContext
*s
, arg_2op
*a
)
927 static MVEGenTwoOpFn
* const fns
[] = {
929 gen_helper_mve_vqdmullbh
,
930 gen_helper_mve_vqdmullbw
,
933 if (a
->size
== MO_32
&& (a
->qd
== a
->qm
|| a
->qd
== a
->qn
)) {
934 /* UNPREDICTABLE; we choose to undef */
937 return do_2op(s
, a
, fns
[a
->size
]);
940 static bool trans_VQDMULLT(DisasContext
*s
, arg_2op
*a
)
942 static MVEGenTwoOpFn
* const fns
[] = {
944 gen_helper_mve_vqdmullth
,
945 gen_helper_mve_vqdmulltw
,
948 if (a
->size
== MO_32
&& (a
->qd
== a
->qm
|| a
->qd
== a
->qn
)) {
949 /* UNPREDICTABLE; we choose to undef */
952 return do_2op(s
, a
, fns
[a
->size
]);
955 static bool trans_VMULLP_B(DisasContext
*s
, arg_2op
*a
)
958 * Note that a->size indicates the output size, ie VMULL.P8
959 * is the 8x8->16 operation and a->size is MO_16; VMULL.P16
960 * is the 16x16->32 operation and a->size is MO_32.
962 static MVEGenTwoOpFn
* const fns
[] = {
964 gen_helper_mve_vmullpbh
,
965 gen_helper_mve_vmullpbw
,
968 return do_2op(s
, a
, fns
[a
->size
]);
971 static bool trans_VMULLP_T(DisasContext
*s
, arg_2op
*a
)
973 /* a->size is as for trans_VMULLP_B */
974 static MVEGenTwoOpFn
* const fns
[] = {
976 gen_helper_mve_vmullpth
,
977 gen_helper_mve_vmullptw
,
980 return do_2op(s
, a
, fns
[a
->size
]);
984 * VADC and VSBC: these perform an add-with-carry or subtract-with-carry
985 * of the 32-bit elements in each lane of the input vectors, where the
986 * carry-out of each add is the carry-in of the next. The initial carry
987 * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C
988 * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C.
989 * These insns are subject to beat-wise execution. Partial execution
990 * of an I=1 (initial carry input fixed) insn which does not
991 * execute the first beat must start with the current FPSCR.NZCV
992 * value, not the fixed constant input.
994 static bool trans_VADC(DisasContext
*s
, arg_2op
*a
)
996 return do_2op(s
, a
, gen_helper_mve_vadc
);
999 static bool trans_VADCI(DisasContext
*s
, arg_2op
*a
)
1001 if (mve_skip_first_beat(s
)) {
1002 return trans_VADC(s
, a
);
1004 return do_2op(s
, a
, gen_helper_mve_vadci
);
1007 static bool trans_VSBC(DisasContext
*s
, arg_2op
*a
)
1009 return do_2op(s
, a
, gen_helper_mve_vsbc
);
1012 static bool trans_VSBCI(DisasContext
*s
, arg_2op
*a
)
1014 if (mve_skip_first_beat(s
)) {
1015 return trans_VSBC(s
, a
);
1017 return do_2op(s
, a
, gen_helper_mve_vsbci
);
1020 #define DO_2OP_FP(INSN, FN) \
1021 static bool trans_##INSN(DisasContext *s, arg_2op *a) \
1023 static MVEGenTwoOpFn * const fns[] = { \
1025 gen_helper_mve_##FN##h, \
1026 gen_helper_mve_##FN##s, \
1029 if (!dc_isar_feature(aa32_mve_fp, s)) { \
1032 return do_2op(s, a, fns[a->size]); \
1035 DO_2OP_FP(VADD_fp
, vfadd
)
1036 DO_2OP_FP(VSUB_fp
, vfsub
)
1037 DO_2OP_FP(VMUL_fp
, vfmul
)
1038 DO_2OP_FP(VABD_fp
, vfabd
)
1039 DO_2OP_FP(VMAXNM
, vmaxnm
)
1040 DO_2OP_FP(VMINNM
, vminnm
)
1041 DO_2OP_FP(VCADD90_fp
, vfcadd90
)
1042 DO_2OP_FP(VCADD270_fp
, vfcadd270
)
1043 DO_2OP_FP(VFMA
, vfma
)
1044 DO_2OP_FP(VFMS
, vfms
)
1045 DO_2OP_FP(VCMUL0
, vcmul0
)
1046 DO_2OP_FP(VCMUL90
, vcmul90
)
1047 DO_2OP_FP(VCMUL180
, vcmul180
)
1048 DO_2OP_FP(VCMUL270
, vcmul270
)
1049 DO_2OP_FP(VCMLA0
, vcmla0
)
1050 DO_2OP_FP(VCMLA90
, vcmla90
)
1051 DO_2OP_FP(VCMLA180
, vcmla180
)
1052 DO_2OP_FP(VCMLA270
, vcmla270
)
1053 DO_2OP_FP(VMAXNMA
, vmaxnma
)
1054 DO_2OP_FP(VMINNMA
, vminnma
)
1056 static bool do_2op_scalar(DisasContext
*s
, arg_2scalar
*a
,
1057 MVEGenTwoOpScalarFn fn
)
1062 if (!dc_isar_feature(aa32_mve
, s
) ||
1063 !mve_check_qreg_bank(s
, a
->qd
| a
->qn
) ||
1067 if (a
->rm
== 13 || a
->rm
== 15) {
1071 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1075 qd
= mve_qreg_ptr(a
->qd
);
1076 qn
= mve_qreg_ptr(a
->qn
);
1077 rm
= load_reg(s
, a
->rm
);
1078 fn(cpu_env
, qd
, qn
, rm
);
1079 tcg_temp_free_i32(rm
);
1080 tcg_temp_free_ptr(qd
);
1081 tcg_temp_free_ptr(qn
);
1086 #define DO_2OP_SCALAR(INSN, FN) \
1087 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \
1089 static MVEGenTwoOpScalarFn * const fns[] = { \
1090 gen_helper_mve_##FN##b, \
1091 gen_helper_mve_##FN##h, \
1092 gen_helper_mve_##FN##w, \
1095 return do_2op_scalar(s, a, fns[a->size]); \
1098 DO_2OP_SCALAR(VADD_scalar
, vadd_scalar
)
1099 DO_2OP_SCALAR(VSUB_scalar
, vsub_scalar
)
1100 DO_2OP_SCALAR(VMUL_scalar
, vmul_scalar
)
1101 DO_2OP_SCALAR(VHADD_S_scalar
, vhadds_scalar
)
1102 DO_2OP_SCALAR(VHADD_U_scalar
, vhaddu_scalar
)
1103 DO_2OP_SCALAR(VHSUB_S_scalar
, vhsubs_scalar
)
1104 DO_2OP_SCALAR(VHSUB_U_scalar
, vhsubu_scalar
)
1105 DO_2OP_SCALAR(VQADD_S_scalar
, vqadds_scalar
)
1106 DO_2OP_SCALAR(VQADD_U_scalar
, vqaddu_scalar
)
1107 DO_2OP_SCALAR(VQSUB_S_scalar
, vqsubs_scalar
)
1108 DO_2OP_SCALAR(VQSUB_U_scalar
, vqsubu_scalar
)
1109 DO_2OP_SCALAR(VQDMULH_scalar
, vqdmulh_scalar
)
1110 DO_2OP_SCALAR(VQRDMULH_scalar
, vqrdmulh_scalar
)
1111 DO_2OP_SCALAR(VBRSR
, vbrsr
)
1112 DO_2OP_SCALAR(VMLA
, vmla
)
1113 DO_2OP_SCALAR(VMLAS
, vmlas
)
1114 DO_2OP_SCALAR(VQDMLAH
, vqdmlah
)
1115 DO_2OP_SCALAR(VQRDMLAH
, vqrdmlah
)
1116 DO_2OP_SCALAR(VQDMLASH
, vqdmlash
)
1117 DO_2OP_SCALAR(VQRDMLASH
, vqrdmlash
)
1119 static bool trans_VQDMULLB_scalar(DisasContext
*s
, arg_2scalar
*a
)
1121 static MVEGenTwoOpScalarFn
* const fns
[] = {
1123 gen_helper_mve_vqdmullb_scalarh
,
1124 gen_helper_mve_vqdmullb_scalarw
,
1127 if (a
->qd
== a
->qn
&& a
->size
== MO_32
) {
1128 /* UNPREDICTABLE; we choose to undef */
1131 return do_2op_scalar(s
, a
, fns
[a
->size
]);
1134 static bool trans_VQDMULLT_scalar(DisasContext
*s
, arg_2scalar
*a
)
1136 static MVEGenTwoOpScalarFn
* const fns
[] = {
1138 gen_helper_mve_vqdmullt_scalarh
,
1139 gen_helper_mve_vqdmullt_scalarw
,
1142 if (a
->qd
== a
->qn
&& a
->size
== MO_32
) {
1143 /* UNPREDICTABLE; we choose to undef */
1146 return do_2op_scalar(s
, a
, fns
[a
->size
]);
1150 #define DO_2OP_FP_SCALAR(INSN, FN) \
1151 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \
1153 static MVEGenTwoOpScalarFn * const fns[] = { \
1155 gen_helper_mve_##FN##h, \
1156 gen_helper_mve_##FN##s, \
1159 if (!dc_isar_feature(aa32_mve_fp, s)) { \
1162 return do_2op_scalar(s, a, fns[a->size]); \
1165 DO_2OP_FP_SCALAR(VADD_fp_scalar
, vfadd_scalar
)
1166 DO_2OP_FP_SCALAR(VSUB_fp_scalar
, vfsub_scalar
)
1167 DO_2OP_FP_SCALAR(VMUL_fp_scalar
, vfmul_scalar
)
1168 DO_2OP_FP_SCALAR(VFMA_scalar
, vfma_scalar
)
1169 DO_2OP_FP_SCALAR(VFMAS_scalar
, vfmas_scalar
)
1171 static bool do_long_dual_acc(DisasContext
*s
, arg_vmlaldav
*a
,
1172 MVEGenLongDualAccOpFn
*fn
)
1176 TCGv_i32 rdalo
, rdahi
;
1178 if (!dc_isar_feature(aa32_mve
, s
) ||
1179 !mve_check_qreg_bank(s
, a
->qn
| a
->qm
) ||
1184 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
1185 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
1187 if (a
->rdahi
== 13 || a
->rdahi
== 15) {
1190 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1194 qn
= mve_qreg_ptr(a
->qn
);
1195 qm
= mve_qreg_ptr(a
->qm
);
1198 * This insn is subject to beat-wise execution. Partial execution
1199 * of an A=0 (no-accumulate) insn which does not execute the first
1200 * beat must start with the current rda value, not 0.
1202 if (a
->a
|| mve_skip_first_beat(s
)) {
1203 rda
= tcg_temp_new_i64();
1204 rdalo
= load_reg(s
, a
->rdalo
);
1205 rdahi
= load_reg(s
, a
->rdahi
);
1206 tcg_gen_concat_i32_i64(rda
, rdalo
, rdahi
);
1207 tcg_temp_free_i32(rdalo
);
1208 tcg_temp_free_i32(rdahi
);
1210 rda
= tcg_const_i64(0);
1213 fn(rda
, cpu_env
, qn
, qm
, rda
);
1214 tcg_temp_free_ptr(qn
);
1215 tcg_temp_free_ptr(qm
);
1217 rdalo
= tcg_temp_new_i32();
1218 rdahi
= tcg_temp_new_i32();
1219 tcg_gen_extrl_i64_i32(rdalo
, rda
);
1220 tcg_gen_extrh_i64_i32(rdahi
, rda
);
1221 store_reg(s
, a
->rdalo
, rdalo
);
1222 store_reg(s
, a
->rdahi
, rdahi
);
1223 tcg_temp_free_i64(rda
);
1228 static bool trans_VMLALDAV_S(DisasContext
*s
, arg_vmlaldav
*a
)
1230 static MVEGenLongDualAccOpFn
* const fns
[4][2] = {
1232 { gen_helper_mve_vmlaldavsh
, gen_helper_mve_vmlaldavxsh
},
1233 { gen_helper_mve_vmlaldavsw
, gen_helper_mve_vmlaldavxsw
},
1236 return do_long_dual_acc(s
, a
, fns
[a
->size
][a
->x
]);
1239 static bool trans_VMLALDAV_U(DisasContext
*s
, arg_vmlaldav
*a
)
1241 static MVEGenLongDualAccOpFn
* const fns
[4][2] = {
1243 { gen_helper_mve_vmlaldavuh
, NULL
},
1244 { gen_helper_mve_vmlaldavuw
, NULL
},
1247 return do_long_dual_acc(s
, a
, fns
[a
->size
][a
->x
]);
1250 static bool trans_VMLSLDAV(DisasContext
*s
, arg_vmlaldav
*a
)
1252 static MVEGenLongDualAccOpFn
* const fns
[4][2] = {
1254 { gen_helper_mve_vmlsldavsh
, gen_helper_mve_vmlsldavxsh
},
1255 { gen_helper_mve_vmlsldavsw
, gen_helper_mve_vmlsldavxsw
},
1258 return do_long_dual_acc(s
, a
, fns
[a
->size
][a
->x
]);
1261 static bool trans_VRMLALDAVH_S(DisasContext
*s
, arg_vmlaldav
*a
)
1263 static MVEGenLongDualAccOpFn
* const fns
[] = {
1264 gen_helper_mve_vrmlaldavhsw
, gen_helper_mve_vrmlaldavhxsw
,
1266 return do_long_dual_acc(s
, a
, fns
[a
->x
]);
1269 static bool trans_VRMLALDAVH_U(DisasContext
*s
, arg_vmlaldav
*a
)
1271 static MVEGenLongDualAccOpFn
* const fns
[] = {
1272 gen_helper_mve_vrmlaldavhuw
, NULL
,
1274 return do_long_dual_acc(s
, a
, fns
[a
->x
]);
1277 static bool trans_VRMLSLDAVH(DisasContext
*s
, arg_vmlaldav
*a
)
1279 static MVEGenLongDualAccOpFn
* const fns
[] = {
1280 gen_helper_mve_vrmlsldavhsw
, gen_helper_mve_vrmlsldavhxsw
,
1282 return do_long_dual_acc(s
, a
, fns
[a
->x
]);
1285 static bool do_dual_acc(DisasContext
*s
, arg_vmladav
*a
, MVEGenDualAccOpFn
*fn
)
1290 if (!dc_isar_feature(aa32_mve
, s
) ||
1291 !mve_check_qreg_bank(s
, a
->qn
) ||
1295 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1299 qn
= mve_qreg_ptr(a
->qn
);
1300 qm
= mve_qreg_ptr(a
->qm
);
1303 * This insn is subject to beat-wise execution. Partial execution
1304 * of an A=0 (no-accumulate) insn which does not execute the first
1305 * beat must start with the current rda value, not 0.
1307 if (a
->a
|| mve_skip_first_beat(s
)) {
1308 rda
= load_reg(s
, a
->rda
);
1310 rda
= tcg_const_i32(0);
1313 fn(rda
, cpu_env
, qn
, qm
, rda
);
1314 store_reg(s
, a
->rda
, rda
);
1315 tcg_temp_free_ptr(qn
);
1316 tcg_temp_free_ptr(qm
);
1322 #define DO_DUAL_ACC(INSN, FN) \
1323 static bool trans_##INSN(DisasContext *s, arg_vmladav *a) \
1325 static MVEGenDualAccOpFn * const fns[4][2] = { \
1326 { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb }, \
1327 { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh }, \
1328 { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw }, \
1331 return do_dual_acc(s, a, fns[a->size][a->x]); \
1334 DO_DUAL_ACC(VMLADAV_S
, vmladavs
)
1335 DO_DUAL_ACC(VMLSDAV
, vmlsdav
)
1337 static bool trans_VMLADAV_U(DisasContext
*s
, arg_vmladav
*a
)
1339 static MVEGenDualAccOpFn
* const fns
[4][2] = {
1340 { gen_helper_mve_vmladavub
, NULL
},
1341 { gen_helper_mve_vmladavuh
, NULL
},
1342 { gen_helper_mve_vmladavuw
, NULL
},
1345 return do_dual_acc(s
, a
, fns
[a
->size
][a
->x
]);
1348 static void gen_vpst(DisasContext
*s
, uint32_t mask
)
1351 * Set the VPR mask fields. We take advantage of MASK01 and MASK23
1352 * being adjacent fields in the register.
1354 * Updating the masks is not predicated, but it is subject to beat-wise
1355 * execution, and the mask is updated on the odd-numbered beats.
1356 * So if PSR.ECI says we should skip beat 1, we mustn't update the
1359 TCGv_i32 vpr
= load_cpu_field(v7m
.vpr
);
1363 /* Update both 01 and 23 fields */
1364 tcg_gen_deposit_i32(vpr
, vpr
,
1365 tcg_constant_i32(mask
| (mask
<< 4)),
1366 R_V7M_VPR_MASK01_SHIFT
,
1367 R_V7M_VPR_MASK01_LENGTH
+ R_V7M_VPR_MASK23_LENGTH
);
1372 /* Update only the 23 mask field */
1373 tcg_gen_deposit_i32(vpr
, vpr
,
1374 tcg_constant_i32(mask
),
1375 R_V7M_VPR_MASK23_SHIFT
, R_V7M_VPR_MASK23_LENGTH
);
1378 g_assert_not_reached();
1380 store_cpu_field(vpr
, v7m
.vpr
);
1383 static bool trans_VPST(DisasContext
*s
, arg_VPST
*a
)
1385 /* mask == 0 is a "related encoding" */
1386 if (!dc_isar_feature(aa32_mve
, s
) || !a
->mask
) {
1389 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1392 gen_vpst(s
, a
->mask
);
1393 mve_update_and_store_eci(s
);
1397 static bool trans_VPNOT(DisasContext
*s
, arg_VPNOT
*a
)
1400 * Invert the predicate in VPR.P0. We have call out to
1401 * a helper because this insn itself is beatwise and can
1404 if (!dc_isar_feature(aa32_mve
, s
)) {
1407 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1411 gen_helper_mve_vpnot(cpu_env
);
1412 /* This insn updates predication bits */
1413 s
->base
.is_jmp
= DISAS_UPDATE_NOCHAIN
;
1418 static bool trans_VADDV(DisasContext
*s
, arg_VADDV
*a
)
1420 /* VADDV: vector add across vector */
1421 static MVEGenVADDVFn
* const fns
[4][2] = {
1422 { gen_helper_mve_vaddvsb
, gen_helper_mve_vaddvub
},
1423 { gen_helper_mve_vaddvsh
, gen_helper_mve_vaddvuh
},
1424 { gen_helper_mve_vaddvsw
, gen_helper_mve_vaddvuw
},
1430 if (!dc_isar_feature(aa32_mve
, s
) ||
1434 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1439 * This insn is subject to beat-wise execution. Partial execution
1440 * of an A=0 (no-accumulate) insn which does not execute the first
1441 * beat must start with the current value of Rda, not zero.
1443 if (a
->a
|| mve_skip_first_beat(s
)) {
1444 /* Accumulate input from Rda */
1445 rda
= load_reg(s
, a
->rda
);
1447 /* Accumulate starting at zero */
1448 rda
= tcg_const_i32(0);
1451 qm
= mve_qreg_ptr(a
->qm
);
1452 fns
[a
->size
][a
->u
](rda
, cpu_env
, qm
, rda
);
1453 store_reg(s
, a
->rda
, rda
);
1454 tcg_temp_free_ptr(qm
);
1460 static bool trans_VADDLV(DisasContext
*s
, arg_VADDLV
*a
)
1463 * Vector Add Long Across Vector: accumulate the 32-bit
1464 * elements of the vector into a 64-bit result stored in
1465 * a pair of general-purpose registers.
1466 * No need to check Qm's bank: it is only 3 bits in decode.
1470 TCGv_i32 rdalo
, rdahi
;
1472 if (!dc_isar_feature(aa32_mve
, s
)) {
1476 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
1477 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
1479 if (a
->rdahi
== 13 || a
->rdahi
== 15) {
1482 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1487 * This insn is subject to beat-wise execution. Partial execution
1488 * of an A=0 (no-accumulate) insn which does not execute the first
1489 * beat must start with the current value of RdaHi:RdaLo, not zero.
1491 if (a
->a
|| mve_skip_first_beat(s
)) {
1492 /* Accumulate input from RdaHi:RdaLo */
1493 rda
= tcg_temp_new_i64();
1494 rdalo
= load_reg(s
, a
->rdalo
);
1495 rdahi
= load_reg(s
, a
->rdahi
);
1496 tcg_gen_concat_i32_i64(rda
, rdalo
, rdahi
);
1497 tcg_temp_free_i32(rdalo
);
1498 tcg_temp_free_i32(rdahi
);
1500 /* Accumulate starting at zero */
1501 rda
= tcg_const_i64(0);
1504 qm
= mve_qreg_ptr(a
->qm
);
1506 gen_helper_mve_vaddlv_u(rda
, cpu_env
, qm
, rda
);
1508 gen_helper_mve_vaddlv_s(rda
, cpu_env
, qm
, rda
);
1510 tcg_temp_free_ptr(qm
);
1512 rdalo
= tcg_temp_new_i32();
1513 rdahi
= tcg_temp_new_i32();
1514 tcg_gen_extrl_i64_i32(rdalo
, rda
);
1515 tcg_gen_extrh_i64_i32(rdahi
, rda
);
1516 store_reg(s
, a
->rdalo
, rdalo
);
1517 store_reg(s
, a
->rdahi
, rdahi
);
1518 tcg_temp_free_i64(rda
);
1523 static bool do_1imm(DisasContext
*s
, arg_1imm
*a
, MVEGenOneOpImmFn
*fn
,
1529 if (!dc_isar_feature(aa32_mve
, s
) ||
1530 !mve_check_qreg_bank(s
, a
->qd
) ||
1534 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1538 imm
= asimd_imm_const(a
->imm
, a
->cmode
, a
->op
);
1540 if (vecfn
&& mve_no_predication(s
)) {
1541 vecfn(MO_64
, mve_qreg_offset(a
->qd
), mve_qreg_offset(a
->qd
),
1544 qd
= mve_qreg_ptr(a
->qd
);
1545 fn(cpu_env
, qd
, tcg_constant_i64(imm
));
1546 tcg_temp_free_ptr(qd
);
1552 static void gen_gvec_vmovi(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1553 int64_t c
, uint32_t oprsz
, uint32_t maxsz
)
1555 tcg_gen_gvec_dup_imm(vece
, dofs
, oprsz
, maxsz
, c
);
1558 static bool trans_Vimm_1r(DisasContext
*s
, arg_1imm
*a
)
1560 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
1561 MVEGenOneOpImmFn
*fn
;
1564 if ((a
->cmode
& 1) && a
->cmode
< 12) {
1567 * For op=1, the immediate will be inverted by asimd_imm_const(),
1568 * so the VBIC becomes a logical AND operation.
1570 fn
= gen_helper_mve_vandi
;
1571 vecfn
= tcg_gen_gvec_andi
;
1573 fn
= gen_helper_mve_vorri
;
1574 vecfn
= tcg_gen_gvec_ori
;
1577 /* There is one unallocated cmode/op combination in this space */
1578 if (a
->cmode
== 15 && a
->op
== 1) {
1581 /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */
1582 fn
= gen_helper_mve_vmovi
;
1583 vecfn
= gen_gvec_vmovi
;
1585 return do_1imm(s
, a
, fn
, vecfn
);
1588 static bool do_2shift_vec(DisasContext
*s
, arg_2shift
*a
, MVEGenTwoOpShiftFn fn
,
1589 bool negateshift
, GVecGen2iFn vecfn
)
1592 int shift
= a
->shift
;
1594 if (!dc_isar_feature(aa32_mve
, s
) ||
1595 !mve_check_qreg_bank(s
, a
->qd
| a
->qm
) ||
1599 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1604 * When we handle a right shift insn using a left-shift helper
1605 * which permits a negative shift count to indicate a right-shift,
1606 * we must negate the shift count.
1612 if (vecfn
&& mve_no_predication(s
)) {
1613 vecfn(a
->size
, mve_qreg_offset(a
->qd
), mve_qreg_offset(a
->qm
),
1616 qd
= mve_qreg_ptr(a
->qd
);
1617 qm
= mve_qreg_ptr(a
->qm
);
1618 fn(cpu_env
, qd
, qm
, tcg_constant_i32(shift
));
1619 tcg_temp_free_ptr(qd
);
1620 tcg_temp_free_ptr(qm
);
1626 static bool do_2shift(DisasContext
*s
, arg_2shift
*a
, MVEGenTwoOpShiftFn fn
,
1629 return do_2shift_vec(s
, a
, fn
, negateshift
, NULL
);
1632 #define DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, VECFN) \
1633 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
1635 static MVEGenTwoOpShiftFn * const fns[] = { \
1636 gen_helper_mve_##FN##b, \
1637 gen_helper_mve_##FN##h, \
1638 gen_helper_mve_##FN##w, \
1641 return do_2shift_vec(s, a, fns[a->size], NEGATESHIFT, VECFN); \
1644 #define DO_2SHIFT(INSN, FN, NEGATESHIFT) \
1645 DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, NULL)
1647 static void do_gvec_shri_s(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1648 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
1651 * We get here with a negated shift count, and we must handle
1652 * shifts by the element size, which tcg_gen_gvec_sari() does not do.
1655 if (shift
== (8 << vece
)) {
1658 tcg_gen_gvec_sari(vece
, dofs
, aofs
, shift
, oprsz
, maxsz
);
1661 static void do_gvec_shri_u(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1662 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
1665 * We get here with a negated shift count, and we must handle
1666 * shifts by the element size, which tcg_gen_gvec_shri() does not do.
1669 if (shift
== (8 << vece
)) {
1670 tcg_gen_gvec_dup_imm(vece
, dofs
, oprsz
, maxsz
, 0);
1672 tcg_gen_gvec_shri(vece
, dofs
, aofs
, shift
, oprsz
, maxsz
);
1676 DO_2SHIFT_VEC(VSHLI
, vshli_u
, false, tcg_gen_gvec_shli
)
1677 DO_2SHIFT(VQSHLI_S
, vqshli_s
, false)
1678 DO_2SHIFT(VQSHLI_U
, vqshli_u
, false)
1679 DO_2SHIFT(VQSHLUI
, vqshlui_s
, false)
1680 /* These right shifts use a left-shift helper with negated shift count */
1681 DO_2SHIFT_VEC(VSHRI_S
, vshli_s
, true, do_gvec_shri_s
)
1682 DO_2SHIFT_VEC(VSHRI_U
, vshli_u
, true, do_gvec_shri_u
)
1683 DO_2SHIFT(VRSHRI_S
, vrshli_s
, true)
1684 DO_2SHIFT(VRSHRI_U
, vrshli_u
, true)
1686 DO_2SHIFT_VEC(VSRI
, vsri
, false, gen_gvec_sri
)
1687 DO_2SHIFT_VEC(VSLI
, vsli
, false, gen_gvec_sli
)
1689 #define DO_2SHIFT_FP(INSN, FN) \
1690 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
1692 if (!dc_isar_feature(aa32_mve_fp, s)) { \
1695 return do_2shift(s, a, gen_helper_mve_##FN, false); \
1698 DO_2SHIFT_FP(VCVT_SH_fixed
, vcvt_sh
)
1699 DO_2SHIFT_FP(VCVT_UH_fixed
, vcvt_uh
)
1700 DO_2SHIFT_FP(VCVT_HS_fixed
, vcvt_hs
)
1701 DO_2SHIFT_FP(VCVT_HU_fixed
, vcvt_hu
)
1702 DO_2SHIFT_FP(VCVT_SF_fixed
, vcvt_sf
)
1703 DO_2SHIFT_FP(VCVT_UF_fixed
, vcvt_uf
)
1704 DO_2SHIFT_FP(VCVT_FS_fixed
, vcvt_fs
)
1705 DO_2SHIFT_FP(VCVT_FU_fixed
, vcvt_fu
)
1707 static bool do_2shift_scalar(DisasContext
*s
, arg_shl_scalar
*a
,
1708 MVEGenTwoOpShiftFn
*fn
)
1713 if (!dc_isar_feature(aa32_mve
, s
) ||
1714 !mve_check_qreg_bank(s
, a
->qda
) ||
1715 a
->rm
== 13 || a
->rm
== 15 || !fn
) {
1716 /* Rm cases are UNPREDICTABLE */
1719 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1723 qda
= mve_qreg_ptr(a
->qda
);
1724 rm
= load_reg(s
, a
->rm
);
1725 fn(cpu_env
, qda
, qda
, rm
);
1726 tcg_temp_free_ptr(qda
);
1727 tcg_temp_free_i32(rm
);
1732 #define DO_2SHIFT_SCALAR(INSN, FN) \
1733 static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a) \
1735 static MVEGenTwoOpShiftFn * const fns[] = { \
1736 gen_helper_mve_##FN##b, \
1737 gen_helper_mve_##FN##h, \
1738 gen_helper_mve_##FN##w, \
1741 return do_2shift_scalar(s, a, fns[a->size]); \
1744 DO_2SHIFT_SCALAR(VSHL_S_scalar
, vshli_s
)
1745 DO_2SHIFT_SCALAR(VSHL_U_scalar
, vshli_u
)
1746 DO_2SHIFT_SCALAR(VRSHL_S_scalar
, vrshli_s
)
1747 DO_2SHIFT_SCALAR(VRSHL_U_scalar
, vrshli_u
)
1748 DO_2SHIFT_SCALAR(VQSHL_S_scalar
, vqshli_s
)
1749 DO_2SHIFT_SCALAR(VQSHL_U_scalar
, vqshli_u
)
1750 DO_2SHIFT_SCALAR(VQRSHL_S_scalar
, vqrshli_s
)
1751 DO_2SHIFT_SCALAR(VQRSHL_U_scalar
, vqrshli_u
)
1753 #define DO_VSHLL(INSN, FN) \
1754 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
1756 static MVEGenTwoOpShiftFn * const fns[] = { \
1757 gen_helper_mve_##FN##b, \
1758 gen_helper_mve_##FN##h, \
1760 return do_2shift_vec(s, a, fns[a->size], false, do_gvec_##FN); \
1764 * For the VSHLL vector helpers, the vece is the size of the input
1765 * (ie MO_8 or MO_16); the helpers want to work in the output size.
1766 * The shift count can be 0..<input size>, inclusive. (0 is VMOVL.)
1768 static void do_gvec_vshllbs(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1769 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
1771 unsigned ovece
= vece
+ 1;
1772 unsigned ibits
= vece
== MO_8
? 8 : 16;
1773 tcg_gen_gvec_shli(ovece
, dofs
, aofs
, ibits
, oprsz
, maxsz
);
1774 tcg_gen_gvec_sari(ovece
, dofs
, dofs
, ibits
- shift
, oprsz
, maxsz
);
1777 static void do_gvec_vshllbu(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1778 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
1780 unsigned ovece
= vece
+ 1;
1781 tcg_gen_gvec_andi(ovece
, dofs
, aofs
,
1782 ovece
== MO_16
? 0xff : 0xffff, oprsz
, maxsz
);
1783 tcg_gen_gvec_shli(ovece
, dofs
, dofs
, shift
, oprsz
, maxsz
);
1786 static void do_gvec_vshllts(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1787 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
1789 unsigned ovece
= vece
+ 1;
1790 unsigned ibits
= vece
== MO_8
? 8 : 16;
1792 tcg_gen_gvec_sari(ovece
, dofs
, aofs
, ibits
, oprsz
, maxsz
);
1794 tcg_gen_gvec_andi(ovece
, dofs
, aofs
,
1795 ovece
== MO_16
? 0xff00 : 0xffff0000, oprsz
, maxsz
);
1796 tcg_gen_gvec_sari(ovece
, dofs
, dofs
, ibits
- shift
, oprsz
, maxsz
);
1800 static void do_gvec_vshlltu(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1801 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
1803 unsigned ovece
= vece
+ 1;
1804 unsigned ibits
= vece
== MO_8
? 8 : 16;
1806 tcg_gen_gvec_shri(ovece
, dofs
, aofs
, ibits
, oprsz
, maxsz
);
1808 tcg_gen_gvec_andi(ovece
, dofs
, aofs
,
1809 ovece
== MO_16
? 0xff00 : 0xffff0000, oprsz
, maxsz
);
1810 tcg_gen_gvec_shri(ovece
, dofs
, dofs
, ibits
- shift
, oprsz
, maxsz
);
1814 DO_VSHLL(VSHLL_BS
, vshllbs
)
1815 DO_VSHLL(VSHLL_BU
, vshllbu
)
1816 DO_VSHLL(VSHLL_TS
, vshllts
)
1817 DO_VSHLL(VSHLL_TU
, vshlltu
)
1819 #define DO_2SHIFT_N(INSN, FN) \
1820 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
1822 static MVEGenTwoOpShiftFn * const fns[] = { \
1823 gen_helper_mve_##FN##b, \
1824 gen_helper_mve_##FN##h, \
1826 return do_2shift(s, a, fns[a->size], false); \
1829 DO_2SHIFT_N(VSHRNB
, vshrnb
)
1830 DO_2SHIFT_N(VSHRNT
, vshrnt
)
1831 DO_2SHIFT_N(VRSHRNB
, vrshrnb
)
1832 DO_2SHIFT_N(VRSHRNT
, vrshrnt
)
1833 DO_2SHIFT_N(VQSHRNB_S
, vqshrnb_s
)
1834 DO_2SHIFT_N(VQSHRNT_S
, vqshrnt_s
)
1835 DO_2SHIFT_N(VQSHRNB_U
, vqshrnb_u
)
1836 DO_2SHIFT_N(VQSHRNT_U
, vqshrnt_u
)
1837 DO_2SHIFT_N(VQSHRUNB
, vqshrunb
)
1838 DO_2SHIFT_N(VQSHRUNT
, vqshrunt
)
1839 DO_2SHIFT_N(VQRSHRNB_S
, vqrshrnb_s
)
1840 DO_2SHIFT_N(VQRSHRNT_S
, vqrshrnt_s
)
1841 DO_2SHIFT_N(VQRSHRNB_U
, vqrshrnb_u
)
1842 DO_2SHIFT_N(VQRSHRNT_U
, vqrshrnt_u
)
1843 DO_2SHIFT_N(VQRSHRUNB
, vqrshrunb
)
1844 DO_2SHIFT_N(VQRSHRUNT
, vqrshrunt
)
1846 static bool trans_VSHLC(DisasContext
*s
, arg_VSHLC
*a
)
1849 * Whole Vector Left Shift with Carry. The carry is taken
1850 * from a general purpose register and written back there.
1851 * An imm of 0 means "shift by 32".
1856 if (!dc_isar_feature(aa32_mve
, s
) || !mve_check_qreg_bank(s
, a
->qd
)) {
1859 if (a
->rdm
== 13 || a
->rdm
== 15) {
1860 /* CONSTRAINED UNPREDICTABLE: we UNDEF */
1863 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1867 qd
= mve_qreg_ptr(a
->qd
);
1868 rdm
= load_reg(s
, a
->rdm
);
1869 gen_helper_mve_vshlc(rdm
, cpu_env
, qd
, rdm
, tcg_constant_i32(a
->imm
));
1870 store_reg(s
, a
->rdm
, rdm
);
1871 tcg_temp_free_ptr(qd
);
1876 static bool do_vidup(DisasContext
*s
, arg_vidup
*a
, MVEGenVIDUPFn
*fn
)
1882 * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP).
1883 * This fills the vector with elements of successively increasing
1884 * or decreasing values, starting from Rn.
1886 if (!dc_isar_feature(aa32_mve
, s
) || !mve_check_qreg_bank(s
, a
->qd
)) {
1889 if (a
->size
== MO_64
) {
1890 /* size 0b11 is another encoding */
1893 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1897 qd
= mve_qreg_ptr(a
->qd
);
1898 rn
= load_reg(s
, a
->rn
);
1899 fn(rn
, cpu_env
, qd
, rn
, tcg_constant_i32(a
->imm
));
1900 store_reg(s
, a
->rn
, rn
);
1901 tcg_temp_free_ptr(qd
);
1906 static bool do_viwdup(DisasContext
*s
, arg_viwdup
*a
, MVEGenVIWDUPFn
*fn
)
1912 * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP)
1913 * This fills the vector with elements of successively increasing
1914 * or decreasing values, starting from Rn. Rm specifies a point where
1915 * the count wraps back around to 0. The updated offset is written back
1918 if (!dc_isar_feature(aa32_mve
, s
) || !mve_check_qreg_bank(s
, a
->qd
)) {
1921 if (!fn
|| a
->rm
== 13 || a
->rm
== 15) {
1923 * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE;
1924 * Rm == 13 is VIWDUP, VDWDUP.
1928 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1932 qd
= mve_qreg_ptr(a
->qd
);
1933 rn
= load_reg(s
, a
->rn
);
1934 rm
= load_reg(s
, a
->rm
);
1935 fn(rn
, cpu_env
, qd
, rn
, rm
, tcg_constant_i32(a
->imm
));
1936 store_reg(s
, a
->rn
, rn
);
1937 tcg_temp_free_ptr(qd
);
1938 tcg_temp_free_i32(rm
);
1943 static bool trans_VIDUP(DisasContext
*s
, arg_vidup
*a
)
1945 static MVEGenVIDUPFn
* const fns
[] = {
1946 gen_helper_mve_vidupb
,
1947 gen_helper_mve_viduph
,
1948 gen_helper_mve_vidupw
,
1951 return do_vidup(s
, a
, fns
[a
->size
]);
1954 static bool trans_VDDUP(DisasContext
*s
, arg_vidup
*a
)
1956 static MVEGenVIDUPFn
* const fns
[] = {
1957 gen_helper_mve_vidupb
,
1958 gen_helper_mve_viduph
,
1959 gen_helper_mve_vidupw
,
1962 /* VDDUP is just like VIDUP but with a negative immediate */
1964 return do_vidup(s
, a
, fns
[a
->size
]);
1967 static bool trans_VIWDUP(DisasContext
*s
, arg_viwdup
*a
)
1969 static MVEGenVIWDUPFn
* const fns
[] = {
1970 gen_helper_mve_viwdupb
,
1971 gen_helper_mve_viwduph
,
1972 gen_helper_mve_viwdupw
,
1975 return do_viwdup(s
, a
, fns
[a
->size
]);
1978 static bool trans_VDWDUP(DisasContext
*s
, arg_viwdup
*a
)
1980 static MVEGenVIWDUPFn
* const fns
[] = {
1981 gen_helper_mve_vdwdupb
,
1982 gen_helper_mve_vdwduph
,
1983 gen_helper_mve_vdwdupw
,
1986 return do_viwdup(s
, a
, fns
[a
->size
]);
1989 static bool do_vcmp(DisasContext
*s
, arg_vcmp
*a
, MVEGenCmpFn
*fn
)
1993 if (!dc_isar_feature(aa32_mve
, s
) || !mve_check_qreg_bank(s
, a
->qm
) ||
1997 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
2001 qn
= mve_qreg_ptr(a
->qn
);
2002 qm
= mve_qreg_ptr(a
->qm
);
2003 fn(cpu_env
, qn
, qm
);
2004 tcg_temp_free_ptr(qn
);
2005 tcg_temp_free_ptr(qm
);
2008 gen_vpst(s
, a
->mask
);
2010 /* This insn updates predication bits */
2011 s
->base
.is_jmp
= DISAS_UPDATE_NOCHAIN
;
2016 static bool do_vcmp_scalar(DisasContext
*s
, arg_vcmp_scalar
*a
,
2017 MVEGenScalarCmpFn
*fn
)
2022 if (!dc_isar_feature(aa32_mve
, s
) || !fn
|| a
->rm
== 13) {
2025 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
2029 qn
= mve_qreg_ptr(a
->qn
);
2031 /* Encoding Rm=0b1111 means "constant zero" */
2032 rm
= tcg_constant_i32(0);
2034 rm
= load_reg(s
, a
->rm
);
2036 fn(cpu_env
, qn
, rm
);
2037 tcg_temp_free_ptr(qn
);
2038 tcg_temp_free_i32(rm
);
2041 gen_vpst(s
, a
->mask
);
2043 /* This insn updates predication bits */
2044 s
->base
.is_jmp
= DISAS_UPDATE_NOCHAIN
;
2049 #define DO_VCMP(INSN, FN) \
2050 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \
2052 static MVEGenCmpFn * const fns[] = { \
2053 gen_helper_mve_##FN##b, \
2054 gen_helper_mve_##FN##h, \
2055 gen_helper_mve_##FN##w, \
2058 return do_vcmp(s, a, fns[a->size]); \
2060 static bool trans_##INSN##_scalar(DisasContext *s, \
2061 arg_vcmp_scalar *a) \
2063 static MVEGenScalarCmpFn * const fns[] = { \
2064 gen_helper_mve_##FN##_scalarb, \
2065 gen_helper_mve_##FN##_scalarh, \
2066 gen_helper_mve_##FN##_scalarw, \
2069 return do_vcmp_scalar(s, a, fns[a->size]); \
2072 DO_VCMP(VCMPEQ
, vcmpeq
)
2073 DO_VCMP(VCMPNE
, vcmpne
)
2074 DO_VCMP(VCMPCS
, vcmpcs
)
2075 DO_VCMP(VCMPHI
, vcmphi
)
2076 DO_VCMP(VCMPGE
, vcmpge
)
2077 DO_VCMP(VCMPLT
, vcmplt
)
2078 DO_VCMP(VCMPGT
, vcmpgt
)
2079 DO_VCMP(VCMPLE
, vcmple
)
2081 #define DO_VCMP_FP(INSN, FN) \
2082 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \
2084 static MVEGenCmpFn * const fns[] = { \
2086 gen_helper_mve_##FN##h, \
2087 gen_helper_mve_##FN##s, \
2090 if (!dc_isar_feature(aa32_mve_fp, s)) { \
2093 return do_vcmp(s, a, fns[a->size]); \
2095 static bool trans_##INSN##_scalar(DisasContext *s, \
2096 arg_vcmp_scalar *a) \
2098 static MVEGenScalarCmpFn * const fns[] = { \
2100 gen_helper_mve_##FN##_scalarh, \
2101 gen_helper_mve_##FN##_scalars, \
2104 if (!dc_isar_feature(aa32_mve_fp, s)) { \
2107 return do_vcmp_scalar(s, a, fns[a->size]); \
2110 DO_VCMP_FP(VCMPEQ_fp
, vfcmpeq
)
2111 DO_VCMP_FP(VCMPNE_fp
, vfcmpne
)
2112 DO_VCMP_FP(VCMPGE_fp
, vfcmpge
)
2113 DO_VCMP_FP(VCMPLT_fp
, vfcmplt
)
2114 DO_VCMP_FP(VCMPGT_fp
, vfcmpgt
)
2115 DO_VCMP_FP(VCMPLE_fp
, vfcmple
)
2117 static bool do_vmaxv(DisasContext
*s
, arg_vmaxv
*a
, MVEGenVADDVFn fn
)
2120 * MIN/MAX operations across a vector: compute the min or
2121 * max of the initial value in a general purpose register
2122 * and all the elements in the vector, and store it back
2123 * into the general purpose register.
2128 if (!dc_isar_feature(aa32_mve
, s
) || !mve_check_qreg_bank(s
, a
->qm
) ||
2129 !fn
|| a
->rda
== 13 || a
->rda
== 15) {
2130 /* Rda cases are UNPREDICTABLE */
2133 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
2137 qm
= mve_qreg_ptr(a
->qm
);
2138 rda
= load_reg(s
, a
->rda
);
2139 fn(rda
, cpu_env
, qm
, rda
);
2140 store_reg(s
, a
->rda
, rda
);
2141 tcg_temp_free_ptr(qm
);
2146 #define DO_VMAXV(INSN, FN) \
2147 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \
2149 static MVEGenVADDVFn * const fns[] = { \
2150 gen_helper_mve_##FN##b, \
2151 gen_helper_mve_##FN##h, \
2152 gen_helper_mve_##FN##w, \
2155 return do_vmaxv(s, a, fns[a->size]); \
2158 DO_VMAXV(VMAXV_S
, vmaxvs
)
2159 DO_VMAXV(VMAXV_U
, vmaxvu
)
2160 DO_VMAXV(VMAXAV
, vmaxav
)
2161 DO_VMAXV(VMINV_S
, vminvs
)
2162 DO_VMAXV(VMINV_U
, vminvu
)
2163 DO_VMAXV(VMINAV
, vminav
)
2165 #define DO_VMAXV_FP(INSN, FN) \
2166 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \
2168 static MVEGenVADDVFn * const fns[] = { \
2170 gen_helper_mve_##FN##h, \
2171 gen_helper_mve_##FN##s, \
2174 if (!dc_isar_feature(aa32_mve_fp, s)) { \
2177 return do_vmaxv(s, a, fns[a->size]); \
2180 DO_VMAXV_FP(VMAXNMV
, vmaxnmv
)
2181 DO_VMAXV_FP(VMINNMV
, vminnmv
)
2182 DO_VMAXV_FP(VMAXNMAV
, vmaxnmav
)
2183 DO_VMAXV_FP(VMINNMAV
, vminnmav
)
2185 static bool do_vabav(DisasContext
*s
, arg_vabav
*a
, MVEGenVABAVFn
*fn
)
2187 /* Absolute difference accumulated across vector */
2191 if (!dc_isar_feature(aa32_mve
, s
) ||
2192 !mve_check_qreg_bank(s
, a
->qm
| a
->qn
) ||
2193 !fn
|| a
->rda
== 13 || a
->rda
== 15) {
2194 /* Rda cases are UNPREDICTABLE */
2197 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
2201 qm
= mve_qreg_ptr(a
->qm
);
2202 qn
= mve_qreg_ptr(a
->qn
);
2203 rda
= load_reg(s
, a
->rda
);
2204 fn(rda
, cpu_env
, qn
, qm
, rda
);
2205 store_reg(s
, a
->rda
, rda
);
2206 tcg_temp_free_ptr(qm
);
2207 tcg_temp_free_ptr(qn
);
2212 #define DO_VABAV(INSN, FN) \
2213 static bool trans_##INSN(DisasContext *s, arg_vabav *a) \
2215 static MVEGenVABAVFn * const fns[] = { \
2216 gen_helper_mve_##FN##b, \
2217 gen_helper_mve_##FN##h, \
2218 gen_helper_mve_##FN##w, \
2221 return do_vabav(s, a, fns[a->size]); \
2224 DO_VABAV(VABAV_S
, vabavs
)
2225 DO_VABAV(VABAV_U
, vabavu
)
2227 static bool trans_VMOV_to_2gp(DisasContext
*s
, arg_VMOV_to_2gp
*a
)
2230 * VMOV two 32-bit vector lanes to two general-purpose registers.
2231 * This insn is not predicated but it is subject to beat-wise
2232 * execution if it is not in an IT block. For us this means
2233 * only that if PSR.ECI says we should not be executing the beat
2234 * corresponding to the lane of the vector register being accessed
2235 * then we should skip perfoming the move, and that we need to do
2236 * the usual check for bad ECI state and advance of ECI state.
2237 * (If PSR.ECI is non-zero then we cannot be in an IT block.)
2242 if (!dc_isar_feature(aa32_mve
, s
) || !mve_check_qreg_bank(s
, a
->qd
) ||
2243 a
->rt
== 13 || a
->rt
== 15 || a
->rt2
== 13 || a
->rt2
== 15 ||
2245 /* Rt/Rt2 cases are UNPREDICTABLE */
2248 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
2252 /* Convert Qreg index to Dreg for read_neon_element32() etc */
2255 if (!mve_skip_vmov(s
, vd
, a
->idx
, MO_32
)) {
2256 tmp
= tcg_temp_new_i32();
2257 read_neon_element32(tmp
, vd
, a
->idx
, MO_32
);
2258 store_reg(s
, a
->rt
, tmp
);
2260 if (!mve_skip_vmov(s
, vd
+ 1, a
->idx
, MO_32
)) {
2261 tmp
= tcg_temp_new_i32();
2262 read_neon_element32(tmp
, vd
+ 1, a
->idx
, MO_32
);
2263 store_reg(s
, a
->rt2
, tmp
);
2266 mve_update_and_store_eci(s
);
2270 static bool trans_VMOV_from_2gp(DisasContext
*s
, arg_VMOV_to_2gp
*a
)
2273 * VMOV two general-purpose registers to two 32-bit vector lanes.
2274 * This insn is not predicated but it is subject to beat-wise
2275 * execution if it is not in an IT block. For us this means
2276 * only that if PSR.ECI says we should not be executing the beat
2277 * corresponding to the lane of the vector register being accessed
2278 * then we should skip perfoming the move, and that we need to do
2279 * the usual check for bad ECI state and advance of ECI state.
2280 * (If PSR.ECI is non-zero then we cannot be in an IT block.)
2285 if (!dc_isar_feature(aa32_mve
, s
) || !mve_check_qreg_bank(s
, a
->qd
) ||
2286 a
->rt
== 13 || a
->rt
== 15 || a
->rt2
== 13 || a
->rt2
== 15) {
2287 /* Rt/Rt2 cases are UNPREDICTABLE */
2290 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
2294 /* Convert Qreg idx to Dreg for read_neon_element32() etc */
2297 if (!mve_skip_vmov(s
, vd
, a
->idx
, MO_32
)) {
2298 tmp
= load_reg(s
, a
->rt
);
2299 write_neon_element32(tmp
, vd
, a
->idx
, MO_32
);
2300 tcg_temp_free_i32(tmp
);
2302 if (!mve_skip_vmov(s
, vd
+ 1, a
->idx
, MO_32
)) {
2303 tmp
= load_reg(s
, a
->rt2
);
2304 write_neon_element32(tmp
, vd
+ 1, a
->idx
, MO_32
);
2305 tcg_temp_free_i32(tmp
);
2308 mve_update_and_store_eci(s
);