2 * ARM translation: M-profile MVE instructions
4 * Copyright (c) 2021 Linaro, Ltd.
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "tcg/tcg-op.h"
22 #include "tcg/tcg-op-gvec.h"
23 #include "exec/exec-all.h"
24 #include "exec/gen-icount.h"
25 #include "translate.h"
26 #include "translate-a32.h"
28 static inline int vidup_imm(DisasContext
*s
, int x
)
33 /* Include the generated decoder */
34 #include "decode-mve.c.inc"
36 typedef void MVEGenLdStFn(TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
37 typedef void MVEGenLdStSGFn(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
38 typedef void MVEGenLdStIlFn(TCGv_ptr
, TCGv_i32
, TCGv_i32
);
39 typedef void MVEGenOneOpFn(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
);
40 typedef void MVEGenTwoOpFn(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_ptr
);
41 typedef void MVEGenTwoOpScalarFn(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
42 typedef void MVEGenTwoOpShiftFn(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
43 typedef void MVEGenLongDualAccOpFn(TCGv_i64
, TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i64
);
44 typedef void MVEGenVADDVFn(TCGv_i32
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
45 typedef void MVEGenOneOpImmFn(TCGv_ptr
, TCGv_ptr
, TCGv_i64
);
46 typedef void MVEGenVIDUPFn(TCGv_i32
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
, TCGv_i32
);
47 typedef void MVEGenVIWDUPFn(TCGv_i32
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
, TCGv_i32
, TCGv_i32
);
48 typedef void MVEGenCmpFn(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
);
49 typedef void MVEGenScalarCmpFn(TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
50 typedef void MVEGenVABAVFn(TCGv_i32
, TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
51 typedef void MVEGenDualAccOpFn(TCGv_i32
, TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
52 typedef void MVEGenVCVTRmodeFn(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
54 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
55 static inline long mve_qreg_offset(unsigned reg
)
57 return offsetof(CPUARMState
, vfp
.zregs
[reg
].d
[0]);
60 static TCGv_ptr
mve_qreg_ptr(unsigned reg
)
62 TCGv_ptr ret
= tcg_temp_new_ptr();
63 tcg_gen_addi_ptr(ret
, cpu_env
, mve_qreg_offset(reg
));
67 static bool mve_no_predication(DisasContext
*s
)
70 * Return true if we are executing the entire MVE instruction
71 * with no predication or partial-execution, and so we can safely
72 * use an inline TCG vector implementation.
74 return s
->eci
== 0 && s
->mve_no_pred
;
77 static bool mve_check_qreg_bank(DisasContext
*s
, int qmask
)
80 * Check whether Qregs are in range. For v8.1M only Q0..Q7
81 * are supported, see VFPSmallRegisterBank().
86 bool mve_eci_check(DisasContext
*s
)
89 * This is a beatwise insn: check that ECI is valid (not a
90 * reserved value) and note that we are handling it.
91 * Return true if OK, false if we generated an exception.
93 s
->eci_handled
= true;
102 /* Reserved value: INVSTATE UsageFault */
103 gen_exception_insn(s
, s
->pc_curr
, EXCP_INVSTATE
, syn_uncategorized(),
104 default_exception_el(s
));
109 void mve_update_eci(DisasContext
*s
)
112 * The helper function will always update the CPUState field,
113 * so we only need to update the DisasContext field.
116 s
->eci
= (s
->eci
== ECI_A0A1A2B0
) ? ECI_A0
: ECI_NONE
;
120 void mve_update_and_store_eci(DisasContext
*s
)
123 * For insns which don't call a helper function that will call
124 * mve_advance_vpt(), this version updates s->eci and also stores
125 * it out to the CPUState field.
129 store_cpu_field(tcg_constant_i32(s
->eci
<< 4), condexec_bits
);
133 static bool mve_skip_first_beat(DisasContext
*s
)
135 /* Return true if PSR.ECI says we must skip the first beat of this insn */
145 g_assert_not_reached();
149 static bool do_ldst(DisasContext
*s
, arg_VLDR_VSTR
*a
, MVEGenLdStFn
*fn
,
156 if (!dc_isar_feature(aa32_mve
, s
) ||
157 !mve_check_qreg_bank(s
, a
->qd
) ||
162 /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
163 if (a
->rn
== 15 || (a
->rn
== 13 && a
->w
)) {
167 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
171 offset
= a
->imm
<< msize
;
175 addr
= load_reg(s
, a
->rn
);
177 tcg_gen_addi_i32(addr
, addr
, offset
);
180 qreg
= mve_qreg_ptr(a
->qd
);
181 fn(cpu_env
, qreg
, addr
);
182 tcg_temp_free_ptr(qreg
);
185 * Writeback always happens after the last beat of the insn,
186 * regardless of predication
190 tcg_gen_addi_i32(addr
, addr
, offset
);
192 store_reg(s
, a
->rn
, addr
);
194 tcg_temp_free_i32(addr
);
200 static bool trans_VLDR_VSTR(DisasContext
*s
, arg_VLDR_VSTR
*a
)
202 static MVEGenLdStFn
* const ldstfns
[4][2] = {
203 { gen_helper_mve_vstrb
, gen_helper_mve_vldrb
},
204 { gen_helper_mve_vstrh
, gen_helper_mve_vldrh
},
205 { gen_helper_mve_vstrw
, gen_helper_mve_vldrw
},
208 return do_ldst(s
, a
, ldstfns
[a
->size
][a
->l
], a
->size
);
211 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \
212 static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \
214 static MVEGenLdStFn * const ldstfns[2][2] = { \
215 { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \
216 { NULL, gen_helper_mve_##ULD }, \
218 return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \
221 DO_VLDST_WIDE_NARROW(VLDSTB_H
, vldrb_sh
, vldrb_uh
, vstrb_h
, MO_8
)
222 DO_VLDST_WIDE_NARROW(VLDSTB_W
, vldrb_sw
, vldrb_uw
, vstrb_w
, MO_8
)
223 DO_VLDST_WIDE_NARROW(VLDSTH_W
, vldrh_sw
, vldrh_uw
, vstrh_w
, MO_16
)
225 static bool do_ldst_sg(DisasContext
*s
, arg_vldst_sg
*a
, MVEGenLdStSGFn fn
)
230 if (!dc_isar_feature(aa32_mve
, s
) ||
231 !mve_check_qreg_bank(s
, a
->qd
| a
->qm
) ||
232 !fn
|| a
->rn
== 15) {
233 /* Rn case is UNPREDICTABLE */
237 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
241 addr
= load_reg(s
, a
->rn
);
243 qd
= mve_qreg_ptr(a
->qd
);
244 qm
= mve_qreg_ptr(a
->qm
);
245 fn(cpu_env
, qd
, qm
, addr
);
246 tcg_temp_free_ptr(qd
);
247 tcg_temp_free_ptr(qm
);
248 tcg_temp_free_i32(addr
);
254 * The naming scheme here is "vldrb_sg_sh == in-memory byte loads
255 * signextended to halfword elements in register". _os_ indicates that
256 * the offsets in Qm should be scaled by the element size.
258 /* This macro is just to make the arrays more compact in these functions */
259 #define F(N) gen_helper_mve_##N
261 /* VLDRB/VSTRB (ie msize 1) with OS=1 is UNPREDICTABLE; we UNDEF */
262 static bool trans_VLDR_S_sg(DisasContext
*s
, arg_vldst_sg
*a
)
264 static MVEGenLdStSGFn
* const fns
[2][4][4] = { {
265 { NULL
, F(vldrb_sg_sh
), F(vldrb_sg_sw
), NULL
},
266 { NULL
, NULL
, F(vldrh_sg_sw
), NULL
},
267 { NULL
, NULL
, NULL
, NULL
},
268 { NULL
, NULL
, NULL
, NULL
}
270 { NULL
, NULL
, NULL
, NULL
},
271 { NULL
, NULL
, F(vldrh_sg_os_sw
), NULL
},
272 { NULL
, NULL
, NULL
, NULL
},
273 { NULL
, NULL
, NULL
, NULL
}
276 if (a
->qd
== a
->qm
) {
277 return false; /* UNPREDICTABLE */
279 return do_ldst_sg(s
, a
, fns
[a
->os
][a
->msize
][a
->size
]);
282 static bool trans_VLDR_U_sg(DisasContext
*s
, arg_vldst_sg
*a
)
284 static MVEGenLdStSGFn
* const fns
[2][4][4] = { {
285 { F(vldrb_sg_ub
), F(vldrb_sg_uh
), F(vldrb_sg_uw
), NULL
},
286 { NULL
, F(vldrh_sg_uh
), F(vldrh_sg_uw
), NULL
},
287 { NULL
, NULL
, F(vldrw_sg_uw
), NULL
},
288 { NULL
, NULL
, NULL
, F(vldrd_sg_ud
) }
290 { NULL
, NULL
, NULL
, NULL
},
291 { NULL
, F(vldrh_sg_os_uh
), F(vldrh_sg_os_uw
), NULL
},
292 { NULL
, NULL
, F(vldrw_sg_os_uw
), NULL
},
293 { NULL
, NULL
, NULL
, F(vldrd_sg_os_ud
) }
296 if (a
->qd
== a
->qm
) {
297 return false; /* UNPREDICTABLE */
299 return do_ldst_sg(s
, a
, fns
[a
->os
][a
->msize
][a
->size
]);
302 static bool trans_VSTR_sg(DisasContext
*s
, arg_vldst_sg
*a
)
304 static MVEGenLdStSGFn
* const fns
[2][4][4] = { {
305 { F(vstrb_sg_ub
), F(vstrb_sg_uh
), F(vstrb_sg_uw
), NULL
},
306 { NULL
, F(vstrh_sg_uh
), F(vstrh_sg_uw
), NULL
},
307 { NULL
, NULL
, F(vstrw_sg_uw
), NULL
},
308 { NULL
, NULL
, NULL
, F(vstrd_sg_ud
) }
310 { NULL
, NULL
, NULL
, NULL
},
311 { NULL
, F(vstrh_sg_os_uh
), F(vstrh_sg_os_uw
), NULL
},
312 { NULL
, NULL
, F(vstrw_sg_os_uw
), NULL
},
313 { NULL
, NULL
, NULL
, F(vstrd_sg_os_ud
) }
316 return do_ldst_sg(s
, a
, fns
[a
->os
][a
->msize
][a
->size
]);
321 static bool do_ldst_sg_imm(DisasContext
*s
, arg_vldst_sg_imm
*a
,
322 MVEGenLdStSGFn
*fn
, unsigned msize
)
327 if (!dc_isar_feature(aa32_mve
, s
) ||
328 !mve_check_qreg_bank(s
, a
->qd
| a
->qm
) ||
333 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
337 offset
= a
->imm
<< msize
;
342 qd
= mve_qreg_ptr(a
->qd
);
343 qm
= mve_qreg_ptr(a
->qm
);
344 fn(cpu_env
, qd
, qm
, tcg_constant_i32(offset
));
345 tcg_temp_free_ptr(qd
);
346 tcg_temp_free_ptr(qm
);
351 static bool trans_VLDRW_sg_imm(DisasContext
*s
, arg_vldst_sg_imm
*a
)
353 static MVEGenLdStSGFn
* const fns
[] = {
354 gen_helper_mve_vldrw_sg_uw
,
355 gen_helper_mve_vldrw_sg_wb_uw
,
357 if (a
->qd
== a
->qm
) {
358 return false; /* UNPREDICTABLE */
360 return do_ldst_sg_imm(s
, a
, fns
[a
->w
], MO_32
);
363 static bool trans_VLDRD_sg_imm(DisasContext
*s
, arg_vldst_sg_imm
*a
)
365 static MVEGenLdStSGFn
* const fns
[] = {
366 gen_helper_mve_vldrd_sg_ud
,
367 gen_helper_mve_vldrd_sg_wb_ud
,
369 if (a
->qd
== a
->qm
) {
370 return false; /* UNPREDICTABLE */
372 return do_ldst_sg_imm(s
, a
, fns
[a
->w
], MO_64
);
375 static bool trans_VSTRW_sg_imm(DisasContext
*s
, arg_vldst_sg_imm
*a
)
377 static MVEGenLdStSGFn
* const fns
[] = {
378 gen_helper_mve_vstrw_sg_uw
,
379 gen_helper_mve_vstrw_sg_wb_uw
,
381 return do_ldst_sg_imm(s
, a
, fns
[a
->w
], MO_32
);
384 static bool trans_VSTRD_sg_imm(DisasContext
*s
, arg_vldst_sg_imm
*a
)
386 static MVEGenLdStSGFn
* const fns
[] = {
387 gen_helper_mve_vstrd_sg_ud
,
388 gen_helper_mve_vstrd_sg_wb_ud
,
390 return do_ldst_sg_imm(s
, a
, fns
[a
->w
], MO_64
);
393 static bool do_vldst_il(DisasContext
*s
, arg_vldst_il
*a
, MVEGenLdStIlFn
*fn
,
398 if (!dc_isar_feature(aa32_mve
, s
) ||
399 !mve_check_qreg_bank(s
, a
->qd
) ||
400 !fn
|| (a
->rn
== 13 && a
->w
) || a
->rn
== 15) {
401 /* Variously UNPREDICTABLE or UNDEF or related-encoding */
404 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
408 rn
= load_reg(s
, a
->rn
);
410 * We pass the index of Qd, not a pointer, because the helper must
411 * access multiple Q registers starting at Qd and working up.
413 fn(cpu_env
, tcg_constant_i32(a
->qd
), rn
);
416 tcg_gen_addi_i32(rn
, rn
, addrinc
);
417 store_reg(s
, a
->rn
, rn
);
419 tcg_temp_free_i32(rn
);
421 mve_update_and_store_eci(s
);
425 /* This macro is just to make the arrays more compact in these functions */
426 #define F(N) gen_helper_mve_##N
428 static bool trans_VLD2(DisasContext
*s
, arg_vldst_il
*a
)
430 static MVEGenLdStIlFn
* const fns
[4][4] = {
431 { F(vld20b
), F(vld20h
), F(vld20w
), NULL
, },
432 { F(vld21b
), F(vld21h
), F(vld21w
), NULL
, },
433 { NULL
, NULL
, NULL
, NULL
},
434 { NULL
, NULL
, NULL
, NULL
},
439 return do_vldst_il(s
, a
, fns
[a
->pat
][a
->size
], 32);
442 static bool trans_VLD4(DisasContext
*s
, arg_vldst_il
*a
)
444 static MVEGenLdStIlFn
* const fns
[4][4] = {
445 { F(vld40b
), F(vld40h
), F(vld40w
), NULL
, },
446 { F(vld41b
), F(vld41h
), F(vld41w
), NULL
, },
447 { F(vld42b
), F(vld42h
), F(vld42w
), NULL
, },
448 { F(vld43b
), F(vld43h
), F(vld43w
), NULL
, },
453 return do_vldst_il(s
, a
, fns
[a
->pat
][a
->size
], 64);
456 static bool trans_VST2(DisasContext
*s
, arg_vldst_il
*a
)
458 static MVEGenLdStIlFn
* const fns
[4][4] = {
459 { F(vst20b
), F(vst20h
), F(vst20w
), NULL
, },
460 { F(vst21b
), F(vst21h
), F(vst21w
), NULL
, },
461 { NULL
, NULL
, NULL
, NULL
},
462 { NULL
, NULL
, NULL
, NULL
},
467 return do_vldst_il(s
, a
, fns
[a
->pat
][a
->size
], 32);
470 static bool trans_VST4(DisasContext
*s
, arg_vldst_il
*a
)
472 static MVEGenLdStIlFn
* const fns
[4][4] = {
473 { F(vst40b
), F(vst40h
), F(vst40w
), NULL
, },
474 { F(vst41b
), F(vst41h
), F(vst41w
), NULL
, },
475 { F(vst42b
), F(vst42h
), F(vst42w
), NULL
, },
476 { F(vst43b
), F(vst43h
), F(vst43w
), NULL
, },
481 return do_vldst_il(s
, a
, fns
[a
->pat
][a
->size
], 64);
486 static bool trans_VDUP(DisasContext
*s
, arg_VDUP
*a
)
491 if (!dc_isar_feature(aa32_mve
, s
) ||
492 !mve_check_qreg_bank(s
, a
->qd
)) {
495 if (a
->rt
== 13 || a
->rt
== 15) {
496 /* UNPREDICTABLE; we choose to UNDEF */
499 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
503 rt
= load_reg(s
, a
->rt
);
504 if (mve_no_predication(s
)) {
505 tcg_gen_gvec_dup_i32(a
->size
, mve_qreg_offset(a
->qd
), 16, 16, rt
);
507 qd
= mve_qreg_ptr(a
->qd
);
508 tcg_gen_dup_i32(a
->size
, rt
, rt
);
509 gen_helper_mve_vdup(cpu_env
, qd
, rt
);
510 tcg_temp_free_ptr(qd
);
512 tcg_temp_free_i32(rt
);
517 static bool do_1op_vec(DisasContext
*s
, arg_1op
*a
, MVEGenOneOpFn fn
,
522 if (!dc_isar_feature(aa32_mve
, s
) ||
523 !mve_check_qreg_bank(s
, a
->qd
| a
->qm
) ||
528 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
532 if (vecfn
&& mve_no_predication(s
)) {
533 vecfn(a
->size
, mve_qreg_offset(a
->qd
), mve_qreg_offset(a
->qm
), 16, 16);
535 qd
= mve_qreg_ptr(a
->qd
);
536 qm
= mve_qreg_ptr(a
->qm
);
538 tcg_temp_free_ptr(qd
);
539 tcg_temp_free_ptr(qm
);
545 static bool do_1op(DisasContext
*s
, arg_1op
*a
, MVEGenOneOpFn fn
)
547 return do_1op_vec(s
, a
, fn
, NULL
);
550 #define DO_1OP_VEC(INSN, FN, VECFN) \
551 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
553 static MVEGenOneOpFn * const fns[] = { \
554 gen_helper_mve_##FN##b, \
555 gen_helper_mve_##FN##h, \
556 gen_helper_mve_##FN##w, \
559 return do_1op_vec(s, a, fns[a->size], VECFN); \
562 #define DO_1OP(INSN, FN) DO_1OP_VEC(INSN, FN, NULL)
566 DO_1OP_VEC(VABS
, vabs
, tcg_gen_gvec_abs
)
567 DO_1OP_VEC(VNEG
, vneg
, tcg_gen_gvec_neg
)
574 * For simple float/int conversions we use the fixed-point
575 * conversion helpers with a zero shift count
577 #define DO_VCVT(INSN, HFN, SFN) \
578 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
580 gen_helper_mve_##HFN(env, qd, qm, tcg_constant_i32(0)); \
582 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
584 gen_helper_mve_##SFN(env, qd, qm, tcg_constant_i32(0)); \
586 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
588 static MVEGenOneOpFn * const fns[] = { \
594 if (!dc_isar_feature(aa32_mve_fp, s)) { \
597 return do_1op(s, a, fns[a->size]); \
600 DO_VCVT(VCVT_SF
, vcvt_sh
, vcvt_sf
)
601 DO_VCVT(VCVT_UF
, vcvt_uh
, vcvt_uf
)
602 DO_VCVT(VCVT_FS
, vcvt_hs
, vcvt_fs
)
603 DO_VCVT(VCVT_FU
, vcvt_hu
, vcvt_fu
)
605 static bool do_vcvt_rmode(DisasContext
*s
, arg_1op
*a
,
606 enum arm_fprounding rmode
, bool u
)
609 * Handle VCVT fp to int with specified rounding mode.
610 * This is a 1op fn but we must pass the rounding mode as
611 * an immediate to the helper.
614 static MVEGenVCVTRmodeFn
* const fns
[4][2] = {
616 { gen_helper_mve_vcvt_rm_sh
, gen_helper_mve_vcvt_rm_uh
},
617 { gen_helper_mve_vcvt_rm_ss
, gen_helper_mve_vcvt_rm_us
},
620 MVEGenVCVTRmodeFn
*fn
= fns
[a
->size
][u
];
622 if (!dc_isar_feature(aa32_mve_fp
, s
) ||
623 !mve_check_qreg_bank(s
, a
->qd
| a
->qm
) ||
628 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
632 qd
= mve_qreg_ptr(a
->qd
);
633 qm
= mve_qreg_ptr(a
->qm
);
634 fn(cpu_env
, qd
, qm
, tcg_constant_i32(arm_rmode_to_sf(rmode
)));
635 tcg_temp_free_ptr(qd
);
636 tcg_temp_free_ptr(qm
);
641 #define DO_VCVT_RMODE(INSN, RMODE, U) \
642 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
644 return do_vcvt_rmode(s, a, RMODE, U); \
647 DO_VCVT_RMODE(VCVTAS, FPROUNDING_TIEAWAY, false)
648 DO_VCVT_RMODE(VCVTAU
, FPROUNDING_TIEAWAY
, true)
649 DO_VCVT_RMODE(VCVTNS
, FPROUNDING_TIEEVEN
, false)
650 DO_VCVT_RMODE(VCVTNU
, FPROUNDING_TIEEVEN
, true)
651 DO_VCVT_RMODE(VCVTPS
, FPROUNDING_POSINF
, false)
652 DO_VCVT_RMODE(VCVTPU
, FPROUNDING_POSINF
, true)
653 DO_VCVT_RMODE(VCVTMS
, FPROUNDING_NEGINF
, false)
654 DO_VCVT_RMODE(VCVTMU
, FPROUNDING_NEGINF
, true)
656 #define DO_VCVT_SH(INSN, FN) \
657 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
659 if (!dc_isar_feature(aa32_mve_fp, s)) { \
662 return do_1op(s, a, gen_helper_mve_##FN); \
665 DO_VCVT_SH(VCVTB_SH, vcvtb_sh)
666 DO_VCVT_SH(VCVTT_SH
, vcvtt_sh
)
667 DO_VCVT_SH(VCVTB_HS
, vcvtb_hs
)
668 DO_VCVT_SH(VCVTT_HS
, vcvtt_hs
)
670 #define DO_VRINT(INSN, RMODE) \
671 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
673 gen_helper_mve_vrint_rm_h(env, qd, qm, \
674 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \
676 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
678 gen_helper_mve_vrint_rm_s(env, qd, qm, \
679 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \
681 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
683 static MVEGenOneOpFn * const fns[] = { \
689 if (!dc_isar_feature(aa32_mve_fp, s)) { \
692 return do_1op(s, a, fns[a->size]); \
695 DO_VRINT(VRINTN
, FPROUNDING_TIEEVEN
)
696 DO_VRINT(VRINTA
, FPROUNDING_TIEAWAY
)
697 DO_VRINT(VRINTZ
, FPROUNDING_ZERO
)
698 DO_VRINT(VRINTM
, FPROUNDING_NEGINF
)
699 DO_VRINT(VRINTP
, FPROUNDING_POSINF
)
701 static bool trans_VRINTX(DisasContext
*s
, arg_1op
*a
)
703 static MVEGenOneOpFn
* const fns
[] = {
705 gen_helper_mve_vrintx_h
,
706 gen_helper_mve_vrintx_s
,
709 if (!dc_isar_feature(aa32_mve_fp
, s
)) {
712 return do_1op(s
, a
, fns
[a
->size
]);
715 /* Narrowing moves: only size 0 and 1 are valid */
716 #define DO_VMOVN(INSN, FN) \
717 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
719 static MVEGenOneOpFn * const fns[] = { \
720 gen_helper_mve_##FN##b, \
721 gen_helper_mve_##FN##h, \
725 return do_1op(s, a, fns[a->size]); \
728 DO_VMOVN(VMOVNB
, vmovnb
)
729 DO_VMOVN(VMOVNT
, vmovnt
)
730 DO_VMOVN(VQMOVUNB
, vqmovunb
)
731 DO_VMOVN(VQMOVUNT
, vqmovunt
)
732 DO_VMOVN(VQMOVN_BS
, vqmovnbs
)
733 DO_VMOVN(VQMOVN_TS
, vqmovnts
)
734 DO_VMOVN(VQMOVN_BU
, vqmovnbu
)
735 DO_VMOVN(VQMOVN_TU
, vqmovntu
)
737 static bool trans_VREV16(DisasContext
*s
, arg_1op
*a
)
739 static MVEGenOneOpFn
* const fns
[] = {
740 gen_helper_mve_vrev16b
,
745 return do_1op(s
, a
, fns
[a
->size
]);
748 static bool trans_VREV32(DisasContext
*s
, arg_1op
*a
)
750 static MVEGenOneOpFn
* const fns
[] = {
751 gen_helper_mve_vrev32b
,
752 gen_helper_mve_vrev32h
,
756 return do_1op(s
, a
, fns
[a
->size
]);
759 static bool trans_VREV64(DisasContext
*s
, arg_1op
*a
)
761 static MVEGenOneOpFn
* const fns
[] = {
762 gen_helper_mve_vrev64b
,
763 gen_helper_mve_vrev64h
,
764 gen_helper_mve_vrev64w
,
767 return do_1op(s
, a
, fns
[a
->size
]);
770 static bool trans_VMVN(DisasContext
*s
, arg_1op
*a
)
772 return do_1op_vec(s
, a
, gen_helper_mve_vmvn
, tcg_gen_gvec_not
);
775 static bool trans_VABS_fp(DisasContext
*s
, arg_1op
*a
)
777 static MVEGenOneOpFn
* const fns
[] = {
779 gen_helper_mve_vfabsh
,
780 gen_helper_mve_vfabss
,
783 if (!dc_isar_feature(aa32_mve_fp
, s
)) {
786 return do_1op(s
, a
, fns
[a
->size
]);
789 static bool trans_VNEG_fp(DisasContext
*s
, arg_1op
*a
)
791 static MVEGenOneOpFn
* const fns
[] = {
793 gen_helper_mve_vfnegh
,
794 gen_helper_mve_vfnegs
,
797 if (!dc_isar_feature(aa32_mve_fp
, s
)) {
800 return do_1op(s
, a
, fns
[a
->size
]);
803 static bool do_2op_vec(DisasContext
*s
, arg_2op
*a
, MVEGenTwoOpFn fn
,
808 if (!dc_isar_feature(aa32_mve
, s
) ||
809 !mve_check_qreg_bank(s
, a
->qd
| a
->qn
| a
->qm
) ||
813 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
817 if (vecfn
&& mve_no_predication(s
)) {
818 vecfn(a
->size
, mve_qreg_offset(a
->qd
), mve_qreg_offset(a
->qn
),
819 mve_qreg_offset(a
->qm
), 16, 16);
821 qd
= mve_qreg_ptr(a
->qd
);
822 qn
= mve_qreg_ptr(a
->qn
);
823 qm
= mve_qreg_ptr(a
->qm
);
824 fn(cpu_env
, qd
, qn
, qm
);
825 tcg_temp_free_ptr(qd
);
826 tcg_temp_free_ptr(qn
);
827 tcg_temp_free_ptr(qm
);
833 static bool do_2op(DisasContext
*s
, arg_2op
*a
, MVEGenTwoOpFn
*fn
)
835 return do_2op_vec(s
, a
, fn
, NULL
);
838 #define DO_LOGIC(INSN, HELPER, VECFN) \
839 static bool trans_##INSN(DisasContext *s, arg_2op *a) \
841 return do_2op_vec(s, a, HELPER, VECFN); \
844 DO_LOGIC(VAND
, gen_helper_mve_vand
, tcg_gen_gvec_and
)
845 DO_LOGIC(VBIC
, gen_helper_mve_vbic
, tcg_gen_gvec_andc
)
846 DO_LOGIC(VORR
, gen_helper_mve_vorr
, tcg_gen_gvec_or
)
847 DO_LOGIC(VORN
, gen_helper_mve_vorn
, tcg_gen_gvec_orc
)
848 DO_LOGIC(VEOR
, gen_helper_mve_veor
, tcg_gen_gvec_xor
)
850 static bool trans_VPSEL(DisasContext
*s
, arg_2op
*a
)
852 /* This insn updates predication bits */
853 s
->base
.is_jmp
= DISAS_UPDATE_NOCHAIN
;
854 return do_2op(s
, a
, gen_helper_mve_vpsel
);
857 #define DO_2OP_VEC(INSN, FN, VECFN) \
858 static bool trans_##INSN(DisasContext *s, arg_2op *a) \
860 static MVEGenTwoOpFn * const fns[] = { \
861 gen_helper_mve_##FN##b, \
862 gen_helper_mve_##FN##h, \
863 gen_helper_mve_##FN##w, \
866 return do_2op_vec(s, a, fns[a->size], VECFN); \
869 #define DO_2OP(INSN, FN) DO_2OP_VEC(INSN, FN, NULL)
871 DO_2OP_VEC(VADD
, vadd
, tcg_gen_gvec_add
)
872 DO_2OP_VEC(VSUB
, vsub
, tcg_gen_gvec_sub
)
873 DO_2OP_VEC(VMUL
, vmul
, tcg_gen_gvec_mul
)
874 DO_2OP(VMULH_S
, vmulhs
)
875 DO_2OP(VMULH_U
, vmulhu
)
876 DO_2OP(VRMULH_S
, vrmulhs
)
877 DO_2OP(VRMULH_U
, vrmulhu
)
878 DO_2OP_VEC(VMAX_S
, vmaxs
, tcg_gen_gvec_smax
)
879 DO_2OP_VEC(VMAX_U
, vmaxu
, tcg_gen_gvec_umax
)
880 DO_2OP_VEC(VMIN_S
, vmins
, tcg_gen_gvec_smin
)
881 DO_2OP_VEC(VMIN_U
, vminu
, tcg_gen_gvec_umin
)
882 DO_2OP(VABD_S
, vabds
)
883 DO_2OP(VABD_U
, vabdu
)
884 DO_2OP(VHADD_S
, vhadds
)
885 DO_2OP(VHADD_U
, vhaddu
)
886 DO_2OP(VHSUB_S
, vhsubs
)
887 DO_2OP(VHSUB_U
, vhsubu
)
888 DO_2OP(VMULL_BS
, vmullbs
)
889 DO_2OP(VMULL_BU
, vmullbu
)
890 DO_2OP(VMULL_TS
, vmullts
)
891 DO_2OP(VMULL_TU
, vmulltu
)
892 DO_2OP(VQDMULH
, vqdmulh
)
893 DO_2OP(VQRDMULH
, vqrdmulh
)
894 DO_2OP(VQADD_S
, vqadds
)
895 DO_2OP(VQADD_U
, vqaddu
)
896 DO_2OP(VQSUB_S
, vqsubs
)
897 DO_2OP(VQSUB_U
, vqsubu
)
898 DO_2OP(VSHL_S
, vshls
)
899 DO_2OP(VSHL_U
, vshlu
)
900 DO_2OP(VRSHL_S
, vrshls
)
901 DO_2OP(VRSHL_U
, vrshlu
)
902 DO_2OP(VQSHL_S
, vqshls
)
903 DO_2OP(VQSHL_U
, vqshlu
)
904 DO_2OP(VQRSHL_S
, vqrshls
)
905 DO_2OP(VQRSHL_U
, vqrshlu
)
906 DO_2OP(VQDMLADH
, vqdmladh
)
907 DO_2OP(VQDMLADHX
, vqdmladhx
)
908 DO_2OP(VQRDMLADH
, vqrdmladh
)
909 DO_2OP(VQRDMLADHX
, vqrdmladhx
)
910 DO_2OP(VQDMLSDH
, vqdmlsdh
)
911 DO_2OP(VQDMLSDHX
, vqdmlsdhx
)
912 DO_2OP(VQRDMLSDH
, vqrdmlsdh
)
913 DO_2OP(VQRDMLSDHX
, vqrdmlsdhx
)
914 DO_2OP(VRHADD_S
, vrhadds
)
915 DO_2OP(VRHADD_U
, vrhaddu
)
917 * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose
918 * so we can reuse the DO_2OP macro. (Our implementation calculates the
919 * "expected" results in this case.) Similarly for VHCADD.
921 DO_2OP(VCADD90
, vcadd90
)
922 DO_2OP(VCADD270
, vcadd270
)
923 DO_2OP(VHCADD90
, vhcadd90
)
924 DO_2OP(VHCADD270
, vhcadd270
)
926 static bool trans_VQDMULLB(DisasContext
*s
, arg_2op
*a
)
928 static MVEGenTwoOpFn
* const fns
[] = {
930 gen_helper_mve_vqdmullbh
,
931 gen_helper_mve_vqdmullbw
,
934 if (a
->size
== MO_32
&& (a
->qd
== a
->qm
|| a
->qd
== a
->qn
)) {
935 /* UNPREDICTABLE; we choose to undef */
938 return do_2op(s
, a
, fns
[a
->size
]);
941 static bool trans_VQDMULLT(DisasContext
*s
, arg_2op
*a
)
943 static MVEGenTwoOpFn
* const fns
[] = {
945 gen_helper_mve_vqdmullth
,
946 gen_helper_mve_vqdmulltw
,
949 if (a
->size
== MO_32
&& (a
->qd
== a
->qm
|| a
->qd
== a
->qn
)) {
950 /* UNPREDICTABLE; we choose to undef */
953 return do_2op(s
, a
, fns
[a
->size
]);
956 static bool trans_VMULLP_B(DisasContext
*s
, arg_2op
*a
)
959 * Note that a->size indicates the output size, ie VMULL.P8
960 * is the 8x8->16 operation and a->size is MO_16; VMULL.P16
961 * is the 16x16->32 operation and a->size is MO_32.
963 static MVEGenTwoOpFn
* const fns
[] = {
965 gen_helper_mve_vmullpbh
,
966 gen_helper_mve_vmullpbw
,
969 return do_2op(s
, a
, fns
[a
->size
]);
972 static bool trans_VMULLP_T(DisasContext
*s
, arg_2op
*a
)
974 /* a->size is as for trans_VMULLP_B */
975 static MVEGenTwoOpFn
* const fns
[] = {
977 gen_helper_mve_vmullpth
,
978 gen_helper_mve_vmullptw
,
981 return do_2op(s
, a
, fns
[a
->size
]);
985 * VADC and VSBC: these perform an add-with-carry or subtract-with-carry
986 * of the 32-bit elements in each lane of the input vectors, where the
987 * carry-out of each add is the carry-in of the next. The initial carry
988 * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C
989 * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C.
990 * These insns are subject to beat-wise execution. Partial execution
991 * of an I=1 (initial carry input fixed) insn which does not
992 * execute the first beat must start with the current FPSCR.NZCV
993 * value, not the fixed constant input.
995 static bool trans_VADC(DisasContext
*s
, arg_2op
*a
)
997 return do_2op(s
, a
, gen_helper_mve_vadc
);
1000 static bool trans_VADCI(DisasContext
*s
, arg_2op
*a
)
1002 if (mve_skip_first_beat(s
)) {
1003 return trans_VADC(s
, a
);
1005 return do_2op(s
, a
, gen_helper_mve_vadci
);
1008 static bool trans_VSBC(DisasContext
*s
, arg_2op
*a
)
1010 return do_2op(s
, a
, gen_helper_mve_vsbc
);
1013 static bool trans_VSBCI(DisasContext
*s
, arg_2op
*a
)
1015 if (mve_skip_first_beat(s
)) {
1016 return trans_VSBC(s
, a
);
1018 return do_2op(s
, a
, gen_helper_mve_vsbci
);
1021 #define DO_2OP_FP(INSN, FN) \
1022 static bool trans_##INSN(DisasContext *s, arg_2op *a) \
1024 static MVEGenTwoOpFn * const fns[] = { \
1026 gen_helper_mve_##FN##h, \
1027 gen_helper_mve_##FN##s, \
1030 if (!dc_isar_feature(aa32_mve_fp, s)) { \
1033 return do_2op(s, a, fns[a->size]); \
1036 DO_2OP_FP(VADD_fp
, vfadd
)
1037 DO_2OP_FP(VSUB_fp
, vfsub
)
1038 DO_2OP_FP(VMUL_fp
, vfmul
)
1039 DO_2OP_FP(VABD_fp
, vfabd
)
1040 DO_2OP_FP(VMAXNM
, vmaxnm
)
1041 DO_2OP_FP(VMINNM
, vminnm
)
1042 DO_2OP_FP(VCADD90_fp
, vfcadd90
)
1043 DO_2OP_FP(VCADD270_fp
, vfcadd270
)
1044 DO_2OP_FP(VFMA
, vfma
)
1045 DO_2OP_FP(VFMS
, vfms
)
1046 DO_2OP_FP(VCMUL0
, vcmul0
)
1047 DO_2OP_FP(VCMUL90
, vcmul90
)
1048 DO_2OP_FP(VCMUL180
, vcmul180
)
1049 DO_2OP_FP(VCMUL270
, vcmul270
)
1050 DO_2OP_FP(VCMLA0
, vcmla0
)
1051 DO_2OP_FP(VCMLA90
, vcmla90
)
1052 DO_2OP_FP(VCMLA180
, vcmla180
)
1053 DO_2OP_FP(VCMLA270
, vcmla270
)
1054 DO_2OP_FP(VMAXNMA
, vmaxnma
)
1055 DO_2OP_FP(VMINNMA
, vminnma
)
1057 static bool do_2op_scalar(DisasContext
*s
, arg_2scalar
*a
,
1058 MVEGenTwoOpScalarFn fn
)
1063 if (!dc_isar_feature(aa32_mve
, s
) ||
1064 !mve_check_qreg_bank(s
, a
->qd
| a
->qn
) ||
1068 if (a
->rm
== 13 || a
->rm
== 15) {
1072 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1076 qd
= mve_qreg_ptr(a
->qd
);
1077 qn
= mve_qreg_ptr(a
->qn
);
1078 rm
= load_reg(s
, a
->rm
);
1079 fn(cpu_env
, qd
, qn
, rm
);
1080 tcg_temp_free_i32(rm
);
1081 tcg_temp_free_ptr(qd
);
1082 tcg_temp_free_ptr(qn
);
1087 #define DO_2OP_SCALAR(INSN, FN) \
1088 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \
1090 static MVEGenTwoOpScalarFn * const fns[] = { \
1091 gen_helper_mve_##FN##b, \
1092 gen_helper_mve_##FN##h, \
1093 gen_helper_mve_##FN##w, \
1096 return do_2op_scalar(s, a, fns[a->size]); \
1099 DO_2OP_SCALAR(VADD_scalar
, vadd_scalar
)
1100 DO_2OP_SCALAR(VSUB_scalar
, vsub_scalar
)
1101 DO_2OP_SCALAR(VMUL_scalar
, vmul_scalar
)
1102 DO_2OP_SCALAR(VHADD_S_scalar
, vhadds_scalar
)
1103 DO_2OP_SCALAR(VHADD_U_scalar
, vhaddu_scalar
)
1104 DO_2OP_SCALAR(VHSUB_S_scalar
, vhsubs_scalar
)
1105 DO_2OP_SCALAR(VHSUB_U_scalar
, vhsubu_scalar
)
1106 DO_2OP_SCALAR(VQADD_S_scalar
, vqadds_scalar
)
1107 DO_2OP_SCALAR(VQADD_U_scalar
, vqaddu_scalar
)
1108 DO_2OP_SCALAR(VQSUB_S_scalar
, vqsubs_scalar
)
1109 DO_2OP_SCALAR(VQSUB_U_scalar
, vqsubu_scalar
)
1110 DO_2OP_SCALAR(VQDMULH_scalar
, vqdmulh_scalar
)
1111 DO_2OP_SCALAR(VQRDMULH_scalar
, vqrdmulh_scalar
)
1112 DO_2OP_SCALAR(VBRSR
, vbrsr
)
1113 DO_2OP_SCALAR(VMLA
, vmla
)
1114 DO_2OP_SCALAR(VMLAS
, vmlas
)
1115 DO_2OP_SCALAR(VQDMLAH
, vqdmlah
)
1116 DO_2OP_SCALAR(VQRDMLAH
, vqrdmlah
)
1117 DO_2OP_SCALAR(VQDMLASH
, vqdmlash
)
1118 DO_2OP_SCALAR(VQRDMLASH
, vqrdmlash
)
1120 static bool trans_VQDMULLB_scalar(DisasContext
*s
, arg_2scalar
*a
)
1122 static MVEGenTwoOpScalarFn
* const fns
[] = {
1124 gen_helper_mve_vqdmullb_scalarh
,
1125 gen_helper_mve_vqdmullb_scalarw
,
1128 if (a
->qd
== a
->qn
&& a
->size
== MO_32
) {
1129 /* UNPREDICTABLE; we choose to undef */
1132 return do_2op_scalar(s
, a
, fns
[a
->size
]);
1135 static bool trans_VQDMULLT_scalar(DisasContext
*s
, arg_2scalar
*a
)
1137 static MVEGenTwoOpScalarFn
* const fns
[] = {
1139 gen_helper_mve_vqdmullt_scalarh
,
1140 gen_helper_mve_vqdmullt_scalarw
,
1143 if (a
->qd
== a
->qn
&& a
->size
== MO_32
) {
1144 /* UNPREDICTABLE; we choose to undef */
1147 return do_2op_scalar(s
, a
, fns
[a
->size
]);
1151 #define DO_2OP_FP_SCALAR(INSN, FN) \
1152 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \
1154 static MVEGenTwoOpScalarFn * const fns[] = { \
1156 gen_helper_mve_##FN##h, \
1157 gen_helper_mve_##FN##s, \
1160 if (!dc_isar_feature(aa32_mve_fp, s)) { \
1163 return do_2op_scalar(s, a, fns[a->size]); \
1166 DO_2OP_FP_SCALAR(VADD_fp_scalar
, vfadd_scalar
)
1167 DO_2OP_FP_SCALAR(VSUB_fp_scalar
, vfsub_scalar
)
1168 DO_2OP_FP_SCALAR(VMUL_fp_scalar
, vfmul_scalar
)
1169 DO_2OP_FP_SCALAR(VFMA_scalar
, vfma_scalar
)
1170 DO_2OP_FP_SCALAR(VFMAS_scalar
, vfmas_scalar
)
1172 static bool do_long_dual_acc(DisasContext
*s
, arg_vmlaldav
*a
,
1173 MVEGenLongDualAccOpFn
*fn
)
1177 TCGv_i32 rdalo
, rdahi
;
1179 if (!dc_isar_feature(aa32_mve
, s
) ||
1180 !mve_check_qreg_bank(s
, a
->qn
| a
->qm
) ||
1185 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
1186 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
1188 if (a
->rdahi
== 13 || a
->rdahi
== 15) {
1191 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1195 qn
= mve_qreg_ptr(a
->qn
);
1196 qm
= mve_qreg_ptr(a
->qm
);
1199 * This insn is subject to beat-wise execution. Partial execution
1200 * of an A=0 (no-accumulate) insn which does not execute the first
1201 * beat must start with the current rda value, not 0.
1203 if (a
->a
|| mve_skip_first_beat(s
)) {
1204 rda
= tcg_temp_new_i64();
1205 rdalo
= load_reg(s
, a
->rdalo
);
1206 rdahi
= load_reg(s
, a
->rdahi
);
1207 tcg_gen_concat_i32_i64(rda
, rdalo
, rdahi
);
1208 tcg_temp_free_i32(rdalo
);
1209 tcg_temp_free_i32(rdahi
);
1211 rda
= tcg_const_i64(0);
1214 fn(rda
, cpu_env
, qn
, qm
, rda
);
1215 tcg_temp_free_ptr(qn
);
1216 tcg_temp_free_ptr(qm
);
1218 rdalo
= tcg_temp_new_i32();
1219 rdahi
= tcg_temp_new_i32();
1220 tcg_gen_extrl_i64_i32(rdalo
, rda
);
1221 tcg_gen_extrh_i64_i32(rdahi
, rda
);
1222 store_reg(s
, a
->rdalo
, rdalo
);
1223 store_reg(s
, a
->rdahi
, rdahi
);
1224 tcg_temp_free_i64(rda
);
1229 static bool trans_VMLALDAV_S(DisasContext
*s
, arg_vmlaldav
*a
)
1231 static MVEGenLongDualAccOpFn
* const fns
[4][2] = {
1233 { gen_helper_mve_vmlaldavsh
, gen_helper_mve_vmlaldavxsh
},
1234 { gen_helper_mve_vmlaldavsw
, gen_helper_mve_vmlaldavxsw
},
1237 return do_long_dual_acc(s
, a
, fns
[a
->size
][a
->x
]);
1240 static bool trans_VMLALDAV_U(DisasContext
*s
, arg_vmlaldav
*a
)
1242 static MVEGenLongDualAccOpFn
* const fns
[4][2] = {
1244 { gen_helper_mve_vmlaldavuh
, NULL
},
1245 { gen_helper_mve_vmlaldavuw
, NULL
},
1248 return do_long_dual_acc(s
, a
, fns
[a
->size
][a
->x
]);
1251 static bool trans_VMLSLDAV(DisasContext
*s
, arg_vmlaldav
*a
)
1253 static MVEGenLongDualAccOpFn
* const fns
[4][2] = {
1255 { gen_helper_mve_vmlsldavsh
, gen_helper_mve_vmlsldavxsh
},
1256 { gen_helper_mve_vmlsldavsw
, gen_helper_mve_vmlsldavxsw
},
1259 return do_long_dual_acc(s
, a
, fns
[a
->size
][a
->x
]);
1262 static bool trans_VRMLALDAVH_S(DisasContext
*s
, arg_vmlaldav
*a
)
1264 static MVEGenLongDualAccOpFn
* const fns
[] = {
1265 gen_helper_mve_vrmlaldavhsw
, gen_helper_mve_vrmlaldavhxsw
,
1267 return do_long_dual_acc(s
, a
, fns
[a
->x
]);
1270 static bool trans_VRMLALDAVH_U(DisasContext
*s
, arg_vmlaldav
*a
)
1272 static MVEGenLongDualAccOpFn
* const fns
[] = {
1273 gen_helper_mve_vrmlaldavhuw
, NULL
,
1275 return do_long_dual_acc(s
, a
, fns
[a
->x
]);
1278 static bool trans_VRMLSLDAVH(DisasContext
*s
, arg_vmlaldav
*a
)
1280 static MVEGenLongDualAccOpFn
* const fns
[] = {
1281 gen_helper_mve_vrmlsldavhsw
, gen_helper_mve_vrmlsldavhxsw
,
1283 return do_long_dual_acc(s
, a
, fns
[a
->x
]);
1286 static bool do_dual_acc(DisasContext
*s
, arg_vmladav
*a
, MVEGenDualAccOpFn
*fn
)
1291 if (!dc_isar_feature(aa32_mve
, s
) ||
1292 !mve_check_qreg_bank(s
, a
->qn
) ||
1296 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1300 qn
= mve_qreg_ptr(a
->qn
);
1301 qm
= mve_qreg_ptr(a
->qm
);
1304 * This insn is subject to beat-wise execution. Partial execution
1305 * of an A=0 (no-accumulate) insn which does not execute the first
1306 * beat must start with the current rda value, not 0.
1308 if (a
->a
|| mve_skip_first_beat(s
)) {
1309 rda
= load_reg(s
, a
->rda
);
1311 rda
= tcg_const_i32(0);
1314 fn(rda
, cpu_env
, qn
, qm
, rda
);
1315 store_reg(s
, a
->rda
, rda
);
1316 tcg_temp_free_ptr(qn
);
1317 tcg_temp_free_ptr(qm
);
1323 #define DO_DUAL_ACC(INSN, FN) \
1324 static bool trans_##INSN(DisasContext *s, arg_vmladav *a) \
1326 static MVEGenDualAccOpFn * const fns[4][2] = { \
1327 { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb }, \
1328 { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh }, \
1329 { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw }, \
1332 return do_dual_acc(s, a, fns[a->size][a->x]); \
1335 DO_DUAL_ACC(VMLADAV_S
, vmladavs
)
1336 DO_DUAL_ACC(VMLSDAV
, vmlsdav
)
1338 static bool trans_VMLADAV_U(DisasContext
*s
, arg_vmladav
*a
)
1340 static MVEGenDualAccOpFn
* const fns
[4][2] = {
1341 { gen_helper_mve_vmladavub
, NULL
},
1342 { gen_helper_mve_vmladavuh
, NULL
},
1343 { gen_helper_mve_vmladavuw
, NULL
},
1346 return do_dual_acc(s
, a
, fns
[a
->size
][a
->x
]);
1349 static void gen_vpst(DisasContext
*s
, uint32_t mask
)
1352 * Set the VPR mask fields. We take advantage of MASK01 and MASK23
1353 * being adjacent fields in the register.
1355 * Updating the masks is not predicated, but it is subject to beat-wise
1356 * execution, and the mask is updated on the odd-numbered beats.
1357 * So if PSR.ECI says we should skip beat 1, we mustn't update the
1360 TCGv_i32 vpr
= load_cpu_field(v7m
.vpr
);
1364 /* Update both 01 and 23 fields */
1365 tcg_gen_deposit_i32(vpr
, vpr
,
1366 tcg_constant_i32(mask
| (mask
<< 4)),
1367 R_V7M_VPR_MASK01_SHIFT
,
1368 R_V7M_VPR_MASK01_LENGTH
+ R_V7M_VPR_MASK23_LENGTH
);
1373 /* Update only the 23 mask field */
1374 tcg_gen_deposit_i32(vpr
, vpr
,
1375 tcg_constant_i32(mask
),
1376 R_V7M_VPR_MASK23_SHIFT
, R_V7M_VPR_MASK23_LENGTH
);
1379 g_assert_not_reached();
1381 store_cpu_field(vpr
, v7m
.vpr
);
1384 static bool trans_VPST(DisasContext
*s
, arg_VPST
*a
)
1386 /* mask == 0 is a "related encoding" */
1387 if (!dc_isar_feature(aa32_mve
, s
) || !a
->mask
) {
1390 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1393 gen_vpst(s
, a
->mask
);
1394 mve_update_and_store_eci(s
);
1398 static bool trans_VPNOT(DisasContext
*s
, arg_VPNOT
*a
)
1401 * Invert the predicate in VPR.P0. We have call out to
1402 * a helper because this insn itself is beatwise and can
1405 if (!dc_isar_feature(aa32_mve
, s
)) {
1408 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1412 gen_helper_mve_vpnot(cpu_env
);
1413 /* This insn updates predication bits */
1414 s
->base
.is_jmp
= DISAS_UPDATE_NOCHAIN
;
1419 static bool trans_VADDV(DisasContext
*s
, arg_VADDV
*a
)
1421 /* VADDV: vector add across vector */
1422 static MVEGenVADDVFn
* const fns
[4][2] = {
1423 { gen_helper_mve_vaddvsb
, gen_helper_mve_vaddvub
},
1424 { gen_helper_mve_vaddvsh
, gen_helper_mve_vaddvuh
},
1425 { gen_helper_mve_vaddvsw
, gen_helper_mve_vaddvuw
},
1431 if (!dc_isar_feature(aa32_mve
, s
) ||
1435 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1440 * This insn is subject to beat-wise execution. Partial execution
1441 * of an A=0 (no-accumulate) insn which does not execute the first
1442 * beat must start with the current value of Rda, not zero.
1444 if (a
->a
|| mve_skip_first_beat(s
)) {
1445 /* Accumulate input from Rda */
1446 rda
= load_reg(s
, a
->rda
);
1448 /* Accumulate starting at zero */
1449 rda
= tcg_const_i32(0);
1452 qm
= mve_qreg_ptr(a
->qm
);
1453 fns
[a
->size
][a
->u
](rda
, cpu_env
, qm
, rda
);
1454 store_reg(s
, a
->rda
, rda
);
1455 tcg_temp_free_ptr(qm
);
1461 static bool trans_VADDLV(DisasContext
*s
, arg_VADDLV
*a
)
1464 * Vector Add Long Across Vector: accumulate the 32-bit
1465 * elements of the vector into a 64-bit result stored in
1466 * a pair of general-purpose registers.
1467 * No need to check Qm's bank: it is only 3 bits in decode.
1471 TCGv_i32 rdalo
, rdahi
;
1473 if (!dc_isar_feature(aa32_mve
, s
)) {
1477 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
1478 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
1480 if (a
->rdahi
== 13 || a
->rdahi
== 15) {
1483 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1488 * This insn is subject to beat-wise execution. Partial execution
1489 * of an A=0 (no-accumulate) insn which does not execute the first
1490 * beat must start with the current value of RdaHi:RdaLo, not zero.
1492 if (a
->a
|| mve_skip_first_beat(s
)) {
1493 /* Accumulate input from RdaHi:RdaLo */
1494 rda
= tcg_temp_new_i64();
1495 rdalo
= load_reg(s
, a
->rdalo
);
1496 rdahi
= load_reg(s
, a
->rdahi
);
1497 tcg_gen_concat_i32_i64(rda
, rdalo
, rdahi
);
1498 tcg_temp_free_i32(rdalo
);
1499 tcg_temp_free_i32(rdahi
);
1501 /* Accumulate starting at zero */
1502 rda
= tcg_const_i64(0);
1505 qm
= mve_qreg_ptr(a
->qm
);
1507 gen_helper_mve_vaddlv_u(rda
, cpu_env
, qm
, rda
);
1509 gen_helper_mve_vaddlv_s(rda
, cpu_env
, qm
, rda
);
1511 tcg_temp_free_ptr(qm
);
1513 rdalo
= tcg_temp_new_i32();
1514 rdahi
= tcg_temp_new_i32();
1515 tcg_gen_extrl_i64_i32(rdalo
, rda
);
1516 tcg_gen_extrh_i64_i32(rdahi
, rda
);
1517 store_reg(s
, a
->rdalo
, rdalo
);
1518 store_reg(s
, a
->rdahi
, rdahi
);
1519 tcg_temp_free_i64(rda
);
1524 static bool do_1imm(DisasContext
*s
, arg_1imm
*a
, MVEGenOneOpImmFn
*fn
,
1530 if (!dc_isar_feature(aa32_mve
, s
) ||
1531 !mve_check_qreg_bank(s
, a
->qd
) ||
1535 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1539 imm
= asimd_imm_const(a
->imm
, a
->cmode
, a
->op
);
1541 if (vecfn
&& mve_no_predication(s
)) {
1542 vecfn(MO_64
, mve_qreg_offset(a
->qd
), mve_qreg_offset(a
->qd
),
1545 qd
= mve_qreg_ptr(a
->qd
);
1546 fn(cpu_env
, qd
, tcg_constant_i64(imm
));
1547 tcg_temp_free_ptr(qd
);
1553 static void gen_gvec_vmovi(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1554 int64_t c
, uint32_t oprsz
, uint32_t maxsz
)
1556 tcg_gen_gvec_dup_imm(vece
, dofs
, oprsz
, maxsz
, c
);
1559 static bool trans_Vimm_1r(DisasContext
*s
, arg_1imm
*a
)
1561 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
1562 MVEGenOneOpImmFn
*fn
;
1565 if ((a
->cmode
& 1) && a
->cmode
< 12) {
1568 * For op=1, the immediate will be inverted by asimd_imm_const(),
1569 * so the VBIC becomes a logical AND operation.
1571 fn
= gen_helper_mve_vandi
;
1572 vecfn
= tcg_gen_gvec_andi
;
1574 fn
= gen_helper_mve_vorri
;
1575 vecfn
= tcg_gen_gvec_ori
;
1578 /* There is one unallocated cmode/op combination in this space */
1579 if (a
->cmode
== 15 && a
->op
== 1) {
1582 /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */
1583 fn
= gen_helper_mve_vmovi
;
1584 vecfn
= gen_gvec_vmovi
;
1586 return do_1imm(s
, a
, fn
, vecfn
);
1589 static bool do_2shift_vec(DisasContext
*s
, arg_2shift
*a
, MVEGenTwoOpShiftFn fn
,
1590 bool negateshift
, GVecGen2iFn vecfn
)
1593 int shift
= a
->shift
;
1595 if (!dc_isar_feature(aa32_mve
, s
) ||
1596 !mve_check_qreg_bank(s
, a
->qd
| a
->qm
) ||
1600 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1605 * When we handle a right shift insn using a left-shift helper
1606 * which permits a negative shift count to indicate a right-shift,
1607 * we must negate the shift count.
1613 if (vecfn
&& mve_no_predication(s
)) {
1614 vecfn(a
->size
, mve_qreg_offset(a
->qd
), mve_qreg_offset(a
->qm
),
1617 qd
= mve_qreg_ptr(a
->qd
);
1618 qm
= mve_qreg_ptr(a
->qm
);
1619 fn(cpu_env
, qd
, qm
, tcg_constant_i32(shift
));
1620 tcg_temp_free_ptr(qd
);
1621 tcg_temp_free_ptr(qm
);
1627 static bool do_2shift(DisasContext
*s
, arg_2shift
*a
, MVEGenTwoOpShiftFn fn
,
1630 return do_2shift_vec(s
, a
, fn
, negateshift
, NULL
);
1633 #define DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, VECFN) \
1634 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
1636 static MVEGenTwoOpShiftFn * const fns[] = { \
1637 gen_helper_mve_##FN##b, \
1638 gen_helper_mve_##FN##h, \
1639 gen_helper_mve_##FN##w, \
1642 return do_2shift_vec(s, a, fns[a->size], NEGATESHIFT, VECFN); \
1645 #define DO_2SHIFT(INSN, FN, NEGATESHIFT) \
1646 DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, NULL)
1648 static void do_gvec_shri_s(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1649 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
1652 * We get here with a negated shift count, and we must handle
1653 * shifts by the element size, which tcg_gen_gvec_sari() does not do.
1656 if (shift
== (8 << vece
)) {
1659 tcg_gen_gvec_sari(vece
, dofs
, aofs
, shift
, oprsz
, maxsz
);
1662 static void do_gvec_shri_u(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1663 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
1666 * We get here with a negated shift count, and we must handle
1667 * shifts by the element size, which tcg_gen_gvec_shri() does not do.
1670 if (shift
== (8 << vece
)) {
1671 tcg_gen_gvec_dup_imm(vece
, dofs
, oprsz
, maxsz
, 0);
1673 tcg_gen_gvec_shri(vece
, dofs
, aofs
, shift
, oprsz
, maxsz
);
1677 DO_2SHIFT_VEC(VSHLI
, vshli_u
, false, tcg_gen_gvec_shli
)
1678 DO_2SHIFT(VQSHLI_S
, vqshli_s
, false)
1679 DO_2SHIFT(VQSHLI_U
, vqshli_u
, false)
1680 DO_2SHIFT(VQSHLUI
, vqshlui_s
, false)
1681 /* These right shifts use a left-shift helper with negated shift count */
1682 DO_2SHIFT_VEC(VSHRI_S
, vshli_s
, true, do_gvec_shri_s
)
1683 DO_2SHIFT_VEC(VSHRI_U
, vshli_u
, true, do_gvec_shri_u
)
1684 DO_2SHIFT(VRSHRI_S
, vrshli_s
, true)
1685 DO_2SHIFT(VRSHRI_U
, vrshli_u
, true)
1687 DO_2SHIFT_VEC(VSRI
, vsri
, false, gen_gvec_sri
)
1688 DO_2SHIFT_VEC(VSLI
, vsli
, false, gen_gvec_sli
)
1690 #define DO_2SHIFT_FP(INSN, FN) \
1691 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
1693 if (!dc_isar_feature(aa32_mve_fp, s)) { \
1696 return do_2shift(s, a, gen_helper_mve_##FN, false); \
1699 DO_2SHIFT_FP(VCVT_SH_fixed
, vcvt_sh
)
1700 DO_2SHIFT_FP(VCVT_UH_fixed
, vcvt_uh
)
1701 DO_2SHIFT_FP(VCVT_HS_fixed
, vcvt_hs
)
1702 DO_2SHIFT_FP(VCVT_HU_fixed
, vcvt_hu
)
1703 DO_2SHIFT_FP(VCVT_SF_fixed
, vcvt_sf
)
1704 DO_2SHIFT_FP(VCVT_UF_fixed
, vcvt_uf
)
1705 DO_2SHIFT_FP(VCVT_FS_fixed
, vcvt_fs
)
1706 DO_2SHIFT_FP(VCVT_FU_fixed
, vcvt_fu
)
1708 static bool do_2shift_scalar(DisasContext
*s
, arg_shl_scalar
*a
,
1709 MVEGenTwoOpShiftFn
*fn
)
1714 if (!dc_isar_feature(aa32_mve
, s
) ||
1715 !mve_check_qreg_bank(s
, a
->qda
) ||
1716 a
->rm
== 13 || a
->rm
== 15 || !fn
) {
1717 /* Rm cases are UNPREDICTABLE */
1720 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1724 qda
= mve_qreg_ptr(a
->qda
);
1725 rm
= load_reg(s
, a
->rm
);
1726 fn(cpu_env
, qda
, qda
, rm
);
1727 tcg_temp_free_ptr(qda
);
1728 tcg_temp_free_i32(rm
);
1733 #define DO_2SHIFT_SCALAR(INSN, FN) \
1734 static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a) \
1736 static MVEGenTwoOpShiftFn * const fns[] = { \
1737 gen_helper_mve_##FN##b, \
1738 gen_helper_mve_##FN##h, \
1739 gen_helper_mve_##FN##w, \
1742 return do_2shift_scalar(s, a, fns[a->size]); \
1745 DO_2SHIFT_SCALAR(VSHL_S_scalar
, vshli_s
)
1746 DO_2SHIFT_SCALAR(VSHL_U_scalar
, vshli_u
)
1747 DO_2SHIFT_SCALAR(VRSHL_S_scalar
, vrshli_s
)
1748 DO_2SHIFT_SCALAR(VRSHL_U_scalar
, vrshli_u
)
1749 DO_2SHIFT_SCALAR(VQSHL_S_scalar
, vqshli_s
)
1750 DO_2SHIFT_SCALAR(VQSHL_U_scalar
, vqshli_u
)
1751 DO_2SHIFT_SCALAR(VQRSHL_S_scalar
, vqrshli_s
)
1752 DO_2SHIFT_SCALAR(VQRSHL_U_scalar
, vqrshli_u
)
1754 #define DO_VSHLL(INSN, FN) \
1755 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
1757 static MVEGenTwoOpShiftFn * const fns[] = { \
1758 gen_helper_mve_##FN##b, \
1759 gen_helper_mve_##FN##h, \
1761 return do_2shift_vec(s, a, fns[a->size], false, do_gvec_##FN); \
1765 * For the VSHLL vector helpers, the vece is the size of the input
1766 * (ie MO_8 or MO_16); the helpers want to work in the output size.
1767 * The shift count can be 0..<input size>, inclusive. (0 is VMOVL.)
1769 static void do_gvec_vshllbs(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1770 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
1772 unsigned ovece
= vece
+ 1;
1773 unsigned ibits
= vece
== MO_8
? 8 : 16;
1774 tcg_gen_gvec_shli(ovece
, dofs
, aofs
, ibits
, oprsz
, maxsz
);
1775 tcg_gen_gvec_sari(ovece
, dofs
, dofs
, ibits
- shift
, oprsz
, maxsz
);
1778 static void do_gvec_vshllbu(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1779 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
1781 unsigned ovece
= vece
+ 1;
1782 tcg_gen_gvec_andi(ovece
, dofs
, aofs
,
1783 ovece
== MO_16
? 0xff : 0xffff, oprsz
, maxsz
);
1784 tcg_gen_gvec_shli(ovece
, dofs
, dofs
, shift
, oprsz
, maxsz
);
1787 static void do_gvec_vshllts(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1788 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
1790 unsigned ovece
= vece
+ 1;
1791 unsigned ibits
= vece
== MO_8
? 8 : 16;
1793 tcg_gen_gvec_sari(ovece
, dofs
, aofs
, ibits
, oprsz
, maxsz
);
1795 tcg_gen_gvec_andi(ovece
, dofs
, aofs
,
1796 ovece
== MO_16
? 0xff00 : 0xffff0000, oprsz
, maxsz
);
1797 tcg_gen_gvec_sari(ovece
, dofs
, dofs
, ibits
- shift
, oprsz
, maxsz
);
1801 static void do_gvec_vshlltu(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1802 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
1804 unsigned ovece
= vece
+ 1;
1805 unsigned ibits
= vece
== MO_8
? 8 : 16;
1807 tcg_gen_gvec_shri(ovece
, dofs
, aofs
, ibits
, oprsz
, maxsz
);
1809 tcg_gen_gvec_andi(ovece
, dofs
, aofs
,
1810 ovece
== MO_16
? 0xff00 : 0xffff0000, oprsz
, maxsz
);
1811 tcg_gen_gvec_shri(ovece
, dofs
, dofs
, ibits
- shift
, oprsz
, maxsz
);
1815 DO_VSHLL(VSHLL_BS
, vshllbs
)
1816 DO_VSHLL(VSHLL_BU
, vshllbu
)
1817 DO_VSHLL(VSHLL_TS
, vshllts
)
1818 DO_VSHLL(VSHLL_TU
, vshlltu
)
1820 #define DO_2SHIFT_N(INSN, FN) \
1821 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
1823 static MVEGenTwoOpShiftFn * const fns[] = { \
1824 gen_helper_mve_##FN##b, \
1825 gen_helper_mve_##FN##h, \
1827 return do_2shift(s, a, fns[a->size], false); \
1830 DO_2SHIFT_N(VSHRNB
, vshrnb
)
1831 DO_2SHIFT_N(VSHRNT
, vshrnt
)
1832 DO_2SHIFT_N(VRSHRNB
, vrshrnb
)
1833 DO_2SHIFT_N(VRSHRNT
, vrshrnt
)
1834 DO_2SHIFT_N(VQSHRNB_S
, vqshrnb_s
)
1835 DO_2SHIFT_N(VQSHRNT_S
, vqshrnt_s
)
1836 DO_2SHIFT_N(VQSHRNB_U
, vqshrnb_u
)
1837 DO_2SHIFT_N(VQSHRNT_U
, vqshrnt_u
)
1838 DO_2SHIFT_N(VQSHRUNB
, vqshrunb
)
1839 DO_2SHIFT_N(VQSHRUNT
, vqshrunt
)
1840 DO_2SHIFT_N(VQRSHRNB_S
, vqrshrnb_s
)
1841 DO_2SHIFT_N(VQRSHRNT_S
, vqrshrnt_s
)
1842 DO_2SHIFT_N(VQRSHRNB_U
, vqrshrnb_u
)
1843 DO_2SHIFT_N(VQRSHRNT_U
, vqrshrnt_u
)
1844 DO_2SHIFT_N(VQRSHRUNB
, vqrshrunb
)
1845 DO_2SHIFT_N(VQRSHRUNT
, vqrshrunt
)
1847 static bool trans_VSHLC(DisasContext
*s
, arg_VSHLC
*a
)
1850 * Whole Vector Left Shift with Carry. The carry is taken
1851 * from a general purpose register and written back there.
1852 * An imm of 0 means "shift by 32".
1857 if (!dc_isar_feature(aa32_mve
, s
) || !mve_check_qreg_bank(s
, a
->qd
)) {
1860 if (a
->rdm
== 13 || a
->rdm
== 15) {
1861 /* CONSTRAINED UNPREDICTABLE: we UNDEF */
1864 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1868 qd
= mve_qreg_ptr(a
->qd
);
1869 rdm
= load_reg(s
, a
->rdm
);
1870 gen_helper_mve_vshlc(rdm
, cpu_env
, qd
, rdm
, tcg_constant_i32(a
->imm
));
1871 store_reg(s
, a
->rdm
, rdm
);
1872 tcg_temp_free_ptr(qd
);
1877 static bool do_vidup(DisasContext
*s
, arg_vidup
*a
, MVEGenVIDUPFn
*fn
)
1883 * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP).
1884 * This fills the vector with elements of successively increasing
1885 * or decreasing values, starting from Rn.
1887 if (!dc_isar_feature(aa32_mve
, s
) || !mve_check_qreg_bank(s
, a
->qd
)) {
1890 if (a
->size
== MO_64
) {
1891 /* size 0b11 is another encoding */
1894 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1898 qd
= mve_qreg_ptr(a
->qd
);
1899 rn
= load_reg(s
, a
->rn
);
1900 fn(rn
, cpu_env
, qd
, rn
, tcg_constant_i32(a
->imm
));
1901 store_reg(s
, a
->rn
, rn
);
1902 tcg_temp_free_ptr(qd
);
1907 static bool do_viwdup(DisasContext
*s
, arg_viwdup
*a
, MVEGenVIWDUPFn
*fn
)
1913 * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP)
1914 * This fills the vector with elements of successively increasing
1915 * or decreasing values, starting from Rn. Rm specifies a point where
1916 * the count wraps back around to 0. The updated offset is written back
1919 if (!dc_isar_feature(aa32_mve
, s
) || !mve_check_qreg_bank(s
, a
->qd
)) {
1922 if (!fn
|| a
->rm
== 13 || a
->rm
== 15) {
1924 * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE;
1925 * Rm == 13 is VIWDUP, VDWDUP.
1929 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
1933 qd
= mve_qreg_ptr(a
->qd
);
1934 rn
= load_reg(s
, a
->rn
);
1935 rm
= load_reg(s
, a
->rm
);
1936 fn(rn
, cpu_env
, qd
, rn
, rm
, tcg_constant_i32(a
->imm
));
1937 store_reg(s
, a
->rn
, rn
);
1938 tcg_temp_free_ptr(qd
);
1939 tcg_temp_free_i32(rm
);
1944 static bool trans_VIDUP(DisasContext
*s
, arg_vidup
*a
)
1946 static MVEGenVIDUPFn
* const fns
[] = {
1947 gen_helper_mve_vidupb
,
1948 gen_helper_mve_viduph
,
1949 gen_helper_mve_vidupw
,
1952 return do_vidup(s
, a
, fns
[a
->size
]);
1955 static bool trans_VDDUP(DisasContext
*s
, arg_vidup
*a
)
1957 static MVEGenVIDUPFn
* const fns
[] = {
1958 gen_helper_mve_vidupb
,
1959 gen_helper_mve_viduph
,
1960 gen_helper_mve_vidupw
,
1963 /* VDDUP is just like VIDUP but with a negative immediate */
1965 return do_vidup(s
, a
, fns
[a
->size
]);
1968 static bool trans_VIWDUP(DisasContext
*s
, arg_viwdup
*a
)
1970 static MVEGenVIWDUPFn
* const fns
[] = {
1971 gen_helper_mve_viwdupb
,
1972 gen_helper_mve_viwduph
,
1973 gen_helper_mve_viwdupw
,
1976 return do_viwdup(s
, a
, fns
[a
->size
]);
1979 static bool trans_VDWDUP(DisasContext
*s
, arg_viwdup
*a
)
1981 static MVEGenVIWDUPFn
* const fns
[] = {
1982 gen_helper_mve_vdwdupb
,
1983 gen_helper_mve_vdwduph
,
1984 gen_helper_mve_vdwdupw
,
1987 return do_viwdup(s
, a
, fns
[a
->size
]);
1990 static bool do_vcmp(DisasContext
*s
, arg_vcmp
*a
, MVEGenCmpFn
*fn
)
1994 if (!dc_isar_feature(aa32_mve
, s
) || !mve_check_qreg_bank(s
, a
->qm
) ||
1998 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
2002 qn
= mve_qreg_ptr(a
->qn
);
2003 qm
= mve_qreg_ptr(a
->qm
);
2004 fn(cpu_env
, qn
, qm
);
2005 tcg_temp_free_ptr(qn
);
2006 tcg_temp_free_ptr(qm
);
2009 gen_vpst(s
, a
->mask
);
2011 /* This insn updates predication bits */
2012 s
->base
.is_jmp
= DISAS_UPDATE_NOCHAIN
;
2017 static bool do_vcmp_scalar(DisasContext
*s
, arg_vcmp_scalar
*a
,
2018 MVEGenScalarCmpFn
*fn
)
2023 if (!dc_isar_feature(aa32_mve
, s
) || !fn
|| a
->rm
== 13) {
2026 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
2030 qn
= mve_qreg_ptr(a
->qn
);
2032 /* Encoding Rm=0b1111 means "constant zero" */
2033 rm
= tcg_constant_i32(0);
2035 rm
= load_reg(s
, a
->rm
);
2037 fn(cpu_env
, qn
, rm
);
2038 tcg_temp_free_ptr(qn
);
2039 tcg_temp_free_i32(rm
);
2042 gen_vpst(s
, a
->mask
);
2044 /* This insn updates predication bits */
2045 s
->base
.is_jmp
= DISAS_UPDATE_NOCHAIN
;
2050 #define DO_VCMP(INSN, FN) \
2051 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \
2053 static MVEGenCmpFn * const fns[] = { \
2054 gen_helper_mve_##FN##b, \
2055 gen_helper_mve_##FN##h, \
2056 gen_helper_mve_##FN##w, \
2059 return do_vcmp(s, a, fns[a->size]); \
2061 static bool trans_##INSN##_scalar(DisasContext *s, \
2062 arg_vcmp_scalar *a) \
2064 static MVEGenScalarCmpFn * const fns[] = { \
2065 gen_helper_mve_##FN##_scalarb, \
2066 gen_helper_mve_##FN##_scalarh, \
2067 gen_helper_mve_##FN##_scalarw, \
2070 return do_vcmp_scalar(s, a, fns[a->size]); \
2073 DO_VCMP(VCMPEQ
, vcmpeq
)
2074 DO_VCMP(VCMPNE
, vcmpne
)
2075 DO_VCMP(VCMPCS
, vcmpcs
)
2076 DO_VCMP(VCMPHI
, vcmphi
)
2077 DO_VCMP(VCMPGE
, vcmpge
)
2078 DO_VCMP(VCMPLT
, vcmplt
)
2079 DO_VCMP(VCMPGT
, vcmpgt
)
2080 DO_VCMP(VCMPLE
, vcmple
)
2082 #define DO_VCMP_FP(INSN, FN) \
2083 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \
2085 static MVEGenCmpFn * const fns[] = { \
2087 gen_helper_mve_##FN##h, \
2088 gen_helper_mve_##FN##s, \
2091 if (!dc_isar_feature(aa32_mve_fp, s)) { \
2094 return do_vcmp(s, a, fns[a->size]); \
2096 static bool trans_##INSN##_scalar(DisasContext *s, \
2097 arg_vcmp_scalar *a) \
2099 static MVEGenScalarCmpFn * const fns[] = { \
2101 gen_helper_mve_##FN##_scalarh, \
2102 gen_helper_mve_##FN##_scalars, \
2105 if (!dc_isar_feature(aa32_mve_fp, s)) { \
2108 return do_vcmp_scalar(s, a, fns[a->size]); \
2111 DO_VCMP_FP(VCMPEQ_fp
, vfcmpeq
)
2112 DO_VCMP_FP(VCMPNE_fp
, vfcmpne
)
2113 DO_VCMP_FP(VCMPGE_fp
, vfcmpge
)
2114 DO_VCMP_FP(VCMPLT_fp
, vfcmplt
)
2115 DO_VCMP_FP(VCMPGT_fp
, vfcmpgt
)
2116 DO_VCMP_FP(VCMPLE_fp
, vfcmple
)
2118 static bool do_vmaxv(DisasContext
*s
, arg_vmaxv
*a
, MVEGenVADDVFn fn
)
2121 * MIN/MAX operations across a vector: compute the min or
2122 * max of the initial value in a general purpose register
2123 * and all the elements in the vector, and store it back
2124 * into the general purpose register.
2129 if (!dc_isar_feature(aa32_mve
, s
) || !mve_check_qreg_bank(s
, a
->qm
) ||
2130 !fn
|| a
->rda
== 13 || a
->rda
== 15) {
2131 /* Rda cases are UNPREDICTABLE */
2134 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
2138 qm
= mve_qreg_ptr(a
->qm
);
2139 rda
= load_reg(s
, a
->rda
);
2140 fn(rda
, cpu_env
, qm
, rda
);
2141 store_reg(s
, a
->rda
, rda
);
2142 tcg_temp_free_ptr(qm
);
2147 #define DO_VMAXV(INSN, FN) \
2148 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \
2150 static MVEGenVADDVFn * const fns[] = { \
2151 gen_helper_mve_##FN##b, \
2152 gen_helper_mve_##FN##h, \
2153 gen_helper_mve_##FN##w, \
2156 return do_vmaxv(s, a, fns[a->size]); \
2159 DO_VMAXV(VMAXV_S
, vmaxvs
)
2160 DO_VMAXV(VMAXV_U
, vmaxvu
)
2161 DO_VMAXV(VMAXAV
, vmaxav
)
2162 DO_VMAXV(VMINV_S
, vminvs
)
2163 DO_VMAXV(VMINV_U
, vminvu
)
2164 DO_VMAXV(VMINAV
, vminav
)
2166 #define DO_VMAXV_FP(INSN, FN) \
2167 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \
2169 static MVEGenVADDVFn * const fns[] = { \
2171 gen_helper_mve_##FN##h, \
2172 gen_helper_mve_##FN##s, \
2175 if (!dc_isar_feature(aa32_mve_fp, s)) { \
2178 return do_vmaxv(s, a, fns[a->size]); \
2181 DO_VMAXV_FP(VMAXNMV
, vmaxnmv
)
2182 DO_VMAXV_FP(VMINNMV
, vminnmv
)
2183 DO_VMAXV_FP(VMAXNMAV
, vmaxnmav
)
2184 DO_VMAXV_FP(VMINNMAV
, vminnmav
)
2186 static bool do_vabav(DisasContext
*s
, arg_vabav
*a
, MVEGenVABAVFn
*fn
)
2188 /* Absolute difference accumulated across vector */
2192 if (!dc_isar_feature(aa32_mve
, s
) ||
2193 !mve_check_qreg_bank(s
, a
->qm
| a
->qn
) ||
2194 !fn
|| a
->rda
== 13 || a
->rda
== 15) {
2195 /* Rda cases are UNPREDICTABLE */
2198 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
2202 qm
= mve_qreg_ptr(a
->qm
);
2203 qn
= mve_qreg_ptr(a
->qn
);
2204 rda
= load_reg(s
, a
->rda
);
2205 fn(rda
, cpu_env
, qn
, qm
, rda
);
2206 store_reg(s
, a
->rda
, rda
);
2207 tcg_temp_free_ptr(qm
);
2208 tcg_temp_free_ptr(qn
);
2213 #define DO_VABAV(INSN, FN) \
2214 static bool trans_##INSN(DisasContext *s, arg_vabav *a) \
2216 static MVEGenVABAVFn * const fns[] = { \
2217 gen_helper_mve_##FN##b, \
2218 gen_helper_mve_##FN##h, \
2219 gen_helper_mve_##FN##w, \
2222 return do_vabav(s, a, fns[a->size]); \
2225 DO_VABAV(VABAV_S
, vabavs
)
2226 DO_VABAV(VABAV_U
, vabavu
)
2228 static bool trans_VMOV_to_2gp(DisasContext
*s
, arg_VMOV_to_2gp
*a
)
2231 * VMOV two 32-bit vector lanes to two general-purpose registers.
2232 * This insn is not predicated but it is subject to beat-wise
2233 * execution if it is not in an IT block. For us this means
2234 * only that if PSR.ECI says we should not be executing the beat
2235 * corresponding to the lane of the vector register being accessed
2236 * then we should skip perfoming the move, and that we need to do
2237 * the usual check for bad ECI state and advance of ECI state.
2238 * (If PSR.ECI is non-zero then we cannot be in an IT block.)
2243 if (!dc_isar_feature(aa32_mve
, s
) || !mve_check_qreg_bank(s
, a
->qd
) ||
2244 a
->rt
== 13 || a
->rt
== 15 || a
->rt2
== 13 || a
->rt2
== 15 ||
2246 /* Rt/Rt2 cases are UNPREDICTABLE */
2249 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
2253 /* Convert Qreg index to Dreg for read_neon_element32() etc */
2256 if (!mve_skip_vmov(s
, vd
, a
->idx
, MO_32
)) {
2257 tmp
= tcg_temp_new_i32();
2258 read_neon_element32(tmp
, vd
, a
->idx
, MO_32
);
2259 store_reg(s
, a
->rt
, tmp
);
2261 if (!mve_skip_vmov(s
, vd
+ 1, a
->idx
, MO_32
)) {
2262 tmp
= tcg_temp_new_i32();
2263 read_neon_element32(tmp
, vd
+ 1, a
->idx
, MO_32
);
2264 store_reg(s
, a
->rt2
, tmp
);
2267 mve_update_and_store_eci(s
);
2271 static bool trans_VMOV_from_2gp(DisasContext
*s
, arg_VMOV_to_2gp
*a
)
2274 * VMOV two general-purpose registers to two 32-bit vector lanes.
2275 * This insn is not predicated but it is subject to beat-wise
2276 * execution if it is not in an IT block. For us this means
2277 * only that if PSR.ECI says we should not be executing the beat
2278 * corresponding to the lane of the vector register being accessed
2279 * then we should skip perfoming the move, and that we need to do
2280 * the usual check for bad ECI state and advance of ECI state.
2281 * (If PSR.ECI is non-zero then we cannot be in an IT block.)
2286 if (!dc_isar_feature(aa32_mve
, s
) || !mve_check_qreg_bank(s
, a
->qd
) ||
2287 a
->rt
== 13 || a
->rt
== 15 || a
->rt2
== 13 || a
->rt2
== 15) {
2288 /* Rt/Rt2 cases are UNPREDICTABLE */
2291 if (!mve_eci_check(s
) || !vfp_access_check(s
)) {
2295 /* Convert Qreg idx to Dreg for read_neon_element32() etc */
2298 if (!mve_skip_vmov(s
, vd
, a
->idx
, MO_32
)) {
2299 tmp
= load_reg(s
, a
->rt
);
2300 write_neon_element32(tmp
, vd
, a
->idx
, MO_32
);
2301 tcg_temp_free_i32(tmp
);
2303 if (!mve_skip_vmov(s
, vd
+ 1, a
->idx
, MO_32
)) {
2304 tmp
= load_reg(s
, a
->rt2
);
2305 write_neon_element32(tmp
, vd
+ 1, a
->idx
, MO_32
);
2306 tcg_temp_free_i32(tmp
);
2309 mve_update_and_store_eci(s
);