Merge tag 'nvme-fixes-for-6.2-pull-request' of git://git.infradead.org/qemu-nvme...
[qemu.git] / target / arm / translate-mve.c
blob4267d43cc7c409956f6a65f3584764646cf7136f
1 /*
2 * ARM translation: M-profile MVE instructions
4 * Copyright (c) 2021 Linaro, Ltd.
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "tcg/tcg-op.h"
22 #include "tcg/tcg-op-gvec.h"
23 #include "exec/exec-all.h"
24 #include "exec/gen-icount.h"
25 #include "translate.h"
26 #include "translate-a32.h"
28 static inline int vidup_imm(DisasContext *s, int x)
30 return 1 << x;
33 /* Include the generated decoder */
34 #include "decode-mve.c.inc"
36 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
37 typedef void MVEGenLdStSGFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
38 typedef void MVEGenLdStIlFn(TCGv_ptr, TCGv_i32, TCGv_i32);
39 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
40 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr);
41 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
42 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
43 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64);
44 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32);
45 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64);
46 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32);
47 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
48 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
49 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
50 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
51 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
52 typedef void MVEGenVCVTRmodeFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
54 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
55 static inline long mve_qreg_offset(unsigned reg)
57 return offsetof(CPUARMState, vfp.zregs[reg].d[0]);
60 static TCGv_ptr mve_qreg_ptr(unsigned reg)
62 TCGv_ptr ret = tcg_temp_new_ptr();
63 tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg));
64 return ret;
67 static bool mve_no_predication(DisasContext *s)
70 * Return true if we are executing the entire MVE instruction
71 * with no predication or partial-execution, and so we can safely
72 * use an inline TCG vector implementation.
74 return s->eci == 0 && s->mve_no_pred;
77 static bool mve_check_qreg_bank(DisasContext *s, int qmask)
80 * Check whether Qregs are in range. For v8.1M only Q0..Q7
81 * are supported, see VFPSmallRegisterBank().
83 return qmask < 8;
86 bool mve_eci_check(DisasContext *s)
89 * This is a beatwise insn: check that ECI is valid (not a
90 * reserved value) and note that we are handling it.
91 * Return true if OK, false if we generated an exception.
93 s->eci_handled = true;
94 switch (s->eci) {
95 case ECI_NONE:
96 case ECI_A0:
97 case ECI_A0A1:
98 case ECI_A0A1A2:
99 case ECI_A0A1A2B0:
100 return true;
101 default:
102 /* Reserved value: INVSTATE UsageFault */
103 gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
104 default_exception_el(s));
105 return false;
109 void mve_update_eci(DisasContext *s)
112 * The helper function will always update the CPUState field,
113 * so we only need to update the DisasContext field.
115 if (s->eci) {
116 s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE;
120 void mve_update_and_store_eci(DisasContext *s)
123 * For insns which don't call a helper function that will call
124 * mve_advance_vpt(), this version updates s->eci and also stores
125 * it out to the CPUState field.
127 if (s->eci) {
128 mve_update_eci(s);
129 store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits);
133 static bool mve_skip_first_beat(DisasContext *s)
135 /* Return true if PSR.ECI says we must skip the first beat of this insn */
136 switch (s->eci) {
137 case ECI_NONE:
138 return false;
139 case ECI_A0:
140 case ECI_A0A1:
141 case ECI_A0A1A2:
142 case ECI_A0A1A2B0:
143 return true;
144 default:
145 g_assert_not_reached();
149 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn,
150 unsigned msize)
152 TCGv_i32 addr;
153 uint32_t offset;
154 TCGv_ptr qreg;
156 if (!dc_isar_feature(aa32_mve, s) ||
157 !mve_check_qreg_bank(s, a->qd) ||
158 !fn) {
159 return false;
162 /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
163 if (a->rn == 15 || (a->rn == 13 && a->w)) {
164 return false;
167 if (!mve_eci_check(s) || !vfp_access_check(s)) {
168 return true;
171 offset = a->imm << msize;
172 if (!a->a) {
173 offset = -offset;
175 addr = load_reg(s, a->rn);
176 if (a->p) {
177 tcg_gen_addi_i32(addr, addr, offset);
180 qreg = mve_qreg_ptr(a->qd);
181 fn(cpu_env, qreg, addr);
182 tcg_temp_free_ptr(qreg);
185 * Writeback always happens after the last beat of the insn,
186 * regardless of predication
188 if (a->w) {
189 if (!a->p) {
190 tcg_gen_addi_i32(addr, addr, offset);
192 store_reg(s, a->rn, addr);
193 } else {
194 tcg_temp_free_i32(addr);
196 mve_update_eci(s);
197 return true;
200 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a)
202 static MVEGenLdStFn * const ldstfns[4][2] = {
203 { gen_helper_mve_vstrb, gen_helper_mve_vldrb },
204 { gen_helper_mve_vstrh, gen_helper_mve_vldrh },
205 { gen_helper_mve_vstrw, gen_helper_mve_vldrw },
206 { NULL, NULL }
208 return do_ldst(s, a, ldstfns[a->size][a->l], a->size);
211 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \
212 static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \
214 static MVEGenLdStFn * const ldstfns[2][2] = { \
215 { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \
216 { NULL, gen_helper_mve_##ULD }, \
217 }; \
218 return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \
221 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8)
222 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8)
223 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16)
225 static bool do_ldst_sg(DisasContext *s, arg_vldst_sg *a, MVEGenLdStSGFn fn)
227 TCGv_i32 addr;
228 TCGv_ptr qd, qm;
230 if (!dc_isar_feature(aa32_mve, s) ||
231 !mve_check_qreg_bank(s, a->qd | a->qm) ||
232 !fn || a->rn == 15) {
233 /* Rn case is UNPREDICTABLE */
234 return false;
237 if (!mve_eci_check(s) || !vfp_access_check(s)) {
238 return true;
241 addr = load_reg(s, a->rn);
243 qd = mve_qreg_ptr(a->qd);
244 qm = mve_qreg_ptr(a->qm);
245 fn(cpu_env, qd, qm, addr);
246 tcg_temp_free_ptr(qd);
247 tcg_temp_free_ptr(qm);
248 tcg_temp_free_i32(addr);
249 mve_update_eci(s);
250 return true;
254 * The naming scheme here is "vldrb_sg_sh == in-memory byte loads
255 * signextended to halfword elements in register". _os_ indicates that
256 * the offsets in Qm should be scaled by the element size.
258 /* This macro is just to make the arrays more compact in these functions */
259 #define F(N) gen_helper_mve_##N
261 /* VLDRB/VSTRB (ie msize 1) with OS=1 is UNPREDICTABLE; we UNDEF */
262 static bool trans_VLDR_S_sg(DisasContext *s, arg_vldst_sg *a)
264 static MVEGenLdStSGFn * const fns[2][4][4] = { {
265 { NULL, F(vldrb_sg_sh), F(vldrb_sg_sw), NULL },
266 { NULL, NULL, F(vldrh_sg_sw), NULL },
267 { NULL, NULL, NULL, NULL },
268 { NULL, NULL, NULL, NULL }
269 }, {
270 { NULL, NULL, NULL, NULL },
271 { NULL, NULL, F(vldrh_sg_os_sw), NULL },
272 { NULL, NULL, NULL, NULL },
273 { NULL, NULL, NULL, NULL }
276 if (a->qd == a->qm) {
277 return false; /* UNPREDICTABLE */
279 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
282 static bool trans_VLDR_U_sg(DisasContext *s, arg_vldst_sg *a)
284 static MVEGenLdStSGFn * const fns[2][4][4] = { {
285 { F(vldrb_sg_ub), F(vldrb_sg_uh), F(vldrb_sg_uw), NULL },
286 { NULL, F(vldrh_sg_uh), F(vldrh_sg_uw), NULL },
287 { NULL, NULL, F(vldrw_sg_uw), NULL },
288 { NULL, NULL, NULL, F(vldrd_sg_ud) }
289 }, {
290 { NULL, NULL, NULL, NULL },
291 { NULL, F(vldrh_sg_os_uh), F(vldrh_sg_os_uw), NULL },
292 { NULL, NULL, F(vldrw_sg_os_uw), NULL },
293 { NULL, NULL, NULL, F(vldrd_sg_os_ud) }
296 if (a->qd == a->qm) {
297 return false; /* UNPREDICTABLE */
299 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
302 static bool trans_VSTR_sg(DisasContext *s, arg_vldst_sg *a)
304 static MVEGenLdStSGFn * const fns[2][4][4] = { {
305 { F(vstrb_sg_ub), F(vstrb_sg_uh), F(vstrb_sg_uw), NULL },
306 { NULL, F(vstrh_sg_uh), F(vstrh_sg_uw), NULL },
307 { NULL, NULL, F(vstrw_sg_uw), NULL },
308 { NULL, NULL, NULL, F(vstrd_sg_ud) }
309 }, {
310 { NULL, NULL, NULL, NULL },
311 { NULL, F(vstrh_sg_os_uh), F(vstrh_sg_os_uw), NULL },
312 { NULL, NULL, F(vstrw_sg_os_uw), NULL },
313 { NULL, NULL, NULL, F(vstrd_sg_os_ud) }
316 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
319 #undef F
321 static bool do_ldst_sg_imm(DisasContext *s, arg_vldst_sg_imm *a,
322 MVEGenLdStSGFn *fn, unsigned msize)
324 uint32_t offset;
325 TCGv_ptr qd, qm;
327 if (!dc_isar_feature(aa32_mve, s) ||
328 !mve_check_qreg_bank(s, a->qd | a->qm) ||
329 !fn) {
330 return false;
333 if (!mve_eci_check(s) || !vfp_access_check(s)) {
334 return true;
337 offset = a->imm << msize;
338 if (!a->a) {
339 offset = -offset;
342 qd = mve_qreg_ptr(a->qd);
343 qm = mve_qreg_ptr(a->qm);
344 fn(cpu_env, qd, qm, tcg_constant_i32(offset));
345 tcg_temp_free_ptr(qd);
346 tcg_temp_free_ptr(qm);
347 mve_update_eci(s);
348 return true;
351 static bool trans_VLDRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
353 static MVEGenLdStSGFn * const fns[] = {
354 gen_helper_mve_vldrw_sg_uw,
355 gen_helper_mve_vldrw_sg_wb_uw,
357 if (a->qd == a->qm) {
358 return false; /* UNPREDICTABLE */
360 return do_ldst_sg_imm(s, a, fns[a->w], MO_32);
363 static bool trans_VLDRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
365 static MVEGenLdStSGFn * const fns[] = {
366 gen_helper_mve_vldrd_sg_ud,
367 gen_helper_mve_vldrd_sg_wb_ud,
369 if (a->qd == a->qm) {
370 return false; /* UNPREDICTABLE */
372 return do_ldst_sg_imm(s, a, fns[a->w], MO_64);
375 static bool trans_VSTRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
377 static MVEGenLdStSGFn * const fns[] = {
378 gen_helper_mve_vstrw_sg_uw,
379 gen_helper_mve_vstrw_sg_wb_uw,
381 return do_ldst_sg_imm(s, a, fns[a->w], MO_32);
384 static bool trans_VSTRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
386 static MVEGenLdStSGFn * const fns[] = {
387 gen_helper_mve_vstrd_sg_ud,
388 gen_helper_mve_vstrd_sg_wb_ud,
390 return do_ldst_sg_imm(s, a, fns[a->w], MO_64);
393 static bool do_vldst_il(DisasContext *s, arg_vldst_il *a, MVEGenLdStIlFn *fn,
394 int addrinc)
396 TCGv_i32 rn;
398 if (!dc_isar_feature(aa32_mve, s) ||
399 !mve_check_qreg_bank(s, a->qd) ||
400 !fn || (a->rn == 13 && a->w) || a->rn == 15) {
401 /* Variously UNPREDICTABLE or UNDEF or related-encoding */
402 return false;
404 if (!mve_eci_check(s) || !vfp_access_check(s)) {
405 return true;
408 rn = load_reg(s, a->rn);
410 * We pass the index of Qd, not a pointer, because the helper must
411 * access multiple Q registers starting at Qd and working up.
413 fn(cpu_env, tcg_constant_i32(a->qd), rn);
415 if (a->w) {
416 tcg_gen_addi_i32(rn, rn, addrinc);
417 store_reg(s, a->rn, rn);
418 } else {
419 tcg_temp_free_i32(rn);
421 mve_update_and_store_eci(s);
422 return true;
425 /* This macro is just to make the arrays more compact in these functions */
426 #define F(N) gen_helper_mve_##N
428 static bool trans_VLD2(DisasContext *s, arg_vldst_il *a)
430 static MVEGenLdStIlFn * const fns[4][4] = {
431 { F(vld20b), F(vld20h), F(vld20w), NULL, },
432 { F(vld21b), F(vld21h), F(vld21w), NULL, },
433 { NULL, NULL, NULL, NULL },
434 { NULL, NULL, NULL, NULL },
436 if (a->qd > 6) {
437 return false;
439 return do_vldst_il(s, a, fns[a->pat][a->size], 32);
442 static bool trans_VLD4(DisasContext *s, arg_vldst_il *a)
444 static MVEGenLdStIlFn * const fns[4][4] = {
445 { F(vld40b), F(vld40h), F(vld40w), NULL, },
446 { F(vld41b), F(vld41h), F(vld41w), NULL, },
447 { F(vld42b), F(vld42h), F(vld42w), NULL, },
448 { F(vld43b), F(vld43h), F(vld43w), NULL, },
450 if (a->qd > 4) {
451 return false;
453 return do_vldst_il(s, a, fns[a->pat][a->size], 64);
456 static bool trans_VST2(DisasContext *s, arg_vldst_il *a)
458 static MVEGenLdStIlFn * const fns[4][4] = {
459 { F(vst20b), F(vst20h), F(vst20w), NULL, },
460 { F(vst21b), F(vst21h), F(vst21w), NULL, },
461 { NULL, NULL, NULL, NULL },
462 { NULL, NULL, NULL, NULL },
464 if (a->qd > 6) {
465 return false;
467 return do_vldst_il(s, a, fns[a->pat][a->size], 32);
470 static bool trans_VST4(DisasContext *s, arg_vldst_il *a)
472 static MVEGenLdStIlFn * const fns[4][4] = {
473 { F(vst40b), F(vst40h), F(vst40w), NULL, },
474 { F(vst41b), F(vst41h), F(vst41w), NULL, },
475 { F(vst42b), F(vst42h), F(vst42w), NULL, },
476 { F(vst43b), F(vst43h), F(vst43w), NULL, },
478 if (a->qd > 4) {
479 return false;
481 return do_vldst_il(s, a, fns[a->pat][a->size], 64);
484 #undef F
486 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
488 TCGv_ptr qd;
489 TCGv_i32 rt;
491 if (!dc_isar_feature(aa32_mve, s) ||
492 !mve_check_qreg_bank(s, a->qd)) {
493 return false;
495 if (a->rt == 13 || a->rt == 15) {
496 /* UNPREDICTABLE; we choose to UNDEF */
497 return false;
499 if (!mve_eci_check(s) || !vfp_access_check(s)) {
500 return true;
503 rt = load_reg(s, a->rt);
504 if (mve_no_predication(s)) {
505 tcg_gen_gvec_dup_i32(a->size, mve_qreg_offset(a->qd), 16, 16, rt);
506 } else {
507 qd = mve_qreg_ptr(a->qd);
508 tcg_gen_dup_i32(a->size, rt, rt);
509 gen_helper_mve_vdup(cpu_env, qd, rt);
510 tcg_temp_free_ptr(qd);
512 tcg_temp_free_i32(rt);
513 mve_update_eci(s);
514 return true;
517 static bool do_1op_vec(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn,
518 GVecGen2Fn vecfn)
520 TCGv_ptr qd, qm;
522 if (!dc_isar_feature(aa32_mve, s) ||
523 !mve_check_qreg_bank(s, a->qd | a->qm) ||
524 !fn) {
525 return false;
528 if (!mve_eci_check(s) || !vfp_access_check(s)) {
529 return true;
532 if (vecfn && mve_no_predication(s)) {
533 vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm), 16, 16);
534 } else {
535 qd = mve_qreg_ptr(a->qd);
536 qm = mve_qreg_ptr(a->qm);
537 fn(cpu_env, qd, qm);
538 tcg_temp_free_ptr(qd);
539 tcg_temp_free_ptr(qm);
541 mve_update_eci(s);
542 return true;
545 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
547 return do_1op_vec(s, a, fn, NULL);
550 #define DO_1OP_VEC(INSN, FN, VECFN) \
551 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
553 static MVEGenOneOpFn * const fns[] = { \
554 gen_helper_mve_##FN##b, \
555 gen_helper_mve_##FN##h, \
556 gen_helper_mve_##FN##w, \
557 NULL, \
558 }; \
559 return do_1op_vec(s, a, fns[a->size], VECFN); \
562 #define DO_1OP(INSN, FN) DO_1OP_VEC(INSN, FN, NULL)
564 DO_1OP(VCLZ, vclz)
565 DO_1OP(VCLS, vcls)
566 DO_1OP_VEC(VABS, vabs, tcg_gen_gvec_abs)
567 DO_1OP_VEC(VNEG, vneg, tcg_gen_gvec_neg)
568 DO_1OP(VQABS, vqabs)
569 DO_1OP(VQNEG, vqneg)
570 DO_1OP(VMAXA, vmaxa)
571 DO_1OP(VMINA, vmina)
574 * For simple float/int conversions we use the fixed-point
575 * conversion helpers with a zero shift count
577 #define DO_VCVT(INSN, HFN, SFN) \
578 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
580 gen_helper_mve_##HFN(env, qd, qm, tcg_constant_i32(0)); \
582 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
584 gen_helper_mve_##SFN(env, qd, qm, tcg_constant_i32(0)); \
586 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
588 static MVEGenOneOpFn * const fns[] = { \
589 NULL, \
590 gen_##INSN##h, \
591 gen_##INSN##s, \
592 NULL, \
593 }; \
594 if (!dc_isar_feature(aa32_mve_fp, s)) { \
595 return false; \
597 return do_1op(s, a, fns[a->size]); \
600 DO_VCVT(VCVT_SF, vcvt_sh, vcvt_sf)
601 DO_VCVT(VCVT_UF, vcvt_uh, vcvt_uf)
602 DO_VCVT(VCVT_FS, vcvt_hs, vcvt_fs)
603 DO_VCVT(VCVT_FU, vcvt_hu, vcvt_fu)
605 static bool do_vcvt_rmode(DisasContext *s, arg_1op *a,
606 enum arm_fprounding rmode, bool u)
609 * Handle VCVT fp to int with specified rounding mode.
610 * This is a 1op fn but we must pass the rounding mode as
611 * an immediate to the helper.
613 TCGv_ptr qd, qm;
614 static MVEGenVCVTRmodeFn * const fns[4][2] = {
615 { NULL, NULL },
616 { gen_helper_mve_vcvt_rm_sh, gen_helper_mve_vcvt_rm_uh },
617 { gen_helper_mve_vcvt_rm_ss, gen_helper_mve_vcvt_rm_us },
618 { NULL, NULL },
620 MVEGenVCVTRmodeFn *fn = fns[a->size][u];
622 if (!dc_isar_feature(aa32_mve_fp, s) ||
623 !mve_check_qreg_bank(s, a->qd | a->qm) ||
624 !fn) {
625 return false;
628 if (!mve_eci_check(s) || !vfp_access_check(s)) {
629 return true;
632 qd = mve_qreg_ptr(a->qd);
633 qm = mve_qreg_ptr(a->qm);
634 fn(cpu_env, qd, qm, tcg_constant_i32(arm_rmode_to_sf(rmode)));
635 tcg_temp_free_ptr(qd);
636 tcg_temp_free_ptr(qm);
637 mve_update_eci(s);
638 return true;
641 #define DO_VCVT_RMODE(INSN, RMODE, U) \
642 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
644 return do_vcvt_rmode(s, a, RMODE, U); \
647 DO_VCVT_RMODE(VCVTAS, FPROUNDING_TIEAWAY, false)
648 DO_VCVT_RMODE(VCVTAU, FPROUNDING_TIEAWAY, true)
649 DO_VCVT_RMODE(VCVTNS, FPROUNDING_TIEEVEN, false)
650 DO_VCVT_RMODE(VCVTNU, FPROUNDING_TIEEVEN, true)
651 DO_VCVT_RMODE(VCVTPS, FPROUNDING_POSINF, false)
652 DO_VCVT_RMODE(VCVTPU, FPROUNDING_POSINF, true)
653 DO_VCVT_RMODE(VCVTMS, FPROUNDING_NEGINF, false)
654 DO_VCVT_RMODE(VCVTMU, FPROUNDING_NEGINF, true)
656 #define DO_VCVT_SH(INSN, FN) \
657 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
659 if (!dc_isar_feature(aa32_mve_fp, s)) { \
660 return false; \
662 return do_1op(s, a, gen_helper_mve_##FN); \
665 DO_VCVT_SH(VCVTB_SH, vcvtb_sh)
666 DO_VCVT_SH(VCVTT_SH, vcvtt_sh)
667 DO_VCVT_SH(VCVTB_HS, vcvtb_hs)
668 DO_VCVT_SH(VCVTT_HS, vcvtt_hs)
670 #define DO_VRINT(INSN, RMODE) \
671 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
673 gen_helper_mve_vrint_rm_h(env, qd, qm, \
674 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \
676 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
678 gen_helper_mve_vrint_rm_s(env, qd, qm, \
679 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \
681 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
683 static MVEGenOneOpFn * const fns[] = { \
684 NULL, \
685 gen_##INSN##h, \
686 gen_##INSN##s, \
687 NULL, \
688 }; \
689 if (!dc_isar_feature(aa32_mve_fp, s)) { \
690 return false; \
692 return do_1op(s, a, fns[a->size]); \
695 DO_VRINT(VRINTN, FPROUNDING_TIEEVEN)
696 DO_VRINT(VRINTA, FPROUNDING_TIEAWAY)
697 DO_VRINT(VRINTZ, FPROUNDING_ZERO)
698 DO_VRINT(VRINTM, FPROUNDING_NEGINF)
699 DO_VRINT(VRINTP, FPROUNDING_POSINF)
701 static bool trans_VRINTX(DisasContext *s, arg_1op *a)
703 static MVEGenOneOpFn * const fns[] = {
704 NULL,
705 gen_helper_mve_vrintx_h,
706 gen_helper_mve_vrintx_s,
707 NULL,
709 if (!dc_isar_feature(aa32_mve_fp, s)) {
710 return false;
712 return do_1op(s, a, fns[a->size]);
715 /* Narrowing moves: only size 0 and 1 are valid */
716 #define DO_VMOVN(INSN, FN) \
717 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
719 static MVEGenOneOpFn * const fns[] = { \
720 gen_helper_mve_##FN##b, \
721 gen_helper_mve_##FN##h, \
722 NULL, \
723 NULL, \
724 }; \
725 return do_1op(s, a, fns[a->size]); \
728 DO_VMOVN(VMOVNB, vmovnb)
729 DO_VMOVN(VMOVNT, vmovnt)
730 DO_VMOVN(VQMOVUNB, vqmovunb)
731 DO_VMOVN(VQMOVUNT, vqmovunt)
732 DO_VMOVN(VQMOVN_BS, vqmovnbs)
733 DO_VMOVN(VQMOVN_TS, vqmovnts)
734 DO_VMOVN(VQMOVN_BU, vqmovnbu)
735 DO_VMOVN(VQMOVN_TU, vqmovntu)
737 static bool trans_VREV16(DisasContext *s, arg_1op *a)
739 static MVEGenOneOpFn * const fns[] = {
740 gen_helper_mve_vrev16b,
741 NULL,
742 NULL,
743 NULL,
745 return do_1op(s, a, fns[a->size]);
748 static bool trans_VREV32(DisasContext *s, arg_1op *a)
750 static MVEGenOneOpFn * const fns[] = {
751 gen_helper_mve_vrev32b,
752 gen_helper_mve_vrev32h,
753 NULL,
754 NULL,
756 return do_1op(s, a, fns[a->size]);
759 static bool trans_VREV64(DisasContext *s, arg_1op *a)
761 static MVEGenOneOpFn * const fns[] = {
762 gen_helper_mve_vrev64b,
763 gen_helper_mve_vrev64h,
764 gen_helper_mve_vrev64w,
765 NULL,
767 return do_1op(s, a, fns[a->size]);
770 static bool trans_VMVN(DisasContext *s, arg_1op *a)
772 return do_1op_vec(s, a, gen_helper_mve_vmvn, tcg_gen_gvec_not);
775 static bool trans_VABS_fp(DisasContext *s, arg_1op *a)
777 static MVEGenOneOpFn * const fns[] = {
778 NULL,
779 gen_helper_mve_vfabsh,
780 gen_helper_mve_vfabss,
781 NULL,
783 if (!dc_isar_feature(aa32_mve_fp, s)) {
784 return false;
786 return do_1op(s, a, fns[a->size]);
789 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a)
791 static MVEGenOneOpFn * const fns[] = {
792 NULL,
793 gen_helper_mve_vfnegh,
794 gen_helper_mve_vfnegs,
795 NULL,
797 if (!dc_isar_feature(aa32_mve_fp, s)) {
798 return false;
800 return do_1op(s, a, fns[a->size]);
803 static bool do_2op_vec(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn,
804 GVecGen3Fn *vecfn)
806 TCGv_ptr qd, qn, qm;
808 if (!dc_isar_feature(aa32_mve, s) ||
809 !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) ||
810 !fn) {
811 return false;
813 if (!mve_eci_check(s) || !vfp_access_check(s)) {
814 return true;
817 if (vecfn && mve_no_predication(s)) {
818 vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qn),
819 mve_qreg_offset(a->qm), 16, 16);
820 } else {
821 qd = mve_qreg_ptr(a->qd);
822 qn = mve_qreg_ptr(a->qn);
823 qm = mve_qreg_ptr(a->qm);
824 fn(cpu_env, qd, qn, qm);
825 tcg_temp_free_ptr(qd);
826 tcg_temp_free_ptr(qn);
827 tcg_temp_free_ptr(qm);
829 mve_update_eci(s);
830 return true;
833 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn *fn)
835 return do_2op_vec(s, a, fn, NULL);
838 #define DO_LOGIC(INSN, HELPER, VECFN) \
839 static bool trans_##INSN(DisasContext *s, arg_2op *a) \
841 return do_2op_vec(s, a, HELPER, VECFN); \
844 DO_LOGIC(VAND, gen_helper_mve_vand, tcg_gen_gvec_and)
845 DO_LOGIC(VBIC, gen_helper_mve_vbic, tcg_gen_gvec_andc)
846 DO_LOGIC(VORR, gen_helper_mve_vorr, tcg_gen_gvec_or)
847 DO_LOGIC(VORN, gen_helper_mve_vorn, tcg_gen_gvec_orc)
848 DO_LOGIC(VEOR, gen_helper_mve_veor, tcg_gen_gvec_xor)
850 static bool trans_VPSEL(DisasContext *s, arg_2op *a)
852 /* This insn updates predication bits */
853 s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
854 return do_2op(s, a, gen_helper_mve_vpsel);
857 #define DO_2OP_VEC(INSN, FN, VECFN) \
858 static bool trans_##INSN(DisasContext *s, arg_2op *a) \
860 static MVEGenTwoOpFn * const fns[] = { \
861 gen_helper_mve_##FN##b, \
862 gen_helper_mve_##FN##h, \
863 gen_helper_mve_##FN##w, \
864 NULL, \
865 }; \
866 return do_2op_vec(s, a, fns[a->size], VECFN); \
869 #define DO_2OP(INSN, FN) DO_2OP_VEC(INSN, FN, NULL)
871 DO_2OP_VEC(VADD, vadd, tcg_gen_gvec_add)
872 DO_2OP_VEC(VSUB, vsub, tcg_gen_gvec_sub)
873 DO_2OP_VEC(VMUL, vmul, tcg_gen_gvec_mul)
874 DO_2OP(VMULH_S, vmulhs)
875 DO_2OP(VMULH_U, vmulhu)
876 DO_2OP(VRMULH_S, vrmulhs)
877 DO_2OP(VRMULH_U, vrmulhu)
878 DO_2OP_VEC(VMAX_S, vmaxs, tcg_gen_gvec_smax)
879 DO_2OP_VEC(VMAX_U, vmaxu, tcg_gen_gvec_umax)
880 DO_2OP_VEC(VMIN_S, vmins, tcg_gen_gvec_smin)
881 DO_2OP_VEC(VMIN_U, vminu, tcg_gen_gvec_umin)
882 DO_2OP(VABD_S, vabds)
883 DO_2OP(VABD_U, vabdu)
884 DO_2OP(VHADD_S, vhadds)
885 DO_2OP(VHADD_U, vhaddu)
886 DO_2OP(VHSUB_S, vhsubs)
887 DO_2OP(VHSUB_U, vhsubu)
888 DO_2OP(VMULL_BS, vmullbs)
889 DO_2OP(VMULL_BU, vmullbu)
890 DO_2OP(VMULL_TS, vmullts)
891 DO_2OP(VMULL_TU, vmulltu)
892 DO_2OP(VQDMULH, vqdmulh)
893 DO_2OP(VQRDMULH, vqrdmulh)
894 DO_2OP(VQADD_S, vqadds)
895 DO_2OP(VQADD_U, vqaddu)
896 DO_2OP(VQSUB_S, vqsubs)
897 DO_2OP(VQSUB_U, vqsubu)
898 DO_2OP(VSHL_S, vshls)
899 DO_2OP(VSHL_U, vshlu)
900 DO_2OP(VRSHL_S, vrshls)
901 DO_2OP(VRSHL_U, vrshlu)
902 DO_2OP(VQSHL_S, vqshls)
903 DO_2OP(VQSHL_U, vqshlu)
904 DO_2OP(VQRSHL_S, vqrshls)
905 DO_2OP(VQRSHL_U, vqrshlu)
906 DO_2OP(VQDMLADH, vqdmladh)
907 DO_2OP(VQDMLADHX, vqdmladhx)
908 DO_2OP(VQRDMLADH, vqrdmladh)
909 DO_2OP(VQRDMLADHX, vqrdmladhx)
910 DO_2OP(VQDMLSDH, vqdmlsdh)
911 DO_2OP(VQDMLSDHX, vqdmlsdhx)
912 DO_2OP(VQRDMLSDH, vqrdmlsdh)
913 DO_2OP(VQRDMLSDHX, vqrdmlsdhx)
914 DO_2OP(VRHADD_S, vrhadds)
915 DO_2OP(VRHADD_U, vrhaddu)
917 * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose
918 * so we can reuse the DO_2OP macro. (Our implementation calculates the
919 * "expected" results in this case.) Similarly for VHCADD.
921 DO_2OP(VCADD90, vcadd90)
922 DO_2OP(VCADD270, vcadd270)
923 DO_2OP(VHCADD90, vhcadd90)
924 DO_2OP(VHCADD270, vhcadd270)
926 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a)
928 static MVEGenTwoOpFn * const fns[] = {
929 NULL,
930 gen_helper_mve_vqdmullbh,
931 gen_helper_mve_vqdmullbw,
932 NULL,
934 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
935 /* UNPREDICTABLE; we choose to undef */
936 return false;
938 return do_2op(s, a, fns[a->size]);
941 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a)
943 static MVEGenTwoOpFn * const fns[] = {
944 NULL,
945 gen_helper_mve_vqdmullth,
946 gen_helper_mve_vqdmulltw,
947 NULL,
949 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
950 /* UNPREDICTABLE; we choose to undef */
951 return false;
953 return do_2op(s, a, fns[a->size]);
956 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a)
959 * Note that a->size indicates the output size, ie VMULL.P8
960 * is the 8x8->16 operation and a->size is MO_16; VMULL.P16
961 * is the 16x16->32 operation and a->size is MO_32.
963 static MVEGenTwoOpFn * const fns[] = {
964 NULL,
965 gen_helper_mve_vmullpbh,
966 gen_helper_mve_vmullpbw,
967 NULL,
969 return do_2op(s, a, fns[a->size]);
972 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a)
974 /* a->size is as for trans_VMULLP_B */
975 static MVEGenTwoOpFn * const fns[] = {
976 NULL,
977 gen_helper_mve_vmullpth,
978 gen_helper_mve_vmullptw,
979 NULL,
981 return do_2op(s, a, fns[a->size]);
985 * VADC and VSBC: these perform an add-with-carry or subtract-with-carry
986 * of the 32-bit elements in each lane of the input vectors, where the
987 * carry-out of each add is the carry-in of the next. The initial carry
988 * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C
989 * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C.
990 * These insns are subject to beat-wise execution. Partial execution
991 * of an I=1 (initial carry input fixed) insn which does not
992 * execute the first beat must start with the current FPSCR.NZCV
993 * value, not the fixed constant input.
995 static bool trans_VADC(DisasContext *s, arg_2op *a)
997 return do_2op(s, a, gen_helper_mve_vadc);
1000 static bool trans_VADCI(DisasContext *s, arg_2op *a)
1002 if (mve_skip_first_beat(s)) {
1003 return trans_VADC(s, a);
1005 return do_2op(s, a, gen_helper_mve_vadci);
1008 static bool trans_VSBC(DisasContext *s, arg_2op *a)
1010 return do_2op(s, a, gen_helper_mve_vsbc);
1013 static bool trans_VSBCI(DisasContext *s, arg_2op *a)
1015 if (mve_skip_first_beat(s)) {
1016 return trans_VSBC(s, a);
1018 return do_2op(s, a, gen_helper_mve_vsbci);
1021 #define DO_2OP_FP(INSN, FN) \
1022 static bool trans_##INSN(DisasContext *s, arg_2op *a) \
1024 static MVEGenTwoOpFn * const fns[] = { \
1025 NULL, \
1026 gen_helper_mve_##FN##h, \
1027 gen_helper_mve_##FN##s, \
1028 NULL, \
1029 }; \
1030 if (!dc_isar_feature(aa32_mve_fp, s)) { \
1031 return false; \
1033 return do_2op(s, a, fns[a->size]); \
1036 DO_2OP_FP(VADD_fp, vfadd)
1037 DO_2OP_FP(VSUB_fp, vfsub)
1038 DO_2OP_FP(VMUL_fp, vfmul)
1039 DO_2OP_FP(VABD_fp, vfabd)
1040 DO_2OP_FP(VMAXNM, vmaxnm)
1041 DO_2OP_FP(VMINNM, vminnm)
1042 DO_2OP_FP(VCADD90_fp, vfcadd90)
1043 DO_2OP_FP(VCADD270_fp, vfcadd270)
1044 DO_2OP_FP(VFMA, vfma)
1045 DO_2OP_FP(VFMS, vfms)
1046 DO_2OP_FP(VCMUL0, vcmul0)
1047 DO_2OP_FP(VCMUL90, vcmul90)
1048 DO_2OP_FP(VCMUL180, vcmul180)
1049 DO_2OP_FP(VCMUL270, vcmul270)
1050 DO_2OP_FP(VCMLA0, vcmla0)
1051 DO_2OP_FP(VCMLA90, vcmla90)
1052 DO_2OP_FP(VCMLA180, vcmla180)
1053 DO_2OP_FP(VCMLA270, vcmla270)
1054 DO_2OP_FP(VMAXNMA, vmaxnma)
1055 DO_2OP_FP(VMINNMA, vminnma)
1057 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
1058 MVEGenTwoOpScalarFn fn)
1060 TCGv_ptr qd, qn;
1061 TCGv_i32 rm;
1063 if (!dc_isar_feature(aa32_mve, s) ||
1064 !mve_check_qreg_bank(s, a->qd | a->qn) ||
1065 !fn) {
1066 return false;
1068 if (a->rm == 13 || a->rm == 15) {
1069 /* UNPREDICTABLE */
1070 return false;
1072 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1073 return true;
1076 qd = mve_qreg_ptr(a->qd);
1077 qn = mve_qreg_ptr(a->qn);
1078 rm = load_reg(s, a->rm);
1079 fn(cpu_env, qd, qn, rm);
1080 tcg_temp_free_i32(rm);
1081 tcg_temp_free_ptr(qd);
1082 tcg_temp_free_ptr(qn);
1083 mve_update_eci(s);
1084 return true;
1087 #define DO_2OP_SCALAR(INSN, FN) \
1088 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \
1090 static MVEGenTwoOpScalarFn * const fns[] = { \
1091 gen_helper_mve_##FN##b, \
1092 gen_helper_mve_##FN##h, \
1093 gen_helper_mve_##FN##w, \
1094 NULL, \
1095 }; \
1096 return do_2op_scalar(s, a, fns[a->size]); \
1099 DO_2OP_SCALAR(VADD_scalar, vadd_scalar)
1100 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar)
1101 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar)
1102 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar)
1103 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar)
1104 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar)
1105 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar)
1106 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar)
1107 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar)
1108 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar)
1109 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar)
1110 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar)
1111 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar)
1112 DO_2OP_SCALAR(VBRSR, vbrsr)
1113 DO_2OP_SCALAR(VMLA, vmla)
1114 DO_2OP_SCALAR(VMLAS, vmlas)
1115 DO_2OP_SCALAR(VQDMLAH, vqdmlah)
1116 DO_2OP_SCALAR(VQRDMLAH, vqrdmlah)
1117 DO_2OP_SCALAR(VQDMLASH, vqdmlash)
1118 DO_2OP_SCALAR(VQRDMLASH, vqrdmlash)
1120 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a)
1122 static MVEGenTwoOpScalarFn * const fns[] = {
1123 NULL,
1124 gen_helper_mve_vqdmullb_scalarh,
1125 gen_helper_mve_vqdmullb_scalarw,
1126 NULL,
1128 if (a->qd == a->qn && a->size == MO_32) {
1129 /* UNPREDICTABLE; we choose to undef */
1130 return false;
1132 return do_2op_scalar(s, a, fns[a->size]);
1135 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a)
1137 static MVEGenTwoOpScalarFn * const fns[] = {
1138 NULL,
1139 gen_helper_mve_vqdmullt_scalarh,
1140 gen_helper_mve_vqdmullt_scalarw,
1141 NULL,
1143 if (a->qd == a->qn && a->size == MO_32) {
1144 /* UNPREDICTABLE; we choose to undef */
1145 return false;
1147 return do_2op_scalar(s, a, fns[a->size]);
1151 #define DO_2OP_FP_SCALAR(INSN, FN) \
1152 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \
1154 static MVEGenTwoOpScalarFn * const fns[] = { \
1155 NULL, \
1156 gen_helper_mve_##FN##h, \
1157 gen_helper_mve_##FN##s, \
1158 NULL, \
1159 }; \
1160 if (!dc_isar_feature(aa32_mve_fp, s)) { \
1161 return false; \
1163 return do_2op_scalar(s, a, fns[a->size]); \
1166 DO_2OP_FP_SCALAR(VADD_fp_scalar, vfadd_scalar)
1167 DO_2OP_FP_SCALAR(VSUB_fp_scalar, vfsub_scalar)
1168 DO_2OP_FP_SCALAR(VMUL_fp_scalar, vfmul_scalar)
1169 DO_2OP_FP_SCALAR(VFMA_scalar, vfma_scalar)
1170 DO_2OP_FP_SCALAR(VFMAS_scalar, vfmas_scalar)
1172 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,
1173 MVEGenLongDualAccOpFn *fn)
1175 TCGv_ptr qn, qm;
1176 TCGv_i64 rda;
1177 TCGv_i32 rdalo, rdahi;
1179 if (!dc_isar_feature(aa32_mve, s) ||
1180 !mve_check_qreg_bank(s, a->qn | a->qm) ||
1181 !fn) {
1182 return false;
1185 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
1186 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
1188 if (a->rdahi == 13 || a->rdahi == 15) {
1189 return false;
1191 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1192 return true;
1195 qn = mve_qreg_ptr(a->qn);
1196 qm = mve_qreg_ptr(a->qm);
1199 * This insn is subject to beat-wise execution. Partial execution
1200 * of an A=0 (no-accumulate) insn which does not execute the first
1201 * beat must start with the current rda value, not 0.
1203 if (a->a || mve_skip_first_beat(s)) {
1204 rda = tcg_temp_new_i64();
1205 rdalo = load_reg(s, a->rdalo);
1206 rdahi = load_reg(s, a->rdahi);
1207 tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
1208 tcg_temp_free_i32(rdalo);
1209 tcg_temp_free_i32(rdahi);
1210 } else {
1211 rda = tcg_const_i64(0);
1214 fn(rda, cpu_env, qn, qm, rda);
1215 tcg_temp_free_ptr(qn);
1216 tcg_temp_free_ptr(qm);
1218 rdalo = tcg_temp_new_i32();
1219 rdahi = tcg_temp_new_i32();
1220 tcg_gen_extrl_i64_i32(rdalo, rda);
1221 tcg_gen_extrh_i64_i32(rdahi, rda);
1222 store_reg(s, a->rdalo, rdalo);
1223 store_reg(s, a->rdahi, rdahi);
1224 tcg_temp_free_i64(rda);
1225 mve_update_eci(s);
1226 return true;
1229 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a)
1231 static MVEGenLongDualAccOpFn * const fns[4][2] = {
1232 { NULL, NULL },
1233 { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh },
1234 { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw },
1235 { NULL, NULL },
1237 return do_long_dual_acc(s, a, fns[a->size][a->x]);
1240 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a)
1242 static MVEGenLongDualAccOpFn * const fns[4][2] = {
1243 { NULL, NULL },
1244 { gen_helper_mve_vmlaldavuh, NULL },
1245 { gen_helper_mve_vmlaldavuw, NULL },
1246 { NULL, NULL },
1248 return do_long_dual_acc(s, a, fns[a->size][a->x]);
1251 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a)
1253 static MVEGenLongDualAccOpFn * const fns[4][2] = {
1254 { NULL, NULL },
1255 { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh },
1256 { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw },
1257 { NULL, NULL },
1259 return do_long_dual_acc(s, a, fns[a->size][a->x]);
1262 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a)
1264 static MVEGenLongDualAccOpFn * const fns[] = {
1265 gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw,
1267 return do_long_dual_acc(s, a, fns[a->x]);
1270 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a)
1272 static MVEGenLongDualAccOpFn * const fns[] = {
1273 gen_helper_mve_vrmlaldavhuw, NULL,
1275 return do_long_dual_acc(s, a, fns[a->x]);
1278 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a)
1280 static MVEGenLongDualAccOpFn * const fns[] = {
1281 gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw,
1283 return do_long_dual_acc(s, a, fns[a->x]);
1286 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn)
1288 TCGv_ptr qn, qm;
1289 TCGv_i32 rda;
1291 if (!dc_isar_feature(aa32_mve, s) ||
1292 !mve_check_qreg_bank(s, a->qn) ||
1293 !fn) {
1294 return false;
1296 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1297 return true;
1300 qn = mve_qreg_ptr(a->qn);
1301 qm = mve_qreg_ptr(a->qm);
1304 * This insn is subject to beat-wise execution. Partial execution
1305 * of an A=0 (no-accumulate) insn which does not execute the first
1306 * beat must start with the current rda value, not 0.
1308 if (a->a || mve_skip_first_beat(s)) {
1309 rda = load_reg(s, a->rda);
1310 } else {
1311 rda = tcg_const_i32(0);
1314 fn(rda, cpu_env, qn, qm, rda);
1315 store_reg(s, a->rda, rda);
1316 tcg_temp_free_ptr(qn);
1317 tcg_temp_free_ptr(qm);
1319 mve_update_eci(s);
1320 return true;
1323 #define DO_DUAL_ACC(INSN, FN) \
1324 static bool trans_##INSN(DisasContext *s, arg_vmladav *a) \
1326 static MVEGenDualAccOpFn * const fns[4][2] = { \
1327 { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb }, \
1328 { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh }, \
1329 { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw }, \
1330 { NULL, NULL }, \
1331 }; \
1332 return do_dual_acc(s, a, fns[a->size][a->x]); \
1335 DO_DUAL_ACC(VMLADAV_S, vmladavs)
1336 DO_DUAL_ACC(VMLSDAV, vmlsdav)
1338 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a)
1340 static MVEGenDualAccOpFn * const fns[4][2] = {
1341 { gen_helper_mve_vmladavub, NULL },
1342 { gen_helper_mve_vmladavuh, NULL },
1343 { gen_helper_mve_vmladavuw, NULL },
1344 { NULL, NULL },
1346 return do_dual_acc(s, a, fns[a->size][a->x]);
1349 static void gen_vpst(DisasContext *s, uint32_t mask)
1352 * Set the VPR mask fields. We take advantage of MASK01 and MASK23
1353 * being adjacent fields in the register.
1355 * Updating the masks is not predicated, but it is subject to beat-wise
1356 * execution, and the mask is updated on the odd-numbered beats.
1357 * So if PSR.ECI says we should skip beat 1, we mustn't update the
1358 * 01 mask field.
1360 TCGv_i32 vpr = load_cpu_field(v7m.vpr);
1361 switch (s->eci) {
1362 case ECI_NONE:
1363 case ECI_A0:
1364 /* Update both 01 and 23 fields */
1365 tcg_gen_deposit_i32(vpr, vpr,
1366 tcg_constant_i32(mask | (mask << 4)),
1367 R_V7M_VPR_MASK01_SHIFT,
1368 R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH);
1369 break;
1370 case ECI_A0A1:
1371 case ECI_A0A1A2:
1372 case ECI_A0A1A2B0:
1373 /* Update only the 23 mask field */
1374 tcg_gen_deposit_i32(vpr, vpr,
1375 tcg_constant_i32(mask),
1376 R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH);
1377 break;
1378 default:
1379 g_assert_not_reached();
1381 store_cpu_field(vpr, v7m.vpr);
1384 static bool trans_VPST(DisasContext *s, arg_VPST *a)
1386 /* mask == 0 is a "related encoding" */
1387 if (!dc_isar_feature(aa32_mve, s) || !a->mask) {
1388 return false;
1390 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1391 return true;
1393 gen_vpst(s, a->mask);
1394 mve_update_and_store_eci(s);
1395 return true;
1398 static bool trans_VPNOT(DisasContext *s, arg_VPNOT *a)
1401 * Invert the predicate in VPR.P0. We have call out to
1402 * a helper because this insn itself is beatwise and can
1403 * be predicated.
1405 if (!dc_isar_feature(aa32_mve, s)) {
1406 return false;
1408 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1409 return true;
1412 gen_helper_mve_vpnot(cpu_env);
1413 /* This insn updates predication bits */
1414 s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
1415 mve_update_eci(s);
1416 return true;
1419 static bool trans_VADDV(DisasContext *s, arg_VADDV *a)
1421 /* VADDV: vector add across vector */
1422 static MVEGenVADDVFn * const fns[4][2] = {
1423 { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub },
1424 { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh },
1425 { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw },
1426 { NULL, NULL }
1428 TCGv_ptr qm;
1429 TCGv_i32 rda;
1431 if (!dc_isar_feature(aa32_mve, s) ||
1432 a->size == 3) {
1433 return false;
1435 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1436 return true;
1440 * This insn is subject to beat-wise execution. Partial execution
1441 * of an A=0 (no-accumulate) insn which does not execute the first
1442 * beat must start with the current value of Rda, not zero.
1444 if (a->a || mve_skip_first_beat(s)) {
1445 /* Accumulate input from Rda */
1446 rda = load_reg(s, a->rda);
1447 } else {
1448 /* Accumulate starting at zero */
1449 rda = tcg_const_i32(0);
1452 qm = mve_qreg_ptr(a->qm);
1453 fns[a->size][a->u](rda, cpu_env, qm, rda);
1454 store_reg(s, a->rda, rda);
1455 tcg_temp_free_ptr(qm);
1457 mve_update_eci(s);
1458 return true;
1461 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a)
1464 * Vector Add Long Across Vector: accumulate the 32-bit
1465 * elements of the vector into a 64-bit result stored in
1466 * a pair of general-purpose registers.
1467 * No need to check Qm's bank: it is only 3 bits in decode.
1469 TCGv_ptr qm;
1470 TCGv_i64 rda;
1471 TCGv_i32 rdalo, rdahi;
1473 if (!dc_isar_feature(aa32_mve, s)) {
1474 return false;
1477 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
1478 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
1480 if (a->rdahi == 13 || a->rdahi == 15) {
1481 return false;
1483 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1484 return true;
1488 * This insn is subject to beat-wise execution. Partial execution
1489 * of an A=0 (no-accumulate) insn which does not execute the first
1490 * beat must start with the current value of RdaHi:RdaLo, not zero.
1492 if (a->a || mve_skip_first_beat(s)) {
1493 /* Accumulate input from RdaHi:RdaLo */
1494 rda = tcg_temp_new_i64();
1495 rdalo = load_reg(s, a->rdalo);
1496 rdahi = load_reg(s, a->rdahi);
1497 tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
1498 tcg_temp_free_i32(rdalo);
1499 tcg_temp_free_i32(rdahi);
1500 } else {
1501 /* Accumulate starting at zero */
1502 rda = tcg_const_i64(0);
1505 qm = mve_qreg_ptr(a->qm);
1506 if (a->u) {
1507 gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda);
1508 } else {
1509 gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda);
1511 tcg_temp_free_ptr(qm);
1513 rdalo = tcg_temp_new_i32();
1514 rdahi = tcg_temp_new_i32();
1515 tcg_gen_extrl_i64_i32(rdalo, rda);
1516 tcg_gen_extrh_i64_i32(rdahi, rda);
1517 store_reg(s, a->rdalo, rdalo);
1518 store_reg(s, a->rdahi, rdahi);
1519 tcg_temp_free_i64(rda);
1520 mve_update_eci(s);
1521 return true;
1524 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn,
1525 GVecGen2iFn *vecfn)
1527 TCGv_ptr qd;
1528 uint64_t imm;
1530 if (!dc_isar_feature(aa32_mve, s) ||
1531 !mve_check_qreg_bank(s, a->qd) ||
1532 !fn) {
1533 return false;
1535 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1536 return true;
1539 imm = asimd_imm_const(a->imm, a->cmode, a->op);
1541 if (vecfn && mve_no_predication(s)) {
1542 vecfn(MO_64, mve_qreg_offset(a->qd), mve_qreg_offset(a->qd),
1543 imm, 16, 16);
1544 } else {
1545 qd = mve_qreg_ptr(a->qd);
1546 fn(cpu_env, qd, tcg_constant_i64(imm));
1547 tcg_temp_free_ptr(qd);
1549 mve_update_eci(s);
1550 return true;
1553 static void gen_gvec_vmovi(unsigned vece, uint32_t dofs, uint32_t aofs,
1554 int64_t c, uint32_t oprsz, uint32_t maxsz)
1556 tcg_gen_gvec_dup_imm(vece, dofs, oprsz, maxsz, c);
1559 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a)
1561 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
1562 MVEGenOneOpImmFn *fn;
1563 GVecGen2iFn *vecfn;
1565 if ((a->cmode & 1) && a->cmode < 12) {
1566 if (a->op) {
1568 * For op=1, the immediate will be inverted by asimd_imm_const(),
1569 * so the VBIC becomes a logical AND operation.
1571 fn = gen_helper_mve_vandi;
1572 vecfn = tcg_gen_gvec_andi;
1573 } else {
1574 fn = gen_helper_mve_vorri;
1575 vecfn = tcg_gen_gvec_ori;
1577 } else {
1578 /* There is one unallocated cmode/op combination in this space */
1579 if (a->cmode == 15 && a->op == 1) {
1580 return false;
1582 /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */
1583 fn = gen_helper_mve_vmovi;
1584 vecfn = gen_gvec_vmovi;
1586 return do_1imm(s, a, fn, vecfn);
1589 static bool do_2shift_vec(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
1590 bool negateshift, GVecGen2iFn vecfn)
1592 TCGv_ptr qd, qm;
1593 int shift = a->shift;
1595 if (!dc_isar_feature(aa32_mve, s) ||
1596 !mve_check_qreg_bank(s, a->qd | a->qm) ||
1597 !fn) {
1598 return false;
1600 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1601 return true;
1605 * When we handle a right shift insn using a left-shift helper
1606 * which permits a negative shift count to indicate a right-shift,
1607 * we must negate the shift count.
1609 if (negateshift) {
1610 shift = -shift;
1613 if (vecfn && mve_no_predication(s)) {
1614 vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm),
1615 shift, 16, 16);
1616 } else {
1617 qd = mve_qreg_ptr(a->qd);
1618 qm = mve_qreg_ptr(a->qm);
1619 fn(cpu_env, qd, qm, tcg_constant_i32(shift));
1620 tcg_temp_free_ptr(qd);
1621 tcg_temp_free_ptr(qm);
1623 mve_update_eci(s);
1624 return true;
1627 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
1628 bool negateshift)
1630 return do_2shift_vec(s, a, fn, negateshift, NULL);
1633 #define DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, VECFN) \
1634 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
1636 static MVEGenTwoOpShiftFn * const fns[] = { \
1637 gen_helper_mve_##FN##b, \
1638 gen_helper_mve_##FN##h, \
1639 gen_helper_mve_##FN##w, \
1640 NULL, \
1641 }; \
1642 return do_2shift_vec(s, a, fns[a->size], NEGATESHIFT, VECFN); \
1645 #define DO_2SHIFT(INSN, FN, NEGATESHIFT) \
1646 DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, NULL)
1648 static void do_gvec_shri_s(unsigned vece, uint32_t dofs, uint32_t aofs,
1649 int64_t shift, uint32_t oprsz, uint32_t maxsz)
1652 * We get here with a negated shift count, and we must handle
1653 * shifts by the element size, which tcg_gen_gvec_sari() does not do.
1655 shift = -shift;
1656 if (shift == (8 << vece)) {
1657 shift--;
1659 tcg_gen_gvec_sari(vece, dofs, aofs, shift, oprsz, maxsz);
1662 static void do_gvec_shri_u(unsigned vece, uint32_t dofs, uint32_t aofs,
1663 int64_t shift, uint32_t oprsz, uint32_t maxsz)
1666 * We get here with a negated shift count, and we must handle
1667 * shifts by the element size, which tcg_gen_gvec_shri() does not do.
1669 shift = -shift;
1670 if (shift == (8 << vece)) {
1671 tcg_gen_gvec_dup_imm(vece, dofs, oprsz, maxsz, 0);
1672 } else {
1673 tcg_gen_gvec_shri(vece, dofs, aofs, shift, oprsz, maxsz);
1677 DO_2SHIFT_VEC(VSHLI, vshli_u, false, tcg_gen_gvec_shli)
1678 DO_2SHIFT(VQSHLI_S, vqshli_s, false)
1679 DO_2SHIFT(VQSHLI_U, vqshli_u, false)
1680 DO_2SHIFT(VQSHLUI, vqshlui_s, false)
1681 /* These right shifts use a left-shift helper with negated shift count */
1682 DO_2SHIFT_VEC(VSHRI_S, vshli_s, true, do_gvec_shri_s)
1683 DO_2SHIFT_VEC(VSHRI_U, vshli_u, true, do_gvec_shri_u)
1684 DO_2SHIFT(VRSHRI_S, vrshli_s, true)
1685 DO_2SHIFT(VRSHRI_U, vrshli_u, true)
1687 DO_2SHIFT_VEC(VSRI, vsri, false, gen_gvec_sri)
1688 DO_2SHIFT_VEC(VSLI, vsli, false, gen_gvec_sli)
1690 #define DO_2SHIFT_FP(INSN, FN) \
1691 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
1693 if (!dc_isar_feature(aa32_mve_fp, s)) { \
1694 return false; \
1696 return do_2shift(s, a, gen_helper_mve_##FN, false); \
1699 DO_2SHIFT_FP(VCVT_SH_fixed, vcvt_sh)
1700 DO_2SHIFT_FP(VCVT_UH_fixed, vcvt_uh)
1701 DO_2SHIFT_FP(VCVT_HS_fixed, vcvt_hs)
1702 DO_2SHIFT_FP(VCVT_HU_fixed, vcvt_hu)
1703 DO_2SHIFT_FP(VCVT_SF_fixed, vcvt_sf)
1704 DO_2SHIFT_FP(VCVT_UF_fixed, vcvt_uf)
1705 DO_2SHIFT_FP(VCVT_FS_fixed, vcvt_fs)
1706 DO_2SHIFT_FP(VCVT_FU_fixed, vcvt_fu)
1708 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a,
1709 MVEGenTwoOpShiftFn *fn)
1711 TCGv_ptr qda;
1712 TCGv_i32 rm;
1714 if (!dc_isar_feature(aa32_mve, s) ||
1715 !mve_check_qreg_bank(s, a->qda) ||
1716 a->rm == 13 || a->rm == 15 || !fn) {
1717 /* Rm cases are UNPREDICTABLE */
1718 return false;
1720 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1721 return true;
1724 qda = mve_qreg_ptr(a->qda);
1725 rm = load_reg(s, a->rm);
1726 fn(cpu_env, qda, qda, rm);
1727 tcg_temp_free_ptr(qda);
1728 tcg_temp_free_i32(rm);
1729 mve_update_eci(s);
1730 return true;
1733 #define DO_2SHIFT_SCALAR(INSN, FN) \
1734 static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a) \
1736 static MVEGenTwoOpShiftFn * const fns[] = { \
1737 gen_helper_mve_##FN##b, \
1738 gen_helper_mve_##FN##h, \
1739 gen_helper_mve_##FN##w, \
1740 NULL, \
1741 }; \
1742 return do_2shift_scalar(s, a, fns[a->size]); \
1745 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s)
1746 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u)
1747 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s)
1748 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u)
1749 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s)
1750 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u)
1751 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s)
1752 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u)
1754 #define DO_VSHLL(INSN, FN) \
1755 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
1757 static MVEGenTwoOpShiftFn * const fns[] = { \
1758 gen_helper_mve_##FN##b, \
1759 gen_helper_mve_##FN##h, \
1760 }; \
1761 return do_2shift_vec(s, a, fns[a->size], false, do_gvec_##FN); \
1765 * For the VSHLL vector helpers, the vece is the size of the input
1766 * (ie MO_8 or MO_16); the helpers want to work in the output size.
1767 * The shift count can be 0..<input size>, inclusive. (0 is VMOVL.)
1769 static void do_gvec_vshllbs(unsigned vece, uint32_t dofs, uint32_t aofs,
1770 int64_t shift, uint32_t oprsz, uint32_t maxsz)
1772 unsigned ovece = vece + 1;
1773 unsigned ibits = vece == MO_8 ? 8 : 16;
1774 tcg_gen_gvec_shli(ovece, dofs, aofs, ibits, oprsz, maxsz);
1775 tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz);
1778 static void do_gvec_vshllbu(unsigned vece, uint32_t dofs, uint32_t aofs,
1779 int64_t shift, uint32_t oprsz, uint32_t maxsz)
1781 unsigned ovece = vece + 1;
1782 tcg_gen_gvec_andi(ovece, dofs, aofs,
1783 ovece == MO_16 ? 0xff : 0xffff, oprsz, maxsz);
1784 tcg_gen_gvec_shli(ovece, dofs, dofs, shift, oprsz, maxsz);
1787 static void do_gvec_vshllts(unsigned vece, uint32_t dofs, uint32_t aofs,
1788 int64_t shift, uint32_t oprsz, uint32_t maxsz)
1790 unsigned ovece = vece + 1;
1791 unsigned ibits = vece == MO_8 ? 8 : 16;
1792 if (shift == 0) {
1793 tcg_gen_gvec_sari(ovece, dofs, aofs, ibits, oprsz, maxsz);
1794 } else {
1795 tcg_gen_gvec_andi(ovece, dofs, aofs,
1796 ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz);
1797 tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz);
1801 static void do_gvec_vshlltu(unsigned vece, uint32_t dofs, uint32_t aofs,
1802 int64_t shift, uint32_t oprsz, uint32_t maxsz)
1804 unsigned ovece = vece + 1;
1805 unsigned ibits = vece == MO_8 ? 8 : 16;
1806 if (shift == 0) {
1807 tcg_gen_gvec_shri(ovece, dofs, aofs, ibits, oprsz, maxsz);
1808 } else {
1809 tcg_gen_gvec_andi(ovece, dofs, aofs,
1810 ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz);
1811 tcg_gen_gvec_shri(ovece, dofs, dofs, ibits - shift, oprsz, maxsz);
1815 DO_VSHLL(VSHLL_BS, vshllbs)
1816 DO_VSHLL(VSHLL_BU, vshllbu)
1817 DO_VSHLL(VSHLL_TS, vshllts)
1818 DO_VSHLL(VSHLL_TU, vshlltu)
1820 #define DO_2SHIFT_N(INSN, FN) \
1821 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
1823 static MVEGenTwoOpShiftFn * const fns[] = { \
1824 gen_helper_mve_##FN##b, \
1825 gen_helper_mve_##FN##h, \
1826 }; \
1827 return do_2shift(s, a, fns[a->size], false); \
1830 DO_2SHIFT_N(VSHRNB, vshrnb)
1831 DO_2SHIFT_N(VSHRNT, vshrnt)
1832 DO_2SHIFT_N(VRSHRNB, vrshrnb)
1833 DO_2SHIFT_N(VRSHRNT, vrshrnt)
1834 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s)
1835 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s)
1836 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u)
1837 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u)
1838 DO_2SHIFT_N(VQSHRUNB, vqshrunb)
1839 DO_2SHIFT_N(VQSHRUNT, vqshrunt)
1840 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s)
1841 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s)
1842 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u)
1843 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u)
1844 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb)
1845 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt)
1847 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a)
1850 * Whole Vector Left Shift with Carry. The carry is taken
1851 * from a general purpose register and written back there.
1852 * An imm of 0 means "shift by 32".
1854 TCGv_ptr qd;
1855 TCGv_i32 rdm;
1857 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1858 return false;
1860 if (a->rdm == 13 || a->rdm == 15) {
1861 /* CONSTRAINED UNPREDICTABLE: we UNDEF */
1862 return false;
1864 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1865 return true;
1868 qd = mve_qreg_ptr(a->qd);
1869 rdm = load_reg(s, a->rdm);
1870 gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm));
1871 store_reg(s, a->rdm, rdm);
1872 tcg_temp_free_ptr(qd);
1873 mve_update_eci(s);
1874 return true;
1877 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn)
1879 TCGv_ptr qd;
1880 TCGv_i32 rn;
1883 * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP).
1884 * This fills the vector with elements of successively increasing
1885 * or decreasing values, starting from Rn.
1887 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1888 return false;
1890 if (a->size == MO_64) {
1891 /* size 0b11 is another encoding */
1892 return false;
1894 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1895 return true;
1898 qd = mve_qreg_ptr(a->qd);
1899 rn = load_reg(s, a->rn);
1900 fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm));
1901 store_reg(s, a->rn, rn);
1902 tcg_temp_free_ptr(qd);
1903 mve_update_eci(s);
1904 return true;
1907 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn)
1909 TCGv_ptr qd;
1910 TCGv_i32 rn, rm;
1913 * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP)
1914 * This fills the vector with elements of successively increasing
1915 * or decreasing values, starting from Rn. Rm specifies a point where
1916 * the count wraps back around to 0. The updated offset is written back
1917 * to Rn.
1919 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1920 return false;
1922 if (!fn || a->rm == 13 || a->rm == 15) {
1924 * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE;
1925 * Rm == 13 is VIWDUP, VDWDUP.
1927 return false;
1929 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1930 return true;
1933 qd = mve_qreg_ptr(a->qd);
1934 rn = load_reg(s, a->rn);
1935 rm = load_reg(s, a->rm);
1936 fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm));
1937 store_reg(s, a->rn, rn);
1938 tcg_temp_free_ptr(qd);
1939 tcg_temp_free_i32(rm);
1940 mve_update_eci(s);
1941 return true;
1944 static bool trans_VIDUP(DisasContext *s, arg_vidup *a)
1946 static MVEGenVIDUPFn * const fns[] = {
1947 gen_helper_mve_vidupb,
1948 gen_helper_mve_viduph,
1949 gen_helper_mve_vidupw,
1950 NULL,
1952 return do_vidup(s, a, fns[a->size]);
1955 static bool trans_VDDUP(DisasContext *s, arg_vidup *a)
1957 static MVEGenVIDUPFn * const fns[] = {
1958 gen_helper_mve_vidupb,
1959 gen_helper_mve_viduph,
1960 gen_helper_mve_vidupw,
1961 NULL,
1963 /* VDDUP is just like VIDUP but with a negative immediate */
1964 a->imm = -a->imm;
1965 return do_vidup(s, a, fns[a->size]);
1968 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a)
1970 static MVEGenVIWDUPFn * const fns[] = {
1971 gen_helper_mve_viwdupb,
1972 gen_helper_mve_viwduph,
1973 gen_helper_mve_viwdupw,
1974 NULL,
1976 return do_viwdup(s, a, fns[a->size]);
1979 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a)
1981 static MVEGenVIWDUPFn * const fns[] = {
1982 gen_helper_mve_vdwdupb,
1983 gen_helper_mve_vdwduph,
1984 gen_helper_mve_vdwdupw,
1985 NULL,
1987 return do_viwdup(s, a, fns[a->size]);
1990 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn)
1992 TCGv_ptr qn, qm;
1994 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
1995 !fn) {
1996 return false;
1998 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1999 return true;
2002 qn = mve_qreg_ptr(a->qn);
2003 qm = mve_qreg_ptr(a->qm);
2004 fn(cpu_env, qn, qm);
2005 tcg_temp_free_ptr(qn);
2006 tcg_temp_free_ptr(qm);
2007 if (a->mask) {
2008 /* VPT */
2009 gen_vpst(s, a->mask);
2011 /* This insn updates predication bits */
2012 s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
2013 mve_update_eci(s);
2014 return true;
2017 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a,
2018 MVEGenScalarCmpFn *fn)
2020 TCGv_ptr qn;
2021 TCGv_i32 rm;
2023 if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) {
2024 return false;
2026 if (!mve_eci_check(s) || !vfp_access_check(s)) {
2027 return true;
2030 qn = mve_qreg_ptr(a->qn);
2031 if (a->rm == 15) {
2032 /* Encoding Rm=0b1111 means "constant zero" */
2033 rm = tcg_constant_i32(0);
2034 } else {
2035 rm = load_reg(s, a->rm);
2037 fn(cpu_env, qn, rm);
2038 tcg_temp_free_ptr(qn);
2039 tcg_temp_free_i32(rm);
2040 if (a->mask) {
2041 /* VPT */
2042 gen_vpst(s, a->mask);
2044 /* This insn updates predication bits */
2045 s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
2046 mve_update_eci(s);
2047 return true;
2050 #define DO_VCMP(INSN, FN) \
2051 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \
2053 static MVEGenCmpFn * const fns[] = { \
2054 gen_helper_mve_##FN##b, \
2055 gen_helper_mve_##FN##h, \
2056 gen_helper_mve_##FN##w, \
2057 NULL, \
2058 }; \
2059 return do_vcmp(s, a, fns[a->size]); \
2061 static bool trans_##INSN##_scalar(DisasContext *s, \
2062 arg_vcmp_scalar *a) \
2064 static MVEGenScalarCmpFn * const fns[] = { \
2065 gen_helper_mve_##FN##_scalarb, \
2066 gen_helper_mve_##FN##_scalarh, \
2067 gen_helper_mve_##FN##_scalarw, \
2068 NULL, \
2069 }; \
2070 return do_vcmp_scalar(s, a, fns[a->size]); \
2073 DO_VCMP(VCMPEQ, vcmpeq)
2074 DO_VCMP(VCMPNE, vcmpne)
2075 DO_VCMP(VCMPCS, vcmpcs)
2076 DO_VCMP(VCMPHI, vcmphi)
2077 DO_VCMP(VCMPGE, vcmpge)
2078 DO_VCMP(VCMPLT, vcmplt)
2079 DO_VCMP(VCMPGT, vcmpgt)
2080 DO_VCMP(VCMPLE, vcmple)
2082 #define DO_VCMP_FP(INSN, FN) \
2083 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \
2085 static MVEGenCmpFn * const fns[] = { \
2086 NULL, \
2087 gen_helper_mve_##FN##h, \
2088 gen_helper_mve_##FN##s, \
2089 NULL, \
2090 }; \
2091 if (!dc_isar_feature(aa32_mve_fp, s)) { \
2092 return false; \
2094 return do_vcmp(s, a, fns[a->size]); \
2096 static bool trans_##INSN##_scalar(DisasContext *s, \
2097 arg_vcmp_scalar *a) \
2099 static MVEGenScalarCmpFn * const fns[] = { \
2100 NULL, \
2101 gen_helper_mve_##FN##_scalarh, \
2102 gen_helper_mve_##FN##_scalars, \
2103 NULL, \
2104 }; \
2105 if (!dc_isar_feature(aa32_mve_fp, s)) { \
2106 return false; \
2108 return do_vcmp_scalar(s, a, fns[a->size]); \
2111 DO_VCMP_FP(VCMPEQ_fp, vfcmpeq)
2112 DO_VCMP_FP(VCMPNE_fp, vfcmpne)
2113 DO_VCMP_FP(VCMPGE_fp, vfcmpge)
2114 DO_VCMP_FP(VCMPLT_fp, vfcmplt)
2115 DO_VCMP_FP(VCMPGT_fp, vfcmpgt)
2116 DO_VCMP_FP(VCMPLE_fp, vfcmple)
2118 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn)
2121 * MIN/MAX operations across a vector: compute the min or
2122 * max of the initial value in a general purpose register
2123 * and all the elements in the vector, and store it back
2124 * into the general purpose register.
2126 TCGv_ptr qm;
2127 TCGv_i32 rda;
2129 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
2130 !fn || a->rda == 13 || a->rda == 15) {
2131 /* Rda cases are UNPREDICTABLE */
2132 return false;
2134 if (!mve_eci_check(s) || !vfp_access_check(s)) {
2135 return true;
2138 qm = mve_qreg_ptr(a->qm);
2139 rda = load_reg(s, a->rda);
2140 fn(rda, cpu_env, qm, rda);
2141 store_reg(s, a->rda, rda);
2142 tcg_temp_free_ptr(qm);
2143 mve_update_eci(s);
2144 return true;
2147 #define DO_VMAXV(INSN, FN) \
2148 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \
2150 static MVEGenVADDVFn * const fns[] = { \
2151 gen_helper_mve_##FN##b, \
2152 gen_helper_mve_##FN##h, \
2153 gen_helper_mve_##FN##w, \
2154 NULL, \
2155 }; \
2156 return do_vmaxv(s, a, fns[a->size]); \
2159 DO_VMAXV(VMAXV_S, vmaxvs)
2160 DO_VMAXV(VMAXV_U, vmaxvu)
2161 DO_VMAXV(VMAXAV, vmaxav)
2162 DO_VMAXV(VMINV_S, vminvs)
2163 DO_VMAXV(VMINV_U, vminvu)
2164 DO_VMAXV(VMINAV, vminav)
2166 #define DO_VMAXV_FP(INSN, FN) \
2167 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \
2169 static MVEGenVADDVFn * const fns[] = { \
2170 NULL, \
2171 gen_helper_mve_##FN##h, \
2172 gen_helper_mve_##FN##s, \
2173 NULL, \
2174 }; \
2175 if (!dc_isar_feature(aa32_mve_fp, s)) { \
2176 return false; \
2178 return do_vmaxv(s, a, fns[a->size]); \
2181 DO_VMAXV_FP(VMAXNMV, vmaxnmv)
2182 DO_VMAXV_FP(VMINNMV, vminnmv)
2183 DO_VMAXV_FP(VMAXNMAV, vmaxnmav)
2184 DO_VMAXV_FP(VMINNMAV, vminnmav)
2186 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn)
2188 /* Absolute difference accumulated across vector */
2189 TCGv_ptr qn, qm;
2190 TCGv_i32 rda;
2192 if (!dc_isar_feature(aa32_mve, s) ||
2193 !mve_check_qreg_bank(s, a->qm | a->qn) ||
2194 !fn || a->rda == 13 || a->rda == 15) {
2195 /* Rda cases are UNPREDICTABLE */
2196 return false;
2198 if (!mve_eci_check(s) || !vfp_access_check(s)) {
2199 return true;
2202 qm = mve_qreg_ptr(a->qm);
2203 qn = mve_qreg_ptr(a->qn);
2204 rda = load_reg(s, a->rda);
2205 fn(rda, cpu_env, qn, qm, rda);
2206 store_reg(s, a->rda, rda);
2207 tcg_temp_free_ptr(qm);
2208 tcg_temp_free_ptr(qn);
2209 mve_update_eci(s);
2210 return true;
2213 #define DO_VABAV(INSN, FN) \
2214 static bool trans_##INSN(DisasContext *s, arg_vabav *a) \
2216 static MVEGenVABAVFn * const fns[] = { \
2217 gen_helper_mve_##FN##b, \
2218 gen_helper_mve_##FN##h, \
2219 gen_helper_mve_##FN##w, \
2220 NULL, \
2221 }; \
2222 return do_vabav(s, a, fns[a->size]); \
2225 DO_VABAV(VABAV_S, vabavs)
2226 DO_VABAV(VABAV_U, vabavu)
2228 static bool trans_VMOV_to_2gp(DisasContext *s, arg_VMOV_to_2gp *a)
2231 * VMOV two 32-bit vector lanes to two general-purpose registers.
2232 * This insn is not predicated but it is subject to beat-wise
2233 * execution if it is not in an IT block. For us this means
2234 * only that if PSR.ECI says we should not be executing the beat
2235 * corresponding to the lane of the vector register being accessed
2236 * then we should skip perfoming the move, and that we need to do
2237 * the usual check for bad ECI state and advance of ECI state.
2238 * (If PSR.ECI is non-zero then we cannot be in an IT block.)
2240 TCGv_i32 tmp;
2241 int vd;
2243 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) ||
2244 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15 ||
2245 a->rt == a->rt2) {
2246 /* Rt/Rt2 cases are UNPREDICTABLE */
2247 return false;
2249 if (!mve_eci_check(s) || !vfp_access_check(s)) {
2250 return true;
2253 /* Convert Qreg index to Dreg for read_neon_element32() etc */
2254 vd = a->qd * 2;
2256 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) {
2257 tmp = tcg_temp_new_i32();
2258 read_neon_element32(tmp, vd, a->idx, MO_32);
2259 store_reg(s, a->rt, tmp);
2261 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) {
2262 tmp = tcg_temp_new_i32();
2263 read_neon_element32(tmp, vd + 1, a->idx, MO_32);
2264 store_reg(s, a->rt2, tmp);
2267 mve_update_and_store_eci(s);
2268 return true;
2271 static bool trans_VMOV_from_2gp(DisasContext *s, arg_VMOV_to_2gp *a)
2274 * VMOV two general-purpose registers to two 32-bit vector lanes.
2275 * This insn is not predicated but it is subject to beat-wise
2276 * execution if it is not in an IT block. For us this means
2277 * only that if PSR.ECI says we should not be executing the beat
2278 * corresponding to the lane of the vector register being accessed
2279 * then we should skip perfoming the move, and that we need to do
2280 * the usual check for bad ECI state and advance of ECI state.
2281 * (If PSR.ECI is non-zero then we cannot be in an IT block.)
2283 TCGv_i32 tmp;
2284 int vd;
2286 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) ||
2287 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15) {
2288 /* Rt/Rt2 cases are UNPREDICTABLE */
2289 return false;
2291 if (!mve_eci_check(s) || !vfp_access_check(s)) {
2292 return true;
2295 /* Convert Qreg idx to Dreg for read_neon_element32() etc */
2296 vd = a->qd * 2;
2298 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) {
2299 tmp = load_reg(s, a->rt);
2300 write_neon_element32(tmp, vd, a->idx, MO_32);
2301 tcg_temp_free_i32(tmp);
2303 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) {
2304 tmp = load_reg(s, a->rt2);
2305 write_neon_element32(tmp, vd + 1, a->idx, MO_32);
2306 tcg_temp_free_i32(tmp);
2309 mve_update_and_store_eci(s);
2310 return true;