pc-bios/vof: Adopt meson style Make output
[qemu.git] / target / arm / translate-mve.c
blob0cf1b5ea4f555f8c035d5fe44c810735b24e494c
1 /*
2 * ARM translation: M-profile MVE instructions
4 * Copyright (c) 2021 Linaro, Ltd.
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "tcg/tcg-op.h"
22 #include "tcg/tcg-op-gvec.h"
23 #include "exec/exec-all.h"
24 #include "exec/gen-icount.h"
25 #include "translate.h"
26 #include "translate-a32.h"
28 static inline int vidup_imm(DisasContext *s, int x)
30 return 1 << x;
33 /* Include the generated decoder */
34 #include "decode-mve.c.inc"
36 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
37 typedef void MVEGenLdStSGFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
38 typedef void MVEGenLdStIlFn(TCGv_ptr, TCGv_i32, TCGv_i32);
39 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
40 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr);
41 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
42 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
43 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64);
44 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32);
45 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64);
46 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32);
47 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
48 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
49 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
50 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
51 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
52 typedef void MVEGenVCVTRmodeFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
54 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
55 static inline long mve_qreg_offset(unsigned reg)
57 return offsetof(CPUARMState, vfp.zregs[reg].d[0]);
60 static TCGv_ptr mve_qreg_ptr(unsigned reg)
62 TCGv_ptr ret = tcg_temp_new_ptr();
63 tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg));
64 return ret;
67 static bool mve_no_predication(DisasContext *s)
70 * Return true if we are executing the entire MVE instruction
71 * with no predication or partial-execution, and so we can safely
72 * use an inline TCG vector implementation.
74 return s->eci == 0 && s->mve_no_pred;
77 static bool mve_check_qreg_bank(DisasContext *s, int qmask)
80 * Check whether Qregs are in range. For v8.1M only Q0..Q7
81 * are supported, see VFPSmallRegisterBank().
83 return qmask < 8;
86 bool mve_eci_check(DisasContext *s)
89 * This is a beatwise insn: check that ECI is valid (not a
90 * reserved value) and note that we are handling it.
91 * Return true if OK, false if we generated an exception.
93 s->eci_handled = true;
94 switch (s->eci) {
95 case ECI_NONE:
96 case ECI_A0:
97 case ECI_A0A1:
98 case ECI_A0A1A2:
99 case ECI_A0A1A2B0:
100 return true;
101 default:
102 /* Reserved value: INVSTATE UsageFault */
103 gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized());
104 return false;
108 void mve_update_eci(DisasContext *s)
111 * The helper function will always update the CPUState field,
112 * so we only need to update the DisasContext field.
114 if (s->eci) {
115 s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE;
119 void mve_update_and_store_eci(DisasContext *s)
122 * For insns which don't call a helper function that will call
123 * mve_advance_vpt(), this version updates s->eci and also stores
124 * it out to the CPUState field.
126 if (s->eci) {
127 mve_update_eci(s);
128 store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits);
132 static bool mve_skip_first_beat(DisasContext *s)
134 /* Return true if PSR.ECI says we must skip the first beat of this insn */
135 switch (s->eci) {
136 case ECI_NONE:
137 return false;
138 case ECI_A0:
139 case ECI_A0A1:
140 case ECI_A0A1A2:
141 case ECI_A0A1A2B0:
142 return true;
143 default:
144 g_assert_not_reached();
148 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn,
149 unsigned msize)
151 TCGv_i32 addr;
152 uint32_t offset;
153 TCGv_ptr qreg;
155 if (!dc_isar_feature(aa32_mve, s) ||
156 !mve_check_qreg_bank(s, a->qd) ||
157 !fn) {
158 return false;
161 /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
162 if (a->rn == 15 || (a->rn == 13 && a->w)) {
163 return false;
166 if (!mve_eci_check(s) || !vfp_access_check(s)) {
167 return true;
170 offset = a->imm << msize;
171 if (!a->a) {
172 offset = -offset;
174 addr = load_reg(s, a->rn);
175 if (a->p) {
176 tcg_gen_addi_i32(addr, addr, offset);
179 qreg = mve_qreg_ptr(a->qd);
180 fn(cpu_env, qreg, addr);
181 tcg_temp_free_ptr(qreg);
184 * Writeback always happens after the last beat of the insn,
185 * regardless of predication
187 if (a->w) {
188 if (!a->p) {
189 tcg_gen_addi_i32(addr, addr, offset);
191 store_reg(s, a->rn, addr);
192 } else {
193 tcg_temp_free_i32(addr);
195 mve_update_eci(s);
196 return true;
199 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a)
201 static MVEGenLdStFn * const ldstfns[4][2] = {
202 { gen_helper_mve_vstrb, gen_helper_mve_vldrb },
203 { gen_helper_mve_vstrh, gen_helper_mve_vldrh },
204 { gen_helper_mve_vstrw, gen_helper_mve_vldrw },
205 { NULL, NULL }
207 return do_ldst(s, a, ldstfns[a->size][a->l], a->size);
210 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \
211 static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \
213 static MVEGenLdStFn * const ldstfns[2][2] = { \
214 { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \
215 { NULL, gen_helper_mve_##ULD }, \
216 }; \
217 return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \
220 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8)
221 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8)
222 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16)
224 static bool do_ldst_sg(DisasContext *s, arg_vldst_sg *a, MVEGenLdStSGFn fn)
226 TCGv_i32 addr;
227 TCGv_ptr qd, qm;
229 if (!dc_isar_feature(aa32_mve, s) ||
230 !mve_check_qreg_bank(s, a->qd | a->qm) ||
231 !fn || a->rn == 15) {
232 /* Rn case is UNPREDICTABLE */
233 return false;
236 if (!mve_eci_check(s) || !vfp_access_check(s)) {
237 return true;
240 addr = load_reg(s, a->rn);
242 qd = mve_qreg_ptr(a->qd);
243 qm = mve_qreg_ptr(a->qm);
244 fn(cpu_env, qd, qm, addr);
245 tcg_temp_free_ptr(qd);
246 tcg_temp_free_ptr(qm);
247 tcg_temp_free_i32(addr);
248 mve_update_eci(s);
249 return true;
253 * The naming scheme here is "vldrb_sg_sh == in-memory byte loads
254 * signextended to halfword elements in register". _os_ indicates that
255 * the offsets in Qm should be scaled by the element size.
257 /* This macro is just to make the arrays more compact in these functions */
258 #define F(N) gen_helper_mve_##N
260 /* VLDRB/VSTRB (ie msize 1) with OS=1 is UNPREDICTABLE; we UNDEF */
261 static bool trans_VLDR_S_sg(DisasContext *s, arg_vldst_sg *a)
263 static MVEGenLdStSGFn * const fns[2][4][4] = { {
264 { NULL, F(vldrb_sg_sh), F(vldrb_sg_sw), NULL },
265 { NULL, NULL, F(vldrh_sg_sw), NULL },
266 { NULL, NULL, NULL, NULL },
267 { NULL, NULL, NULL, NULL }
268 }, {
269 { NULL, NULL, NULL, NULL },
270 { NULL, NULL, F(vldrh_sg_os_sw), NULL },
271 { NULL, NULL, NULL, NULL },
272 { NULL, NULL, NULL, NULL }
275 if (a->qd == a->qm) {
276 return false; /* UNPREDICTABLE */
278 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
281 static bool trans_VLDR_U_sg(DisasContext *s, arg_vldst_sg *a)
283 static MVEGenLdStSGFn * const fns[2][4][4] = { {
284 { F(vldrb_sg_ub), F(vldrb_sg_uh), F(vldrb_sg_uw), NULL },
285 { NULL, F(vldrh_sg_uh), F(vldrh_sg_uw), NULL },
286 { NULL, NULL, F(vldrw_sg_uw), NULL },
287 { NULL, NULL, NULL, F(vldrd_sg_ud) }
288 }, {
289 { NULL, NULL, NULL, NULL },
290 { NULL, F(vldrh_sg_os_uh), F(vldrh_sg_os_uw), NULL },
291 { NULL, NULL, F(vldrw_sg_os_uw), NULL },
292 { NULL, NULL, NULL, F(vldrd_sg_os_ud) }
295 if (a->qd == a->qm) {
296 return false; /* UNPREDICTABLE */
298 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
301 static bool trans_VSTR_sg(DisasContext *s, arg_vldst_sg *a)
303 static MVEGenLdStSGFn * const fns[2][4][4] = { {
304 { F(vstrb_sg_ub), F(vstrb_sg_uh), F(vstrb_sg_uw), NULL },
305 { NULL, F(vstrh_sg_uh), F(vstrh_sg_uw), NULL },
306 { NULL, NULL, F(vstrw_sg_uw), NULL },
307 { NULL, NULL, NULL, F(vstrd_sg_ud) }
308 }, {
309 { NULL, NULL, NULL, NULL },
310 { NULL, F(vstrh_sg_os_uh), F(vstrh_sg_os_uw), NULL },
311 { NULL, NULL, F(vstrw_sg_os_uw), NULL },
312 { NULL, NULL, NULL, F(vstrd_sg_os_ud) }
315 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
318 #undef F
320 static bool do_ldst_sg_imm(DisasContext *s, arg_vldst_sg_imm *a,
321 MVEGenLdStSGFn *fn, unsigned msize)
323 uint32_t offset;
324 TCGv_ptr qd, qm;
326 if (!dc_isar_feature(aa32_mve, s) ||
327 !mve_check_qreg_bank(s, a->qd | a->qm) ||
328 !fn) {
329 return false;
332 if (!mve_eci_check(s) || !vfp_access_check(s)) {
333 return true;
336 offset = a->imm << msize;
337 if (!a->a) {
338 offset = -offset;
341 qd = mve_qreg_ptr(a->qd);
342 qm = mve_qreg_ptr(a->qm);
343 fn(cpu_env, qd, qm, tcg_constant_i32(offset));
344 tcg_temp_free_ptr(qd);
345 tcg_temp_free_ptr(qm);
346 mve_update_eci(s);
347 return true;
350 static bool trans_VLDRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
352 static MVEGenLdStSGFn * const fns[] = {
353 gen_helper_mve_vldrw_sg_uw,
354 gen_helper_mve_vldrw_sg_wb_uw,
356 if (a->qd == a->qm) {
357 return false; /* UNPREDICTABLE */
359 return do_ldst_sg_imm(s, a, fns[a->w], MO_32);
362 static bool trans_VLDRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
364 static MVEGenLdStSGFn * const fns[] = {
365 gen_helper_mve_vldrd_sg_ud,
366 gen_helper_mve_vldrd_sg_wb_ud,
368 if (a->qd == a->qm) {
369 return false; /* UNPREDICTABLE */
371 return do_ldst_sg_imm(s, a, fns[a->w], MO_64);
374 static bool trans_VSTRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
376 static MVEGenLdStSGFn * const fns[] = {
377 gen_helper_mve_vstrw_sg_uw,
378 gen_helper_mve_vstrw_sg_wb_uw,
380 return do_ldst_sg_imm(s, a, fns[a->w], MO_32);
383 static bool trans_VSTRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
385 static MVEGenLdStSGFn * const fns[] = {
386 gen_helper_mve_vstrd_sg_ud,
387 gen_helper_mve_vstrd_sg_wb_ud,
389 return do_ldst_sg_imm(s, a, fns[a->w], MO_64);
392 static bool do_vldst_il(DisasContext *s, arg_vldst_il *a, MVEGenLdStIlFn *fn,
393 int addrinc)
395 TCGv_i32 rn;
397 if (!dc_isar_feature(aa32_mve, s) ||
398 !mve_check_qreg_bank(s, a->qd) ||
399 !fn || (a->rn == 13 && a->w) || a->rn == 15) {
400 /* Variously UNPREDICTABLE or UNDEF or related-encoding */
401 return false;
403 if (!mve_eci_check(s) || !vfp_access_check(s)) {
404 return true;
407 rn = load_reg(s, a->rn);
409 * We pass the index of Qd, not a pointer, because the helper must
410 * access multiple Q registers starting at Qd and working up.
412 fn(cpu_env, tcg_constant_i32(a->qd), rn);
414 if (a->w) {
415 tcg_gen_addi_i32(rn, rn, addrinc);
416 store_reg(s, a->rn, rn);
417 } else {
418 tcg_temp_free_i32(rn);
420 mve_update_and_store_eci(s);
421 return true;
424 /* This macro is just to make the arrays more compact in these functions */
425 #define F(N) gen_helper_mve_##N
427 static bool trans_VLD2(DisasContext *s, arg_vldst_il *a)
429 static MVEGenLdStIlFn * const fns[4][4] = {
430 { F(vld20b), F(vld20h), F(vld20w), NULL, },
431 { F(vld21b), F(vld21h), F(vld21w), NULL, },
432 { NULL, NULL, NULL, NULL },
433 { NULL, NULL, NULL, NULL },
435 if (a->qd > 6) {
436 return false;
438 return do_vldst_il(s, a, fns[a->pat][a->size], 32);
441 static bool trans_VLD4(DisasContext *s, arg_vldst_il *a)
443 static MVEGenLdStIlFn * const fns[4][4] = {
444 { F(vld40b), F(vld40h), F(vld40w), NULL, },
445 { F(vld41b), F(vld41h), F(vld41w), NULL, },
446 { F(vld42b), F(vld42h), F(vld42w), NULL, },
447 { F(vld43b), F(vld43h), F(vld43w), NULL, },
449 if (a->qd > 4) {
450 return false;
452 return do_vldst_il(s, a, fns[a->pat][a->size], 64);
455 static bool trans_VST2(DisasContext *s, arg_vldst_il *a)
457 static MVEGenLdStIlFn * const fns[4][4] = {
458 { F(vst20b), F(vst20h), F(vst20w), NULL, },
459 { F(vst21b), F(vst21h), F(vst21w), NULL, },
460 { NULL, NULL, NULL, NULL },
461 { NULL, NULL, NULL, NULL },
463 if (a->qd > 6) {
464 return false;
466 return do_vldst_il(s, a, fns[a->pat][a->size], 32);
469 static bool trans_VST4(DisasContext *s, arg_vldst_il *a)
471 static MVEGenLdStIlFn * const fns[4][4] = {
472 { F(vst40b), F(vst40h), F(vst40w), NULL, },
473 { F(vst41b), F(vst41h), F(vst41w), NULL, },
474 { F(vst42b), F(vst42h), F(vst42w), NULL, },
475 { F(vst43b), F(vst43h), F(vst43w), NULL, },
477 if (a->qd > 4) {
478 return false;
480 return do_vldst_il(s, a, fns[a->pat][a->size], 64);
483 #undef F
485 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
487 TCGv_ptr qd;
488 TCGv_i32 rt;
490 if (!dc_isar_feature(aa32_mve, s) ||
491 !mve_check_qreg_bank(s, a->qd)) {
492 return false;
494 if (a->rt == 13 || a->rt == 15) {
495 /* UNPREDICTABLE; we choose to UNDEF */
496 return false;
498 if (!mve_eci_check(s) || !vfp_access_check(s)) {
499 return true;
502 rt = load_reg(s, a->rt);
503 if (mve_no_predication(s)) {
504 tcg_gen_gvec_dup_i32(a->size, mve_qreg_offset(a->qd), 16, 16, rt);
505 } else {
506 qd = mve_qreg_ptr(a->qd);
507 tcg_gen_dup_i32(a->size, rt, rt);
508 gen_helper_mve_vdup(cpu_env, qd, rt);
509 tcg_temp_free_ptr(qd);
511 tcg_temp_free_i32(rt);
512 mve_update_eci(s);
513 return true;
516 static bool do_1op_vec(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn,
517 GVecGen2Fn vecfn)
519 TCGv_ptr qd, qm;
521 if (!dc_isar_feature(aa32_mve, s) ||
522 !mve_check_qreg_bank(s, a->qd | a->qm) ||
523 !fn) {
524 return false;
527 if (!mve_eci_check(s) || !vfp_access_check(s)) {
528 return true;
531 if (vecfn && mve_no_predication(s)) {
532 vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm), 16, 16);
533 } else {
534 qd = mve_qreg_ptr(a->qd);
535 qm = mve_qreg_ptr(a->qm);
536 fn(cpu_env, qd, qm);
537 tcg_temp_free_ptr(qd);
538 tcg_temp_free_ptr(qm);
540 mve_update_eci(s);
541 return true;
544 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
546 return do_1op_vec(s, a, fn, NULL);
549 #define DO_1OP_VEC(INSN, FN, VECFN) \
550 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
552 static MVEGenOneOpFn * const fns[] = { \
553 gen_helper_mve_##FN##b, \
554 gen_helper_mve_##FN##h, \
555 gen_helper_mve_##FN##w, \
556 NULL, \
557 }; \
558 return do_1op_vec(s, a, fns[a->size], VECFN); \
561 #define DO_1OP(INSN, FN) DO_1OP_VEC(INSN, FN, NULL)
563 DO_1OP(VCLZ, vclz)
564 DO_1OP(VCLS, vcls)
565 DO_1OP_VEC(VABS, vabs, tcg_gen_gvec_abs)
566 DO_1OP_VEC(VNEG, vneg, tcg_gen_gvec_neg)
567 DO_1OP(VQABS, vqabs)
568 DO_1OP(VQNEG, vqneg)
569 DO_1OP(VMAXA, vmaxa)
570 DO_1OP(VMINA, vmina)
573 * For simple float/int conversions we use the fixed-point
574 * conversion helpers with a zero shift count
576 #define DO_VCVT(INSN, HFN, SFN) \
577 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
579 gen_helper_mve_##HFN(env, qd, qm, tcg_constant_i32(0)); \
581 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
583 gen_helper_mve_##SFN(env, qd, qm, tcg_constant_i32(0)); \
585 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
587 static MVEGenOneOpFn * const fns[] = { \
588 NULL, \
589 gen_##INSN##h, \
590 gen_##INSN##s, \
591 NULL, \
592 }; \
593 if (!dc_isar_feature(aa32_mve_fp, s)) { \
594 return false; \
596 return do_1op(s, a, fns[a->size]); \
599 DO_VCVT(VCVT_SF, vcvt_sh, vcvt_sf)
600 DO_VCVT(VCVT_UF, vcvt_uh, vcvt_uf)
601 DO_VCVT(VCVT_FS, vcvt_hs, vcvt_fs)
602 DO_VCVT(VCVT_FU, vcvt_hu, vcvt_fu)
604 static bool do_vcvt_rmode(DisasContext *s, arg_1op *a,
605 enum arm_fprounding rmode, bool u)
608 * Handle VCVT fp to int with specified rounding mode.
609 * This is a 1op fn but we must pass the rounding mode as
610 * an immediate to the helper.
612 TCGv_ptr qd, qm;
613 static MVEGenVCVTRmodeFn * const fns[4][2] = {
614 { NULL, NULL },
615 { gen_helper_mve_vcvt_rm_sh, gen_helper_mve_vcvt_rm_uh },
616 { gen_helper_mve_vcvt_rm_ss, gen_helper_mve_vcvt_rm_us },
617 { NULL, NULL },
619 MVEGenVCVTRmodeFn *fn = fns[a->size][u];
621 if (!dc_isar_feature(aa32_mve_fp, s) ||
622 !mve_check_qreg_bank(s, a->qd | a->qm) ||
623 !fn) {
624 return false;
627 if (!mve_eci_check(s) || !vfp_access_check(s)) {
628 return true;
631 qd = mve_qreg_ptr(a->qd);
632 qm = mve_qreg_ptr(a->qm);
633 fn(cpu_env, qd, qm, tcg_constant_i32(arm_rmode_to_sf(rmode)));
634 tcg_temp_free_ptr(qd);
635 tcg_temp_free_ptr(qm);
636 mve_update_eci(s);
637 return true;
640 #define DO_VCVT_RMODE(INSN, RMODE, U) \
641 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
643 return do_vcvt_rmode(s, a, RMODE, U); \
646 DO_VCVT_RMODE(VCVTAS, FPROUNDING_TIEAWAY, false)
647 DO_VCVT_RMODE(VCVTAU, FPROUNDING_TIEAWAY, true)
648 DO_VCVT_RMODE(VCVTNS, FPROUNDING_TIEEVEN, false)
649 DO_VCVT_RMODE(VCVTNU, FPROUNDING_TIEEVEN, true)
650 DO_VCVT_RMODE(VCVTPS, FPROUNDING_POSINF, false)
651 DO_VCVT_RMODE(VCVTPU, FPROUNDING_POSINF, true)
652 DO_VCVT_RMODE(VCVTMS, FPROUNDING_NEGINF, false)
653 DO_VCVT_RMODE(VCVTMU, FPROUNDING_NEGINF, true)
655 #define DO_VCVT_SH(INSN, FN) \
656 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
658 if (!dc_isar_feature(aa32_mve_fp, s)) { \
659 return false; \
661 return do_1op(s, a, gen_helper_mve_##FN); \
664 DO_VCVT_SH(VCVTB_SH, vcvtb_sh)
665 DO_VCVT_SH(VCVTT_SH, vcvtt_sh)
666 DO_VCVT_SH(VCVTB_HS, vcvtb_hs)
667 DO_VCVT_SH(VCVTT_HS, vcvtt_hs)
669 #define DO_VRINT(INSN, RMODE) \
670 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
672 gen_helper_mve_vrint_rm_h(env, qd, qm, \
673 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \
675 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
677 gen_helper_mve_vrint_rm_s(env, qd, qm, \
678 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \
680 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
682 static MVEGenOneOpFn * const fns[] = { \
683 NULL, \
684 gen_##INSN##h, \
685 gen_##INSN##s, \
686 NULL, \
687 }; \
688 if (!dc_isar_feature(aa32_mve_fp, s)) { \
689 return false; \
691 return do_1op(s, a, fns[a->size]); \
694 DO_VRINT(VRINTN, FPROUNDING_TIEEVEN)
695 DO_VRINT(VRINTA, FPROUNDING_TIEAWAY)
696 DO_VRINT(VRINTZ, FPROUNDING_ZERO)
697 DO_VRINT(VRINTM, FPROUNDING_NEGINF)
698 DO_VRINT(VRINTP, FPROUNDING_POSINF)
700 static bool trans_VRINTX(DisasContext *s, arg_1op *a)
702 static MVEGenOneOpFn * const fns[] = {
703 NULL,
704 gen_helper_mve_vrintx_h,
705 gen_helper_mve_vrintx_s,
706 NULL,
708 if (!dc_isar_feature(aa32_mve_fp, s)) {
709 return false;
711 return do_1op(s, a, fns[a->size]);
714 /* Narrowing moves: only size 0 and 1 are valid */
715 #define DO_VMOVN(INSN, FN) \
716 static bool trans_##INSN(DisasContext *s, arg_1op *a) \
718 static MVEGenOneOpFn * const fns[] = { \
719 gen_helper_mve_##FN##b, \
720 gen_helper_mve_##FN##h, \
721 NULL, \
722 NULL, \
723 }; \
724 return do_1op(s, a, fns[a->size]); \
727 DO_VMOVN(VMOVNB, vmovnb)
728 DO_VMOVN(VMOVNT, vmovnt)
729 DO_VMOVN(VQMOVUNB, vqmovunb)
730 DO_VMOVN(VQMOVUNT, vqmovunt)
731 DO_VMOVN(VQMOVN_BS, vqmovnbs)
732 DO_VMOVN(VQMOVN_TS, vqmovnts)
733 DO_VMOVN(VQMOVN_BU, vqmovnbu)
734 DO_VMOVN(VQMOVN_TU, vqmovntu)
736 static bool trans_VREV16(DisasContext *s, arg_1op *a)
738 static MVEGenOneOpFn * const fns[] = {
739 gen_helper_mve_vrev16b,
740 NULL,
741 NULL,
742 NULL,
744 return do_1op(s, a, fns[a->size]);
747 static bool trans_VREV32(DisasContext *s, arg_1op *a)
749 static MVEGenOneOpFn * const fns[] = {
750 gen_helper_mve_vrev32b,
751 gen_helper_mve_vrev32h,
752 NULL,
753 NULL,
755 return do_1op(s, a, fns[a->size]);
758 static bool trans_VREV64(DisasContext *s, arg_1op *a)
760 static MVEGenOneOpFn * const fns[] = {
761 gen_helper_mve_vrev64b,
762 gen_helper_mve_vrev64h,
763 gen_helper_mve_vrev64w,
764 NULL,
766 return do_1op(s, a, fns[a->size]);
769 static bool trans_VMVN(DisasContext *s, arg_1op *a)
771 return do_1op_vec(s, a, gen_helper_mve_vmvn, tcg_gen_gvec_not);
774 static bool trans_VABS_fp(DisasContext *s, arg_1op *a)
776 static MVEGenOneOpFn * const fns[] = {
777 NULL,
778 gen_helper_mve_vfabsh,
779 gen_helper_mve_vfabss,
780 NULL,
782 if (!dc_isar_feature(aa32_mve_fp, s)) {
783 return false;
785 return do_1op(s, a, fns[a->size]);
788 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a)
790 static MVEGenOneOpFn * const fns[] = {
791 NULL,
792 gen_helper_mve_vfnegh,
793 gen_helper_mve_vfnegs,
794 NULL,
796 if (!dc_isar_feature(aa32_mve_fp, s)) {
797 return false;
799 return do_1op(s, a, fns[a->size]);
802 static bool do_2op_vec(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn,
803 GVecGen3Fn *vecfn)
805 TCGv_ptr qd, qn, qm;
807 if (!dc_isar_feature(aa32_mve, s) ||
808 !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) ||
809 !fn) {
810 return false;
812 if (!mve_eci_check(s) || !vfp_access_check(s)) {
813 return true;
816 if (vecfn && mve_no_predication(s)) {
817 vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qn),
818 mve_qreg_offset(a->qm), 16, 16);
819 } else {
820 qd = mve_qreg_ptr(a->qd);
821 qn = mve_qreg_ptr(a->qn);
822 qm = mve_qreg_ptr(a->qm);
823 fn(cpu_env, qd, qn, qm);
824 tcg_temp_free_ptr(qd);
825 tcg_temp_free_ptr(qn);
826 tcg_temp_free_ptr(qm);
828 mve_update_eci(s);
829 return true;
832 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn *fn)
834 return do_2op_vec(s, a, fn, NULL);
837 #define DO_LOGIC(INSN, HELPER, VECFN) \
838 static bool trans_##INSN(DisasContext *s, arg_2op *a) \
840 return do_2op_vec(s, a, HELPER, VECFN); \
843 DO_LOGIC(VAND, gen_helper_mve_vand, tcg_gen_gvec_and)
844 DO_LOGIC(VBIC, gen_helper_mve_vbic, tcg_gen_gvec_andc)
845 DO_LOGIC(VORR, gen_helper_mve_vorr, tcg_gen_gvec_or)
846 DO_LOGIC(VORN, gen_helper_mve_vorn, tcg_gen_gvec_orc)
847 DO_LOGIC(VEOR, gen_helper_mve_veor, tcg_gen_gvec_xor)
849 static bool trans_VPSEL(DisasContext *s, arg_2op *a)
851 /* This insn updates predication bits */
852 s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
853 return do_2op(s, a, gen_helper_mve_vpsel);
856 #define DO_2OP_VEC(INSN, FN, VECFN) \
857 static bool trans_##INSN(DisasContext *s, arg_2op *a) \
859 static MVEGenTwoOpFn * const fns[] = { \
860 gen_helper_mve_##FN##b, \
861 gen_helper_mve_##FN##h, \
862 gen_helper_mve_##FN##w, \
863 NULL, \
864 }; \
865 return do_2op_vec(s, a, fns[a->size], VECFN); \
868 #define DO_2OP(INSN, FN) DO_2OP_VEC(INSN, FN, NULL)
870 DO_2OP_VEC(VADD, vadd, tcg_gen_gvec_add)
871 DO_2OP_VEC(VSUB, vsub, tcg_gen_gvec_sub)
872 DO_2OP_VEC(VMUL, vmul, tcg_gen_gvec_mul)
873 DO_2OP(VMULH_S, vmulhs)
874 DO_2OP(VMULH_U, vmulhu)
875 DO_2OP(VRMULH_S, vrmulhs)
876 DO_2OP(VRMULH_U, vrmulhu)
877 DO_2OP_VEC(VMAX_S, vmaxs, tcg_gen_gvec_smax)
878 DO_2OP_VEC(VMAX_U, vmaxu, tcg_gen_gvec_umax)
879 DO_2OP_VEC(VMIN_S, vmins, tcg_gen_gvec_smin)
880 DO_2OP_VEC(VMIN_U, vminu, tcg_gen_gvec_umin)
881 DO_2OP(VABD_S, vabds)
882 DO_2OP(VABD_U, vabdu)
883 DO_2OP(VHADD_S, vhadds)
884 DO_2OP(VHADD_U, vhaddu)
885 DO_2OP(VHSUB_S, vhsubs)
886 DO_2OP(VHSUB_U, vhsubu)
887 DO_2OP(VMULL_BS, vmullbs)
888 DO_2OP(VMULL_BU, vmullbu)
889 DO_2OP(VMULL_TS, vmullts)
890 DO_2OP(VMULL_TU, vmulltu)
891 DO_2OP(VQDMULH, vqdmulh)
892 DO_2OP(VQRDMULH, vqrdmulh)
893 DO_2OP(VQADD_S, vqadds)
894 DO_2OP(VQADD_U, vqaddu)
895 DO_2OP(VQSUB_S, vqsubs)
896 DO_2OP(VQSUB_U, vqsubu)
897 DO_2OP(VSHL_S, vshls)
898 DO_2OP(VSHL_U, vshlu)
899 DO_2OP(VRSHL_S, vrshls)
900 DO_2OP(VRSHL_U, vrshlu)
901 DO_2OP(VQSHL_S, vqshls)
902 DO_2OP(VQSHL_U, vqshlu)
903 DO_2OP(VQRSHL_S, vqrshls)
904 DO_2OP(VQRSHL_U, vqrshlu)
905 DO_2OP(VQDMLADH, vqdmladh)
906 DO_2OP(VQDMLADHX, vqdmladhx)
907 DO_2OP(VQRDMLADH, vqrdmladh)
908 DO_2OP(VQRDMLADHX, vqrdmladhx)
909 DO_2OP(VQDMLSDH, vqdmlsdh)
910 DO_2OP(VQDMLSDHX, vqdmlsdhx)
911 DO_2OP(VQRDMLSDH, vqrdmlsdh)
912 DO_2OP(VQRDMLSDHX, vqrdmlsdhx)
913 DO_2OP(VRHADD_S, vrhadds)
914 DO_2OP(VRHADD_U, vrhaddu)
916 * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose
917 * so we can reuse the DO_2OP macro. (Our implementation calculates the
918 * "expected" results in this case.) Similarly for VHCADD.
920 DO_2OP(VCADD90, vcadd90)
921 DO_2OP(VCADD270, vcadd270)
922 DO_2OP(VHCADD90, vhcadd90)
923 DO_2OP(VHCADD270, vhcadd270)
925 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a)
927 static MVEGenTwoOpFn * const fns[] = {
928 NULL,
929 gen_helper_mve_vqdmullbh,
930 gen_helper_mve_vqdmullbw,
931 NULL,
933 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
934 /* UNPREDICTABLE; we choose to undef */
935 return false;
937 return do_2op(s, a, fns[a->size]);
940 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a)
942 static MVEGenTwoOpFn * const fns[] = {
943 NULL,
944 gen_helper_mve_vqdmullth,
945 gen_helper_mve_vqdmulltw,
946 NULL,
948 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
949 /* UNPREDICTABLE; we choose to undef */
950 return false;
952 return do_2op(s, a, fns[a->size]);
955 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a)
958 * Note that a->size indicates the output size, ie VMULL.P8
959 * is the 8x8->16 operation and a->size is MO_16; VMULL.P16
960 * is the 16x16->32 operation and a->size is MO_32.
962 static MVEGenTwoOpFn * const fns[] = {
963 NULL,
964 gen_helper_mve_vmullpbh,
965 gen_helper_mve_vmullpbw,
966 NULL,
968 return do_2op(s, a, fns[a->size]);
971 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a)
973 /* a->size is as for trans_VMULLP_B */
974 static MVEGenTwoOpFn * const fns[] = {
975 NULL,
976 gen_helper_mve_vmullpth,
977 gen_helper_mve_vmullptw,
978 NULL,
980 return do_2op(s, a, fns[a->size]);
984 * VADC and VSBC: these perform an add-with-carry or subtract-with-carry
985 * of the 32-bit elements in each lane of the input vectors, where the
986 * carry-out of each add is the carry-in of the next. The initial carry
987 * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C
988 * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C.
989 * These insns are subject to beat-wise execution. Partial execution
990 * of an I=1 (initial carry input fixed) insn which does not
991 * execute the first beat must start with the current FPSCR.NZCV
992 * value, not the fixed constant input.
994 static bool trans_VADC(DisasContext *s, arg_2op *a)
996 return do_2op(s, a, gen_helper_mve_vadc);
999 static bool trans_VADCI(DisasContext *s, arg_2op *a)
1001 if (mve_skip_first_beat(s)) {
1002 return trans_VADC(s, a);
1004 return do_2op(s, a, gen_helper_mve_vadci);
1007 static bool trans_VSBC(DisasContext *s, arg_2op *a)
1009 return do_2op(s, a, gen_helper_mve_vsbc);
1012 static bool trans_VSBCI(DisasContext *s, arg_2op *a)
1014 if (mve_skip_first_beat(s)) {
1015 return trans_VSBC(s, a);
1017 return do_2op(s, a, gen_helper_mve_vsbci);
1020 #define DO_2OP_FP(INSN, FN) \
1021 static bool trans_##INSN(DisasContext *s, arg_2op *a) \
1023 static MVEGenTwoOpFn * const fns[] = { \
1024 NULL, \
1025 gen_helper_mve_##FN##h, \
1026 gen_helper_mve_##FN##s, \
1027 NULL, \
1028 }; \
1029 if (!dc_isar_feature(aa32_mve_fp, s)) { \
1030 return false; \
1032 return do_2op(s, a, fns[a->size]); \
1035 DO_2OP_FP(VADD_fp, vfadd)
1036 DO_2OP_FP(VSUB_fp, vfsub)
1037 DO_2OP_FP(VMUL_fp, vfmul)
1038 DO_2OP_FP(VABD_fp, vfabd)
1039 DO_2OP_FP(VMAXNM, vmaxnm)
1040 DO_2OP_FP(VMINNM, vminnm)
1041 DO_2OP_FP(VCADD90_fp, vfcadd90)
1042 DO_2OP_FP(VCADD270_fp, vfcadd270)
1043 DO_2OP_FP(VFMA, vfma)
1044 DO_2OP_FP(VFMS, vfms)
1045 DO_2OP_FP(VCMUL0, vcmul0)
1046 DO_2OP_FP(VCMUL90, vcmul90)
1047 DO_2OP_FP(VCMUL180, vcmul180)
1048 DO_2OP_FP(VCMUL270, vcmul270)
1049 DO_2OP_FP(VCMLA0, vcmla0)
1050 DO_2OP_FP(VCMLA90, vcmla90)
1051 DO_2OP_FP(VCMLA180, vcmla180)
1052 DO_2OP_FP(VCMLA270, vcmla270)
1053 DO_2OP_FP(VMAXNMA, vmaxnma)
1054 DO_2OP_FP(VMINNMA, vminnma)
1056 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
1057 MVEGenTwoOpScalarFn fn)
1059 TCGv_ptr qd, qn;
1060 TCGv_i32 rm;
1062 if (!dc_isar_feature(aa32_mve, s) ||
1063 !mve_check_qreg_bank(s, a->qd | a->qn) ||
1064 !fn) {
1065 return false;
1067 if (a->rm == 13 || a->rm == 15) {
1068 /* UNPREDICTABLE */
1069 return false;
1071 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1072 return true;
1075 qd = mve_qreg_ptr(a->qd);
1076 qn = mve_qreg_ptr(a->qn);
1077 rm = load_reg(s, a->rm);
1078 fn(cpu_env, qd, qn, rm);
1079 tcg_temp_free_i32(rm);
1080 tcg_temp_free_ptr(qd);
1081 tcg_temp_free_ptr(qn);
1082 mve_update_eci(s);
1083 return true;
1086 #define DO_2OP_SCALAR(INSN, FN) \
1087 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \
1089 static MVEGenTwoOpScalarFn * const fns[] = { \
1090 gen_helper_mve_##FN##b, \
1091 gen_helper_mve_##FN##h, \
1092 gen_helper_mve_##FN##w, \
1093 NULL, \
1094 }; \
1095 return do_2op_scalar(s, a, fns[a->size]); \
1098 DO_2OP_SCALAR(VADD_scalar, vadd_scalar)
1099 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar)
1100 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar)
1101 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar)
1102 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar)
1103 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar)
1104 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar)
1105 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar)
1106 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar)
1107 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar)
1108 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar)
1109 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar)
1110 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar)
1111 DO_2OP_SCALAR(VBRSR, vbrsr)
1112 DO_2OP_SCALAR(VMLA, vmla)
1113 DO_2OP_SCALAR(VMLAS, vmlas)
1114 DO_2OP_SCALAR(VQDMLAH, vqdmlah)
1115 DO_2OP_SCALAR(VQRDMLAH, vqrdmlah)
1116 DO_2OP_SCALAR(VQDMLASH, vqdmlash)
1117 DO_2OP_SCALAR(VQRDMLASH, vqrdmlash)
1119 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a)
1121 static MVEGenTwoOpScalarFn * const fns[] = {
1122 NULL,
1123 gen_helper_mve_vqdmullb_scalarh,
1124 gen_helper_mve_vqdmullb_scalarw,
1125 NULL,
1127 if (a->qd == a->qn && a->size == MO_32) {
1128 /* UNPREDICTABLE; we choose to undef */
1129 return false;
1131 return do_2op_scalar(s, a, fns[a->size]);
1134 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a)
1136 static MVEGenTwoOpScalarFn * const fns[] = {
1137 NULL,
1138 gen_helper_mve_vqdmullt_scalarh,
1139 gen_helper_mve_vqdmullt_scalarw,
1140 NULL,
1142 if (a->qd == a->qn && a->size == MO_32) {
1143 /* UNPREDICTABLE; we choose to undef */
1144 return false;
1146 return do_2op_scalar(s, a, fns[a->size]);
1150 #define DO_2OP_FP_SCALAR(INSN, FN) \
1151 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \
1153 static MVEGenTwoOpScalarFn * const fns[] = { \
1154 NULL, \
1155 gen_helper_mve_##FN##h, \
1156 gen_helper_mve_##FN##s, \
1157 NULL, \
1158 }; \
1159 if (!dc_isar_feature(aa32_mve_fp, s)) { \
1160 return false; \
1162 return do_2op_scalar(s, a, fns[a->size]); \
1165 DO_2OP_FP_SCALAR(VADD_fp_scalar, vfadd_scalar)
1166 DO_2OP_FP_SCALAR(VSUB_fp_scalar, vfsub_scalar)
1167 DO_2OP_FP_SCALAR(VMUL_fp_scalar, vfmul_scalar)
1168 DO_2OP_FP_SCALAR(VFMA_scalar, vfma_scalar)
1169 DO_2OP_FP_SCALAR(VFMAS_scalar, vfmas_scalar)
1171 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,
1172 MVEGenLongDualAccOpFn *fn)
1174 TCGv_ptr qn, qm;
1175 TCGv_i64 rda;
1176 TCGv_i32 rdalo, rdahi;
1178 if (!dc_isar_feature(aa32_mve, s) ||
1179 !mve_check_qreg_bank(s, a->qn | a->qm) ||
1180 !fn) {
1181 return false;
1184 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
1185 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
1187 if (a->rdahi == 13 || a->rdahi == 15) {
1188 return false;
1190 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1191 return true;
1194 qn = mve_qreg_ptr(a->qn);
1195 qm = mve_qreg_ptr(a->qm);
1198 * This insn is subject to beat-wise execution. Partial execution
1199 * of an A=0 (no-accumulate) insn which does not execute the first
1200 * beat must start with the current rda value, not 0.
1202 if (a->a || mve_skip_first_beat(s)) {
1203 rda = tcg_temp_new_i64();
1204 rdalo = load_reg(s, a->rdalo);
1205 rdahi = load_reg(s, a->rdahi);
1206 tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
1207 tcg_temp_free_i32(rdalo);
1208 tcg_temp_free_i32(rdahi);
1209 } else {
1210 rda = tcg_const_i64(0);
1213 fn(rda, cpu_env, qn, qm, rda);
1214 tcg_temp_free_ptr(qn);
1215 tcg_temp_free_ptr(qm);
1217 rdalo = tcg_temp_new_i32();
1218 rdahi = tcg_temp_new_i32();
1219 tcg_gen_extrl_i64_i32(rdalo, rda);
1220 tcg_gen_extrh_i64_i32(rdahi, rda);
1221 store_reg(s, a->rdalo, rdalo);
1222 store_reg(s, a->rdahi, rdahi);
1223 tcg_temp_free_i64(rda);
1224 mve_update_eci(s);
1225 return true;
1228 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a)
1230 static MVEGenLongDualAccOpFn * const fns[4][2] = {
1231 { NULL, NULL },
1232 { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh },
1233 { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw },
1234 { NULL, NULL },
1236 return do_long_dual_acc(s, a, fns[a->size][a->x]);
1239 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a)
1241 static MVEGenLongDualAccOpFn * const fns[4][2] = {
1242 { NULL, NULL },
1243 { gen_helper_mve_vmlaldavuh, NULL },
1244 { gen_helper_mve_vmlaldavuw, NULL },
1245 { NULL, NULL },
1247 return do_long_dual_acc(s, a, fns[a->size][a->x]);
1250 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a)
1252 static MVEGenLongDualAccOpFn * const fns[4][2] = {
1253 { NULL, NULL },
1254 { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh },
1255 { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw },
1256 { NULL, NULL },
1258 return do_long_dual_acc(s, a, fns[a->size][a->x]);
1261 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a)
1263 static MVEGenLongDualAccOpFn * const fns[] = {
1264 gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw,
1266 return do_long_dual_acc(s, a, fns[a->x]);
1269 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a)
1271 static MVEGenLongDualAccOpFn * const fns[] = {
1272 gen_helper_mve_vrmlaldavhuw, NULL,
1274 return do_long_dual_acc(s, a, fns[a->x]);
1277 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a)
1279 static MVEGenLongDualAccOpFn * const fns[] = {
1280 gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw,
1282 return do_long_dual_acc(s, a, fns[a->x]);
1285 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn)
1287 TCGv_ptr qn, qm;
1288 TCGv_i32 rda;
1290 if (!dc_isar_feature(aa32_mve, s) ||
1291 !mve_check_qreg_bank(s, a->qn) ||
1292 !fn) {
1293 return false;
1295 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1296 return true;
1299 qn = mve_qreg_ptr(a->qn);
1300 qm = mve_qreg_ptr(a->qm);
1303 * This insn is subject to beat-wise execution. Partial execution
1304 * of an A=0 (no-accumulate) insn which does not execute the first
1305 * beat must start with the current rda value, not 0.
1307 if (a->a || mve_skip_first_beat(s)) {
1308 rda = load_reg(s, a->rda);
1309 } else {
1310 rda = tcg_const_i32(0);
1313 fn(rda, cpu_env, qn, qm, rda);
1314 store_reg(s, a->rda, rda);
1315 tcg_temp_free_ptr(qn);
1316 tcg_temp_free_ptr(qm);
1318 mve_update_eci(s);
1319 return true;
1322 #define DO_DUAL_ACC(INSN, FN) \
1323 static bool trans_##INSN(DisasContext *s, arg_vmladav *a) \
1325 static MVEGenDualAccOpFn * const fns[4][2] = { \
1326 { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb }, \
1327 { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh }, \
1328 { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw }, \
1329 { NULL, NULL }, \
1330 }; \
1331 return do_dual_acc(s, a, fns[a->size][a->x]); \
1334 DO_DUAL_ACC(VMLADAV_S, vmladavs)
1335 DO_DUAL_ACC(VMLSDAV, vmlsdav)
1337 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a)
1339 static MVEGenDualAccOpFn * const fns[4][2] = {
1340 { gen_helper_mve_vmladavub, NULL },
1341 { gen_helper_mve_vmladavuh, NULL },
1342 { gen_helper_mve_vmladavuw, NULL },
1343 { NULL, NULL },
1345 return do_dual_acc(s, a, fns[a->size][a->x]);
1348 static void gen_vpst(DisasContext *s, uint32_t mask)
1351 * Set the VPR mask fields. We take advantage of MASK01 and MASK23
1352 * being adjacent fields in the register.
1354 * Updating the masks is not predicated, but it is subject to beat-wise
1355 * execution, and the mask is updated on the odd-numbered beats.
1356 * So if PSR.ECI says we should skip beat 1, we mustn't update the
1357 * 01 mask field.
1359 TCGv_i32 vpr = load_cpu_field(v7m.vpr);
1360 switch (s->eci) {
1361 case ECI_NONE:
1362 case ECI_A0:
1363 /* Update both 01 and 23 fields */
1364 tcg_gen_deposit_i32(vpr, vpr,
1365 tcg_constant_i32(mask | (mask << 4)),
1366 R_V7M_VPR_MASK01_SHIFT,
1367 R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH);
1368 break;
1369 case ECI_A0A1:
1370 case ECI_A0A1A2:
1371 case ECI_A0A1A2B0:
1372 /* Update only the 23 mask field */
1373 tcg_gen_deposit_i32(vpr, vpr,
1374 tcg_constant_i32(mask),
1375 R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH);
1376 break;
1377 default:
1378 g_assert_not_reached();
1380 store_cpu_field(vpr, v7m.vpr);
1383 static bool trans_VPST(DisasContext *s, arg_VPST *a)
1385 /* mask == 0 is a "related encoding" */
1386 if (!dc_isar_feature(aa32_mve, s) || !a->mask) {
1387 return false;
1389 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1390 return true;
1392 gen_vpst(s, a->mask);
1393 mve_update_and_store_eci(s);
1394 return true;
1397 static bool trans_VPNOT(DisasContext *s, arg_VPNOT *a)
1400 * Invert the predicate in VPR.P0. We have call out to
1401 * a helper because this insn itself is beatwise and can
1402 * be predicated.
1404 if (!dc_isar_feature(aa32_mve, s)) {
1405 return false;
1407 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1408 return true;
1411 gen_helper_mve_vpnot(cpu_env);
1412 /* This insn updates predication bits */
1413 s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
1414 mve_update_eci(s);
1415 return true;
1418 static bool trans_VADDV(DisasContext *s, arg_VADDV *a)
1420 /* VADDV: vector add across vector */
1421 static MVEGenVADDVFn * const fns[4][2] = {
1422 { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub },
1423 { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh },
1424 { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw },
1425 { NULL, NULL }
1427 TCGv_ptr qm;
1428 TCGv_i32 rda;
1430 if (!dc_isar_feature(aa32_mve, s) ||
1431 a->size == 3) {
1432 return false;
1434 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1435 return true;
1439 * This insn is subject to beat-wise execution. Partial execution
1440 * of an A=0 (no-accumulate) insn which does not execute the first
1441 * beat must start with the current value of Rda, not zero.
1443 if (a->a || mve_skip_first_beat(s)) {
1444 /* Accumulate input from Rda */
1445 rda = load_reg(s, a->rda);
1446 } else {
1447 /* Accumulate starting at zero */
1448 rda = tcg_const_i32(0);
1451 qm = mve_qreg_ptr(a->qm);
1452 fns[a->size][a->u](rda, cpu_env, qm, rda);
1453 store_reg(s, a->rda, rda);
1454 tcg_temp_free_ptr(qm);
1456 mve_update_eci(s);
1457 return true;
1460 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a)
1463 * Vector Add Long Across Vector: accumulate the 32-bit
1464 * elements of the vector into a 64-bit result stored in
1465 * a pair of general-purpose registers.
1466 * No need to check Qm's bank: it is only 3 bits in decode.
1468 TCGv_ptr qm;
1469 TCGv_i64 rda;
1470 TCGv_i32 rdalo, rdahi;
1472 if (!dc_isar_feature(aa32_mve, s)) {
1473 return false;
1476 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
1477 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
1479 if (a->rdahi == 13 || a->rdahi == 15) {
1480 return false;
1482 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1483 return true;
1487 * This insn is subject to beat-wise execution. Partial execution
1488 * of an A=0 (no-accumulate) insn which does not execute the first
1489 * beat must start with the current value of RdaHi:RdaLo, not zero.
1491 if (a->a || mve_skip_first_beat(s)) {
1492 /* Accumulate input from RdaHi:RdaLo */
1493 rda = tcg_temp_new_i64();
1494 rdalo = load_reg(s, a->rdalo);
1495 rdahi = load_reg(s, a->rdahi);
1496 tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
1497 tcg_temp_free_i32(rdalo);
1498 tcg_temp_free_i32(rdahi);
1499 } else {
1500 /* Accumulate starting at zero */
1501 rda = tcg_const_i64(0);
1504 qm = mve_qreg_ptr(a->qm);
1505 if (a->u) {
1506 gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda);
1507 } else {
1508 gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda);
1510 tcg_temp_free_ptr(qm);
1512 rdalo = tcg_temp_new_i32();
1513 rdahi = tcg_temp_new_i32();
1514 tcg_gen_extrl_i64_i32(rdalo, rda);
1515 tcg_gen_extrh_i64_i32(rdahi, rda);
1516 store_reg(s, a->rdalo, rdalo);
1517 store_reg(s, a->rdahi, rdahi);
1518 tcg_temp_free_i64(rda);
1519 mve_update_eci(s);
1520 return true;
1523 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn,
1524 GVecGen2iFn *vecfn)
1526 TCGv_ptr qd;
1527 uint64_t imm;
1529 if (!dc_isar_feature(aa32_mve, s) ||
1530 !mve_check_qreg_bank(s, a->qd) ||
1531 !fn) {
1532 return false;
1534 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1535 return true;
1538 imm = asimd_imm_const(a->imm, a->cmode, a->op);
1540 if (vecfn && mve_no_predication(s)) {
1541 vecfn(MO_64, mve_qreg_offset(a->qd), mve_qreg_offset(a->qd),
1542 imm, 16, 16);
1543 } else {
1544 qd = mve_qreg_ptr(a->qd);
1545 fn(cpu_env, qd, tcg_constant_i64(imm));
1546 tcg_temp_free_ptr(qd);
1548 mve_update_eci(s);
1549 return true;
1552 static void gen_gvec_vmovi(unsigned vece, uint32_t dofs, uint32_t aofs,
1553 int64_t c, uint32_t oprsz, uint32_t maxsz)
1555 tcg_gen_gvec_dup_imm(vece, dofs, oprsz, maxsz, c);
1558 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a)
1560 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
1561 MVEGenOneOpImmFn *fn;
1562 GVecGen2iFn *vecfn;
1564 if ((a->cmode & 1) && a->cmode < 12) {
1565 if (a->op) {
1567 * For op=1, the immediate will be inverted by asimd_imm_const(),
1568 * so the VBIC becomes a logical AND operation.
1570 fn = gen_helper_mve_vandi;
1571 vecfn = tcg_gen_gvec_andi;
1572 } else {
1573 fn = gen_helper_mve_vorri;
1574 vecfn = tcg_gen_gvec_ori;
1576 } else {
1577 /* There is one unallocated cmode/op combination in this space */
1578 if (a->cmode == 15 && a->op == 1) {
1579 return false;
1581 /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */
1582 fn = gen_helper_mve_vmovi;
1583 vecfn = gen_gvec_vmovi;
1585 return do_1imm(s, a, fn, vecfn);
1588 static bool do_2shift_vec(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
1589 bool negateshift, GVecGen2iFn vecfn)
1591 TCGv_ptr qd, qm;
1592 int shift = a->shift;
1594 if (!dc_isar_feature(aa32_mve, s) ||
1595 !mve_check_qreg_bank(s, a->qd | a->qm) ||
1596 !fn) {
1597 return false;
1599 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1600 return true;
1604 * When we handle a right shift insn using a left-shift helper
1605 * which permits a negative shift count to indicate a right-shift,
1606 * we must negate the shift count.
1608 if (negateshift) {
1609 shift = -shift;
1612 if (vecfn && mve_no_predication(s)) {
1613 vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm),
1614 shift, 16, 16);
1615 } else {
1616 qd = mve_qreg_ptr(a->qd);
1617 qm = mve_qreg_ptr(a->qm);
1618 fn(cpu_env, qd, qm, tcg_constant_i32(shift));
1619 tcg_temp_free_ptr(qd);
1620 tcg_temp_free_ptr(qm);
1622 mve_update_eci(s);
1623 return true;
1626 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
1627 bool negateshift)
1629 return do_2shift_vec(s, a, fn, negateshift, NULL);
1632 #define DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, VECFN) \
1633 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
1635 static MVEGenTwoOpShiftFn * const fns[] = { \
1636 gen_helper_mve_##FN##b, \
1637 gen_helper_mve_##FN##h, \
1638 gen_helper_mve_##FN##w, \
1639 NULL, \
1640 }; \
1641 return do_2shift_vec(s, a, fns[a->size], NEGATESHIFT, VECFN); \
1644 #define DO_2SHIFT(INSN, FN, NEGATESHIFT) \
1645 DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, NULL)
1647 static void do_gvec_shri_s(unsigned vece, uint32_t dofs, uint32_t aofs,
1648 int64_t shift, uint32_t oprsz, uint32_t maxsz)
1651 * We get here with a negated shift count, and we must handle
1652 * shifts by the element size, which tcg_gen_gvec_sari() does not do.
1654 shift = -shift;
1655 if (shift == (8 << vece)) {
1656 shift--;
1658 tcg_gen_gvec_sari(vece, dofs, aofs, shift, oprsz, maxsz);
1661 static void do_gvec_shri_u(unsigned vece, uint32_t dofs, uint32_t aofs,
1662 int64_t shift, uint32_t oprsz, uint32_t maxsz)
1665 * We get here with a negated shift count, and we must handle
1666 * shifts by the element size, which tcg_gen_gvec_shri() does not do.
1668 shift = -shift;
1669 if (shift == (8 << vece)) {
1670 tcg_gen_gvec_dup_imm(vece, dofs, oprsz, maxsz, 0);
1671 } else {
1672 tcg_gen_gvec_shri(vece, dofs, aofs, shift, oprsz, maxsz);
1676 DO_2SHIFT_VEC(VSHLI, vshli_u, false, tcg_gen_gvec_shli)
1677 DO_2SHIFT(VQSHLI_S, vqshli_s, false)
1678 DO_2SHIFT(VQSHLI_U, vqshli_u, false)
1679 DO_2SHIFT(VQSHLUI, vqshlui_s, false)
1680 /* These right shifts use a left-shift helper with negated shift count */
1681 DO_2SHIFT_VEC(VSHRI_S, vshli_s, true, do_gvec_shri_s)
1682 DO_2SHIFT_VEC(VSHRI_U, vshli_u, true, do_gvec_shri_u)
1683 DO_2SHIFT(VRSHRI_S, vrshli_s, true)
1684 DO_2SHIFT(VRSHRI_U, vrshli_u, true)
1686 DO_2SHIFT_VEC(VSRI, vsri, false, gen_gvec_sri)
1687 DO_2SHIFT_VEC(VSLI, vsli, false, gen_gvec_sli)
1689 #define DO_2SHIFT_FP(INSN, FN) \
1690 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
1692 if (!dc_isar_feature(aa32_mve_fp, s)) { \
1693 return false; \
1695 return do_2shift(s, a, gen_helper_mve_##FN, false); \
1698 DO_2SHIFT_FP(VCVT_SH_fixed, vcvt_sh)
1699 DO_2SHIFT_FP(VCVT_UH_fixed, vcvt_uh)
1700 DO_2SHIFT_FP(VCVT_HS_fixed, vcvt_hs)
1701 DO_2SHIFT_FP(VCVT_HU_fixed, vcvt_hu)
1702 DO_2SHIFT_FP(VCVT_SF_fixed, vcvt_sf)
1703 DO_2SHIFT_FP(VCVT_UF_fixed, vcvt_uf)
1704 DO_2SHIFT_FP(VCVT_FS_fixed, vcvt_fs)
1705 DO_2SHIFT_FP(VCVT_FU_fixed, vcvt_fu)
1707 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a,
1708 MVEGenTwoOpShiftFn *fn)
1710 TCGv_ptr qda;
1711 TCGv_i32 rm;
1713 if (!dc_isar_feature(aa32_mve, s) ||
1714 !mve_check_qreg_bank(s, a->qda) ||
1715 a->rm == 13 || a->rm == 15 || !fn) {
1716 /* Rm cases are UNPREDICTABLE */
1717 return false;
1719 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1720 return true;
1723 qda = mve_qreg_ptr(a->qda);
1724 rm = load_reg(s, a->rm);
1725 fn(cpu_env, qda, qda, rm);
1726 tcg_temp_free_ptr(qda);
1727 tcg_temp_free_i32(rm);
1728 mve_update_eci(s);
1729 return true;
1732 #define DO_2SHIFT_SCALAR(INSN, FN) \
1733 static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a) \
1735 static MVEGenTwoOpShiftFn * const fns[] = { \
1736 gen_helper_mve_##FN##b, \
1737 gen_helper_mve_##FN##h, \
1738 gen_helper_mve_##FN##w, \
1739 NULL, \
1740 }; \
1741 return do_2shift_scalar(s, a, fns[a->size]); \
1744 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s)
1745 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u)
1746 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s)
1747 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u)
1748 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s)
1749 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u)
1750 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s)
1751 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u)
1753 #define DO_VSHLL(INSN, FN) \
1754 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
1756 static MVEGenTwoOpShiftFn * const fns[] = { \
1757 gen_helper_mve_##FN##b, \
1758 gen_helper_mve_##FN##h, \
1759 }; \
1760 return do_2shift_vec(s, a, fns[a->size], false, do_gvec_##FN); \
1764 * For the VSHLL vector helpers, the vece is the size of the input
1765 * (ie MO_8 or MO_16); the helpers want to work in the output size.
1766 * The shift count can be 0..<input size>, inclusive. (0 is VMOVL.)
1768 static void do_gvec_vshllbs(unsigned vece, uint32_t dofs, uint32_t aofs,
1769 int64_t shift, uint32_t oprsz, uint32_t maxsz)
1771 unsigned ovece = vece + 1;
1772 unsigned ibits = vece == MO_8 ? 8 : 16;
1773 tcg_gen_gvec_shli(ovece, dofs, aofs, ibits, oprsz, maxsz);
1774 tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz);
1777 static void do_gvec_vshllbu(unsigned vece, uint32_t dofs, uint32_t aofs,
1778 int64_t shift, uint32_t oprsz, uint32_t maxsz)
1780 unsigned ovece = vece + 1;
1781 tcg_gen_gvec_andi(ovece, dofs, aofs,
1782 ovece == MO_16 ? 0xff : 0xffff, oprsz, maxsz);
1783 tcg_gen_gvec_shli(ovece, dofs, dofs, shift, oprsz, maxsz);
1786 static void do_gvec_vshllts(unsigned vece, uint32_t dofs, uint32_t aofs,
1787 int64_t shift, uint32_t oprsz, uint32_t maxsz)
1789 unsigned ovece = vece + 1;
1790 unsigned ibits = vece == MO_8 ? 8 : 16;
1791 if (shift == 0) {
1792 tcg_gen_gvec_sari(ovece, dofs, aofs, ibits, oprsz, maxsz);
1793 } else {
1794 tcg_gen_gvec_andi(ovece, dofs, aofs,
1795 ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz);
1796 tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz);
1800 static void do_gvec_vshlltu(unsigned vece, uint32_t dofs, uint32_t aofs,
1801 int64_t shift, uint32_t oprsz, uint32_t maxsz)
1803 unsigned ovece = vece + 1;
1804 unsigned ibits = vece == MO_8 ? 8 : 16;
1805 if (shift == 0) {
1806 tcg_gen_gvec_shri(ovece, dofs, aofs, ibits, oprsz, maxsz);
1807 } else {
1808 tcg_gen_gvec_andi(ovece, dofs, aofs,
1809 ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz);
1810 tcg_gen_gvec_shri(ovece, dofs, dofs, ibits - shift, oprsz, maxsz);
1814 DO_VSHLL(VSHLL_BS, vshllbs)
1815 DO_VSHLL(VSHLL_BU, vshllbu)
1816 DO_VSHLL(VSHLL_TS, vshllts)
1817 DO_VSHLL(VSHLL_TU, vshlltu)
1819 #define DO_2SHIFT_N(INSN, FN) \
1820 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
1822 static MVEGenTwoOpShiftFn * const fns[] = { \
1823 gen_helper_mve_##FN##b, \
1824 gen_helper_mve_##FN##h, \
1825 }; \
1826 return do_2shift(s, a, fns[a->size], false); \
1829 DO_2SHIFT_N(VSHRNB, vshrnb)
1830 DO_2SHIFT_N(VSHRNT, vshrnt)
1831 DO_2SHIFT_N(VRSHRNB, vrshrnb)
1832 DO_2SHIFT_N(VRSHRNT, vrshrnt)
1833 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s)
1834 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s)
1835 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u)
1836 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u)
1837 DO_2SHIFT_N(VQSHRUNB, vqshrunb)
1838 DO_2SHIFT_N(VQSHRUNT, vqshrunt)
1839 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s)
1840 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s)
1841 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u)
1842 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u)
1843 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb)
1844 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt)
1846 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a)
1849 * Whole Vector Left Shift with Carry. The carry is taken
1850 * from a general purpose register and written back there.
1851 * An imm of 0 means "shift by 32".
1853 TCGv_ptr qd;
1854 TCGv_i32 rdm;
1856 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1857 return false;
1859 if (a->rdm == 13 || a->rdm == 15) {
1860 /* CONSTRAINED UNPREDICTABLE: we UNDEF */
1861 return false;
1863 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1864 return true;
1867 qd = mve_qreg_ptr(a->qd);
1868 rdm = load_reg(s, a->rdm);
1869 gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm));
1870 store_reg(s, a->rdm, rdm);
1871 tcg_temp_free_ptr(qd);
1872 mve_update_eci(s);
1873 return true;
1876 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn)
1878 TCGv_ptr qd;
1879 TCGv_i32 rn;
1882 * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP).
1883 * This fills the vector with elements of successively increasing
1884 * or decreasing values, starting from Rn.
1886 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1887 return false;
1889 if (a->size == MO_64) {
1890 /* size 0b11 is another encoding */
1891 return false;
1893 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1894 return true;
1897 qd = mve_qreg_ptr(a->qd);
1898 rn = load_reg(s, a->rn);
1899 fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm));
1900 store_reg(s, a->rn, rn);
1901 tcg_temp_free_ptr(qd);
1902 mve_update_eci(s);
1903 return true;
1906 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn)
1908 TCGv_ptr qd;
1909 TCGv_i32 rn, rm;
1912 * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP)
1913 * This fills the vector with elements of successively increasing
1914 * or decreasing values, starting from Rn. Rm specifies a point where
1915 * the count wraps back around to 0. The updated offset is written back
1916 * to Rn.
1918 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1919 return false;
1921 if (!fn || a->rm == 13 || a->rm == 15) {
1923 * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE;
1924 * Rm == 13 is VIWDUP, VDWDUP.
1926 return false;
1928 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1929 return true;
1932 qd = mve_qreg_ptr(a->qd);
1933 rn = load_reg(s, a->rn);
1934 rm = load_reg(s, a->rm);
1935 fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm));
1936 store_reg(s, a->rn, rn);
1937 tcg_temp_free_ptr(qd);
1938 tcg_temp_free_i32(rm);
1939 mve_update_eci(s);
1940 return true;
1943 static bool trans_VIDUP(DisasContext *s, arg_vidup *a)
1945 static MVEGenVIDUPFn * const fns[] = {
1946 gen_helper_mve_vidupb,
1947 gen_helper_mve_viduph,
1948 gen_helper_mve_vidupw,
1949 NULL,
1951 return do_vidup(s, a, fns[a->size]);
1954 static bool trans_VDDUP(DisasContext *s, arg_vidup *a)
1956 static MVEGenVIDUPFn * const fns[] = {
1957 gen_helper_mve_vidupb,
1958 gen_helper_mve_viduph,
1959 gen_helper_mve_vidupw,
1960 NULL,
1962 /* VDDUP is just like VIDUP but with a negative immediate */
1963 a->imm = -a->imm;
1964 return do_vidup(s, a, fns[a->size]);
1967 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a)
1969 static MVEGenVIWDUPFn * const fns[] = {
1970 gen_helper_mve_viwdupb,
1971 gen_helper_mve_viwduph,
1972 gen_helper_mve_viwdupw,
1973 NULL,
1975 return do_viwdup(s, a, fns[a->size]);
1978 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a)
1980 static MVEGenVIWDUPFn * const fns[] = {
1981 gen_helper_mve_vdwdupb,
1982 gen_helper_mve_vdwduph,
1983 gen_helper_mve_vdwdupw,
1984 NULL,
1986 return do_viwdup(s, a, fns[a->size]);
1989 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn)
1991 TCGv_ptr qn, qm;
1993 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
1994 !fn) {
1995 return false;
1997 if (!mve_eci_check(s) || !vfp_access_check(s)) {
1998 return true;
2001 qn = mve_qreg_ptr(a->qn);
2002 qm = mve_qreg_ptr(a->qm);
2003 fn(cpu_env, qn, qm);
2004 tcg_temp_free_ptr(qn);
2005 tcg_temp_free_ptr(qm);
2006 if (a->mask) {
2007 /* VPT */
2008 gen_vpst(s, a->mask);
2010 /* This insn updates predication bits */
2011 s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
2012 mve_update_eci(s);
2013 return true;
2016 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a,
2017 MVEGenScalarCmpFn *fn)
2019 TCGv_ptr qn;
2020 TCGv_i32 rm;
2022 if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) {
2023 return false;
2025 if (!mve_eci_check(s) || !vfp_access_check(s)) {
2026 return true;
2029 qn = mve_qreg_ptr(a->qn);
2030 if (a->rm == 15) {
2031 /* Encoding Rm=0b1111 means "constant zero" */
2032 rm = tcg_constant_i32(0);
2033 } else {
2034 rm = load_reg(s, a->rm);
2036 fn(cpu_env, qn, rm);
2037 tcg_temp_free_ptr(qn);
2038 tcg_temp_free_i32(rm);
2039 if (a->mask) {
2040 /* VPT */
2041 gen_vpst(s, a->mask);
2043 /* This insn updates predication bits */
2044 s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
2045 mve_update_eci(s);
2046 return true;
2049 #define DO_VCMP(INSN, FN) \
2050 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \
2052 static MVEGenCmpFn * const fns[] = { \
2053 gen_helper_mve_##FN##b, \
2054 gen_helper_mve_##FN##h, \
2055 gen_helper_mve_##FN##w, \
2056 NULL, \
2057 }; \
2058 return do_vcmp(s, a, fns[a->size]); \
2060 static bool trans_##INSN##_scalar(DisasContext *s, \
2061 arg_vcmp_scalar *a) \
2063 static MVEGenScalarCmpFn * const fns[] = { \
2064 gen_helper_mve_##FN##_scalarb, \
2065 gen_helper_mve_##FN##_scalarh, \
2066 gen_helper_mve_##FN##_scalarw, \
2067 NULL, \
2068 }; \
2069 return do_vcmp_scalar(s, a, fns[a->size]); \
2072 DO_VCMP(VCMPEQ, vcmpeq)
2073 DO_VCMP(VCMPNE, vcmpne)
2074 DO_VCMP(VCMPCS, vcmpcs)
2075 DO_VCMP(VCMPHI, vcmphi)
2076 DO_VCMP(VCMPGE, vcmpge)
2077 DO_VCMP(VCMPLT, vcmplt)
2078 DO_VCMP(VCMPGT, vcmpgt)
2079 DO_VCMP(VCMPLE, vcmple)
2081 #define DO_VCMP_FP(INSN, FN) \
2082 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \
2084 static MVEGenCmpFn * const fns[] = { \
2085 NULL, \
2086 gen_helper_mve_##FN##h, \
2087 gen_helper_mve_##FN##s, \
2088 NULL, \
2089 }; \
2090 if (!dc_isar_feature(aa32_mve_fp, s)) { \
2091 return false; \
2093 return do_vcmp(s, a, fns[a->size]); \
2095 static bool trans_##INSN##_scalar(DisasContext *s, \
2096 arg_vcmp_scalar *a) \
2098 static MVEGenScalarCmpFn * const fns[] = { \
2099 NULL, \
2100 gen_helper_mve_##FN##_scalarh, \
2101 gen_helper_mve_##FN##_scalars, \
2102 NULL, \
2103 }; \
2104 if (!dc_isar_feature(aa32_mve_fp, s)) { \
2105 return false; \
2107 return do_vcmp_scalar(s, a, fns[a->size]); \
2110 DO_VCMP_FP(VCMPEQ_fp, vfcmpeq)
2111 DO_VCMP_FP(VCMPNE_fp, vfcmpne)
2112 DO_VCMP_FP(VCMPGE_fp, vfcmpge)
2113 DO_VCMP_FP(VCMPLT_fp, vfcmplt)
2114 DO_VCMP_FP(VCMPGT_fp, vfcmpgt)
2115 DO_VCMP_FP(VCMPLE_fp, vfcmple)
2117 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn)
2120 * MIN/MAX operations across a vector: compute the min or
2121 * max of the initial value in a general purpose register
2122 * and all the elements in the vector, and store it back
2123 * into the general purpose register.
2125 TCGv_ptr qm;
2126 TCGv_i32 rda;
2128 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
2129 !fn || a->rda == 13 || a->rda == 15) {
2130 /* Rda cases are UNPREDICTABLE */
2131 return false;
2133 if (!mve_eci_check(s) || !vfp_access_check(s)) {
2134 return true;
2137 qm = mve_qreg_ptr(a->qm);
2138 rda = load_reg(s, a->rda);
2139 fn(rda, cpu_env, qm, rda);
2140 store_reg(s, a->rda, rda);
2141 tcg_temp_free_ptr(qm);
2142 mve_update_eci(s);
2143 return true;
2146 #define DO_VMAXV(INSN, FN) \
2147 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \
2149 static MVEGenVADDVFn * const fns[] = { \
2150 gen_helper_mve_##FN##b, \
2151 gen_helper_mve_##FN##h, \
2152 gen_helper_mve_##FN##w, \
2153 NULL, \
2154 }; \
2155 return do_vmaxv(s, a, fns[a->size]); \
2158 DO_VMAXV(VMAXV_S, vmaxvs)
2159 DO_VMAXV(VMAXV_U, vmaxvu)
2160 DO_VMAXV(VMAXAV, vmaxav)
2161 DO_VMAXV(VMINV_S, vminvs)
2162 DO_VMAXV(VMINV_U, vminvu)
2163 DO_VMAXV(VMINAV, vminav)
2165 #define DO_VMAXV_FP(INSN, FN) \
2166 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \
2168 static MVEGenVADDVFn * const fns[] = { \
2169 NULL, \
2170 gen_helper_mve_##FN##h, \
2171 gen_helper_mve_##FN##s, \
2172 NULL, \
2173 }; \
2174 if (!dc_isar_feature(aa32_mve_fp, s)) { \
2175 return false; \
2177 return do_vmaxv(s, a, fns[a->size]); \
2180 DO_VMAXV_FP(VMAXNMV, vmaxnmv)
2181 DO_VMAXV_FP(VMINNMV, vminnmv)
2182 DO_VMAXV_FP(VMAXNMAV, vmaxnmav)
2183 DO_VMAXV_FP(VMINNMAV, vminnmav)
2185 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn)
2187 /* Absolute difference accumulated across vector */
2188 TCGv_ptr qn, qm;
2189 TCGv_i32 rda;
2191 if (!dc_isar_feature(aa32_mve, s) ||
2192 !mve_check_qreg_bank(s, a->qm | a->qn) ||
2193 !fn || a->rda == 13 || a->rda == 15) {
2194 /* Rda cases are UNPREDICTABLE */
2195 return false;
2197 if (!mve_eci_check(s) || !vfp_access_check(s)) {
2198 return true;
2201 qm = mve_qreg_ptr(a->qm);
2202 qn = mve_qreg_ptr(a->qn);
2203 rda = load_reg(s, a->rda);
2204 fn(rda, cpu_env, qn, qm, rda);
2205 store_reg(s, a->rda, rda);
2206 tcg_temp_free_ptr(qm);
2207 tcg_temp_free_ptr(qn);
2208 mve_update_eci(s);
2209 return true;
2212 #define DO_VABAV(INSN, FN) \
2213 static bool trans_##INSN(DisasContext *s, arg_vabav *a) \
2215 static MVEGenVABAVFn * const fns[] = { \
2216 gen_helper_mve_##FN##b, \
2217 gen_helper_mve_##FN##h, \
2218 gen_helper_mve_##FN##w, \
2219 NULL, \
2220 }; \
2221 return do_vabav(s, a, fns[a->size]); \
2224 DO_VABAV(VABAV_S, vabavs)
2225 DO_VABAV(VABAV_U, vabavu)
2227 static bool trans_VMOV_to_2gp(DisasContext *s, arg_VMOV_to_2gp *a)
2230 * VMOV two 32-bit vector lanes to two general-purpose registers.
2231 * This insn is not predicated but it is subject to beat-wise
2232 * execution if it is not in an IT block. For us this means
2233 * only that if PSR.ECI says we should not be executing the beat
2234 * corresponding to the lane of the vector register being accessed
2235 * then we should skip perfoming the move, and that we need to do
2236 * the usual check for bad ECI state and advance of ECI state.
2237 * (If PSR.ECI is non-zero then we cannot be in an IT block.)
2239 TCGv_i32 tmp;
2240 int vd;
2242 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) ||
2243 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15 ||
2244 a->rt == a->rt2) {
2245 /* Rt/Rt2 cases are UNPREDICTABLE */
2246 return false;
2248 if (!mve_eci_check(s) || !vfp_access_check(s)) {
2249 return true;
2252 /* Convert Qreg index to Dreg for read_neon_element32() etc */
2253 vd = a->qd * 2;
2255 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) {
2256 tmp = tcg_temp_new_i32();
2257 read_neon_element32(tmp, vd, a->idx, MO_32);
2258 store_reg(s, a->rt, tmp);
2260 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) {
2261 tmp = tcg_temp_new_i32();
2262 read_neon_element32(tmp, vd + 1, a->idx, MO_32);
2263 store_reg(s, a->rt2, tmp);
2266 mve_update_and_store_eci(s);
2267 return true;
2270 static bool trans_VMOV_from_2gp(DisasContext *s, arg_VMOV_to_2gp *a)
2273 * VMOV two general-purpose registers to two 32-bit vector lanes.
2274 * This insn is not predicated but it is subject to beat-wise
2275 * execution if it is not in an IT block. For us this means
2276 * only that if PSR.ECI says we should not be executing the beat
2277 * corresponding to the lane of the vector register being accessed
2278 * then we should skip perfoming the move, and that we need to do
2279 * the usual check for bad ECI state and advance of ECI state.
2280 * (If PSR.ECI is non-zero then we cannot be in an IT block.)
2282 TCGv_i32 tmp;
2283 int vd;
2285 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) ||
2286 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15) {
2287 /* Rt/Rt2 cases are UNPREDICTABLE */
2288 return false;
2290 if (!mve_eci_check(s) || !vfp_access_check(s)) {
2291 return true;
2294 /* Convert Qreg idx to Dreg for read_neon_element32() etc */
2295 vd = a->qd * 2;
2297 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) {
2298 tmp = load_reg(s, a->rt);
2299 write_neon_element32(tmp, vd, a->idx, MO_32);
2300 tcg_temp_free_i32(tmp);
2302 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) {
2303 tmp = load_reg(s, a->rt2);
2304 write_neon_element32(tmp, vd + 1, a->idx, MO_32);
2305 tcg_temp_free_i32(tmp);
2308 mve_update_and_store_eci(s);
2309 return true;