1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 * LSX translate functions
4 * Copyright (c) 2022-2023 Loongson Technology Corporation Limited
7 #ifndef CONFIG_USER_ONLY
8 #define CHECK_SXE do { \
9 if ((ctx->base.tb->flags & HW_FLAGS_EUEN_SXE) == 0) { \
10 generate_exception(ctx, EXCCODE_SXD); \
18 static bool gen_vvv(DisasContext *ctx, arg_vvv *a,
19 void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
21 TCGv_i32 vd = tcg_constant_i32(a->vd);
22 TCGv_i32 vj = tcg_constant_i32(a->vj);
23 TCGv_i32 vk = tcg_constant_i32(a->vk);
27 func(cpu_env, vd, vj, vk);
31 static bool gen_vv(DisasContext *ctx, arg_vv *a,
32 void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32))
34 TCGv_i32 vd = tcg_constant_i32(a->vd);
35 TCGv_i32 vj = tcg_constant_i32(a->vj);
38 func(cpu_env, vd, vj);
42 static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a,
43 void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
45 TCGv_i32 vd = tcg_constant_i32(a->vd);
46 TCGv_i32 vj = tcg_constant_i32(a->vj);
47 TCGv_i32 imm = tcg_constant_i32(a->imm);
50 func(cpu_env, vd, vj, imm);
54 static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop,
55 void (*func)(unsigned, uint32_t, uint32_t,
56 uint32_t, uint32_t, uint32_t))
58 uint32_t vd_ofs, vj_ofs, vk_ofs;
62 vd_ofs = vec_full_offset(a->vd);
63 vj_ofs = vec_full_offset(a->vj);
64 vk_ofs = vec_full_offset(a->vk);
66 func(mop, vd_ofs, vj_ofs, vk_ofs, 16, ctx->vl/8);
70 static bool gvec_vv(DisasContext *ctx, arg_vv *a, MemOp mop,
71 void (*func)(unsigned, uint32_t, uint32_t,
74 uint32_t vd_ofs, vj_ofs;
78 vd_ofs = vec_full_offset(a->vd);
79 vj_ofs = vec_full_offset(a->vj);
81 func(mop, vd_ofs, vj_ofs, 16, ctx->vl/8);
85 static bool gvec_vv_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
86 void (*func)(unsigned, uint32_t, uint32_t,
87 int64_t, uint32_t, uint32_t))
89 uint32_t vd_ofs, vj_ofs;
93 vd_ofs = vec_full_offset(a->vd);
94 vj_ofs = vec_full_offset(a->vj);
96 func(mop, vd_ofs, vj_ofs, a->imm , 16, ctx->vl/8);
100 static bool gvec_subi(DisasContext *ctx, arg_vv_i *a, MemOp mop)
102 uint32_t vd_ofs, vj_ofs;
106 vd_ofs = vec_full_offset(a->vd);
107 vj_ofs = vec_full_offset(a->vj);
109 tcg_gen_gvec_addi(mop, vd_ofs, vj_ofs, -a->imm, 16, ctx->vl/8);
113 TRANS(vadd_b, gvec_vvv, MO_8, tcg_gen_gvec_add)
114 TRANS(vadd_h, gvec_vvv, MO_16, tcg_gen_gvec_add)
115 TRANS(vadd_w, gvec_vvv, MO_32, tcg_gen_gvec_add)
116 TRANS(vadd_d, gvec_vvv, MO_64, tcg_gen_gvec_add)
118 #define VADDSUB_Q(NAME) \
119 static bool trans_v## NAME ##_q(DisasContext *ctx, arg_vvv *a) \
121 TCGv_i64 rh, rl, ah, al, bh, bl; \
125 rh = tcg_temp_new_i64(); \
126 rl = tcg_temp_new_i64(); \
127 ah = tcg_temp_new_i64(); \
128 al = tcg_temp_new_i64(); \
129 bh = tcg_temp_new_i64(); \
130 bl = tcg_temp_new_i64(); \
132 get_vreg64(ah, a->vj, 1); \
133 get_vreg64(al, a->vj, 0); \
134 get_vreg64(bh, a->vk, 1); \
135 get_vreg64(bl, a->vk, 0); \
137 tcg_gen_## NAME ##2_i64(rl, rh, al, ah, bl, bh); \
139 set_vreg64(rh, a->vd, 1); \
140 set_vreg64(rl, a->vd, 0); \
148 TRANS(vsub_b, gvec_vvv, MO_8, tcg_gen_gvec_sub)
149 TRANS(vsub_h, gvec_vvv, MO_16, tcg_gen_gvec_sub)
150 TRANS(vsub_w, gvec_vvv, MO_32, tcg_gen_gvec_sub)
151 TRANS(vsub_d, gvec_vvv, MO_64, tcg_gen_gvec_sub)
153 TRANS(vaddi_bu, gvec_vv_i, MO_8, tcg_gen_gvec_addi)
154 TRANS(vaddi_hu, gvec_vv_i, MO_16, tcg_gen_gvec_addi)
155 TRANS(vaddi_wu, gvec_vv_i, MO_32, tcg_gen_gvec_addi)
156 TRANS(vaddi_du, gvec_vv_i, MO_64, tcg_gen_gvec_addi)
157 TRANS(vsubi_bu, gvec_subi, MO_8)
158 TRANS(vsubi_hu, gvec_subi, MO_16)
159 TRANS(vsubi_wu, gvec_subi, MO_32)
160 TRANS(vsubi_du, gvec_subi, MO_64)
162 TRANS(vneg_b, gvec_vv, MO_8, tcg_gen_gvec_neg)
163 TRANS(vneg_h, gvec_vv, MO_16, tcg_gen_gvec_neg)
164 TRANS(vneg_w, gvec_vv, MO_32, tcg_gen_gvec_neg)
165 TRANS(vneg_d, gvec_vv, MO_64, tcg_gen_gvec_neg)
167 TRANS(vsadd_b, gvec_vvv, MO_8, tcg_gen_gvec_ssadd)
168 TRANS(vsadd_h, gvec_vvv, MO_16, tcg_gen_gvec_ssadd)
169 TRANS(vsadd_w, gvec_vvv, MO_32, tcg_gen_gvec_ssadd)
170 TRANS(vsadd_d, gvec_vvv, MO_64, tcg_gen_gvec_ssadd)
171 TRANS(vsadd_bu, gvec_vvv, MO_8, tcg_gen_gvec_usadd)
172 TRANS(vsadd_hu, gvec_vvv, MO_16, tcg_gen_gvec_usadd)
173 TRANS(vsadd_wu, gvec_vvv, MO_32, tcg_gen_gvec_usadd)
174 TRANS(vsadd_du, gvec_vvv, MO_64, tcg_gen_gvec_usadd)
175 TRANS(vssub_b, gvec_vvv, MO_8, tcg_gen_gvec_sssub)
176 TRANS(vssub_h, gvec_vvv, MO_16, tcg_gen_gvec_sssub)
177 TRANS(vssub_w, gvec_vvv, MO_32, tcg_gen_gvec_sssub)
178 TRANS(vssub_d, gvec_vvv, MO_64, tcg_gen_gvec_sssub)
179 TRANS(vssub_bu, gvec_vvv, MO_8, tcg_gen_gvec_ussub)
180 TRANS(vssub_hu, gvec_vvv, MO_16, tcg_gen_gvec_ussub)
181 TRANS(vssub_wu, gvec_vvv, MO_32, tcg_gen_gvec_ussub)
182 TRANS(vssub_du, gvec_vvv, MO_64, tcg_gen_gvec_ussub)
184 TRANS(vhaddw_h_b, gen_vvv, gen_helper_vhaddw_h_b)
185 TRANS(vhaddw_w_h, gen_vvv, gen_helper_vhaddw_w_h)
186 TRANS(vhaddw_d_w, gen_vvv, gen_helper_vhaddw_d_w)
187 TRANS(vhaddw_q_d, gen_vvv, gen_helper_vhaddw_q_d)
188 TRANS(vhaddw_hu_bu, gen_vvv, gen_helper_vhaddw_hu_bu)
189 TRANS(vhaddw_wu_hu, gen_vvv, gen_helper_vhaddw_wu_hu)
190 TRANS(vhaddw_du_wu, gen_vvv, gen_helper_vhaddw_du_wu)
191 TRANS(vhaddw_qu_du, gen_vvv, gen_helper_vhaddw_qu_du)
192 TRANS(vhsubw_h_b, gen_vvv, gen_helper_vhsubw_h_b)
193 TRANS(vhsubw_w_h, gen_vvv, gen_helper_vhsubw_w_h)
194 TRANS(vhsubw_d_w, gen_vvv, gen_helper_vhsubw_d_w)
195 TRANS(vhsubw_q_d, gen_vvv, gen_helper_vhsubw_q_d)
196 TRANS(vhsubw_hu_bu, gen_vvv, gen_helper_vhsubw_hu_bu)
197 TRANS(vhsubw_wu_hu, gen_vvv, gen_helper_vhsubw_wu_hu)
198 TRANS(vhsubw_du_wu, gen_vvv, gen_helper_vhsubw_du_wu)
199 TRANS(vhsubw_qu_du, gen_vvv, gen_helper_vhsubw_qu_du)
201 static void gen_vaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
205 int halfbits = 4 << vece;
207 t1 = tcg_temp_new_vec_matching(a);
208 t2 = tcg_temp_new_vec_matching(b);
210 /* Sign-extend the even elements from a */
211 tcg_gen_shli_vec(vece, t1, a, halfbits);
212 tcg_gen_sari_vec(vece, t1, t1, halfbits);
214 /* Sign-extend the even elements from b */
215 tcg_gen_shli_vec(vece, t2, b, halfbits);
216 tcg_gen_sari_vec(vece, t2, t2, halfbits);
218 tcg_gen_add_vec(vece, t, t1, t2);
221 static void gen_vaddwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
225 t1 = tcg_temp_new_i32();
226 t2 = tcg_temp_new_i32();
227 tcg_gen_ext16s_i32(t1, a);
228 tcg_gen_ext16s_i32(t2, b);
229 tcg_gen_add_i32(t, t1, t2);
232 static void gen_vaddwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
236 t1 = tcg_temp_new_i64();
237 t2 = tcg_temp_new_i64();
238 tcg_gen_ext32s_i64(t1, a);
239 tcg_gen_ext32s_i64(t2, b);
240 tcg_gen_add_i64(t, t1, t2);
243 static void do_vaddwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
244 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
246 static const TCGOpcode vecop_list[] = {
247 INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
249 static const GVecGen3 op[4] = {
251 .fniv = gen_vaddwev_s,
252 .fno = gen_helper_vaddwev_h_b,
253 .opt_opc = vecop_list,
257 .fni4 = gen_vaddwev_w_h,
258 .fniv = gen_vaddwev_s,
259 .fno = gen_helper_vaddwev_w_h,
260 .opt_opc = vecop_list,
264 .fni8 = gen_vaddwev_d_w,
265 .fniv = gen_vaddwev_s,
266 .fno = gen_helper_vaddwev_d_w,
267 .opt_opc = vecop_list,
271 .fno = gen_helper_vaddwev_q_d,
276 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
279 TRANS(vaddwev_h_b, gvec_vvv, MO_8, do_vaddwev_s)
280 TRANS(vaddwev_w_h, gvec_vvv, MO_16, do_vaddwev_s)
281 TRANS(vaddwev_d_w, gvec_vvv, MO_32, do_vaddwev_s)
282 TRANS(vaddwev_q_d, gvec_vvv, MO_64, do_vaddwev_s)
284 static void gen_vaddwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
288 t1 = tcg_temp_new_i32();
289 t2 = tcg_temp_new_i32();
290 tcg_gen_sari_i32(t1, a, 16);
291 tcg_gen_sari_i32(t2, b, 16);
292 tcg_gen_add_i32(t, t1, t2);
295 static void gen_vaddwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
299 t1 = tcg_temp_new_i64();
300 t2 = tcg_temp_new_i64();
301 tcg_gen_sari_i64(t1, a, 32);
302 tcg_gen_sari_i64(t2, b, 32);
303 tcg_gen_add_i64(t, t1, t2);
306 static void gen_vaddwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
310 int halfbits = 4 << vece;
312 t1 = tcg_temp_new_vec_matching(a);
313 t2 = tcg_temp_new_vec_matching(b);
315 /* Sign-extend the odd elements for vector */
316 tcg_gen_sari_vec(vece, t1, a, halfbits);
317 tcg_gen_sari_vec(vece, t2, b, halfbits);
319 tcg_gen_add_vec(vece, t, t1, t2);
322 static void do_vaddwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
323 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
325 static const TCGOpcode vecop_list[] = {
326 INDEX_op_sari_vec, INDEX_op_add_vec, 0
328 static const GVecGen3 op[4] = {
330 .fniv = gen_vaddwod_s,
331 .fno = gen_helper_vaddwod_h_b,
332 .opt_opc = vecop_list,
336 .fni4 = gen_vaddwod_w_h,
337 .fniv = gen_vaddwod_s,
338 .fno = gen_helper_vaddwod_w_h,
339 .opt_opc = vecop_list,
343 .fni8 = gen_vaddwod_d_w,
344 .fniv = gen_vaddwod_s,
345 .fno = gen_helper_vaddwod_d_w,
346 .opt_opc = vecop_list,
350 .fno = gen_helper_vaddwod_q_d,
355 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
358 TRANS(vaddwod_h_b, gvec_vvv, MO_8, do_vaddwod_s)
359 TRANS(vaddwod_w_h, gvec_vvv, MO_16, do_vaddwod_s)
360 TRANS(vaddwod_d_w, gvec_vvv, MO_32, do_vaddwod_s)
361 TRANS(vaddwod_q_d, gvec_vvv, MO_64, do_vaddwod_s)
363 static void gen_vsubwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
367 int halfbits = 4 << vece;
369 t1 = tcg_temp_new_vec_matching(a);
370 t2 = tcg_temp_new_vec_matching(b);
372 /* Sign-extend the even elements from a */
373 tcg_gen_shli_vec(vece, t1, a, halfbits);
374 tcg_gen_sari_vec(vece, t1, t1, halfbits);
376 /* Sign-extend the even elements from b */
377 tcg_gen_shli_vec(vece, t2, b, halfbits);
378 tcg_gen_sari_vec(vece, t2, t2, halfbits);
380 tcg_gen_sub_vec(vece, t, t1, t2);
383 static void gen_vsubwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
387 t1 = tcg_temp_new_i32();
388 t2 = tcg_temp_new_i32();
389 tcg_gen_ext16s_i32(t1, a);
390 tcg_gen_ext16s_i32(t2, b);
391 tcg_gen_sub_i32(t, t1, t2);
394 static void gen_vsubwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
398 t1 = tcg_temp_new_i64();
399 t2 = tcg_temp_new_i64();
400 tcg_gen_ext32s_i64(t1, a);
401 tcg_gen_ext32s_i64(t2, b);
402 tcg_gen_sub_i64(t, t1, t2);
405 static void do_vsubwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
406 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
408 static const TCGOpcode vecop_list[] = {
409 INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_sub_vec, 0
411 static const GVecGen3 op[4] = {
413 .fniv = gen_vsubwev_s,
414 .fno = gen_helper_vsubwev_h_b,
415 .opt_opc = vecop_list,
419 .fni4 = gen_vsubwev_w_h,
420 .fniv = gen_vsubwev_s,
421 .fno = gen_helper_vsubwev_w_h,
422 .opt_opc = vecop_list,
426 .fni8 = gen_vsubwev_d_w,
427 .fniv = gen_vsubwev_s,
428 .fno = gen_helper_vsubwev_d_w,
429 .opt_opc = vecop_list,
433 .fno = gen_helper_vsubwev_q_d,
438 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
441 TRANS(vsubwev_h_b, gvec_vvv, MO_8, do_vsubwev_s)
442 TRANS(vsubwev_w_h, gvec_vvv, MO_16, do_vsubwev_s)
443 TRANS(vsubwev_d_w, gvec_vvv, MO_32, do_vsubwev_s)
444 TRANS(vsubwev_q_d, gvec_vvv, MO_64, do_vsubwev_s)
446 static void gen_vsubwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
450 int halfbits = 4 << vece;
452 t1 = tcg_temp_new_vec_matching(a);
453 t2 = tcg_temp_new_vec_matching(b);
455 /* Sign-extend the odd elements for vector */
456 tcg_gen_sari_vec(vece, t1, a, halfbits);
457 tcg_gen_sari_vec(vece, t2, b, halfbits);
459 tcg_gen_sub_vec(vece, t, t1, t2);
462 static void gen_vsubwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
466 t1 = tcg_temp_new_i32();
467 t2 = tcg_temp_new_i32();
468 tcg_gen_sari_i32(t1, a, 16);
469 tcg_gen_sari_i32(t2, b, 16);
470 tcg_gen_sub_i32(t, t1, t2);
473 static void gen_vsubwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
477 t1 = tcg_temp_new_i64();
478 t2 = tcg_temp_new_i64();
479 tcg_gen_sari_i64(t1, a, 32);
480 tcg_gen_sari_i64(t2, b, 32);
481 tcg_gen_sub_i64(t, t1, t2);
484 static void do_vsubwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
485 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
487 static const TCGOpcode vecop_list[] = {
488 INDEX_op_sari_vec, INDEX_op_sub_vec, 0
490 static const GVecGen3 op[4] = {
492 .fniv = gen_vsubwod_s,
493 .fno = gen_helper_vsubwod_h_b,
494 .opt_opc = vecop_list,
498 .fni4 = gen_vsubwod_w_h,
499 .fniv = gen_vsubwod_s,
500 .fno = gen_helper_vsubwod_w_h,
501 .opt_opc = vecop_list,
505 .fni8 = gen_vsubwod_d_w,
506 .fniv = gen_vsubwod_s,
507 .fno = gen_helper_vsubwod_d_w,
508 .opt_opc = vecop_list,
512 .fno = gen_helper_vsubwod_q_d,
517 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
520 TRANS(vsubwod_h_b, gvec_vvv, MO_8, do_vsubwod_s)
521 TRANS(vsubwod_w_h, gvec_vvv, MO_16, do_vsubwod_s)
522 TRANS(vsubwod_d_w, gvec_vvv, MO_32, do_vsubwod_s)
523 TRANS(vsubwod_q_d, gvec_vvv, MO_64, do_vsubwod_s)
525 static void gen_vaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
529 t1 = tcg_temp_new_vec_matching(a);
530 t2 = tcg_temp_new_vec_matching(b);
531 t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
532 tcg_gen_and_vec(vece, t1, a, t3);
533 tcg_gen_and_vec(vece, t2, b, t3);
534 tcg_gen_add_vec(vece, t, t1, t2);
537 static void gen_vaddwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
541 t1 = tcg_temp_new_i32();
542 t2 = tcg_temp_new_i32();
543 tcg_gen_ext16u_i32(t1, a);
544 tcg_gen_ext16u_i32(t2, b);
545 tcg_gen_add_i32(t, t1, t2);
548 static void gen_vaddwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
552 t1 = tcg_temp_new_i64();
553 t2 = tcg_temp_new_i64();
554 tcg_gen_ext32u_i64(t1, a);
555 tcg_gen_ext32u_i64(t2, b);
556 tcg_gen_add_i64(t, t1, t2);
559 static void do_vaddwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
560 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
562 static const TCGOpcode vecop_list[] = {
565 static const GVecGen3 op[4] = {
567 .fniv = gen_vaddwev_u,
568 .fno = gen_helper_vaddwev_h_bu,
569 .opt_opc = vecop_list,
573 .fni4 = gen_vaddwev_w_hu,
574 .fniv = gen_vaddwev_u,
575 .fno = gen_helper_vaddwev_w_hu,
576 .opt_opc = vecop_list,
580 .fni8 = gen_vaddwev_d_wu,
581 .fniv = gen_vaddwev_u,
582 .fno = gen_helper_vaddwev_d_wu,
583 .opt_opc = vecop_list,
587 .fno = gen_helper_vaddwev_q_du,
592 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
595 TRANS(vaddwev_h_bu, gvec_vvv, MO_8, do_vaddwev_u)
596 TRANS(vaddwev_w_hu, gvec_vvv, MO_16, do_vaddwev_u)
597 TRANS(vaddwev_d_wu, gvec_vvv, MO_32, do_vaddwev_u)
598 TRANS(vaddwev_q_du, gvec_vvv, MO_64, do_vaddwev_u)
600 static void gen_vaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
604 int halfbits = 4 << vece;
606 t1 = tcg_temp_new_vec_matching(a);
607 t2 = tcg_temp_new_vec_matching(b);
609 /* Zero-extend the odd elements for vector */
610 tcg_gen_shri_vec(vece, t1, a, halfbits);
611 tcg_gen_shri_vec(vece, t2, b, halfbits);
613 tcg_gen_add_vec(vece, t, t1, t2);
616 static void gen_vaddwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
620 t1 = tcg_temp_new_i32();
621 t2 = tcg_temp_new_i32();
622 tcg_gen_shri_i32(t1, a, 16);
623 tcg_gen_shri_i32(t2, b, 16);
624 tcg_gen_add_i32(t, t1, t2);
627 static void gen_vaddwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
631 t1 = tcg_temp_new_i64();
632 t2 = tcg_temp_new_i64();
633 tcg_gen_shri_i64(t1, a, 32);
634 tcg_gen_shri_i64(t2, b, 32);
635 tcg_gen_add_i64(t, t1, t2);
638 static void do_vaddwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
639 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
641 static const TCGOpcode vecop_list[] = {
642 INDEX_op_shri_vec, INDEX_op_add_vec, 0
644 static const GVecGen3 op[4] = {
646 .fniv = gen_vaddwod_u,
647 .fno = gen_helper_vaddwod_h_bu,
648 .opt_opc = vecop_list,
652 .fni4 = gen_vaddwod_w_hu,
653 .fniv = gen_vaddwod_u,
654 .fno = gen_helper_vaddwod_w_hu,
655 .opt_opc = vecop_list,
659 .fni8 = gen_vaddwod_d_wu,
660 .fniv = gen_vaddwod_u,
661 .fno = gen_helper_vaddwod_d_wu,
662 .opt_opc = vecop_list,
666 .fno = gen_helper_vaddwod_q_du,
671 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
674 TRANS(vaddwod_h_bu, gvec_vvv, MO_8, do_vaddwod_u)
675 TRANS(vaddwod_w_hu, gvec_vvv, MO_16, do_vaddwod_u)
676 TRANS(vaddwod_d_wu, gvec_vvv, MO_32, do_vaddwod_u)
677 TRANS(vaddwod_q_du, gvec_vvv, MO_64, do_vaddwod_u)
679 static void gen_vsubwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
683 t1 = tcg_temp_new_vec_matching(a);
684 t2 = tcg_temp_new_vec_matching(b);
685 t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
686 tcg_gen_and_vec(vece, t1, a, t3);
687 tcg_gen_and_vec(vece, t2, b, t3);
688 tcg_gen_sub_vec(vece, t, t1, t2);
691 static void gen_vsubwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
695 t1 = tcg_temp_new_i32();
696 t2 = tcg_temp_new_i32();
697 tcg_gen_ext16u_i32(t1, a);
698 tcg_gen_ext16u_i32(t2, b);
699 tcg_gen_sub_i32(t, t1, t2);
702 static void gen_vsubwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
706 t1 = tcg_temp_new_i64();
707 t2 = tcg_temp_new_i64();
708 tcg_gen_ext32u_i64(t1, a);
709 tcg_gen_ext32u_i64(t2, b);
710 tcg_gen_sub_i64(t, t1, t2);
713 static void do_vsubwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
714 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
716 static const TCGOpcode vecop_list[] = {
719 static const GVecGen3 op[4] = {
721 .fniv = gen_vsubwev_u,
722 .fno = gen_helper_vsubwev_h_bu,
723 .opt_opc = vecop_list,
727 .fni4 = gen_vsubwev_w_hu,
728 .fniv = gen_vsubwev_u,
729 .fno = gen_helper_vsubwev_w_hu,
730 .opt_opc = vecop_list,
734 .fni8 = gen_vsubwev_d_wu,
735 .fniv = gen_vsubwev_u,
736 .fno = gen_helper_vsubwev_d_wu,
737 .opt_opc = vecop_list,
741 .fno = gen_helper_vsubwev_q_du,
746 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
749 TRANS(vsubwev_h_bu, gvec_vvv, MO_8, do_vsubwev_u)
750 TRANS(vsubwev_w_hu, gvec_vvv, MO_16, do_vsubwev_u)
751 TRANS(vsubwev_d_wu, gvec_vvv, MO_32, do_vsubwev_u)
752 TRANS(vsubwev_q_du, gvec_vvv, MO_64, do_vsubwev_u)
754 static void gen_vsubwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
758 int halfbits = 4 << vece;
760 t1 = tcg_temp_new_vec_matching(a);
761 t2 = tcg_temp_new_vec_matching(b);
763 /* Zero-extend the odd elements for vector */
764 tcg_gen_shri_vec(vece, t1, a, halfbits);
765 tcg_gen_shri_vec(vece, t2, b, halfbits);
767 tcg_gen_sub_vec(vece, t, t1, t2);
770 static void gen_vsubwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
774 t1 = tcg_temp_new_i32();
775 t2 = tcg_temp_new_i32();
776 tcg_gen_shri_i32(t1, a, 16);
777 tcg_gen_shri_i32(t2, b, 16);
778 tcg_gen_sub_i32(t, t1, t2);
781 static void gen_vsubwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
785 t1 = tcg_temp_new_i64();
786 t2 = tcg_temp_new_i64();
787 tcg_gen_shri_i64(t1, a, 32);
788 tcg_gen_shri_i64(t2, b, 32);
789 tcg_gen_sub_i64(t, t1, t2);
792 static void do_vsubwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
793 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
795 static const TCGOpcode vecop_list[] = {
796 INDEX_op_shri_vec, INDEX_op_sub_vec, 0
798 static const GVecGen3 op[4] = {
800 .fniv = gen_vsubwod_u,
801 .fno = gen_helper_vsubwod_h_bu,
802 .opt_opc = vecop_list,
806 .fni4 = gen_vsubwod_w_hu,
807 .fniv = gen_vsubwod_u,
808 .fno = gen_helper_vsubwod_w_hu,
809 .opt_opc = vecop_list,
813 .fni8 = gen_vsubwod_d_wu,
814 .fniv = gen_vsubwod_u,
815 .fno = gen_helper_vsubwod_d_wu,
816 .opt_opc = vecop_list,
820 .fno = gen_helper_vsubwod_q_du,
825 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
828 TRANS(vsubwod_h_bu, gvec_vvv, MO_8, do_vsubwod_u)
829 TRANS(vsubwod_w_hu, gvec_vvv, MO_16, do_vsubwod_u)
830 TRANS(vsubwod_d_wu, gvec_vvv, MO_32, do_vsubwod_u)
831 TRANS(vsubwod_q_du, gvec_vvv, MO_64, do_vsubwod_u)
833 static void gen_vaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
837 int halfbits = 4 << vece;
839 t1 = tcg_temp_new_vec_matching(a);
840 t2 = tcg_temp_new_vec_matching(b);
841 t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, halfbits));
843 /* Zero-extend the even elements from a */
844 tcg_gen_and_vec(vece, t1, a, t3);
846 /* Sign-extend the even elements from b */
847 tcg_gen_shli_vec(vece, t2, b, halfbits);
848 tcg_gen_sari_vec(vece, t2, t2, halfbits);
850 tcg_gen_add_vec(vece, t, t1, t2);
853 static void gen_vaddwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
857 t1 = tcg_temp_new_i32();
858 t2 = tcg_temp_new_i32();
859 tcg_gen_ext16u_i32(t1, a);
860 tcg_gen_ext16s_i32(t2, b);
861 tcg_gen_add_i32(t, t1, t2);
864 static void gen_vaddwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
868 t1 = tcg_temp_new_i64();
869 t2 = tcg_temp_new_i64();
870 tcg_gen_ext32u_i64(t1, a);
871 tcg_gen_ext32s_i64(t2, b);
872 tcg_gen_add_i64(t, t1, t2);
875 static void do_vaddwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
876 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
878 static const TCGOpcode vecop_list[] = {
879 INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
881 static const GVecGen3 op[4] = {
883 .fniv = gen_vaddwev_u_s,
884 .fno = gen_helper_vaddwev_h_bu_b,
885 .opt_opc = vecop_list,
889 .fni4 = gen_vaddwev_w_hu_h,
890 .fniv = gen_vaddwev_u_s,
891 .fno = gen_helper_vaddwev_w_hu_h,
892 .opt_opc = vecop_list,
896 .fni8 = gen_vaddwev_d_wu_w,
897 .fniv = gen_vaddwev_u_s,
898 .fno = gen_helper_vaddwev_d_wu_w,
899 .opt_opc = vecop_list,
903 .fno = gen_helper_vaddwev_q_du_d,
908 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
911 TRANS(vaddwev_h_bu_b, gvec_vvv, MO_8, do_vaddwev_u_s)
912 TRANS(vaddwev_w_hu_h, gvec_vvv, MO_16, do_vaddwev_u_s)
913 TRANS(vaddwev_d_wu_w, gvec_vvv, MO_32, do_vaddwev_u_s)
914 TRANS(vaddwev_q_du_d, gvec_vvv, MO_64, do_vaddwev_u_s)
916 static void gen_vaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
920 int halfbits = 4 << vece;
922 t1 = tcg_temp_new_vec_matching(a);
923 t2 = tcg_temp_new_vec_matching(b);
925 /* Zero-extend the odd elements from a */
926 tcg_gen_shri_vec(vece, t1, a, halfbits);
927 /* Sign-extend the odd elements from b */
928 tcg_gen_sari_vec(vece, t2, b, halfbits);
930 tcg_gen_add_vec(vece, t, t1, t2);
933 static void gen_vaddwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
937 t1 = tcg_temp_new_i32();
938 t2 = tcg_temp_new_i32();
939 tcg_gen_shri_i32(t1, a, 16);
940 tcg_gen_sari_i32(t2, b, 16);
941 tcg_gen_add_i32(t, t1, t2);
944 static void gen_vaddwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
948 t1 = tcg_temp_new_i64();
949 t2 = tcg_temp_new_i64();
950 tcg_gen_shri_i64(t1, a, 32);
951 tcg_gen_sari_i64(t2, b, 32);
952 tcg_gen_add_i64(t, t1, t2);
955 static void do_vaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
956 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
958 static const TCGOpcode vecop_list[] = {
959 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
961 static const GVecGen3 op[4] = {
963 .fniv = gen_vaddwod_u_s,
964 .fno = gen_helper_vaddwod_h_bu_b,
965 .opt_opc = vecop_list,
969 .fni4 = gen_vaddwod_w_hu_h,
970 .fniv = gen_vaddwod_u_s,
971 .fno = gen_helper_vaddwod_w_hu_h,
972 .opt_opc = vecop_list,
976 .fni8 = gen_vaddwod_d_wu_w,
977 .fniv = gen_vaddwod_u_s,
978 .fno = gen_helper_vaddwod_d_wu_w,
979 .opt_opc = vecop_list,
983 .fno = gen_helper_vaddwod_q_du_d,
988 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
991 TRANS(vaddwod_h_bu_b, gvec_vvv, MO_8, do_vaddwod_u_s)
992 TRANS(vaddwod_w_hu_h, gvec_vvv, MO_16, do_vaddwod_u_s)
993 TRANS(vaddwod_d_wu_w, gvec_vvv, MO_32, do_vaddwod_u_s)
994 TRANS(vaddwod_q_du_d, gvec_vvv, MO_64, do_vaddwod_u_s)
996 static void do_vavg(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
997 void (*gen_shr_vec)(unsigned, TCGv_vec,
999 void (*gen_round_vec)(unsigned, TCGv_vec,
1000 TCGv_vec, TCGv_vec))
1002 TCGv_vec tmp = tcg_temp_new_vec_matching(t);
1003 gen_round_vec(vece, tmp, a, b);
1004 tcg_gen_and_vec(vece, tmp, tmp, tcg_constant_vec_matching(t, vece, 1));
1005 gen_shr_vec(vece, a, a, 1);
1006 gen_shr_vec(vece, b, b, 1);
1007 tcg_gen_add_vec(vece, t, a, b);
1008 tcg_gen_add_vec(vece, t, t, tmp);
1011 static void gen_vavg_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1013 do_vavg(vece, t, a, b, tcg_gen_sari_vec, tcg_gen_and_vec);
1016 static void gen_vavg_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1018 do_vavg(vece, t, a, b, tcg_gen_shri_vec, tcg_gen_and_vec);
1021 static void gen_vavgr_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1023 do_vavg(vece, t, a, b, tcg_gen_sari_vec, tcg_gen_or_vec);
1026 static void gen_vavgr_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1028 do_vavg(vece, t, a, b, tcg_gen_shri_vec, tcg_gen_or_vec);
1031 static void do_vavg_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1032 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1034 static const TCGOpcode vecop_list[] = {
1035 INDEX_op_sari_vec, INDEX_op_add_vec, 0
1037 static const GVecGen3 op[4] = {
1040 .fno = gen_helper_vavg_b,
1041 .opt_opc = vecop_list,
1046 .fno = gen_helper_vavg_h,
1047 .opt_opc = vecop_list,
1052 .fno = gen_helper_vavg_w,
1053 .opt_opc = vecop_list,
1058 .fno = gen_helper_vavg_d,
1059 .opt_opc = vecop_list,
1064 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1067 static void do_vavg_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1068 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1070 static const TCGOpcode vecop_list[] = {
1071 INDEX_op_shri_vec, INDEX_op_add_vec, 0
1073 static const GVecGen3 op[4] = {
1076 .fno = gen_helper_vavg_bu,
1077 .opt_opc = vecop_list,
1082 .fno = gen_helper_vavg_hu,
1083 .opt_opc = vecop_list,
1088 .fno = gen_helper_vavg_wu,
1089 .opt_opc = vecop_list,
1094 .fno = gen_helper_vavg_du,
1095 .opt_opc = vecop_list,
1100 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1103 TRANS(vavg_b, gvec_vvv, MO_8, do_vavg_s)
1104 TRANS(vavg_h, gvec_vvv, MO_16, do_vavg_s)
1105 TRANS(vavg_w, gvec_vvv, MO_32, do_vavg_s)
1106 TRANS(vavg_d, gvec_vvv, MO_64, do_vavg_s)
1107 TRANS(vavg_bu, gvec_vvv, MO_8, do_vavg_u)
1108 TRANS(vavg_hu, gvec_vvv, MO_16, do_vavg_u)
1109 TRANS(vavg_wu, gvec_vvv, MO_32, do_vavg_u)
1110 TRANS(vavg_du, gvec_vvv, MO_64, do_vavg_u)
1112 static void do_vavgr_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1113 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1115 static const TCGOpcode vecop_list[] = {
1116 INDEX_op_sari_vec, INDEX_op_add_vec, 0
1118 static const GVecGen3 op[4] = {
1120 .fniv = gen_vavgr_s,
1121 .fno = gen_helper_vavgr_b,
1122 .opt_opc = vecop_list,
1126 .fniv = gen_vavgr_s,
1127 .fno = gen_helper_vavgr_h,
1128 .opt_opc = vecop_list,
1132 .fniv = gen_vavgr_s,
1133 .fno = gen_helper_vavgr_w,
1134 .opt_opc = vecop_list,
1138 .fniv = gen_vavgr_s,
1139 .fno = gen_helper_vavgr_d,
1140 .opt_opc = vecop_list,
1145 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1148 static void do_vavgr_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1149 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1151 static const TCGOpcode vecop_list[] = {
1152 INDEX_op_shri_vec, INDEX_op_add_vec, 0
1154 static const GVecGen3 op[4] = {
1156 .fniv = gen_vavgr_u,
1157 .fno = gen_helper_vavgr_bu,
1158 .opt_opc = vecop_list,
1162 .fniv = gen_vavgr_u,
1163 .fno = gen_helper_vavgr_hu,
1164 .opt_opc = vecop_list,
1168 .fniv = gen_vavgr_u,
1169 .fno = gen_helper_vavgr_wu,
1170 .opt_opc = vecop_list,
1174 .fniv = gen_vavgr_u,
1175 .fno = gen_helper_vavgr_du,
1176 .opt_opc = vecop_list,
1181 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1184 TRANS(vavgr_b, gvec_vvv, MO_8, do_vavgr_s)
1185 TRANS(vavgr_h, gvec_vvv, MO_16, do_vavgr_s)
1186 TRANS(vavgr_w, gvec_vvv, MO_32, do_vavgr_s)
1187 TRANS(vavgr_d, gvec_vvv, MO_64, do_vavgr_s)
1188 TRANS(vavgr_bu, gvec_vvv, MO_8, do_vavgr_u)
1189 TRANS(vavgr_hu, gvec_vvv, MO_16, do_vavgr_u)
1190 TRANS(vavgr_wu, gvec_vvv, MO_32, do_vavgr_u)
1191 TRANS(vavgr_du, gvec_vvv, MO_64, do_vavgr_u)
1193 static void gen_vabsd_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1195 tcg_gen_smax_vec(vece, t, a, b);
1196 tcg_gen_smin_vec(vece, a, a, b);
1197 tcg_gen_sub_vec(vece, t, t, a);
1200 static void do_vabsd_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1201 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1203 static const TCGOpcode vecop_list[] = {
1204 INDEX_op_smax_vec, INDEX_op_smin_vec, INDEX_op_sub_vec, 0
1206 static const GVecGen3 op[4] = {
1208 .fniv = gen_vabsd_s,
1209 .fno = gen_helper_vabsd_b,
1210 .opt_opc = vecop_list,
1214 .fniv = gen_vabsd_s,
1215 .fno = gen_helper_vabsd_h,
1216 .opt_opc = vecop_list,
1220 .fniv = gen_vabsd_s,
1221 .fno = gen_helper_vabsd_w,
1222 .opt_opc = vecop_list,
1226 .fniv = gen_vabsd_s,
1227 .fno = gen_helper_vabsd_d,
1228 .opt_opc = vecop_list,
1233 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1236 static void gen_vabsd_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1238 tcg_gen_umax_vec(vece, t, a, b);
1239 tcg_gen_umin_vec(vece, a, a, b);
1240 tcg_gen_sub_vec(vece, t, t, a);
1243 static void do_vabsd_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1244 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1246 static const TCGOpcode vecop_list[] = {
1247 INDEX_op_umax_vec, INDEX_op_umin_vec, INDEX_op_sub_vec, 0
1249 static const GVecGen3 op[4] = {
1251 .fniv = gen_vabsd_u,
1252 .fno = gen_helper_vabsd_bu,
1253 .opt_opc = vecop_list,
1257 .fniv = gen_vabsd_u,
1258 .fno = gen_helper_vabsd_hu,
1259 .opt_opc = vecop_list,
1263 .fniv = gen_vabsd_u,
1264 .fno = gen_helper_vabsd_wu,
1265 .opt_opc = vecop_list,
1269 .fniv = gen_vabsd_u,
1270 .fno = gen_helper_vabsd_du,
1271 .opt_opc = vecop_list,
1276 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1279 TRANS(vabsd_b, gvec_vvv, MO_8, do_vabsd_s)
1280 TRANS(vabsd_h, gvec_vvv, MO_16, do_vabsd_s)
1281 TRANS(vabsd_w, gvec_vvv, MO_32, do_vabsd_s)
1282 TRANS(vabsd_d, gvec_vvv, MO_64, do_vabsd_s)
1283 TRANS(vabsd_bu, gvec_vvv, MO_8, do_vabsd_u)
1284 TRANS(vabsd_hu, gvec_vvv, MO_16, do_vabsd_u)
1285 TRANS(vabsd_wu, gvec_vvv, MO_32, do_vabsd_u)
1286 TRANS(vabsd_du, gvec_vvv, MO_64, do_vabsd_u)
1288 static void gen_vadda(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1292 t1 = tcg_temp_new_vec_matching(a);
1293 t2 = tcg_temp_new_vec_matching(b);
1295 tcg_gen_abs_vec(vece, t1, a);
1296 tcg_gen_abs_vec(vece, t2, b);
1297 tcg_gen_add_vec(vece, t, t1, t2);
1300 static void do_vadda(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1301 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1303 static const TCGOpcode vecop_list[] = {
1304 INDEX_op_abs_vec, INDEX_op_add_vec, 0
1306 static const GVecGen3 op[4] = {
1309 .fno = gen_helper_vadda_b,
1310 .opt_opc = vecop_list,
1315 .fno = gen_helper_vadda_h,
1316 .opt_opc = vecop_list,
1321 .fno = gen_helper_vadda_w,
1322 .opt_opc = vecop_list,
1327 .fno = gen_helper_vadda_d,
1328 .opt_opc = vecop_list,
1333 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1336 TRANS(vadda_b, gvec_vvv, MO_8, do_vadda)
1337 TRANS(vadda_h, gvec_vvv, MO_16, do_vadda)
1338 TRANS(vadda_w, gvec_vvv, MO_32, do_vadda)
1339 TRANS(vadda_d, gvec_vvv, MO_64, do_vadda)
1341 TRANS(vmax_b, gvec_vvv, MO_8, tcg_gen_gvec_smax)
1342 TRANS(vmax_h, gvec_vvv, MO_16, tcg_gen_gvec_smax)
1343 TRANS(vmax_w, gvec_vvv, MO_32, tcg_gen_gvec_smax)
1344 TRANS(vmax_d, gvec_vvv, MO_64, tcg_gen_gvec_smax)
1345 TRANS(vmax_bu, gvec_vvv, MO_8, tcg_gen_gvec_umax)
1346 TRANS(vmax_hu, gvec_vvv, MO_16, tcg_gen_gvec_umax)
1347 TRANS(vmax_wu, gvec_vvv, MO_32, tcg_gen_gvec_umax)
1348 TRANS(vmax_du, gvec_vvv, MO_64, tcg_gen_gvec_umax)
1350 TRANS(vmin_b, gvec_vvv, MO_8, tcg_gen_gvec_smin)
1351 TRANS(vmin_h, gvec_vvv, MO_16, tcg_gen_gvec_smin)
1352 TRANS(vmin_w, gvec_vvv, MO_32, tcg_gen_gvec_smin)
1353 TRANS(vmin_d, gvec_vvv, MO_64, tcg_gen_gvec_smin)
1354 TRANS(vmin_bu, gvec_vvv, MO_8, tcg_gen_gvec_umin)
1355 TRANS(vmin_hu, gvec_vvv, MO_16, tcg_gen_gvec_umin)
1356 TRANS(vmin_wu, gvec_vvv, MO_32, tcg_gen_gvec_umin)
1357 TRANS(vmin_du, gvec_vvv, MO_64, tcg_gen_gvec_umin)
1359 static void gen_vmini_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
1361 tcg_gen_smin_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
1364 static void gen_vmini_u(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
1366 tcg_gen_umin_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
1369 static void gen_vmaxi_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
1371 tcg_gen_smax_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
1374 static void gen_vmaxi_u(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
1376 tcg_gen_umax_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
1379 static void do_vmini_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1380 int64_t imm, uint32_t oprsz, uint32_t maxsz)
1382 static const TCGOpcode vecop_list[] = {
1383 INDEX_op_smin_vec, 0
1385 static const GVecGen2i op[4] = {
1387 .fniv = gen_vmini_s,
1388 .fnoi = gen_helper_vmini_b,
1389 .opt_opc = vecop_list,
1393 .fniv = gen_vmini_s,
1394 .fnoi = gen_helper_vmini_h,
1395 .opt_opc = vecop_list,
1399 .fniv = gen_vmini_s,
1400 .fnoi = gen_helper_vmini_w,
1401 .opt_opc = vecop_list,
1405 .fniv = gen_vmini_s,
1406 .fnoi = gen_helper_vmini_d,
1407 .opt_opc = vecop_list,
1412 tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
1415 static void do_vmini_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1416 int64_t imm, uint32_t oprsz, uint32_t maxsz)
1418 static const TCGOpcode vecop_list[] = {
1419 INDEX_op_umin_vec, 0
1421 static const GVecGen2i op[4] = {
1423 .fniv = gen_vmini_u,
1424 .fnoi = gen_helper_vmini_bu,
1425 .opt_opc = vecop_list,
1429 .fniv = gen_vmini_u,
1430 .fnoi = gen_helper_vmini_hu,
1431 .opt_opc = vecop_list,
1435 .fniv = gen_vmini_u,
1436 .fnoi = gen_helper_vmini_wu,
1437 .opt_opc = vecop_list,
1441 .fniv = gen_vmini_u,
1442 .fnoi = gen_helper_vmini_du,
1443 .opt_opc = vecop_list,
1448 tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
1451 TRANS(vmini_b, gvec_vv_i, MO_8, do_vmini_s)
1452 TRANS(vmini_h, gvec_vv_i, MO_16, do_vmini_s)
1453 TRANS(vmini_w, gvec_vv_i, MO_32, do_vmini_s)
1454 TRANS(vmini_d, gvec_vv_i, MO_64, do_vmini_s)
1455 TRANS(vmini_bu, gvec_vv_i, MO_8, do_vmini_u)
1456 TRANS(vmini_hu, gvec_vv_i, MO_16, do_vmini_u)
1457 TRANS(vmini_wu, gvec_vv_i, MO_32, do_vmini_u)
1458 TRANS(vmini_du, gvec_vv_i, MO_64, do_vmini_u)
1460 static void do_vmaxi_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1461 int64_t imm, uint32_t oprsz, uint32_t maxsz)
1463 static const TCGOpcode vecop_list[] = {
1464 INDEX_op_smax_vec, 0
1466 static const GVecGen2i op[4] = {
1468 .fniv = gen_vmaxi_s,
1469 .fnoi = gen_helper_vmaxi_b,
1470 .opt_opc = vecop_list,
1474 .fniv = gen_vmaxi_s,
1475 .fnoi = gen_helper_vmaxi_h,
1476 .opt_opc = vecop_list,
1480 .fniv = gen_vmaxi_s,
1481 .fnoi = gen_helper_vmaxi_w,
1482 .opt_opc = vecop_list,
1486 .fniv = gen_vmaxi_s,
1487 .fnoi = gen_helper_vmaxi_d,
1488 .opt_opc = vecop_list,
1493 tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
1496 static void do_vmaxi_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1497 int64_t imm, uint32_t oprsz, uint32_t maxsz)
1499 static const TCGOpcode vecop_list[] = {
1500 INDEX_op_umax_vec, 0
1502 static const GVecGen2i op[4] = {
1504 .fniv = gen_vmaxi_u,
1505 .fnoi = gen_helper_vmaxi_bu,
1506 .opt_opc = vecop_list,
1510 .fniv = gen_vmaxi_u,
1511 .fnoi = gen_helper_vmaxi_hu,
1512 .opt_opc = vecop_list,
1516 .fniv = gen_vmaxi_u,
1517 .fnoi = gen_helper_vmaxi_wu,
1518 .opt_opc = vecop_list,
1522 .fniv = gen_vmaxi_u,
1523 .fnoi = gen_helper_vmaxi_du,
1524 .opt_opc = vecop_list,
1529 tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
1532 TRANS(vmaxi_b, gvec_vv_i, MO_8, do_vmaxi_s)
1533 TRANS(vmaxi_h, gvec_vv_i, MO_16, do_vmaxi_s)
1534 TRANS(vmaxi_w, gvec_vv_i, MO_32, do_vmaxi_s)
1535 TRANS(vmaxi_d, gvec_vv_i, MO_64, do_vmaxi_s)
1536 TRANS(vmaxi_bu, gvec_vv_i, MO_8, do_vmaxi_u)
1537 TRANS(vmaxi_hu, gvec_vv_i, MO_16, do_vmaxi_u)
1538 TRANS(vmaxi_wu, gvec_vv_i, MO_32, do_vmaxi_u)
1539 TRANS(vmaxi_du, gvec_vv_i, MO_64, do_vmaxi_u)
1541 TRANS(vmul_b, gvec_vvv, MO_8, tcg_gen_gvec_mul)
1542 TRANS(vmul_h, gvec_vvv, MO_16, tcg_gen_gvec_mul)
1543 TRANS(vmul_w, gvec_vvv, MO_32, tcg_gen_gvec_mul)
1544 TRANS(vmul_d, gvec_vvv, MO_64, tcg_gen_gvec_mul)
1546 static void gen_vmuh_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1548 TCGv_i32 discard = tcg_temp_new_i32();
1549 tcg_gen_muls2_i32(discard, t, a, b);
1552 static void gen_vmuh_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1554 TCGv_i64 discard = tcg_temp_new_i64();
1555 tcg_gen_muls2_i64(discard, t, a, b);
1558 static void do_vmuh_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1559 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1561 static const GVecGen3 op[4] = {
1563 .fno = gen_helper_vmuh_b,
1567 .fno = gen_helper_vmuh_h,
1572 .fno = gen_helper_vmuh_w,
1577 .fno = gen_helper_vmuh_d,
1582 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1585 TRANS(vmuh_b, gvec_vvv, MO_8, do_vmuh_s)
1586 TRANS(vmuh_h, gvec_vvv, MO_16, do_vmuh_s)
1587 TRANS(vmuh_w, gvec_vvv, MO_32, do_vmuh_s)
1588 TRANS(vmuh_d, gvec_vvv, MO_64, do_vmuh_s)
1590 static void gen_vmuh_wu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1592 TCGv_i32 discard = tcg_temp_new_i32();
1593 tcg_gen_mulu2_i32(discard, t, a, b);
1596 static void gen_vmuh_du(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1598 TCGv_i64 discard = tcg_temp_new_i64();
1599 tcg_gen_mulu2_i64(discard, t, a, b);
1602 static void do_vmuh_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1603 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1605 static const GVecGen3 op[4] = {
1607 .fno = gen_helper_vmuh_bu,
1611 .fno = gen_helper_vmuh_hu,
1615 .fni4 = gen_vmuh_wu,
1616 .fno = gen_helper_vmuh_wu,
1620 .fni8 = gen_vmuh_du,
1621 .fno = gen_helper_vmuh_du,
1626 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1629 TRANS(vmuh_bu, gvec_vvv, MO_8, do_vmuh_u)
1630 TRANS(vmuh_hu, gvec_vvv, MO_16, do_vmuh_u)
1631 TRANS(vmuh_wu, gvec_vvv, MO_32, do_vmuh_u)
1632 TRANS(vmuh_du, gvec_vvv, MO_64, do_vmuh_u)
1634 static void gen_vmulwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1637 int halfbits = 4 << vece;
1639 t1 = tcg_temp_new_vec_matching(a);
1640 t2 = tcg_temp_new_vec_matching(b);
1641 tcg_gen_shli_vec(vece, t1, a, halfbits);
1642 tcg_gen_sari_vec(vece, t1, t1, halfbits);
1643 tcg_gen_shli_vec(vece, t2, b, halfbits);
1644 tcg_gen_sari_vec(vece, t2, t2, halfbits);
1645 tcg_gen_mul_vec(vece, t, t1, t2);
1648 static void gen_vmulwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1652 t1 = tcg_temp_new_i32();
1653 t2 = tcg_temp_new_i32();
1654 tcg_gen_ext16s_i32(t1, a);
1655 tcg_gen_ext16s_i32(t2, b);
1656 tcg_gen_mul_i32(t, t1, t2);
1659 static void gen_vmulwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1663 t1 = tcg_temp_new_i64();
1664 t2 = tcg_temp_new_i64();
1665 tcg_gen_ext32s_i64(t1, a);
1666 tcg_gen_ext32s_i64(t2, b);
1667 tcg_gen_mul_i64(t, t1, t2);
1670 static void do_vmulwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1671 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1673 static const TCGOpcode vecop_list[] = {
1674 INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0
1676 static const GVecGen3 op[3] = {
1678 .fniv = gen_vmulwev_s,
1679 .fno = gen_helper_vmulwev_h_b,
1680 .opt_opc = vecop_list,
1684 .fni4 = gen_vmulwev_w_h,
1685 .fniv = gen_vmulwev_s,
1686 .fno = gen_helper_vmulwev_w_h,
1687 .opt_opc = vecop_list,
1691 .fni8 = gen_vmulwev_d_w,
1692 .fniv = gen_vmulwev_s,
1693 .fno = gen_helper_vmulwev_d_w,
1694 .opt_opc = vecop_list,
1699 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1702 TRANS(vmulwev_h_b, gvec_vvv, MO_8, do_vmulwev_s)
1703 TRANS(vmulwev_w_h, gvec_vvv, MO_16, do_vmulwev_s)
1704 TRANS(vmulwev_d_w, gvec_vvv, MO_32, do_vmulwev_s)
1706 static void tcg_gen_mulus2_i64(TCGv_i64 rl, TCGv_i64 rh,
1707 TCGv_i64 arg1, TCGv_i64 arg2)
1709 tcg_gen_mulsu2_i64(rl, rh, arg2, arg1);
1712 #define VMUL_Q(NAME, FN, idx1, idx2) \
1713 static bool trans_## NAME (DisasContext *ctx, arg_vvv *a) \
1715 TCGv_i64 rh, rl, arg1, arg2; \
1717 rh = tcg_temp_new_i64(); \
1718 rl = tcg_temp_new_i64(); \
1719 arg1 = tcg_temp_new_i64(); \
1720 arg2 = tcg_temp_new_i64(); \
1722 get_vreg64(arg1, a->vj, idx1); \
1723 get_vreg64(arg2, a->vk, idx2); \
1725 tcg_gen_## FN ##_i64(rl, rh, arg1, arg2); \
1727 set_vreg64(rh, a->vd, 1); \
1728 set_vreg64(rl, a->vd, 0); \
1733 VMUL_Q(vmulwev_q_d, muls2, 0, 0)
1734 VMUL_Q(vmulwod_q_d, muls2, 1, 1)
1735 VMUL_Q(vmulwev_q_du, mulu2, 0, 0)
1736 VMUL_Q(vmulwod_q_du, mulu2, 1, 1)
1737 VMUL_Q(vmulwev_q_du_d, mulus2, 0, 0)
1738 VMUL_Q(vmulwod_q_du_d, mulus2, 1, 1)
1740 static void gen_vmulwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1743 int halfbits = 4 << vece;
1745 t1 = tcg_temp_new_vec_matching(a);
1746 t2 = tcg_temp_new_vec_matching(b);
1747 tcg_gen_sari_vec(vece, t1, a, halfbits);
1748 tcg_gen_sari_vec(vece, t2, b, halfbits);
1749 tcg_gen_mul_vec(vece, t, t1, t2);
1752 static void gen_vmulwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1756 t1 = tcg_temp_new_i32();
1757 t2 = tcg_temp_new_i32();
1758 tcg_gen_sari_i32(t1, a, 16);
1759 tcg_gen_sari_i32(t2, b, 16);
1760 tcg_gen_mul_i32(t, t1, t2);
1763 static void gen_vmulwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1767 t1 = tcg_temp_new_i64();
1768 t2 = tcg_temp_new_i64();
1769 tcg_gen_sari_i64(t1, a, 32);
1770 tcg_gen_sari_i64(t2, b, 32);
1771 tcg_gen_mul_i64(t, t1, t2);
1774 static void do_vmulwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1775 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1777 static const TCGOpcode vecop_list[] = {
1778 INDEX_op_sari_vec, INDEX_op_mul_vec, 0
1780 static const GVecGen3 op[3] = {
1782 .fniv = gen_vmulwod_s,
1783 .fno = gen_helper_vmulwod_h_b,
1784 .opt_opc = vecop_list,
1788 .fni4 = gen_vmulwod_w_h,
1789 .fniv = gen_vmulwod_s,
1790 .fno = gen_helper_vmulwod_w_h,
1791 .opt_opc = vecop_list,
1795 .fni8 = gen_vmulwod_d_w,
1796 .fniv = gen_vmulwod_s,
1797 .fno = gen_helper_vmulwod_d_w,
1798 .opt_opc = vecop_list,
1803 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1806 TRANS(vmulwod_h_b, gvec_vvv, MO_8, do_vmulwod_s)
1807 TRANS(vmulwod_w_h, gvec_vvv, MO_16, do_vmulwod_s)
1808 TRANS(vmulwod_d_w, gvec_vvv, MO_32, do_vmulwod_s)
1810 static void gen_vmulwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1812 TCGv_vec t1, t2, mask;
1814 t1 = tcg_temp_new_vec_matching(a);
1815 t2 = tcg_temp_new_vec_matching(b);
1816 mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
1817 tcg_gen_and_vec(vece, t1, a, mask);
1818 tcg_gen_and_vec(vece, t2, b, mask);
1819 tcg_gen_mul_vec(vece, t, t1, t2);
1822 static void gen_vmulwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1826 t1 = tcg_temp_new_i32();
1827 t2 = tcg_temp_new_i32();
1828 tcg_gen_ext16u_i32(t1, a);
1829 tcg_gen_ext16u_i32(t2, b);
1830 tcg_gen_mul_i32(t, t1, t2);
1833 static void gen_vmulwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1837 t1 = tcg_temp_new_i64();
1838 t2 = tcg_temp_new_i64();
1839 tcg_gen_ext32u_i64(t1, a);
1840 tcg_gen_ext32u_i64(t2, b);
1841 tcg_gen_mul_i64(t, t1, t2);
1844 static void do_vmulwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1845 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1847 static const TCGOpcode vecop_list[] = {
1850 static const GVecGen3 op[3] = {
1852 .fniv = gen_vmulwev_u,
1853 .fno = gen_helper_vmulwev_h_bu,
1854 .opt_opc = vecop_list,
1858 .fni4 = gen_vmulwev_w_hu,
1859 .fniv = gen_vmulwev_u,
1860 .fno = gen_helper_vmulwev_w_hu,
1861 .opt_opc = vecop_list,
1865 .fni8 = gen_vmulwev_d_wu,
1866 .fniv = gen_vmulwev_u,
1867 .fno = gen_helper_vmulwev_d_wu,
1868 .opt_opc = vecop_list,
1873 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1876 TRANS(vmulwev_h_bu, gvec_vvv, MO_8, do_vmulwev_u)
1877 TRANS(vmulwev_w_hu, gvec_vvv, MO_16, do_vmulwev_u)
1878 TRANS(vmulwev_d_wu, gvec_vvv, MO_32, do_vmulwev_u)
1880 static void gen_vmulwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1883 int halfbits = 4 << vece;
1885 t1 = tcg_temp_new_vec_matching(a);
1886 t2 = tcg_temp_new_vec_matching(b);
1887 tcg_gen_shri_vec(vece, t1, a, halfbits);
1888 tcg_gen_shri_vec(vece, t2, b, halfbits);
1889 tcg_gen_mul_vec(vece, t, t1, t2);
1892 static void gen_vmulwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1896 t1 = tcg_temp_new_i32();
1897 t2 = tcg_temp_new_i32();
1898 tcg_gen_shri_i32(t1, a, 16);
1899 tcg_gen_shri_i32(t2, b, 16);
1900 tcg_gen_mul_i32(t, t1, t2);
1903 static void gen_vmulwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1907 t1 = tcg_temp_new_i64();
1908 t2 = tcg_temp_new_i64();
1909 tcg_gen_shri_i64(t1, a, 32);
1910 tcg_gen_shri_i64(t2, b, 32);
1911 tcg_gen_mul_i64(t, t1, t2);
1914 static void do_vmulwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1915 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1917 static const TCGOpcode vecop_list[] = {
1918 INDEX_op_shri_vec, INDEX_op_mul_vec, 0
1920 static const GVecGen3 op[3] = {
1922 .fniv = gen_vmulwod_u,
1923 .fno = gen_helper_vmulwod_h_bu,
1924 .opt_opc = vecop_list,
1928 .fni4 = gen_vmulwod_w_hu,
1929 .fniv = gen_vmulwod_u,
1930 .fno = gen_helper_vmulwod_w_hu,
1931 .opt_opc = vecop_list,
1935 .fni8 = gen_vmulwod_d_wu,
1936 .fniv = gen_vmulwod_u,
1937 .fno = gen_helper_vmulwod_d_wu,
1938 .opt_opc = vecop_list,
1943 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
1946 TRANS(vmulwod_h_bu, gvec_vvv, MO_8, do_vmulwod_u)
1947 TRANS(vmulwod_w_hu, gvec_vvv, MO_16, do_vmulwod_u)
1948 TRANS(vmulwod_d_wu, gvec_vvv, MO_32, do_vmulwod_u)
1950 static void gen_vmulwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
1952 TCGv_vec t1, t2, mask;
1953 int halfbits = 4 << vece;
1955 t1 = tcg_temp_new_vec_matching(a);
1956 t2 = tcg_temp_new_vec_matching(b);
1957 mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
1958 tcg_gen_and_vec(vece, t1, a, mask);
1959 tcg_gen_shli_vec(vece, t2, b, halfbits);
1960 tcg_gen_sari_vec(vece, t2, t2, halfbits);
1961 tcg_gen_mul_vec(vece, t, t1, t2);
1964 static void gen_vmulwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
1968 t1 = tcg_temp_new_i32();
1969 t2 = tcg_temp_new_i32();
1970 tcg_gen_ext16u_i32(t1, a);
1971 tcg_gen_ext16s_i32(t2, b);
1972 tcg_gen_mul_i32(t, t1, t2);
1975 static void gen_vmulwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
1979 t1 = tcg_temp_new_i64();
1980 t2 = tcg_temp_new_i64();
1981 tcg_gen_ext32u_i64(t1, a);
1982 tcg_gen_ext32s_i64(t2, b);
1983 tcg_gen_mul_i64(t, t1, t2);
1986 static void do_vmulwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
1987 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
1989 static const TCGOpcode vecop_list[] = {
1990 INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0
1992 static const GVecGen3 op[3] = {
1994 .fniv = gen_vmulwev_u_s,
1995 .fno = gen_helper_vmulwev_h_bu_b,
1996 .opt_opc = vecop_list,
2000 .fni4 = gen_vmulwev_w_hu_h,
2001 .fniv = gen_vmulwev_u_s,
2002 .fno = gen_helper_vmulwev_w_hu_h,
2003 .opt_opc = vecop_list,
2007 .fni8 = gen_vmulwev_d_wu_w,
2008 .fniv = gen_vmulwev_u_s,
2009 .fno = gen_helper_vmulwev_d_wu_w,
2010 .opt_opc = vecop_list,
2015 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2018 TRANS(vmulwev_h_bu_b, gvec_vvv, MO_8, do_vmulwev_u_s)
2019 TRANS(vmulwev_w_hu_h, gvec_vvv, MO_16, do_vmulwev_u_s)
2020 TRANS(vmulwev_d_wu_w, gvec_vvv, MO_32, do_vmulwev_u_s)
2022 static void gen_vmulwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2025 int halfbits = 4 << vece;
2027 t1 = tcg_temp_new_vec_matching(a);
2028 t2 = tcg_temp_new_vec_matching(b);
2029 tcg_gen_shri_vec(vece, t1, a, halfbits);
2030 tcg_gen_sari_vec(vece, t2, b, halfbits);
2031 tcg_gen_mul_vec(vece, t, t1, t2);
2034 static void gen_vmulwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2038 t1 = tcg_temp_new_i32();
2039 t2 = tcg_temp_new_i32();
2040 tcg_gen_shri_i32(t1, a, 16);
2041 tcg_gen_sari_i32(t2, b, 16);
2042 tcg_gen_mul_i32(t, t1, t2);
2044 static void gen_vmulwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2048 t1 = tcg_temp_new_i64();
2049 t2 = tcg_temp_new_i64();
2050 tcg_gen_shri_i64(t1, a, 32);
2051 tcg_gen_sari_i64(t2, b, 32);
2052 tcg_gen_mul_i64(t, t1, t2);
2055 static void do_vmulwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2056 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2058 static const TCGOpcode vecop_list[] = {
2059 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0
2061 static const GVecGen3 op[3] = {
2063 .fniv = gen_vmulwod_u_s,
2064 .fno = gen_helper_vmulwod_h_bu_b,
2065 .opt_opc = vecop_list,
2069 .fni4 = gen_vmulwod_w_hu_h,
2070 .fniv = gen_vmulwod_u_s,
2071 .fno = gen_helper_vmulwod_w_hu_h,
2072 .opt_opc = vecop_list,
2076 .fni8 = gen_vmulwod_d_wu_w,
2077 .fniv = gen_vmulwod_u_s,
2078 .fno = gen_helper_vmulwod_d_wu_w,
2079 .opt_opc = vecop_list,
2084 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2087 TRANS(vmulwod_h_bu_b, gvec_vvv, MO_8, do_vmulwod_u_s)
2088 TRANS(vmulwod_w_hu_h, gvec_vvv, MO_16, do_vmulwod_u_s)
2089 TRANS(vmulwod_d_wu_w, gvec_vvv, MO_32, do_vmulwod_u_s)
2091 static void gen_vmadd(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2095 t1 = tcg_temp_new_vec_matching(t);
2096 tcg_gen_mul_vec(vece, t1, a, b);
2097 tcg_gen_add_vec(vece, t, t, t1);
2100 static void gen_vmadd_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2104 t1 = tcg_temp_new_i32();
2105 tcg_gen_mul_i32(t1, a, b);
2106 tcg_gen_add_i32(t, t, t1);
2109 static void gen_vmadd_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2113 t1 = tcg_temp_new_i64();
2114 tcg_gen_mul_i64(t1, a, b);
2115 tcg_gen_add_i64(t, t, t1);
2118 static void do_vmadd(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2119 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2121 static const TCGOpcode vecop_list[] = {
2122 INDEX_op_mul_vec, INDEX_op_add_vec, 0
2124 static const GVecGen3 op[4] = {
2127 .fno = gen_helper_vmadd_b,
2129 .opt_opc = vecop_list,
2134 .fno = gen_helper_vmadd_h,
2136 .opt_opc = vecop_list,
2140 .fni4 = gen_vmadd_w,
2142 .fno = gen_helper_vmadd_w,
2144 .opt_opc = vecop_list,
2148 .fni8 = gen_vmadd_d,
2150 .fno = gen_helper_vmadd_d,
2152 .opt_opc = vecop_list,
2157 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2160 TRANS(vmadd_b, gvec_vvv, MO_8, do_vmadd)
2161 TRANS(vmadd_h, gvec_vvv, MO_16, do_vmadd)
2162 TRANS(vmadd_w, gvec_vvv, MO_32, do_vmadd)
2163 TRANS(vmadd_d, gvec_vvv, MO_64, do_vmadd)
2165 static void gen_vmsub(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2169 t1 = tcg_temp_new_vec_matching(t);
2170 tcg_gen_mul_vec(vece, t1, a, b);
2171 tcg_gen_sub_vec(vece, t, t, t1);
2174 static void gen_vmsub_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2178 t1 = tcg_temp_new_i32();
2179 tcg_gen_mul_i32(t1, a, b);
2180 tcg_gen_sub_i32(t, t, t1);
2183 static void gen_vmsub_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2187 t1 = tcg_temp_new_i64();
2188 tcg_gen_mul_i64(t1, a, b);
2189 tcg_gen_sub_i64(t, t, t1);
2192 static void do_vmsub(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2193 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2195 static const TCGOpcode vecop_list[] = {
2196 INDEX_op_mul_vec, INDEX_op_sub_vec, 0
2198 static const GVecGen3 op[4] = {
2201 .fno = gen_helper_vmsub_b,
2203 .opt_opc = vecop_list,
2208 .fno = gen_helper_vmsub_h,
2210 .opt_opc = vecop_list,
2214 .fni4 = gen_vmsub_w,
2216 .fno = gen_helper_vmsub_w,
2218 .opt_opc = vecop_list,
2222 .fni8 = gen_vmsub_d,
2224 .fno = gen_helper_vmsub_d,
2226 .opt_opc = vecop_list,
2231 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2234 TRANS(vmsub_b, gvec_vvv, MO_8, do_vmsub)
2235 TRANS(vmsub_h, gvec_vvv, MO_16, do_vmsub)
2236 TRANS(vmsub_w, gvec_vvv, MO_32, do_vmsub)
2237 TRANS(vmsub_d, gvec_vvv, MO_64, do_vmsub)
2239 static void gen_vmaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2241 TCGv_vec t1, t2, t3;
2242 int halfbits = 4 << vece;
2244 t1 = tcg_temp_new_vec_matching(a);
2245 t2 = tcg_temp_new_vec_matching(b);
2246 t3 = tcg_temp_new_vec_matching(t);
2247 tcg_gen_shli_vec(vece, t1, a, halfbits);
2248 tcg_gen_sari_vec(vece, t1, t1, halfbits);
2249 tcg_gen_shli_vec(vece, t2, b, halfbits);
2250 tcg_gen_sari_vec(vece, t2, t2, halfbits);
2251 tcg_gen_mul_vec(vece, t3, t1, t2);
2252 tcg_gen_add_vec(vece, t, t, t3);
2255 static void gen_vmaddwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2259 t1 = tcg_temp_new_i32();
2260 gen_vmulwev_w_h(t1, a, b);
2261 tcg_gen_add_i32(t, t, t1);
2264 static void gen_vmaddwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2268 t1 = tcg_temp_new_i64();
2269 gen_vmulwev_d_w(t1, a, b);
2270 tcg_gen_add_i64(t, t, t1);
2273 static void do_vmaddwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2274 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2276 static const TCGOpcode vecop_list[] = {
2277 INDEX_op_shli_vec, INDEX_op_sari_vec,
2278 INDEX_op_mul_vec, INDEX_op_add_vec, 0
2280 static const GVecGen3 op[3] = {
2282 .fniv = gen_vmaddwev_s,
2283 .fno = gen_helper_vmaddwev_h_b,
2285 .opt_opc = vecop_list,
2289 .fni4 = gen_vmaddwev_w_h,
2290 .fniv = gen_vmaddwev_s,
2291 .fno = gen_helper_vmaddwev_w_h,
2293 .opt_opc = vecop_list,
2297 .fni8 = gen_vmaddwev_d_w,
2298 .fniv = gen_vmaddwev_s,
2299 .fno = gen_helper_vmaddwev_d_w,
2301 .opt_opc = vecop_list,
2306 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2309 TRANS(vmaddwev_h_b, gvec_vvv, MO_8, do_vmaddwev_s)
2310 TRANS(vmaddwev_w_h, gvec_vvv, MO_16, do_vmaddwev_s)
2311 TRANS(vmaddwev_d_w, gvec_vvv, MO_32, do_vmaddwev_s)
2313 #define VMADD_Q(NAME, FN, idx1, idx2) \
2314 static bool trans_## NAME (DisasContext *ctx, arg_vvv *a) \
2316 TCGv_i64 rh, rl, arg1, arg2, th, tl; \
2318 rh = tcg_temp_new_i64(); \
2319 rl = tcg_temp_new_i64(); \
2320 arg1 = tcg_temp_new_i64(); \
2321 arg2 = tcg_temp_new_i64(); \
2322 th = tcg_temp_new_i64(); \
2323 tl = tcg_temp_new_i64(); \
2325 get_vreg64(arg1, a->vj, idx1); \
2326 get_vreg64(arg2, a->vk, idx2); \
2327 get_vreg64(rh, a->vd, 1); \
2328 get_vreg64(rl, a->vd, 0); \
2330 tcg_gen_## FN ##_i64(tl, th, arg1, arg2); \
2331 tcg_gen_add2_i64(rl, rh, rl, rh, tl, th); \
2333 set_vreg64(rh, a->vd, 1); \
2334 set_vreg64(rl, a->vd, 0); \
2339 VMADD_Q(vmaddwev_q_d, muls2, 0, 0)
2340 VMADD_Q(vmaddwod_q_d, muls2, 1, 1)
2341 VMADD_Q(vmaddwev_q_du, mulu2, 0, 0)
2342 VMADD_Q(vmaddwod_q_du, mulu2, 1, 1)
2343 VMADD_Q(vmaddwev_q_du_d, mulus2, 0, 0)
2344 VMADD_Q(vmaddwod_q_du_d, mulus2, 1, 1)
2346 static void gen_vmaddwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2348 TCGv_vec t1, t2, t3;
2349 int halfbits = 4 << vece;
2351 t1 = tcg_temp_new_vec_matching(a);
2352 t2 = tcg_temp_new_vec_matching(b);
2353 t3 = tcg_temp_new_vec_matching(t);
2354 tcg_gen_sari_vec(vece, t1, a, halfbits);
2355 tcg_gen_sari_vec(vece, t2, b, halfbits);
2356 tcg_gen_mul_vec(vece, t3, t1, t2);
2357 tcg_gen_add_vec(vece, t, t, t3);
2360 static void gen_vmaddwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2364 t1 = tcg_temp_new_i32();
2365 gen_vmulwod_w_h(t1, a, b);
2366 tcg_gen_add_i32(t, t, t1);
2369 static void gen_vmaddwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2373 t1 = tcg_temp_new_i64();
2374 gen_vmulwod_d_w(t1, a, b);
2375 tcg_gen_add_i64(t, t, t1);
2378 static void do_vmaddwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2379 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2381 static const TCGOpcode vecop_list[] = {
2382 INDEX_op_sari_vec, INDEX_op_mul_vec, INDEX_op_add_vec, 0
2384 static const GVecGen3 op[3] = {
2386 .fniv = gen_vmaddwod_s,
2387 .fno = gen_helper_vmaddwod_h_b,
2389 .opt_opc = vecop_list,
2393 .fni4 = gen_vmaddwod_w_h,
2394 .fniv = gen_vmaddwod_s,
2395 .fno = gen_helper_vmaddwod_w_h,
2397 .opt_opc = vecop_list,
2401 .fni8 = gen_vmaddwod_d_w,
2402 .fniv = gen_vmaddwod_s,
2403 .fno = gen_helper_vmaddwod_d_w,
2405 .opt_opc = vecop_list,
2410 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2413 TRANS(vmaddwod_h_b, gvec_vvv, MO_8, do_vmaddwod_s)
2414 TRANS(vmaddwod_w_h, gvec_vvv, MO_16, do_vmaddwod_s)
2415 TRANS(vmaddwod_d_w, gvec_vvv, MO_32, do_vmaddwod_s)
2417 static void gen_vmaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2419 TCGv_vec t1, t2, mask;
2421 t1 = tcg_temp_new_vec_matching(t);
2422 t2 = tcg_temp_new_vec_matching(b);
2423 mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
2424 tcg_gen_and_vec(vece, t1, a, mask);
2425 tcg_gen_and_vec(vece, t2, b, mask);
2426 tcg_gen_mul_vec(vece, t1, t1, t2);
2427 tcg_gen_add_vec(vece, t, t, t1);
2430 static void gen_vmaddwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2434 t1 = tcg_temp_new_i32();
2435 gen_vmulwev_w_hu(t1, a, b);
2436 tcg_gen_add_i32(t, t, t1);
2439 static void gen_vmaddwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2443 t1 = tcg_temp_new_i64();
2444 gen_vmulwev_d_wu(t1, a, b);
2445 tcg_gen_add_i64(t, t, t1);
2448 static void do_vmaddwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2449 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2451 static const TCGOpcode vecop_list[] = {
2452 INDEX_op_mul_vec, INDEX_op_add_vec, 0
2454 static const GVecGen3 op[3] = {
2456 .fniv = gen_vmaddwev_u,
2457 .fno = gen_helper_vmaddwev_h_bu,
2459 .opt_opc = vecop_list,
2463 .fni4 = gen_vmaddwev_w_hu,
2464 .fniv = gen_vmaddwev_u,
2465 .fno = gen_helper_vmaddwev_w_hu,
2467 .opt_opc = vecop_list,
2471 .fni8 = gen_vmaddwev_d_wu,
2472 .fniv = gen_vmaddwev_u,
2473 .fno = gen_helper_vmaddwev_d_wu,
2475 .opt_opc = vecop_list,
2480 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2483 TRANS(vmaddwev_h_bu, gvec_vvv, MO_8, do_vmaddwev_u)
2484 TRANS(vmaddwev_w_hu, gvec_vvv, MO_16, do_vmaddwev_u)
2485 TRANS(vmaddwev_d_wu, gvec_vvv, MO_32, do_vmaddwev_u)
2487 static void gen_vmaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2489 TCGv_vec t1, t2, t3;
2490 int halfbits = 4 << vece;
2492 t1 = tcg_temp_new_vec_matching(a);
2493 t2 = tcg_temp_new_vec_matching(b);
2494 t3 = tcg_temp_new_vec_matching(t);
2495 tcg_gen_shri_vec(vece, t1, a, halfbits);
2496 tcg_gen_shri_vec(vece, t2, b, halfbits);
2497 tcg_gen_mul_vec(vece, t3, t1, t2);
2498 tcg_gen_add_vec(vece, t, t, t3);
2501 static void gen_vmaddwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2505 t1 = tcg_temp_new_i32();
2506 gen_vmulwod_w_hu(t1, a, b);
2507 tcg_gen_add_i32(t, t, t1);
2510 static void gen_vmaddwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2514 t1 = tcg_temp_new_i64();
2515 gen_vmulwod_d_wu(t1, a, b);
2516 tcg_gen_add_i64(t, t, t1);
2519 static void do_vmaddwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2520 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2522 static const TCGOpcode vecop_list[] = {
2523 INDEX_op_shri_vec, INDEX_op_mul_vec, INDEX_op_add_vec, 0
2525 static const GVecGen3 op[3] = {
2527 .fniv = gen_vmaddwod_u,
2528 .fno = gen_helper_vmaddwod_h_bu,
2530 .opt_opc = vecop_list,
2534 .fni4 = gen_vmaddwod_w_hu,
2535 .fniv = gen_vmaddwod_u,
2536 .fno = gen_helper_vmaddwod_w_hu,
2538 .opt_opc = vecop_list,
2542 .fni8 = gen_vmaddwod_d_wu,
2543 .fniv = gen_vmaddwod_u,
2544 .fno = gen_helper_vmaddwod_d_wu,
2546 .opt_opc = vecop_list,
2551 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2554 TRANS(vmaddwod_h_bu, gvec_vvv, MO_8, do_vmaddwod_u)
2555 TRANS(vmaddwod_w_hu, gvec_vvv, MO_16, do_vmaddwod_u)
2556 TRANS(vmaddwod_d_wu, gvec_vvv, MO_32, do_vmaddwod_u)
2558 static void gen_vmaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2560 TCGv_vec t1, t2, mask;
2561 int halfbits = 4 << vece;
2563 t1 = tcg_temp_new_vec_matching(a);
2564 t2 = tcg_temp_new_vec_matching(b);
2565 mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
2566 tcg_gen_and_vec(vece, t1, a, mask);
2567 tcg_gen_shli_vec(vece, t2, b, halfbits);
2568 tcg_gen_sari_vec(vece, t2, t2, halfbits);
2569 tcg_gen_mul_vec(vece, t1, t1, t2);
2570 tcg_gen_add_vec(vece, t, t, t1);
2573 static void gen_vmaddwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2577 t1 = tcg_temp_new_i32();
2578 gen_vmulwev_w_hu_h(t1, a, b);
2579 tcg_gen_add_i32(t, t, t1);
2582 static void gen_vmaddwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2586 t1 = tcg_temp_new_i64();
2587 gen_vmulwev_d_wu_w(t1, a, b);
2588 tcg_gen_add_i64(t, t, t1);
2591 static void do_vmaddwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2592 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2594 static const TCGOpcode vecop_list[] = {
2595 INDEX_op_shli_vec, INDEX_op_sari_vec,
2596 INDEX_op_mul_vec, INDEX_op_add_vec, 0
2598 static const GVecGen3 op[3] = {
2600 .fniv = gen_vmaddwev_u_s,
2601 .fno = gen_helper_vmaddwev_h_bu_b,
2603 .opt_opc = vecop_list,
2607 .fni4 = gen_vmaddwev_w_hu_h,
2608 .fniv = gen_vmaddwev_u_s,
2609 .fno = gen_helper_vmaddwev_w_hu_h,
2611 .opt_opc = vecop_list,
2615 .fni8 = gen_vmaddwev_d_wu_w,
2616 .fniv = gen_vmaddwev_u_s,
2617 .fno = gen_helper_vmaddwev_d_wu_w,
2619 .opt_opc = vecop_list,
2624 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2627 TRANS(vmaddwev_h_bu_b, gvec_vvv, MO_8, do_vmaddwev_u_s)
2628 TRANS(vmaddwev_w_hu_h, gvec_vvv, MO_16, do_vmaddwev_u_s)
2629 TRANS(vmaddwev_d_wu_w, gvec_vvv, MO_32, do_vmaddwev_u_s)
2631 static void gen_vmaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2633 TCGv_vec t1, t2, t3;
2634 int halfbits = 4 << vece;
2636 t1 = tcg_temp_new_vec_matching(a);
2637 t2 = tcg_temp_new_vec_matching(b);
2638 t3 = tcg_temp_new_vec_matching(t);
2639 tcg_gen_shri_vec(vece, t1, a, halfbits);
2640 tcg_gen_sari_vec(vece, t2, b, halfbits);
2641 tcg_gen_mul_vec(vece, t3, t1, t2);
2642 tcg_gen_add_vec(vece, t, t, t3);
2645 static void gen_vmaddwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
2649 t1 = tcg_temp_new_i32();
2650 gen_vmulwod_w_hu_h(t1, a, b);
2651 tcg_gen_add_i32(t, t, t1);
2654 static void gen_vmaddwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
2658 t1 = tcg_temp_new_i64();
2659 gen_vmulwod_d_wu_w(t1, a, b);
2660 tcg_gen_add_i64(t, t, t1);
2663 static void do_vmaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2664 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2666 static const TCGOpcode vecop_list[] = {
2667 INDEX_op_shri_vec, INDEX_op_sari_vec,
2668 INDEX_op_mul_vec, INDEX_op_add_vec, 0
2670 static const GVecGen3 op[3] = {
2672 .fniv = gen_vmaddwod_u_s,
2673 .fno = gen_helper_vmaddwod_h_bu_b,
2675 .opt_opc = vecop_list,
2679 .fni4 = gen_vmaddwod_w_hu_h,
2680 .fniv = gen_vmaddwod_u_s,
2681 .fno = gen_helper_vmaddwod_w_hu_h,
2683 .opt_opc = vecop_list,
2687 .fni8 = gen_vmaddwod_d_wu_w,
2688 .fniv = gen_vmaddwod_u_s,
2689 .fno = gen_helper_vmaddwod_d_wu_w,
2691 .opt_opc = vecop_list,
2696 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2699 TRANS(vmaddwod_h_bu_b, gvec_vvv, MO_8, do_vmaddwod_u_s)
2700 TRANS(vmaddwod_w_hu_h, gvec_vvv, MO_16, do_vmaddwod_u_s)
2701 TRANS(vmaddwod_d_wu_w, gvec_vvv, MO_32, do_vmaddwod_u_s)
2703 TRANS(vdiv_b, gen_vvv, gen_helper_vdiv_b)
2704 TRANS(vdiv_h, gen_vvv, gen_helper_vdiv_h)
2705 TRANS(vdiv_w, gen_vvv, gen_helper_vdiv_w)
2706 TRANS(vdiv_d, gen_vvv, gen_helper_vdiv_d)
2707 TRANS(vdiv_bu, gen_vvv, gen_helper_vdiv_bu)
2708 TRANS(vdiv_hu, gen_vvv, gen_helper_vdiv_hu)
2709 TRANS(vdiv_wu, gen_vvv, gen_helper_vdiv_wu)
2710 TRANS(vdiv_du, gen_vvv, gen_helper_vdiv_du)
2711 TRANS(vmod_b, gen_vvv, gen_helper_vmod_b)
2712 TRANS(vmod_h, gen_vvv, gen_helper_vmod_h)
2713 TRANS(vmod_w, gen_vvv, gen_helper_vmod_w)
2714 TRANS(vmod_d, gen_vvv, gen_helper_vmod_d)
2715 TRANS(vmod_bu, gen_vvv, gen_helper_vmod_bu)
2716 TRANS(vmod_hu, gen_vvv, gen_helper_vmod_hu)
2717 TRANS(vmod_wu, gen_vvv, gen_helper_vmod_wu)
2718 TRANS(vmod_du, gen_vvv, gen_helper_vmod_du)
2720 static void gen_vsat_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max)
2724 min = tcg_temp_new_vec_matching(t);
2725 tcg_gen_not_vec(vece, min, max);
2726 tcg_gen_smax_vec(vece, t, a, min);
2727 tcg_gen_smin_vec(vece, t, t, max);
2730 static void do_vsat_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2731 int64_t imm, uint32_t oprsz, uint32_t maxsz)
2733 static const TCGOpcode vecop_list[] = {
2734 INDEX_op_smax_vec, INDEX_op_smin_vec, 0
2736 static const GVecGen2s op[4] = {
2739 .fno = gen_helper_vsat_b,
2740 .opt_opc = vecop_list,
2745 .fno = gen_helper_vsat_h,
2746 .opt_opc = vecop_list,
2751 .fno = gen_helper_vsat_w,
2752 .opt_opc = vecop_list,
2757 .fno = gen_helper_vsat_d,
2758 .opt_opc = vecop_list,
2763 tcg_gen_gvec_2s(vd_ofs, vj_ofs, oprsz, maxsz,
2764 tcg_constant_i64((1ll<< imm) -1), &op[vece]);
2767 TRANS(vsat_b, gvec_vv_i, MO_8, do_vsat_s)
2768 TRANS(vsat_h, gvec_vv_i, MO_16, do_vsat_s)
2769 TRANS(vsat_w, gvec_vv_i, MO_32, do_vsat_s)
2770 TRANS(vsat_d, gvec_vv_i, MO_64, do_vsat_s)
2772 static void gen_vsat_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max)
2774 tcg_gen_umin_vec(vece, t, a, max);
2777 static void do_vsat_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2778 int64_t imm, uint32_t oprsz, uint32_t maxsz)
2781 static const TCGOpcode vecop_list[] = {
2782 INDEX_op_umin_vec, 0
2784 static const GVecGen2s op[4] = {
2787 .fno = gen_helper_vsat_bu,
2788 .opt_opc = vecop_list,
2793 .fno = gen_helper_vsat_hu,
2794 .opt_opc = vecop_list,
2799 .fno = gen_helper_vsat_wu,
2800 .opt_opc = vecop_list,
2805 .fno = gen_helper_vsat_du,
2806 .opt_opc = vecop_list,
2811 max = (imm == 0x3f) ? UINT64_MAX : (1ull << (imm + 1)) - 1;
2812 tcg_gen_gvec_2s(vd_ofs, vj_ofs, oprsz, maxsz,
2813 tcg_constant_i64(max), &op[vece]);
2816 TRANS(vsat_bu, gvec_vv_i, MO_8, do_vsat_u)
2817 TRANS(vsat_hu, gvec_vv_i, MO_16, do_vsat_u)
2818 TRANS(vsat_wu, gvec_vv_i, MO_32, do_vsat_u)
2819 TRANS(vsat_du, gvec_vv_i, MO_64, do_vsat_u)
2821 TRANS(vexth_h_b, gen_vv, gen_helper_vexth_h_b)
2822 TRANS(vexth_w_h, gen_vv, gen_helper_vexth_w_h)
2823 TRANS(vexth_d_w, gen_vv, gen_helper_vexth_d_w)
2824 TRANS(vexth_q_d, gen_vv, gen_helper_vexth_q_d)
2825 TRANS(vexth_hu_bu, gen_vv, gen_helper_vexth_hu_bu)
2826 TRANS(vexth_wu_hu, gen_vv, gen_helper_vexth_wu_hu)
2827 TRANS(vexth_du_wu, gen_vv, gen_helper_vexth_du_wu)
2828 TRANS(vexth_qu_du, gen_vv, gen_helper_vexth_qu_du)
2830 static void gen_vsigncov(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
2834 t1 = tcg_temp_new_vec_matching(t);
2835 zero = tcg_constant_vec_matching(t, vece, 0);
2837 tcg_gen_neg_vec(vece, t1, b);
2838 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, t, a, zero, t1, b);
2839 tcg_gen_cmpsel_vec(TCG_COND_EQ, vece, t, a, zero, zero, t);
2842 static void do_vsigncov(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2843 uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
2845 static const TCGOpcode vecop_list[] = {
2846 INDEX_op_neg_vec, INDEX_op_cmpsel_vec, 0
2848 static const GVecGen3 op[4] = {
2850 .fniv = gen_vsigncov,
2851 .fno = gen_helper_vsigncov_b,
2852 .opt_opc = vecop_list,
2856 .fniv = gen_vsigncov,
2857 .fno = gen_helper_vsigncov_h,
2858 .opt_opc = vecop_list,
2862 .fniv = gen_vsigncov,
2863 .fno = gen_helper_vsigncov_w,
2864 .opt_opc = vecop_list,
2868 .fniv = gen_vsigncov,
2869 .fno = gen_helper_vsigncov_d,
2870 .opt_opc = vecop_list,
2875 tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
2878 TRANS(vsigncov_b, gvec_vvv, MO_8, do_vsigncov)
2879 TRANS(vsigncov_h, gvec_vvv, MO_16, do_vsigncov)
2880 TRANS(vsigncov_w, gvec_vvv, MO_32, do_vsigncov)
2881 TRANS(vsigncov_d, gvec_vvv, MO_64, do_vsigncov)
2883 TRANS(vmskltz_b, gen_vv, gen_helper_vmskltz_b)
2884 TRANS(vmskltz_h, gen_vv, gen_helper_vmskltz_h)
2885 TRANS(vmskltz_w, gen_vv, gen_helper_vmskltz_w)
2886 TRANS(vmskltz_d, gen_vv, gen_helper_vmskltz_d)
2887 TRANS(vmskgez_b, gen_vv, gen_helper_vmskgez_b)
2888 TRANS(vmsknz_b, gen_vv, gen_helper_vmsknz_b)
2890 TRANS(vand_v, gvec_vvv, MO_64, tcg_gen_gvec_and)
2891 TRANS(vor_v, gvec_vvv, MO_64, tcg_gen_gvec_or)
2892 TRANS(vxor_v, gvec_vvv, MO_64, tcg_gen_gvec_xor)
2893 TRANS(vnor_v, gvec_vvv, MO_64, tcg_gen_gvec_nor)
2895 static bool trans_vandn_v(DisasContext *ctx, arg_vvv *a)
2897 uint32_t vd_ofs, vj_ofs, vk_ofs;
2901 vd_ofs = vec_full_offset(a->vd);
2902 vj_ofs = vec_full_offset(a->vj);
2903 vk_ofs = vec_full_offset(a->vk);
2905 tcg_gen_gvec_andc(MO_64, vd_ofs, vk_ofs, vj_ofs, 16, ctx->vl/8);
2908 TRANS(vorn_v, gvec_vvv, MO_64, tcg_gen_gvec_orc)
2909 TRANS(vandi_b, gvec_vv_i, MO_8, tcg_gen_gvec_andi)
2910 TRANS(vori_b, gvec_vv_i, MO_8, tcg_gen_gvec_ori)
2911 TRANS(vxori_b, gvec_vv_i, MO_8, tcg_gen_gvec_xori)
2913 static void gen_vnori(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
2917 t1 = tcg_constant_vec_matching(t, vece, imm);
2918 tcg_gen_nor_vec(vece, t, a, t1);
2921 static void gen_vnori_b(TCGv_i64 t, TCGv_i64 a, int64_t imm)
2923 tcg_gen_movi_i64(t, dup_const(MO_8, imm));
2924 tcg_gen_nor_i64(t, a, t);
2927 static void do_vnori_b(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
2928 int64_t imm, uint32_t oprsz, uint32_t maxsz)
2930 static const TCGOpcode vecop_list[] = {
2933 static const GVecGen2i op = {
2934 .fni8 = gen_vnori_b,
2936 .fnoi = gen_helper_vnori_b,
2937 .opt_opc = vecop_list,
2941 tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op);
2944 TRANS(vnori_b, gvec_vv_i, MO_8, do_vnori_b)
2946 TRANS(vsll_b, gvec_vvv, MO_8, tcg_gen_gvec_shlv)
2947 TRANS(vsll_h, gvec_vvv, MO_16, tcg_gen_gvec_shlv)
2948 TRANS(vsll_w, gvec_vvv, MO_32, tcg_gen_gvec_shlv)
2949 TRANS(vsll_d, gvec_vvv, MO_64, tcg_gen_gvec_shlv)
2950 TRANS(vslli_b, gvec_vv_i, MO_8, tcg_gen_gvec_shli)
2951 TRANS(vslli_h, gvec_vv_i, MO_16, tcg_gen_gvec_shli)
2952 TRANS(vslli_w, gvec_vv_i, MO_32, tcg_gen_gvec_shli)
2953 TRANS(vslli_d, gvec_vv_i, MO_64, tcg_gen_gvec_shli)
2955 TRANS(vsrl_b, gvec_vvv, MO_8, tcg_gen_gvec_shrv)
2956 TRANS(vsrl_h, gvec_vvv, MO_16, tcg_gen_gvec_shrv)
2957 TRANS(vsrl_w, gvec_vvv, MO_32, tcg_gen_gvec_shrv)
2958 TRANS(vsrl_d, gvec_vvv, MO_64, tcg_gen_gvec_shrv)
2959 TRANS(vsrli_b, gvec_vv_i, MO_8, tcg_gen_gvec_shri)
2960 TRANS(vsrli_h, gvec_vv_i, MO_16, tcg_gen_gvec_shri)
2961 TRANS(vsrli_w, gvec_vv_i, MO_32, tcg_gen_gvec_shri)
2962 TRANS(vsrli_d, gvec_vv_i, MO_64, tcg_gen_gvec_shri)
2964 TRANS(vsra_b, gvec_vvv, MO_8, tcg_gen_gvec_sarv)
2965 TRANS(vsra_h, gvec_vvv, MO_16, tcg_gen_gvec_sarv)
2966 TRANS(vsra_w, gvec_vvv, MO_32, tcg_gen_gvec_sarv)
2967 TRANS(vsra_d, gvec_vvv, MO_64, tcg_gen_gvec_sarv)
2968 TRANS(vsrai_b, gvec_vv_i, MO_8, tcg_gen_gvec_sari)
2969 TRANS(vsrai_h, gvec_vv_i, MO_16, tcg_gen_gvec_sari)
2970 TRANS(vsrai_w, gvec_vv_i, MO_32, tcg_gen_gvec_sari)
2971 TRANS(vsrai_d, gvec_vv_i, MO_64, tcg_gen_gvec_sari)
2973 TRANS(vrotr_b, gvec_vvv, MO_8, tcg_gen_gvec_rotrv)
2974 TRANS(vrotr_h, gvec_vvv, MO_16, tcg_gen_gvec_rotrv)
2975 TRANS(vrotr_w, gvec_vvv, MO_32, tcg_gen_gvec_rotrv)
2976 TRANS(vrotr_d, gvec_vvv, MO_64, tcg_gen_gvec_rotrv)
2977 TRANS(vrotri_b, gvec_vv_i, MO_8, tcg_gen_gvec_rotri)
2978 TRANS(vrotri_h, gvec_vv_i, MO_16, tcg_gen_gvec_rotri)
2979 TRANS(vrotri_w, gvec_vv_i, MO_32, tcg_gen_gvec_rotri)
2980 TRANS(vrotri_d, gvec_vv_i, MO_64, tcg_gen_gvec_rotri)
2982 TRANS(vsllwil_h_b, gen_vv_i, gen_helper_vsllwil_h_b)
2983 TRANS(vsllwil_w_h, gen_vv_i, gen_helper_vsllwil_w_h)
2984 TRANS(vsllwil_d_w, gen_vv_i, gen_helper_vsllwil_d_w)
2985 TRANS(vextl_q_d, gen_vv, gen_helper_vextl_q_d)
2986 TRANS(vsllwil_hu_bu, gen_vv_i, gen_helper_vsllwil_hu_bu)
2987 TRANS(vsllwil_wu_hu, gen_vv_i, gen_helper_vsllwil_wu_hu)
2988 TRANS(vsllwil_du_wu, gen_vv_i, gen_helper_vsllwil_du_wu)
2989 TRANS(vextl_qu_du, gen_vv, gen_helper_vextl_qu_du)
2991 TRANS(vsrlr_b, gen_vvv, gen_helper_vsrlr_b)
2992 TRANS(vsrlr_h, gen_vvv, gen_helper_vsrlr_h)
2993 TRANS(vsrlr_w, gen_vvv, gen_helper_vsrlr_w)
2994 TRANS(vsrlr_d, gen_vvv, gen_helper_vsrlr_d)
2995 TRANS(vsrlri_b, gen_vv_i, gen_helper_vsrlri_b)
2996 TRANS(vsrlri_h, gen_vv_i, gen_helper_vsrlri_h)
2997 TRANS(vsrlri_w, gen_vv_i, gen_helper_vsrlri_w)
2998 TRANS(vsrlri_d, gen_vv_i, gen_helper_vsrlri_d)
3000 TRANS(vsrar_b, gen_vvv, gen_helper_vsrar_b)
3001 TRANS(vsrar_h, gen_vvv, gen_helper_vsrar_h)
3002 TRANS(vsrar_w, gen_vvv, gen_helper_vsrar_w)
3003 TRANS(vsrar_d, gen_vvv, gen_helper_vsrar_d)
3004 TRANS(vsrari_b, gen_vv_i, gen_helper_vsrari_b)
3005 TRANS(vsrari_h, gen_vv_i, gen_helper_vsrari_h)
3006 TRANS(vsrari_w, gen_vv_i, gen_helper_vsrari_w)
3007 TRANS(vsrari_d, gen_vv_i, gen_helper_vsrari_d)
3009 TRANS(vsrln_b_h, gen_vvv, gen_helper_vsrln_b_h)
3010 TRANS(vsrln_h_w, gen_vvv, gen_helper_vsrln_h_w)
3011 TRANS(vsrln_w_d, gen_vvv, gen_helper_vsrln_w_d)
3012 TRANS(vsran_b_h, gen_vvv, gen_helper_vsran_b_h)
3013 TRANS(vsran_h_w, gen_vvv, gen_helper_vsran_h_w)
3014 TRANS(vsran_w_d, gen_vvv, gen_helper_vsran_w_d)
3016 TRANS(vsrlni_b_h, gen_vv_i, gen_helper_vsrlni_b_h)
3017 TRANS(vsrlni_h_w, gen_vv_i, gen_helper_vsrlni_h_w)
3018 TRANS(vsrlni_w_d, gen_vv_i, gen_helper_vsrlni_w_d)
3019 TRANS(vsrlni_d_q, gen_vv_i, gen_helper_vsrlni_d_q)
3020 TRANS(vsrani_b_h, gen_vv_i, gen_helper_vsrani_b_h)
3021 TRANS(vsrani_h_w, gen_vv_i, gen_helper_vsrani_h_w)
3022 TRANS(vsrani_w_d, gen_vv_i, gen_helper_vsrani_w_d)
3023 TRANS(vsrani_d_q, gen_vv_i, gen_helper_vsrani_d_q)