target/arm: Convert SQSHL and UQSHL (register) to gvec
[qemu/kevin.git] / target / arm / tcg / gengvec.c
blob63c3ec2e735762caa16c552586ba5768fc883ddf
1 /*
2 * ARM generic vector expansion
4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
22 #include "qemu/osdep.h"
23 #include "translate.h"
26 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
27 uint32_t opr_sz, uint32_t max_sz,
28 gen_helper_gvec_3_ptr *fn)
30 TCGv_ptr qc_ptr = tcg_temp_new_ptr();
32 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
33 tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc));
34 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
35 opr_sz, max_sz, 0, fn);
38 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
39 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
41 static gen_helper_gvec_3_ptr * const fns[2] = {
42 gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
44 tcg_debug_assert(vece >= 1 && vece <= 2);
45 gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
48 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
49 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
51 static gen_helper_gvec_3_ptr * const fns[2] = {
52 gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
54 tcg_debug_assert(vece >= 1 && vece <= 2);
55 gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
58 #define GEN_CMP0(NAME, COND) \
59 void NAME(unsigned vece, uint32_t d, uint32_t m, \
60 uint32_t opr_sz, uint32_t max_sz) \
61 { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); }
63 GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ)
64 GEN_CMP0(gen_gvec_cle0, TCG_COND_LE)
65 GEN_CMP0(gen_gvec_cge0, TCG_COND_GE)
66 GEN_CMP0(gen_gvec_clt0, TCG_COND_LT)
67 GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT)
69 #undef GEN_CMP0
71 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
73 tcg_gen_vec_sar8i_i64(a, a, shift);
74 tcg_gen_vec_add8_i64(d, d, a);
77 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
79 tcg_gen_vec_sar16i_i64(a, a, shift);
80 tcg_gen_vec_add16_i64(d, d, a);
83 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
85 tcg_gen_sari_i32(a, a, shift);
86 tcg_gen_add_i32(d, d, a);
89 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
91 tcg_gen_sari_i64(a, a, shift);
92 tcg_gen_add_i64(d, d, a);
95 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
97 tcg_gen_sari_vec(vece, a, a, sh);
98 tcg_gen_add_vec(vece, d, d, a);
101 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
102 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
104 static const TCGOpcode vecop_list[] = {
105 INDEX_op_sari_vec, INDEX_op_add_vec, 0
107 static const GVecGen2i ops[4] = {
108 { .fni8 = gen_ssra8_i64,
109 .fniv = gen_ssra_vec,
110 .fno = gen_helper_gvec_ssra_b,
111 .load_dest = true,
112 .opt_opc = vecop_list,
113 .vece = MO_8 },
114 { .fni8 = gen_ssra16_i64,
115 .fniv = gen_ssra_vec,
116 .fno = gen_helper_gvec_ssra_h,
117 .load_dest = true,
118 .opt_opc = vecop_list,
119 .vece = MO_16 },
120 { .fni4 = gen_ssra32_i32,
121 .fniv = gen_ssra_vec,
122 .fno = gen_helper_gvec_ssra_s,
123 .load_dest = true,
124 .opt_opc = vecop_list,
125 .vece = MO_32 },
126 { .fni8 = gen_ssra64_i64,
127 .fniv = gen_ssra_vec,
128 .fno = gen_helper_gvec_ssra_d,
129 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
130 .opt_opc = vecop_list,
131 .load_dest = true,
132 .vece = MO_64 },
135 /* tszimm encoding produces immediates in the range [1..esize]. */
136 tcg_debug_assert(shift > 0);
137 tcg_debug_assert(shift <= (8 << vece));
140 * Shifts larger than the element size are architecturally valid.
141 * Signed results in all sign bits.
143 shift = MIN(shift, (8 << vece) - 1);
144 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
147 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
149 tcg_gen_vec_shr8i_i64(a, a, shift);
150 tcg_gen_vec_add8_i64(d, d, a);
153 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
155 tcg_gen_vec_shr16i_i64(a, a, shift);
156 tcg_gen_vec_add16_i64(d, d, a);
159 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
161 tcg_gen_shri_i32(a, a, shift);
162 tcg_gen_add_i32(d, d, a);
165 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
167 tcg_gen_shri_i64(a, a, shift);
168 tcg_gen_add_i64(d, d, a);
171 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
173 tcg_gen_shri_vec(vece, a, a, sh);
174 tcg_gen_add_vec(vece, d, d, a);
177 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
178 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
180 static const TCGOpcode vecop_list[] = {
181 INDEX_op_shri_vec, INDEX_op_add_vec, 0
183 static const GVecGen2i ops[4] = {
184 { .fni8 = gen_usra8_i64,
185 .fniv = gen_usra_vec,
186 .fno = gen_helper_gvec_usra_b,
187 .load_dest = true,
188 .opt_opc = vecop_list,
189 .vece = MO_8, },
190 { .fni8 = gen_usra16_i64,
191 .fniv = gen_usra_vec,
192 .fno = gen_helper_gvec_usra_h,
193 .load_dest = true,
194 .opt_opc = vecop_list,
195 .vece = MO_16, },
196 { .fni4 = gen_usra32_i32,
197 .fniv = gen_usra_vec,
198 .fno = gen_helper_gvec_usra_s,
199 .load_dest = true,
200 .opt_opc = vecop_list,
201 .vece = MO_32, },
202 { .fni8 = gen_usra64_i64,
203 .fniv = gen_usra_vec,
204 .fno = gen_helper_gvec_usra_d,
205 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
206 .load_dest = true,
207 .opt_opc = vecop_list,
208 .vece = MO_64, },
211 /* tszimm encoding produces immediates in the range [1..esize]. */
212 tcg_debug_assert(shift > 0);
213 tcg_debug_assert(shift <= (8 << vece));
216 * Shifts larger than the element size are architecturally valid.
217 * Unsigned results in all zeros as input to accumulate: nop.
219 if (shift < (8 << vece)) {
220 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
221 } else {
222 /* Nop, but we do need to clear the tail. */
223 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
228 * Shift one less than the requested amount, and the low bit is
229 * the rounding bit. For the 8 and 16-bit operations, because we
230 * mask the low bit, we can perform a normal integer shift instead
231 * of a vector shift.
233 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
235 TCGv_i64 t = tcg_temp_new_i64();
237 tcg_gen_shri_i64(t, a, sh - 1);
238 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
239 tcg_gen_vec_sar8i_i64(d, a, sh);
240 tcg_gen_vec_add8_i64(d, d, t);
243 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
245 TCGv_i64 t = tcg_temp_new_i64();
247 tcg_gen_shri_i64(t, a, sh - 1);
248 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
249 tcg_gen_vec_sar16i_i64(d, a, sh);
250 tcg_gen_vec_add16_i64(d, d, t);
253 void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
255 TCGv_i32 t;
257 /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
258 if (sh == 32) {
259 tcg_gen_movi_i32(d, 0);
260 return;
262 t = tcg_temp_new_i32();
263 tcg_gen_extract_i32(t, a, sh - 1, 1);
264 tcg_gen_sari_i32(d, a, sh);
265 tcg_gen_add_i32(d, d, t);
268 void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
270 TCGv_i64 t = tcg_temp_new_i64();
272 tcg_gen_extract_i64(t, a, sh - 1, 1);
273 tcg_gen_sari_i64(d, a, sh);
274 tcg_gen_add_i64(d, d, t);
277 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
279 TCGv_vec t = tcg_temp_new_vec_matching(d);
280 TCGv_vec ones = tcg_temp_new_vec_matching(d);
282 tcg_gen_shri_vec(vece, t, a, sh - 1);
283 tcg_gen_dupi_vec(vece, ones, 1);
284 tcg_gen_and_vec(vece, t, t, ones);
285 tcg_gen_sari_vec(vece, d, a, sh);
286 tcg_gen_add_vec(vece, d, d, t);
289 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
290 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
292 static const TCGOpcode vecop_list[] = {
293 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
295 static const GVecGen2i ops[4] = {
296 { .fni8 = gen_srshr8_i64,
297 .fniv = gen_srshr_vec,
298 .fno = gen_helper_gvec_srshr_b,
299 .opt_opc = vecop_list,
300 .vece = MO_8 },
301 { .fni8 = gen_srshr16_i64,
302 .fniv = gen_srshr_vec,
303 .fno = gen_helper_gvec_srshr_h,
304 .opt_opc = vecop_list,
305 .vece = MO_16 },
306 { .fni4 = gen_srshr32_i32,
307 .fniv = gen_srshr_vec,
308 .fno = gen_helper_gvec_srshr_s,
309 .opt_opc = vecop_list,
310 .vece = MO_32 },
311 { .fni8 = gen_srshr64_i64,
312 .fniv = gen_srshr_vec,
313 .fno = gen_helper_gvec_srshr_d,
314 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
315 .opt_opc = vecop_list,
316 .vece = MO_64 },
319 /* tszimm encoding produces immediates in the range [1..esize] */
320 tcg_debug_assert(shift > 0);
321 tcg_debug_assert(shift <= (8 << vece));
323 if (shift == (8 << vece)) {
325 * Shifts larger than the element size are architecturally valid.
326 * Signed results in all sign bits. With rounding, this produces
327 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
328 * I.e. always zero.
330 tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
331 } else {
332 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
336 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
338 TCGv_i64 t = tcg_temp_new_i64();
340 gen_srshr8_i64(t, a, sh);
341 tcg_gen_vec_add8_i64(d, d, t);
344 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
346 TCGv_i64 t = tcg_temp_new_i64();
348 gen_srshr16_i64(t, a, sh);
349 tcg_gen_vec_add16_i64(d, d, t);
352 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
354 TCGv_i32 t = tcg_temp_new_i32();
356 gen_srshr32_i32(t, a, sh);
357 tcg_gen_add_i32(d, d, t);
360 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
362 TCGv_i64 t = tcg_temp_new_i64();
364 gen_srshr64_i64(t, a, sh);
365 tcg_gen_add_i64(d, d, t);
368 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
370 TCGv_vec t = tcg_temp_new_vec_matching(d);
372 gen_srshr_vec(vece, t, a, sh);
373 tcg_gen_add_vec(vece, d, d, t);
376 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
377 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
379 static const TCGOpcode vecop_list[] = {
380 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
382 static const GVecGen2i ops[4] = {
383 { .fni8 = gen_srsra8_i64,
384 .fniv = gen_srsra_vec,
385 .fno = gen_helper_gvec_srsra_b,
386 .opt_opc = vecop_list,
387 .load_dest = true,
388 .vece = MO_8 },
389 { .fni8 = gen_srsra16_i64,
390 .fniv = gen_srsra_vec,
391 .fno = gen_helper_gvec_srsra_h,
392 .opt_opc = vecop_list,
393 .load_dest = true,
394 .vece = MO_16 },
395 { .fni4 = gen_srsra32_i32,
396 .fniv = gen_srsra_vec,
397 .fno = gen_helper_gvec_srsra_s,
398 .opt_opc = vecop_list,
399 .load_dest = true,
400 .vece = MO_32 },
401 { .fni8 = gen_srsra64_i64,
402 .fniv = gen_srsra_vec,
403 .fno = gen_helper_gvec_srsra_d,
404 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
405 .opt_opc = vecop_list,
406 .load_dest = true,
407 .vece = MO_64 },
410 /* tszimm encoding produces immediates in the range [1..esize] */
411 tcg_debug_assert(shift > 0);
412 tcg_debug_assert(shift <= (8 << vece));
415 * Shifts larger than the element size are architecturally valid.
416 * Signed results in all sign bits. With rounding, this produces
417 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
418 * I.e. always zero. With accumulation, this leaves D unchanged.
420 if (shift == (8 << vece)) {
421 /* Nop, but we do need to clear the tail. */
422 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
423 } else {
424 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
428 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
430 TCGv_i64 t = tcg_temp_new_i64();
432 tcg_gen_shri_i64(t, a, sh - 1);
433 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
434 tcg_gen_vec_shr8i_i64(d, a, sh);
435 tcg_gen_vec_add8_i64(d, d, t);
438 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
440 TCGv_i64 t = tcg_temp_new_i64();
442 tcg_gen_shri_i64(t, a, sh - 1);
443 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
444 tcg_gen_vec_shr16i_i64(d, a, sh);
445 tcg_gen_vec_add16_i64(d, d, t);
448 void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
450 TCGv_i32 t;
452 /* Handle shift by the input size for the benefit of trans_URSHR_ri */
453 if (sh == 32) {
454 tcg_gen_extract_i32(d, a, sh - 1, 1);
455 return;
457 t = tcg_temp_new_i32();
458 tcg_gen_extract_i32(t, a, sh - 1, 1);
459 tcg_gen_shri_i32(d, a, sh);
460 tcg_gen_add_i32(d, d, t);
463 void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
465 TCGv_i64 t = tcg_temp_new_i64();
467 tcg_gen_extract_i64(t, a, sh - 1, 1);
468 tcg_gen_shri_i64(d, a, sh);
469 tcg_gen_add_i64(d, d, t);
472 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
474 TCGv_vec t = tcg_temp_new_vec_matching(d);
475 TCGv_vec ones = tcg_temp_new_vec_matching(d);
477 tcg_gen_shri_vec(vece, t, a, shift - 1);
478 tcg_gen_dupi_vec(vece, ones, 1);
479 tcg_gen_and_vec(vece, t, t, ones);
480 tcg_gen_shri_vec(vece, d, a, shift);
481 tcg_gen_add_vec(vece, d, d, t);
484 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
485 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
487 static const TCGOpcode vecop_list[] = {
488 INDEX_op_shri_vec, INDEX_op_add_vec, 0
490 static const GVecGen2i ops[4] = {
491 { .fni8 = gen_urshr8_i64,
492 .fniv = gen_urshr_vec,
493 .fno = gen_helper_gvec_urshr_b,
494 .opt_opc = vecop_list,
495 .vece = MO_8 },
496 { .fni8 = gen_urshr16_i64,
497 .fniv = gen_urshr_vec,
498 .fno = gen_helper_gvec_urshr_h,
499 .opt_opc = vecop_list,
500 .vece = MO_16 },
501 { .fni4 = gen_urshr32_i32,
502 .fniv = gen_urshr_vec,
503 .fno = gen_helper_gvec_urshr_s,
504 .opt_opc = vecop_list,
505 .vece = MO_32 },
506 { .fni8 = gen_urshr64_i64,
507 .fniv = gen_urshr_vec,
508 .fno = gen_helper_gvec_urshr_d,
509 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
510 .opt_opc = vecop_list,
511 .vece = MO_64 },
514 /* tszimm encoding produces immediates in the range [1..esize] */
515 tcg_debug_assert(shift > 0);
516 tcg_debug_assert(shift <= (8 << vece));
518 if (shift == (8 << vece)) {
520 * Shifts larger than the element size are architecturally valid.
521 * Unsigned results in zero. With rounding, this produces a
522 * copy of the most significant bit.
524 tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
525 } else {
526 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
530 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
532 TCGv_i64 t = tcg_temp_new_i64();
534 if (sh == 8) {
535 tcg_gen_vec_shr8i_i64(t, a, 7);
536 } else {
537 gen_urshr8_i64(t, a, sh);
539 tcg_gen_vec_add8_i64(d, d, t);
542 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
544 TCGv_i64 t = tcg_temp_new_i64();
546 if (sh == 16) {
547 tcg_gen_vec_shr16i_i64(t, a, 15);
548 } else {
549 gen_urshr16_i64(t, a, sh);
551 tcg_gen_vec_add16_i64(d, d, t);
554 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
556 TCGv_i32 t = tcg_temp_new_i32();
558 if (sh == 32) {
559 tcg_gen_shri_i32(t, a, 31);
560 } else {
561 gen_urshr32_i32(t, a, sh);
563 tcg_gen_add_i32(d, d, t);
566 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
568 TCGv_i64 t = tcg_temp_new_i64();
570 if (sh == 64) {
571 tcg_gen_shri_i64(t, a, 63);
572 } else {
573 gen_urshr64_i64(t, a, sh);
575 tcg_gen_add_i64(d, d, t);
578 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
580 TCGv_vec t = tcg_temp_new_vec_matching(d);
582 if (sh == (8 << vece)) {
583 tcg_gen_shri_vec(vece, t, a, sh - 1);
584 } else {
585 gen_urshr_vec(vece, t, a, sh);
587 tcg_gen_add_vec(vece, d, d, t);
590 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
591 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
593 static const TCGOpcode vecop_list[] = {
594 INDEX_op_shri_vec, INDEX_op_add_vec, 0
596 static const GVecGen2i ops[4] = {
597 { .fni8 = gen_ursra8_i64,
598 .fniv = gen_ursra_vec,
599 .fno = gen_helper_gvec_ursra_b,
600 .opt_opc = vecop_list,
601 .load_dest = true,
602 .vece = MO_8 },
603 { .fni8 = gen_ursra16_i64,
604 .fniv = gen_ursra_vec,
605 .fno = gen_helper_gvec_ursra_h,
606 .opt_opc = vecop_list,
607 .load_dest = true,
608 .vece = MO_16 },
609 { .fni4 = gen_ursra32_i32,
610 .fniv = gen_ursra_vec,
611 .fno = gen_helper_gvec_ursra_s,
612 .opt_opc = vecop_list,
613 .load_dest = true,
614 .vece = MO_32 },
615 { .fni8 = gen_ursra64_i64,
616 .fniv = gen_ursra_vec,
617 .fno = gen_helper_gvec_ursra_d,
618 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
619 .opt_opc = vecop_list,
620 .load_dest = true,
621 .vece = MO_64 },
624 /* tszimm encoding produces immediates in the range [1..esize] */
625 tcg_debug_assert(shift > 0);
626 tcg_debug_assert(shift <= (8 << vece));
628 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
631 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
633 uint64_t mask = dup_const(MO_8, 0xff >> shift);
634 TCGv_i64 t = tcg_temp_new_i64();
636 tcg_gen_shri_i64(t, a, shift);
637 tcg_gen_andi_i64(t, t, mask);
638 tcg_gen_andi_i64(d, d, ~mask);
639 tcg_gen_or_i64(d, d, t);
642 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
644 uint64_t mask = dup_const(MO_16, 0xffff >> shift);
645 TCGv_i64 t = tcg_temp_new_i64();
647 tcg_gen_shri_i64(t, a, shift);
648 tcg_gen_andi_i64(t, t, mask);
649 tcg_gen_andi_i64(d, d, ~mask);
650 tcg_gen_or_i64(d, d, t);
653 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
655 tcg_gen_shri_i32(a, a, shift);
656 tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
659 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
661 tcg_gen_shri_i64(a, a, shift);
662 tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
665 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
667 TCGv_vec t = tcg_temp_new_vec_matching(d);
668 TCGv_vec m = tcg_temp_new_vec_matching(d);
670 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
671 tcg_gen_shri_vec(vece, t, a, sh);
672 tcg_gen_and_vec(vece, d, d, m);
673 tcg_gen_or_vec(vece, d, d, t);
676 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
677 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
679 static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
680 const GVecGen2i ops[4] = {
681 { .fni8 = gen_shr8_ins_i64,
682 .fniv = gen_shr_ins_vec,
683 .fno = gen_helper_gvec_sri_b,
684 .load_dest = true,
685 .opt_opc = vecop_list,
686 .vece = MO_8 },
687 { .fni8 = gen_shr16_ins_i64,
688 .fniv = gen_shr_ins_vec,
689 .fno = gen_helper_gvec_sri_h,
690 .load_dest = true,
691 .opt_opc = vecop_list,
692 .vece = MO_16 },
693 { .fni4 = gen_shr32_ins_i32,
694 .fniv = gen_shr_ins_vec,
695 .fno = gen_helper_gvec_sri_s,
696 .load_dest = true,
697 .opt_opc = vecop_list,
698 .vece = MO_32 },
699 { .fni8 = gen_shr64_ins_i64,
700 .fniv = gen_shr_ins_vec,
701 .fno = gen_helper_gvec_sri_d,
702 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
703 .load_dest = true,
704 .opt_opc = vecop_list,
705 .vece = MO_64 },
708 /* tszimm encoding produces immediates in the range [1..esize]. */
709 tcg_debug_assert(shift > 0);
710 tcg_debug_assert(shift <= (8 << vece));
712 /* Shift of esize leaves destination unchanged. */
713 if (shift < (8 << vece)) {
714 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
715 } else {
716 /* Nop, but we do need to clear the tail. */
717 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
721 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
723 uint64_t mask = dup_const(MO_8, 0xff << shift);
724 TCGv_i64 t = tcg_temp_new_i64();
726 tcg_gen_shli_i64(t, a, shift);
727 tcg_gen_andi_i64(t, t, mask);
728 tcg_gen_andi_i64(d, d, ~mask);
729 tcg_gen_or_i64(d, d, t);
732 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
734 uint64_t mask = dup_const(MO_16, 0xffff << shift);
735 TCGv_i64 t = tcg_temp_new_i64();
737 tcg_gen_shli_i64(t, a, shift);
738 tcg_gen_andi_i64(t, t, mask);
739 tcg_gen_andi_i64(d, d, ~mask);
740 tcg_gen_or_i64(d, d, t);
743 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
745 tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
748 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
750 tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
753 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
755 TCGv_vec t = tcg_temp_new_vec_matching(d);
756 TCGv_vec m = tcg_temp_new_vec_matching(d);
758 tcg_gen_shli_vec(vece, t, a, sh);
759 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
760 tcg_gen_and_vec(vece, d, d, m);
761 tcg_gen_or_vec(vece, d, d, t);
764 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
765 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
767 static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
768 const GVecGen2i ops[4] = {
769 { .fni8 = gen_shl8_ins_i64,
770 .fniv = gen_shl_ins_vec,
771 .fno = gen_helper_gvec_sli_b,
772 .load_dest = true,
773 .opt_opc = vecop_list,
774 .vece = MO_8 },
775 { .fni8 = gen_shl16_ins_i64,
776 .fniv = gen_shl_ins_vec,
777 .fno = gen_helper_gvec_sli_h,
778 .load_dest = true,
779 .opt_opc = vecop_list,
780 .vece = MO_16 },
781 { .fni4 = gen_shl32_ins_i32,
782 .fniv = gen_shl_ins_vec,
783 .fno = gen_helper_gvec_sli_s,
784 .load_dest = true,
785 .opt_opc = vecop_list,
786 .vece = MO_32 },
787 { .fni8 = gen_shl64_ins_i64,
788 .fniv = gen_shl_ins_vec,
789 .fno = gen_helper_gvec_sli_d,
790 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
791 .load_dest = true,
792 .opt_opc = vecop_list,
793 .vece = MO_64 },
796 /* tszimm encoding produces immediates in the range [0..esize-1]. */
797 tcg_debug_assert(shift >= 0);
798 tcg_debug_assert(shift < (8 << vece));
800 if (shift == 0) {
801 tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
802 } else {
803 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
807 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
809 gen_helper_neon_mul_u8(a, a, b);
810 gen_helper_neon_add_u8(d, d, a);
813 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
815 gen_helper_neon_mul_u8(a, a, b);
816 gen_helper_neon_sub_u8(d, d, a);
819 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
821 gen_helper_neon_mul_u16(a, a, b);
822 gen_helper_neon_add_u16(d, d, a);
825 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
827 gen_helper_neon_mul_u16(a, a, b);
828 gen_helper_neon_sub_u16(d, d, a);
831 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
833 tcg_gen_mul_i32(a, a, b);
834 tcg_gen_add_i32(d, d, a);
837 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
839 tcg_gen_mul_i32(a, a, b);
840 tcg_gen_sub_i32(d, d, a);
843 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
845 tcg_gen_mul_i64(a, a, b);
846 tcg_gen_add_i64(d, d, a);
849 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
851 tcg_gen_mul_i64(a, a, b);
852 tcg_gen_sub_i64(d, d, a);
855 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
857 tcg_gen_mul_vec(vece, a, a, b);
858 tcg_gen_add_vec(vece, d, d, a);
861 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
863 tcg_gen_mul_vec(vece, a, a, b);
864 tcg_gen_sub_vec(vece, d, d, a);
867 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
868 * these tables are shared with AArch64 which does support them.
870 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
871 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
873 static const TCGOpcode vecop_list[] = {
874 INDEX_op_mul_vec, INDEX_op_add_vec, 0
876 static const GVecGen3 ops[4] = {
877 { .fni4 = gen_mla8_i32,
878 .fniv = gen_mla_vec,
879 .load_dest = true,
880 .opt_opc = vecop_list,
881 .vece = MO_8 },
882 { .fni4 = gen_mla16_i32,
883 .fniv = gen_mla_vec,
884 .load_dest = true,
885 .opt_opc = vecop_list,
886 .vece = MO_16 },
887 { .fni4 = gen_mla32_i32,
888 .fniv = gen_mla_vec,
889 .load_dest = true,
890 .opt_opc = vecop_list,
891 .vece = MO_32 },
892 { .fni8 = gen_mla64_i64,
893 .fniv = gen_mla_vec,
894 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
895 .load_dest = true,
896 .opt_opc = vecop_list,
897 .vece = MO_64 },
899 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
902 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
903 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
905 static const TCGOpcode vecop_list[] = {
906 INDEX_op_mul_vec, INDEX_op_sub_vec, 0
908 static const GVecGen3 ops[4] = {
909 { .fni4 = gen_mls8_i32,
910 .fniv = gen_mls_vec,
911 .load_dest = true,
912 .opt_opc = vecop_list,
913 .vece = MO_8 },
914 { .fni4 = gen_mls16_i32,
915 .fniv = gen_mls_vec,
916 .load_dest = true,
917 .opt_opc = vecop_list,
918 .vece = MO_16 },
919 { .fni4 = gen_mls32_i32,
920 .fniv = gen_mls_vec,
921 .load_dest = true,
922 .opt_opc = vecop_list,
923 .vece = MO_32 },
924 { .fni8 = gen_mls64_i64,
925 .fniv = gen_mls_vec,
926 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
927 .load_dest = true,
928 .opt_opc = vecop_list,
929 .vece = MO_64 },
931 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
934 /* CMTST : test is "if (X & Y != 0)". */
935 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
937 tcg_gen_and_i32(d, a, b);
938 tcg_gen_negsetcond_i32(TCG_COND_NE, d, d, tcg_constant_i32(0));
941 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
943 tcg_gen_and_i64(d, a, b);
944 tcg_gen_negsetcond_i64(TCG_COND_NE, d, d, tcg_constant_i64(0));
947 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
949 tcg_gen_and_vec(vece, d, a, b);
950 tcg_gen_dupi_vec(vece, a, 0);
951 tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
954 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
955 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
957 static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
958 static const GVecGen3 ops[4] = {
959 { .fni4 = gen_helper_neon_tst_u8,
960 .fniv = gen_cmtst_vec,
961 .opt_opc = vecop_list,
962 .vece = MO_8 },
963 { .fni4 = gen_helper_neon_tst_u16,
964 .fniv = gen_cmtst_vec,
965 .opt_opc = vecop_list,
966 .vece = MO_16 },
967 { .fni4 = gen_cmtst_i32,
968 .fniv = gen_cmtst_vec,
969 .opt_opc = vecop_list,
970 .vece = MO_32 },
971 { .fni8 = gen_cmtst_i64,
972 .fniv = gen_cmtst_vec,
973 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
974 .opt_opc = vecop_list,
975 .vece = MO_64 },
977 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
980 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
982 TCGv_i32 lval = tcg_temp_new_i32();
983 TCGv_i32 rval = tcg_temp_new_i32();
984 TCGv_i32 lsh = tcg_temp_new_i32();
985 TCGv_i32 rsh = tcg_temp_new_i32();
986 TCGv_i32 zero = tcg_constant_i32(0);
987 TCGv_i32 max = tcg_constant_i32(32);
990 * Rely on the TCG guarantee that out of range shifts produce
991 * unspecified results, not undefined behaviour (i.e. no trap).
992 * Discard out-of-range results after the fact.
994 tcg_gen_ext8s_i32(lsh, shift);
995 tcg_gen_neg_i32(rsh, lsh);
996 tcg_gen_shl_i32(lval, src, lsh);
997 tcg_gen_shr_i32(rval, src, rsh);
998 tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
999 tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
1002 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
1004 TCGv_i64 lval = tcg_temp_new_i64();
1005 TCGv_i64 rval = tcg_temp_new_i64();
1006 TCGv_i64 lsh = tcg_temp_new_i64();
1007 TCGv_i64 rsh = tcg_temp_new_i64();
1008 TCGv_i64 zero = tcg_constant_i64(0);
1009 TCGv_i64 max = tcg_constant_i64(64);
1012 * Rely on the TCG guarantee that out of range shifts produce
1013 * unspecified results, not undefined behaviour (i.e. no trap).
1014 * Discard out-of-range results after the fact.
1016 tcg_gen_ext8s_i64(lsh, shift);
1017 tcg_gen_neg_i64(rsh, lsh);
1018 tcg_gen_shl_i64(lval, src, lsh);
1019 tcg_gen_shr_i64(rval, src, rsh);
1020 tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
1021 tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
1024 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
1025 TCGv_vec src, TCGv_vec shift)
1027 TCGv_vec lval = tcg_temp_new_vec_matching(dst);
1028 TCGv_vec rval = tcg_temp_new_vec_matching(dst);
1029 TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
1030 TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
1031 TCGv_vec msk, max;
1033 tcg_gen_neg_vec(vece, rsh, shift);
1034 if (vece == MO_8) {
1035 tcg_gen_mov_vec(lsh, shift);
1036 } else {
1037 msk = tcg_temp_new_vec_matching(dst);
1038 tcg_gen_dupi_vec(vece, msk, 0xff);
1039 tcg_gen_and_vec(vece, lsh, shift, msk);
1040 tcg_gen_and_vec(vece, rsh, rsh, msk);
1044 * Rely on the TCG guarantee that out of range shifts produce
1045 * unspecified results, not undefined behaviour (i.e. no trap).
1046 * Discard out-of-range results after the fact.
1048 tcg_gen_shlv_vec(vece, lval, src, lsh);
1049 tcg_gen_shrv_vec(vece, rval, src, rsh);
1051 max = tcg_temp_new_vec_matching(dst);
1052 tcg_gen_dupi_vec(vece, max, 8 << vece);
1055 * The choice of LT (signed) and GEU (unsigned) are biased toward
1056 * the instructions of the x86_64 host. For MO_8, the whole byte
1057 * is significant so we must use an unsigned compare; otherwise we
1058 * have already masked to a byte and so a signed compare works.
1059 * Other tcg hosts have a full set of comparisons and do not care.
1061 if (vece == MO_8) {
1062 tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
1063 tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
1064 tcg_gen_andc_vec(vece, lval, lval, lsh);
1065 tcg_gen_andc_vec(vece, rval, rval, rsh);
1066 } else {
1067 tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
1068 tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
1069 tcg_gen_and_vec(vece, lval, lval, lsh);
1070 tcg_gen_and_vec(vece, rval, rval, rsh);
1072 tcg_gen_or_vec(vece, dst, lval, rval);
1075 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1076 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1078 static const TCGOpcode vecop_list[] = {
1079 INDEX_op_neg_vec, INDEX_op_shlv_vec,
1080 INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
1082 static const GVecGen3 ops[4] = {
1083 { .fniv = gen_ushl_vec,
1084 .fno = gen_helper_gvec_ushl_b,
1085 .opt_opc = vecop_list,
1086 .vece = MO_8 },
1087 { .fniv = gen_ushl_vec,
1088 .fno = gen_helper_gvec_ushl_h,
1089 .opt_opc = vecop_list,
1090 .vece = MO_16 },
1091 { .fni4 = gen_ushl_i32,
1092 .fniv = gen_ushl_vec,
1093 .opt_opc = vecop_list,
1094 .vece = MO_32 },
1095 { .fni8 = gen_ushl_i64,
1096 .fniv = gen_ushl_vec,
1097 .opt_opc = vecop_list,
1098 .vece = MO_64 },
1100 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1103 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
1105 TCGv_i32 lval = tcg_temp_new_i32();
1106 TCGv_i32 rval = tcg_temp_new_i32();
1107 TCGv_i32 lsh = tcg_temp_new_i32();
1108 TCGv_i32 rsh = tcg_temp_new_i32();
1109 TCGv_i32 zero = tcg_constant_i32(0);
1110 TCGv_i32 max = tcg_constant_i32(31);
1113 * Rely on the TCG guarantee that out of range shifts produce
1114 * unspecified results, not undefined behaviour (i.e. no trap).
1115 * Discard out-of-range results after the fact.
1117 tcg_gen_ext8s_i32(lsh, shift);
1118 tcg_gen_neg_i32(rsh, lsh);
1119 tcg_gen_shl_i32(lval, src, lsh);
1120 tcg_gen_umin_i32(rsh, rsh, max);
1121 tcg_gen_sar_i32(rval, src, rsh);
1122 tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
1123 tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
1126 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
1128 TCGv_i64 lval = tcg_temp_new_i64();
1129 TCGv_i64 rval = tcg_temp_new_i64();
1130 TCGv_i64 lsh = tcg_temp_new_i64();
1131 TCGv_i64 rsh = tcg_temp_new_i64();
1132 TCGv_i64 zero = tcg_constant_i64(0);
1133 TCGv_i64 max = tcg_constant_i64(63);
1136 * Rely on the TCG guarantee that out of range shifts produce
1137 * unspecified results, not undefined behaviour (i.e. no trap).
1138 * Discard out-of-range results after the fact.
1140 tcg_gen_ext8s_i64(lsh, shift);
1141 tcg_gen_neg_i64(rsh, lsh);
1142 tcg_gen_shl_i64(lval, src, lsh);
1143 tcg_gen_umin_i64(rsh, rsh, max);
1144 tcg_gen_sar_i64(rval, src, rsh);
1145 tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
1146 tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
1149 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
1150 TCGv_vec src, TCGv_vec shift)
1152 TCGv_vec lval = tcg_temp_new_vec_matching(dst);
1153 TCGv_vec rval = tcg_temp_new_vec_matching(dst);
1154 TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
1155 TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
1156 TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
1159 * Rely on the TCG guarantee that out of range shifts produce
1160 * unspecified results, not undefined behaviour (i.e. no trap).
1161 * Discard out-of-range results after the fact.
1163 tcg_gen_neg_vec(vece, rsh, shift);
1164 if (vece == MO_8) {
1165 tcg_gen_mov_vec(lsh, shift);
1166 } else {
1167 tcg_gen_dupi_vec(vece, tmp, 0xff);
1168 tcg_gen_and_vec(vece, lsh, shift, tmp);
1169 tcg_gen_and_vec(vece, rsh, rsh, tmp);
1172 /* Bound rsh so out of bound right shift gets -1. */
1173 tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
1174 tcg_gen_umin_vec(vece, rsh, rsh, tmp);
1175 tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
1177 tcg_gen_shlv_vec(vece, lval, src, lsh);
1178 tcg_gen_sarv_vec(vece, rval, src, rsh);
1180 /* Select in-bound left shift. */
1181 tcg_gen_andc_vec(vece, lval, lval, tmp);
1183 /* Select between left and right shift. */
1184 if (vece == MO_8) {
1185 tcg_gen_dupi_vec(vece, tmp, 0);
1186 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
1187 } else {
1188 tcg_gen_dupi_vec(vece, tmp, 0x80);
1189 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
1193 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1194 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1196 static const TCGOpcode vecop_list[] = {
1197 INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
1198 INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
1200 static const GVecGen3 ops[4] = {
1201 { .fniv = gen_sshl_vec,
1202 .fno = gen_helper_gvec_sshl_b,
1203 .opt_opc = vecop_list,
1204 .vece = MO_8 },
1205 { .fniv = gen_sshl_vec,
1206 .fno = gen_helper_gvec_sshl_h,
1207 .opt_opc = vecop_list,
1208 .vece = MO_16 },
1209 { .fni4 = gen_sshl_i32,
1210 .fniv = gen_sshl_vec,
1211 .opt_opc = vecop_list,
1212 .vece = MO_32 },
1213 { .fni8 = gen_sshl_i64,
1214 .fniv = gen_sshl_vec,
1215 .opt_opc = vecop_list,
1216 .vece = MO_64 },
1218 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1221 void gen_gvec_srshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1222 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1224 static gen_helper_gvec_3 * const fns[] = {
1225 gen_helper_gvec_srshl_b, gen_helper_gvec_srshl_h,
1226 gen_helper_gvec_srshl_s, gen_helper_gvec_srshl_d,
1228 tcg_debug_assert(vece <= MO_64);
1229 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
1232 void gen_gvec_urshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1233 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1235 static gen_helper_gvec_3 * const fns[] = {
1236 gen_helper_gvec_urshl_b, gen_helper_gvec_urshl_h,
1237 gen_helper_gvec_urshl_s, gen_helper_gvec_urshl_d,
1239 tcg_debug_assert(vece <= MO_64);
1240 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
1243 void gen_neon_sqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1244 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1246 static gen_helper_gvec_3_ptr * const fns[] = {
1247 gen_helper_neon_sqshl_b, gen_helper_neon_sqshl_h,
1248 gen_helper_neon_sqshl_s, gen_helper_neon_sqshl_d,
1250 tcg_debug_assert(vece <= MO_64);
1251 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env,
1252 opr_sz, max_sz, 0, fns[vece]);
1255 void gen_neon_uqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1256 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1258 static gen_helper_gvec_3_ptr * const fns[] = {
1259 gen_helper_neon_uqshl_b, gen_helper_neon_uqshl_h,
1260 gen_helper_neon_uqshl_s, gen_helper_neon_uqshl_d,
1262 tcg_debug_assert(vece <= MO_64);
1263 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env,
1264 opr_sz, max_sz, 0, fns[vece]);
1267 void gen_uqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
1269 uint64_t max = MAKE_64BIT_MASK(0, 8 << esz);
1270 TCGv_i64 tmp = tcg_temp_new_i64();
1272 tcg_gen_add_i64(tmp, a, b);
1273 tcg_gen_umin_i64(res, tmp, tcg_constant_i64(max));
1274 tcg_gen_xor_i64(tmp, tmp, res);
1275 tcg_gen_or_i64(qc, qc, tmp);
1278 void gen_uqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
1280 TCGv_i64 t = tcg_temp_new_i64();
1282 tcg_gen_add_i64(t, a, b);
1283 tcg_gen_movcond_i64(TCG_COND_LTU, res, t, a,
1284 tcg_constant_i64(UINT64_MAX), t);
1285 tcg_gen_xor_i64(t, t, res);
1286 tcg_gen_or_i64(qc, qc, t);
1289 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
1290 TCGv_vec a, TCGv_vec b)
1292 TCGv_vec x = tcg_temp_new_vec_matching(t);
1293 tcg_gen_add_vec(vece, x, a, b);
1294 tcg_gen_usadd_vec(vece, t, a, b);
1295 tcg_gen_xor_vec(vece, x, x, t);
1296 tcg_gen_or_vec(vece, qc, qc, x);
1299 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1300 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1302 static const TCGOpcode vecop_list[] = {
1303 INDEX_op_usadd_vec, INDEX_op_add_vec, 0
1305 static const GVecGen4 ops[4] = {
1306 { .fniv = gen_uqadd_vec,
1307 .fno = gen_helper_gvec_uqadd_b,
1308 .write_aofs = true,
1309 .opt_opc = vecop_list,
1310 .vece = MO_8 },
1311 { .fniv = gen_uqadd_vec,
1312 .fno = gen_helper_gvec_uqadd_h,
1313 .write_aofs = true,
1314 .opt_opc = vecop_list,
1315 .vece = MO_16 },
1316 { .fniv = gen_uqadd_vec,
1317 .fno = gen_helper_gvec_uqadd_s,
1318 .write_aofs = true,
1319 .opt_opc = vecop_list,
1320 .vece = MO_32 },
1321 { .fniv = gen_uqadd_vec,
1322 .fni8 = gen_uqadd_d,
1323 .fno = gen_helper_gvec_uqadd_d,
1324 .write_aofs = true,
1325 .opt_opc = vecop_list,
1326 .vece = MO_64 },
1329 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
1330 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
1331 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1334 void gen_sqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
1336 int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1);
1337 int64_t min = -1ll - max;
1338 TCGv_i64 tmp = tcg_temp_new_i64();
1340 tcg_gen_add_i64(tmp, a, b);
1341 tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max));
1342 tcg_gen_smax_i64(res, res, tcg_constant_i64(min));
1343 tcg_gen_xor_i64(tmp, tmp, res);
1344 tcg_gen_or_i64(qc, qc, tmp);
1347 void gen_sqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
1349 TCGv_i64 t0 = tcg_temp_new_i64();
1350 TCGv_i64 t1 = tcg_temp_new_i64();
1351 TCGv_i64 t2 = tcg_temp_new_i64();
1353 tcg_gen_add_i64(t0, a, b);
1355 /* Compute signed overflow indication into T1 */
1356 tcg_gen_xor_i64(t1, a, b);
1357 tcg_gen_xor_i64(t2, t0, a);
1358 tcg_gen_andc_i64(t1, t2, t1);
1360 /* Compute saturated value into T2 */
1361 tcg_gen_sari_i64(t2, a, 63);
1362 tcg_gen_xori_i64(t2, t2, INT64_MAX);
1364 tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0);
1365 tcg_gen_xor_i64(t0, t0, res);
1366 tcg_gen_or_i64(qc, qc, t0);
1369 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
1370 TCGv_vec a, TCGv_vec b)
1372 TCGv_vec x = tcg_temp_new_vec_matching(t);
1373 tcg_gen_add_vec(vece, x, a, b);
1374 tcg_gen_ssadd_vec(vece, t, a, b);
1375 tcg_gen_xor_vec(vece, x, x, t);
1376 tcg_gen_or_vec(vece, qc, qc, x);
1379 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1380 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1382 static const TCGOpcode vecop_list[] = {
1383 INDEX_op_ssadd_vec, INDEX_op_add_vec, 0
1385 static const GVecGen4 ops[4] = {
1386 { .fniv = gen_sqadd_vec,
1387 .fno = gen_helper_gvec_sqadd_b,
1388 .opt_opc = vecop_list,
1389 .write_aofs = true,
1390 .vece = MO_8 },
1391 { .fniv = gen_sqadd_vec,
1392 .fno = gen_helper_gvec_sqadd_h,
1393 .opt_opc = vecop_list,
1394 .write_aofs = true,
1395 .vece = MO_16 },
1396 { .fniv = gen_sqadd_vec,
1397 .fno = gen_helper_gvec_sqadd_s,
1398 .opt_opc = vecop_list,
1399 .write_aofs = true,
1400 .vece = MO_32 },
1401 { .fniv = gen_sqadd_vec,
1402 .fni8 = gen_sqadd_d,
1403 .fno = gen_helper_gvec_sqadd_d,
1404 .opt_opc = vecop_list,
1405 .write_aofs = true,
1406 .vece = MO_64 },
1409 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
1410 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
1411 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1414 void gen_uqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
1416 TCGv_i64 tmp = tcg_temp_new_i64();
1418 tcg_gen_sub_i64(tmp, a, b);
1419 tcg_gen_smax_i64(res, tmp, tcg_constant_i64(0));
1420 tcg_gen_xor_i64(tmp, tmp, res);
1421 tcg_gen_or_i64(qc, qc, tmp);
1424 void gen_uqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
1426 TCGv_i64 t = tcg_temp_new_i64();
1428 tcg_gen_sub_i64(t, a, b);
1429 tcg_gen_movcond_i64(TCG_COND_LTU, res, a, b, tcg_constant_i64(0), t);
1430 tcg_gen_xor_i64(t, t, res);
1431 tcg_gen_or_i64(qc, qc, t);
1434 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
1435 TCGv_vec a, TCGv_vec b)
1437 TCGv_vec x = tcg_temp_new_vec_matching(t);
1438 tcg_gen_sub_vec(vece, x, a, b);
1439 tcg_gen_ussub_vec(vece, t, a, b);
1440 tcg_gen_xor_vec(vece, x, x, t);
1441 tcg_gen_or_vec(vece, qc, qc, x);
1444 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1445 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1447 static const TCGOpcode vecop_list[] = {
1448 INDEX_op_ussub_vec, INDEX_op_sub_vec, 0
1450 static const GVecGen4 ops[4] = {
1451 { .fniv = gen_uqsub_vec,
1452 .fno = gen_helper_gvec_uqsub_b,
1453 .opt_opc = vecop_list,
1454 .write_aofs = true,
1455 .vece = MO_8 },
1456 { .fniv = gen_uqsub_vec,
1457 .fno = gen_helper_gvec_uqsub_h,
1458 .opt_opc = vecop_list,
1459 .write_aofs = true,
1460 .vece = MO_16 },
1461 { .fniv = gen_uqsub_vec,
1462 .fno = gen_helper_gvec_uqsub_s,
1463 .opt_opc = vecop_list,
1464 .write_aofs = true,
1465 .vece = MO_32 },
1466 { .fniv = gen_uqsub_vec,
1467 .fni8 = gen_uqsub_d,
1468 .fno = gen_helper_gvec_uqsub_d,
1469 .opt_opc = vecop_list,
1470 .write_aofs = true,
1471 .vece = MO_64 },
1474 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
1475 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
1476 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1479 void gen_sqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
1481 int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1);
1482 int64_t min = -1ll - max;
1483 TCGv_i64 tmp = tcg_temp_new_i64();
1485 tcg_gen_sub_i64(tmp, a, b);
1486 tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max));
1487 tcg_gen_smax_i64(res, res, tcg_constant_i64(min));
1488 tcg_gen_xor_i64(tmp, tmp, res);
1489 tcg_gen_or_i64(qc, qc, tmp);
1492 void gen_sqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
1494 TCGv_i64 t0 = tcg_temp_new_i64();
1495 TCGv_i64 t1 = tcg_temp_new_i64();
1496 TCGv_i64 t2 = tcg_temp_new_i64();
1498 tcg_gen_sub_i64(t0, a, b);
1500 /* Compute signed overflow indication into T1 */
1501 tcg_gen_xor_i64(t1, a, b);
1502 tcg_gen_xor_i64(t2, t0, a);
1503 tcg_gen_and_i64(t1, t1, t2);
1505 /* Compute saturated value into T2 */
1506 tcg_gen_sari_i64(t2, a, 63);
1507 tcg_gen_xori_i64(t2, t2, INT64_MAX);
1509 tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0);
1510 tcg_gen_xor_i64(t0, t0, res);
1511 tcg_gen_or_i64(qc, qc, t0);
1514 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
1515 TCGv_vec a, TCGv_vec b)
1517 TCGv_vec x = tcg_temp_new_vec_matching(t);
1518 tcg_gen_sub_vec(vece, x, a, b);
1519 tcg_gen_sssub_vec(vece, t, a, b);
1520 tcg_gen_xor_vec(vece, x, x, t);
1521 tcg_gen_or_vec(vece, qc, qc, x);
1524 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1525 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1527 static const TCGOpcode vecop_list[] = {
1528 INDEX_op_sssub_vec, INDEX_op_sub_vec, 0
1530 static const GVecGen4 ops[4] = {
1531 { .fniv = gen_sqsub_vec,
1532 .fno = gen_helper_gvec_sqsub_b,
1533 .opt_opc = vecop_list,
1534 .write_aofs = true,
1535 .vece = MO_8 },
1536 { .fniv = gen_sqsub_vec,
1537 .fno = gen_helper_gvec_sqsub_h,
1538 .opt_opc = vecop_list,
1539 .write_aofs = true,
1540 .vece = MO_16 },
1541 { .fniv = gen_sqsub_vec,
1542 .fno = gen_helper_gvec_sqsub_s,
1543 .opt_opc = vecop_list,
1544 .write_aofs = true,
1545 .vece = MO_32 },
1546 { .fniv = gen_sqsub_vec,
1547 .fni8 = gen_sqsub_d,
1548 .fno = gen_helper_gvec_sqsub_d,
1549 .opt_opc = vecop_list,
1550 .write_aofs = true,
1551 .vece = MO_64 },
1554 tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
1555 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
1556 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1559 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1561 TCGv_i32 t = tcg_temp_new_i32();
1563 tcg_gen_sub_i32(t, a, b);
1564 tcg_gen_sub_i32(d, b, a);
1565 tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
1568 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1570 TCGv_i64 t = tcg_temp_new_i64();
1572 tcg_gen_sub_i64(t, a, b);
1573 tcg_gen_sub_i64(d, b, a);
1574 tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
1577 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
1579 TCGv_vec t = tcg_temp_new_vec_matching(d);
1581 tcg_gen_smin_vec(vece, t, a, b);
1582 tcg_gen_smax_vec(vece, d, a, b);
1583 tcg_gen_sub_vec(vece, d, d, t);
1586 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1587 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1589 static const TCGOpcode vecop_list[] = {
1590 INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
1592 static const GVecGen3 ops[4] = {
1593 { .fniv = gen_sabd_vec,
1594 .fno = gen_helper_gvec_sabd_b,
1595 .opt_opc = vecop_list,
1596 .vece = MO_8 },
1597 { .fniv = gen_sabd_vec,
1598 .fno = gen_helper_gvec_sabd_h,
1599 .opt_opc = vecop_list,
1600 .vece = MO_16 },
1601 { .fni4 = gen_sabd_i32,
1602 .fniv = gen_sabd_vec,
1603 .fno = gen_helper_gvec_sabd_s,
1604 .opt_opc = vecop_list,
1605 .vece = MO_32 },
1606 { .fni8 = gen_sabd_i64,
1607 .fniv = gen_sabd_vec,
1608 .fno = gen_helper_gvec_sabd_d,
1609 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1610 .opt_opc = vecop_list,
1611 .vece = MO_64 },
1613 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1616 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1618 TCGv_i32 t = tcg_temp_new_i32();
1620 tcg_gen_sub_i32(t, a, b);
1621 tcg_gen_sub_i32(d, b, a);
1622 tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
1625 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1627 TCGv_i64 t = tcg_temp_new_i64();
1629 tcg_gen_sub_i64(t, a, b);
1630 tcg_gen_sub_i64(d, b, a);
1631 tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
1634 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
1636 TCGv_vec t = tcg_temp_new_vec_matching(d);
1638 tcg_gen_umin_vec(vece, t, a, b);
1639 tcg_gen_umax_vec(vece, d, a, b);
1640 tcg_gen_sub_vec(vece, d, d, t);
1643 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1644 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1646 static const TCGOpcode vecop_list[] = {
1647 INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
1649 static const GVecGen3 ops[4] = {
1650 { .fniv = gen_uabd_vec,
1651 .fno = gen_helper_gvec_uabd_b,
1652 .opt_opc = vecop_list,
1653 .vece = MO_8 },
1654 { .fniv = gen_uabd_vec,
1655 .fno = gen_helper_gvec_uabd_h,
1656 .opt_opc = vecop_list,
1657 .vece = MO_16 },
1658 { .fni4 = gen_uabd_i32,
1659 .fniv = gen_uabd_vec,
1660 .fno = gen_helper_gvec_uabd_s,
1661 .opt_opc = vecop_list,
1662 .vece = MO_32 },
1663 { .fni8 = gen_uabd_i64,
1664 .fniv = gen_uabd_vec,
1665 .fno = gen_helper_gvec_uabd_d,
1666 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1667 .opt_opc = vecop_list,
1668 .vece = MO_64 },
1670 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1673 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1675 TCGv_i32 t = tcg_temp_new_i32();
1676 gen_sabd_i32(t, a, b);
1677 tcg_gen_add_i32(d, d, t);
1680 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1682 TCGv_i64 t = tcg_temp_new_i64();
1683 gen_sabd_i64(t, a, b);
1684 tcg_gen_add_i64(d, d, t);
1687 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
1689 TCGv_vec t = tcg_temp_new_vec_matching(d);
1690 gen_sabd_vec(vece, t, a, b);
1691 tcg_gen_add_vec(vece, d, d, t);
1694 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1695 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1697 static const TCGOpcode vecop_list[] = {
1698 INDEX_op_sub_vec, INDEX_op_add_vec,
1699 INDEX_op_smin_vec, INDEX_op_smax_vec, 0
1701 static const GVecGen3 ops[4] = {
1702 { .fniv = gen_saba_vec,
1703 .fno = gen_helper_gvec_saba_b,
1704 .opt_opc = vecop_list,
1705 .load_dest = true,
1706 .vece = MO_8 },
1707 { .fniv = gen_saba_vec,
1708 .fno = gen_helper_gvec_saba_h,
1709 .opt_opc = vecop_list,
1710 .load_dest = true,
1711 .vece = MO_16 },
1712 { .fni4 = gen_saba_i32,
1713 .fniv = gen_saba_vec,
1714 .fno = gen_helper_gvec_saba_s,
1715 .opt_opc = vecop_list,
1716 .load_dest = true,
1717 .vece = MO_32 },
1718 { .fni8 = gen_saba_i64,
1719 .fniv = gen_saba_vec,
1720 .fno = gen_helper_gvec_saba_d,
1721 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1722 .opt_opc = vecop_list,
1723 .load_dest = true,
1724 .vece = MO_64 },
1726 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1729 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1731 TCGv_i32 t = tcg_temp_new_i32();
1732 gen_uabd_i32(t, a, b);
1733 tcg_gen_add_i32(d, d, t);
1736 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1738 TCGv_i64 t = tcg_temp_new_i64();
1739 gen_uabd_i64(t, a, b);
1740 tcg_gen_add_i64(d, d, t);
1743 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
1745 TCGv_vec t = tcg_temp_new_vec_matching(d);
1746 gen_uabd_vec(vece, t, a, b);
1747 tcg_gen_add_vec(vece, d, d, t);
1750 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1751 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1753 static const TCGOpcode vecop_list[] = {
1754 INDEX_op_sub_vec, INDEX_op_add_vec,
1755 INDEX_op_umin_vec, INDEX_op_umax_vec, 0
1757 static const GVecGen3 ops[4] = {
1758 { .fniv = gen_uaba_vec,
1759 .fno = gen_helper_gvec_uaba_b,
1760 .opt_opc = vecop_list,
1761 .load_dest = true,
1762 .vece = MO_8 },
1763 { .fniv = gen_uaba_vec,
1764 .fno = gen_helper_gvec_uaba_h,
1765 .opt_opc = vecop_list,
1766 .load_dest = true,
1767 .vece = MO_16 },
1768 { .fni4 = gen_uaba_i32,
1769 .fniv = gen_uaba_vec,
1770 .fno = gen_helper_gvec_uaba_s,
1771 .opt_opc = vecop_list,
1772 .load_dest = true,
1773 .vece = MO_32 },
1774 { .fni8 = gen_uaba_i64,
1775 .fniv = gen_uaba_vec,
1776 .fno = gen_helper_gvec_uaba_d,
1777 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1778 .opt_opc = vecop_list,
1779 .load_dest = true,
1780 .vece = MO_64 },
1782 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1785 void gen_gvec_addp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1786 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1788 static gen_helper_gvec_3 * const fns[4] = {
1789 gen_helper_gvec_addp_b,
1790 gen_helper_gvec_addp_h,
1791 gen_helper_gvec_addp_s,
1792 gen_helper_gvec_addp_d,
1794 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
1797 void gen_gvec_smaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1798 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1800 static gen_helper_gvec_3 * const fns[4] = {
1801 gen_helper_gvec_smaxp_b,
1802 gen_helper_gvec_smaxp_h,
1803 gen_helper_gvec_smaxp_s,
1805 tcg_debug_assert(vece <= MO_32);
1806 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
1809 void gen_gvec_sminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1810 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1812 static gen_helper_gvec_3 * const fns[4] = {
1813 gen_helper_gvec_sminp_b,
1814 gen_helper_gvec_sminp_h,
1815 gen_helper_gvec_sminp_s,
1817 tcg_debug_assert(vece <= MO_32);
1818 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
1821 void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1822 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1824 static gen_helper_gvec_3 * const fns[4] = {
1825 gen_helper_gvec_umaxp_b,
1826 gen_helper_gvec_umaxp_h,
1827 gen_helper_gvec_umaxp_s,
1829 tcg_debug_assert(vece <= MO_32);
1830 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);
1833 void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1834 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1836 static gen_helper_gvec_3 * const fns[4] = {
1837 gen_helper_gvec_uminp_b,
1838 gen_helper_gvec_uminp_h,
1839 gen_helper_gvec_uminp_s,
1841 tcg_debug_assert(vece <= MO_32);
1842 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]);