target/arm: Split out gengvec.c
[qemu/kevin.git] / target / arm / tcg / gengvec.c
blob7a1856253ff2c7e9a96336983621fb2da8e87cfe
1 /*
2 * ARM generic vector expansion
4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
22 #include "qemu/osdep.h"
23 #include "translate.h"
26 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
27 uint32_t opr_sz, uint32_t max_sz,
28 gen_helper_gvec_3_ptr *fn)
30 TCGv_ptr qc_ptr = tcg_temp_new_ptr();
32 tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc));
33 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
34 opr_sz, max_sz, 0, fn);
37 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
38 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
40 static gen_helper_gvec_3_ptr * const fns[2] = {
41 gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
43 tcg_debug_assert(vece >= 1 && vece <= 2);
44 gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
47 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
48 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
50 static gen_helper_gvec_3_ptr * const fns[2] = {
51 gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
53 tcg_debug_assert(vece >= 1 && vece <= 2);
54 gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
57 #define GEN_CMP0(NAME, COND) \
58 void NAME(unsigned vece, uint32_t d, uint32_t m, \
59 uint32_t opr_sz, uint32_t max_sz) \
60 { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); }
62 GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ)
63 GEN_CMP0(gen_gvec_cle0, TCG_COND_LE)
64 GEN_CMP0(gen_gvec_cge0, TCG_COND_GE)
65 GEN_CMP0(gen_gvec_clt0, TCG_COND_LT)
66 GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT)
68 #undef GEN_CMP0
70 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
72 tcg_gen_vec_sar8i_i64(a, a, shift);
73 tcg_gen_vec_add8_i64(d, d, a);
76 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
78 tcg_gen_vec_sar16i_i64(a, a, shift);
79 tcg_gen_vec_add16_i64(d, d, a);
82 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
84 tcg_gen_sari_i32(a, a, shift);
85 tcg_gen_add_i32(d, d, a);
88 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
90 tcg_gen_sari_i64(a, a, shift);
91 tcg_gen_add_i64(d, d, a);
94 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
96 tcg_gen_sari_vec(vece, a, a, sh);
97 tcg_gen_add_vec(vece, d, d, a);
100 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
101 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
103 static const TCGOpcode vecop_list[] = {
104 INDEX_op_sari_vec, INDEX_op_add_vec, 0
106 static const GVecGen2i ops[4] = {
107 { .fni8 = gen_ssra8_i64,
108 .fniv = gen_ssra_vec,
109 .fno = gen_helper_gvec_ssra_b,
110 .load_dest = true,
111 .opt_opc = vecop_list,
112 .vece = MO_8 },
113 { .fni8 = gen_ssra16_i64,
114 .fniv = gen_ssra_vec,
115 .fno = gen_helper_gvec_ssra_h,
116 .load_dest = true,
117 .opt_opc = vecop_list,
118 .vece = MO_16 },
119 { .fni4 = gen_ssra32_i32,
120 .fniv = gen_ssra_vec,
121 .fno = gen_helper_gvec_ssra_s,
122 .load_dest = true,
123 .opt_opc = vecop_list,
124 .vece = MO_32 },
125 { .fni8 = gen_ssra64_i64,
126 .fniv = gen_ssra_vec,
127 .fno = gen_helper_gvec_ssra_d,
128 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
129 .opt_opc = vecop_list,
130 .load_dest = true,
131 .vece = MO_64 },
134 /* tszimm encoding produces immediates in the range [1..esize]. */
135 tcg_debug_assert(shift > 0);
136 tcg_debug_assert(shift <= (8 << vece));
139 * Shifts larger than the element size are architecturally valid.
140 * Signed results in all sign bits.
142 shift = MIN(shift, (8 << vece) - 1);
143 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
146 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
148 tcg_gen_vec_shr8i_i64(a, a, shift);
149 tcg_gen_vec_add8_i64(d, d, a);
152 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
154 tcg_gen_vec_shr16i_i64(a, a, shift);
155 tcg_gen_vec_add16_i64(d, d, a);
158 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
160 tcg_gen_shri_i32(a, a, shift);
161 tcg_gen_add_i32(d, d, a);
164 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
166 tcg_gen_shri_i64(a, a, shift);
167 tcg_gen_add_i64(d, d, a);
170 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
172 tcg_gen_shri_vec(vece, a, a, sh);
173 tcg_gen_add_vec(vece, d, d, a);
176 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
177 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
179 static const TCGOpcode vecop_list[] = {
180 INDEX_op_shri_vec, INDEX_op_add_vec, 0
182 static const GVecGen2i ops[4] = {
183 { .fni8 = gen_usra8_i64,
184 .fniv = gen_usra_vec,
185 .fno = gen_helper_gvec_usra_b,
186 .load_dest = true,
187 .opt_opc = vecop_list,
188 .vece = MO_8, },
189 { .fni8 = gen_usra16_i64,
190 .fniv = gen_usra_vec,
191 .fno = gen_helper_gvec_usra_h,
192 .load_dest = true,
193 .opt_opc = vecop_list,
194 .vece = MO_16, },
195 { .fni4 = gen_usra32_i32,
196 .fniv = gen_usra_vec,
197 .fno = gen_helper_gvec_usra_s,
198 .load_dest = true,
199 .opt_opc = vecop_list,
200 .vece = MO_32, },
201 { .fni8 = gen_usra64_i64,
202 .fniv = gen_usra_vec,
203 .fno = gen_helper_gvec_usra_d,
204 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
205 .load_dest = true,
206 .opt_opc = vecop_list,
207 .vece = MO_64, },
210 /* tszimm encoding produces immediates in the range [1..esize]. */
211 tcg_debug_assert(shift > 0);
212 tcg_debug_assert(shift <= (8 << vece));
215 * Shifts larger than the element size are architecturally valid.
216 * Unsigned results in all zeros as input to accumulate: nop.
218 if (shift < (8 << vece)) {
219 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
220 } else {
221 /* Nop, but we do need to clear the tail. */
222 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
227 * Shift one less than the requested amount, and the low bit is
228 * the rounding bit. For the 8 and 16-bit operations, because we
229 * mask the low bit, we can perform a normal integer shift instead
230 * of a vector shift.
232 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
234 TCGv_i64 t = tcg_temp_new_i64();
236 tcg_gen_shri_i64(t, a, sh - 1);
237 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
238 tcg_gen_vec_sar8i_i64(d, a, sh);
239 tcg_gen_vec_add8_i64(d, d, t);
242 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
244 TCGv_i64 t = tcg_temp_new_i64();
246 tcg_gen_shri_i64(t, a, sh - 1);
247 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
248 tcg_gen_vec_sar16i_i64(d, a, sh);
249 tcg_gen_vec_add16_i64(d, d, t);
252 void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
254 TCGv_i32 t;
256 /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
257 if (sh == 32) {
258 tcg_gen_movi_i32(d, 0);
259 return;
261 t = tcg_temp_new_i32();
262 tcg_gen_extract_i32(t, a, sh - 1, 1);
263 tcg_gen_sari_i32(d, a, sh);
264 tcg_gen_add_i32(d, d, t);
267 void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
269 TCGv_i64 t = tcg_temp_new_i64();
271 tcg_gen_extract_i64(t, a, sh - 1, 1);
272 tcg_gen_sari_i64(d, a, sh);
273 tcg_gen_add_i64(d, d, t);
276 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
278 TCGv_vec t = tcg_temp_new_vec_matching(d);
279 TCGv_vec ones = tcg_temp_new_vec_matching(d);
281 tcg_gen_shri_vec(vece, t, a, sh - 1);
282 tcg_gen_dupi_vec(vece, ones, 1);
283 tcg_gen_and_vec(vece, t, t, ones);
284 tcg_gen_sari_vec(vece, d, a, sh);
285 tcg_gen_add_vec(vece, d, d, t);
288 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
289 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
291 static const TCGOpcode vecop_list[] = {
292 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
294 static const GVecGen2i ops[4] = {
295 { .fni8 = gen_srshr8_i64,
296 .fniv = gen_srshr_vec,
297 .fno = gen_helper_gvec_srshr_b,
298 .opt_opc = vecop_list,
299 .vece = MO_8 },
300 { .fni8 = gen_srshr16_i64,
301 .fniv = gen_srshr_vec,
302 .fno = gen_helper_gvec_srshr_h,
303 .opt_opc = vecop_list,
304 .vece = MO_16 },
305 { .fni4 = gen_srshr32_i32,
306 .fniv = gen_srshr_vec,
307 .fno = gen_helper_gvec_srshr_s,
308 .opt_opc = vecop_list,
309 .vece = MO_32 },
310 { .fni8 = gen_srshr64_i64,
311 .fniv = gen_srshr_vec,
312 .fno = gen_helper_gvec_srshr_d,
313 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
314 .opt_opc = vecop_list,
315 .vece = MO_64 },
318 /* tszimm encoding produces immediates in the range [1..esize] */
319 tcg_debug_assert(shift > 0);
320 tcg_debug_assert(shift <= (8 << vece));
322 if (shift == (8 << vece)) {
324 * Shifts larger than the element size are architecturally valid.
325 * Signed results in all sign bits. With rounding, this produces
326 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
327 * I.e. always zero.
329 tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
330 } else {
331 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
335 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
337 TCGv_i64 t = tcg_temp_new_i64();
339 gen_srshr8_i64(t, a, sh);
340 tcg_gen_vec_add8_i64(d, d, t);
343 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
345 TCGv_i64 t = tcg_temp_new_i64();
347 gen_srshr16_i64(t, a, sh);
348 tcg_gen_vec_add16_i64(d, d, t);
351 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
353 TCGv_i32 t = tcg_temp_new_i32();
355 gen_srshr32_i32(t, a, sh);
356 tcg_gen_add_i32(d, d, t);
359 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
361 TCGv_i64 t = tcg_temp_new_i64();
363 gen_srshr64_i64(t, a, sh);
364 tcg_gen_add_i64(d, d, t);
367 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
369 TCGv_vec t = tcg_temp_new_vec_matching(d);
371 gen_srshr_vec(vece, t, a, sh);
372 tcg_gen_add_vec(vece, d, d, t);
375 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
376 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
378 static const TCGOpcode vecop_list[] = {
379 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
381 static const GVecGen2i ops[4] = {
382 { .fni8 = gen_srsra8_i64,
383 .fniv = gen_srsra_vec,
384 .fno = gen_helper_gvec_srsra_b,
385 .opt_opc = vecop_list,
386 .load_dest = true,
387 .vece = MO_8 },
388 { .fni8 = gen_srsra16_i64,
389 .fniv = gen_srsra_vec,
390 .fno = gen_helper_gvec_srsra_h,
391 .opt_opc = vecop_list,
392 .load_dest = true,
393 .vece = MO_16 },
394 { .fni4 = gen_srsra32_i32,
395 .fniv = gen_srsra_vec,
396 .fno = gen_helper_gvec_srsra_s,
397 .opt_opc = vecop_list,
398 .load_dest = true,
399 .vece = MO_32 },
400 { .fni8 = gen_srsra64_i64,
401 .fniv = gen_srsra_vec,
402 .fno = gen_helper_gvec_srsra_d,
403 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
404 .opt_opc = vecop_list,
405 .load_dest = true,
406 .vece = MO_64 },
409 /* tszimm encoding produces immediates in the range [1..esize] */
410 tcg_debug_assert(shift > 0);
411 tcg_debug_assert(shift <= (8 << vece));
414 * Shifts larger than the element size are architecturally valid.
415 * Signed results in all sign bits. With rounding, this produces
416 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
417 * I.e. always zero. With accumulation, this leaves D unchanged.
419 if (shift == (8 << vece)) {
420 /* Nop, but we do need to clear the tail. */
421 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
422 } else {
423 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
427 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
429 TCGv_i64 t = tcg_temp_new_i64();
431 tcg_gen_shri_i64(t, a, sh - 1);
432 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
433 tcg_gen_vec_shr8i_i64(d, a, sh);
434 tcg_gen_vec_add8_i64(d, d, t);
437 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
439 TCGv_i64 t = tcg_temp_new_i64();
441 tcg_gen_shri_i64(t, a, sh - 1);
442 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
443 tcg_gen_vec_shr16i_i64(d, a, sh);
444 tcg_gen_vec_add16_i64(d, d, t);
447 void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
449 TCGv_i32 t;
451 /* Handle shift by the input size for the benefit of trans_URSHR_ri */
452 if (sh == 32) {
453 tcg_gen_extract_i32(d, a, sh - 1, 1);
454 return;
456 t = tcg_temp_new_i32();
457 tcg_gen_extract_i32(t, a, sh - 1, 1);
458 tcg_gen_shri_i32(d, a, sh);
459 tcg_gen_add_i32(d, d, t);
462 void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
464 TCGv_i64 t = tcg_temp_new_i64();
466 tcg_gen_extract_i64(t, a, sh - 1, 1);
467 tcg_gen_shri_i64(d, a, sh);
468 tcg_gen_add_i64(d, d, t);
471 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
473 TCGv_vec t = tcg_temp_new_vec_matching(d);
474 TCGv_vec ones = tcg_temp_new_vec_matching(d);
476 tcg_gen_shri_vec(vece, t, a, shift - 1);
477 tcg_gen_dupi_vec(vece, ones, 1);
478 tcg_gen_and_vec(vece, t, t, ones);
479 tcg_gen_shri_vec(vece, d, a, shift);
480 tcg_gen_add_vec(vece, d, d, t);
483 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
484 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
486 static const TCGOpcode vecop_list[] = {
487 INDEX_op_shri_vec, INDEX_op_add_vec, 0
489 static const GVecGen2i ops[4] = {
490 { .fni8 = gen_urshr8_i64,
491 .fniv = gen_urshr_vec,
492 .fno = gen_helper_gvec_urshr_b,
493 .opt_opc = vecop_list,
494 .vece = MO_8 },
495 { .fni8 = gen_urshr16_i64,
496 .fniv = gen_urshr_vec,
497 .fno = gen_helper_gvec_urshr_h,
498 .opt_opc = vecop_list,
499 .vece = MO_16 },
500 { .fni4 = gen_urshr32_i32,
501 .fniv = gen_urshr_vec,
502 .fno = gen_helper_gvec_urshr_s,
503 .opt_opc = vecop_list,
504 .vece = MO_32 },
505 { .fni8 = gen_urshr64_i64,
506 .fniv = gen_urshr_vec,
507 .fno = gen_helper_gvec_urshr_d,
508 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
509 .opt_opc = vecop_list,
510 .vece = MO_64 },
513 /* tszimm encoding produces immediates in the range [1..esize] */
514 tcg_debug_assert(shift > 0);
515 tcg_debug_assert(shift <= (8 << vece));
517 if (shift == (8 << vece)) {
519 * Shifts larger than the element size are architecturally valid.
520 * Unsigned results in zero. With rounding, this produces a
521 * copy of the most significant bit.
523 tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
524 } else {
525 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
529 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
531 TCGv_i64 t = tcg_temp_new_i64();
533 if (sh == 8) {
534 tcg_gen_vec_shr8i_i64(t, a, 7);
535 } else {
536 gen_urshr8_i64(t, a, sh);
538 tcg_gen_vec_add8_i64(d, d, t);
541 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
543 TCGv_i64 t = tcg_temp_new_i64();
545 if (sh == 16) {
546 tcg_gen_vec_shr16i_i64(t, a, 15);
547 } else {
548 gen_urshr16_i64(t, a, sh);
550 tcg_gen_vec_add16_i64(d, d, t);
553 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
555 TCGv_i32 t = tcg_temp_new_i32();
557 if (sh == 32) {
558 tcg_gen_shri_i32(t, a, 31);
559 } else {
560 gen_urshr32_i32(t, a, sh);
562 tcg_gen_add_i32(d, d, t);
565 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
567 TCGv_i64 t = tcg_temp_new_i64();
569 if (sh == 64) {
570 tcg_gen_shri_i64(t, a, 63);
571 } else {
572 gen_urshr64_i64(t, a, sh);
574 tcg_gen_add_i64(d, d, t);
577 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
579 TCGv_vec t = tcg_temp_new_vec_matching(d);
581 if (sh == (8 << vece)) {
582 tcg_gen_shri_vec(vece, t, a, sh - 1);
583 } else {
584 gen_urshr_vec(vece, t, a, sh);
586 tcg_gen_add_vec(vece, d, d, t);
589 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
590 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
592 static const TCGOpcode vecop_list[] = {
593 INDEX_op_shri_vec, INDEX_op_add_vec, 0
595 static const GVecGen2i ops[4] = {
596 { .fni8 = gen_ursra8_i64,
597 .fniv = gen_ursra_vec,
598 .fno = gen_helper_gvec_ursra_b,
599 .opt_opc = vecop_list,
600 .load_dest = true,
601 .vece = MO_8 },
602 { .fni8 = gen_ursra16_i64,
603 .fniv = gen_ursra_vec,
604 .fno = gen_helper_gvec_ursra_h,
605 .opt_opc = vecop_list,
606 .load_dest = true,
607 .vece = MO_16 },
608 { .fni4 = gen_ursra32_i32,
609 .fniv = gen_ursra_vec,
610 .fno = gen_helper_gvec_ursra_s,
611 .opt_opc = vecop_list,
612 .load_dest = true,
613 .vece = MO_32 },
614 { .fni8 = gen_ursra64_i64,
615 .fniv = gen_ursra_vec,
616 .fno = gen_helper_gvec_ursra_d,
617 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
618 .opt_opc = vecop_list,
619 .load_dest = true,
620 .vece = MO_64 },
623 /* tszimm encoding produces immediates in the range [1..esize] */
624 tcg_debug_assert(shift > 0);
625 tcg_debug_assert(shift <= (8 << vece));
627 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
630 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
632 uint64_t mask = dup_const(MO_8, 0xff >> shift);
633 TCGv_i64 t = tcg_temp_new_i64();
635 tcg_gen_shri_i64(t, a, shift);
636 tcg_gen_andi_i64(t, t, mask);
637 tcg_gen_andi_i64(d, d, ~mask);
638 tcg_gen_or_i64(d, d, t);
641 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
643 uint64_t mask = dup_const(MO_16, 0xffff >> shift);
644 TCGv_i64 t = tcg_temp_new_i64();
646 tcg_gen_shri_i64(t, a, shift);
647 tcg_gen_andi_i64(t, t, mask);
648 tcg_gen_andi_i64(d, d, ~mask);
649 tcg_gen_or_i64(d, d, t);
652 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
654 tcg_gen_shri_i32(a, a, shift);
655 tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
658 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
660 tcg_gen_shri_i64(a, a, shift);
661 tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
664 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
666 TCGv_vec t = tcg_temp_new_vec_matching(d);
667 TCGv_vec m = tcg_temp_new_vec_matching(d);
669 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
670 tcg_gen_shri_vec(vece, t, a, sh);
671 tcg_gen_and_vec(vece, d, d, m);
672 tcg_gen_or_vec(vece, d, d, t);
675 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
676 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
678 static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
679 const GVecGen2i ops[4] = {
680 { .fni8 = gen_shr8_ins_i64,
681 .fniv = gen_shr_ins_vec,
682 .fno = gen_helper_gvec_sri_b,
683 .load_dest = true,
684 .opt_opc = vecop_list,
685 .vece = MO_8 },
686 { .fni8 = gen_shr16_ins_i64,
687 .fniv = gen_shr_ins_vec,
688 .fno = gen_helper_gvec_sri_h,
689 .load_dest = true,
690 .opt_opc = vecop_list,
691 .vece = MO_16 },
692 { .fni4 = gen_shr32_ins_i32,
693 .fniv = gen_shr_ins_vec,
694 .fno = gen_helper_gvec_sri_s,
695 .load_dest = true,
696 .opt_opc = vecop_list,
697 .vece = MO_32 },
698 { .fni8 = gen_shr64_ins_i64,
699 .fniv = gen_shr_ins_vec,
700 .fno = gen_helper_gvec_sri_d,
701 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
702 .load_dest = true,
703 .opt_opc = vecop_list,
704 .vece = MO_64 },
707 /* tszimm encoding produces immediates in the range [1..esize]. */
708 tcg_debug_assert(shift > 0);
709 tcg_debug_assert(shift <= (8 << vece));
711 /* Shift of esize leaves destination unchanged. */
712 if (shift < (8 << vece)) {
713 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
714 } else {
715 /* Nop, but we do need to clear the tail. */
716 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
720 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
722 uint64_t mask = dup_const(MO_8, 0xff << shift);
723 TCGv_i64 t = tcg_temp_new_i64();
725 tcg_gen_shli_i64(t, a, shift);
726 tcg_gen_andi_i64(t, t, mask);
727 tcg_gen_andi_i64(d, d, ~mask);
728 tcg_gen_or_i64(d, d, t);
731 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
733 uint64_t mask = dup_const(MO_16, 0xffff << shift);
734 TCGv_i64 t = tcg_temp_new_i64();
736 tcg_gen_shli_i64(t, a, shift);
737 tcg_gen_andi_i64(t, t, mask);
738 tcg_gen_andi_i64(d, d, ~mask);
739 tcg_gen_or_i64(d, d, t);
742 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
744 tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
747 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
749 tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
752 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
754 TCGv_vec t = tcg_temp_new_vec_matching(d);
755 TCGv_vec m = tcg_temp_new_vec_matching(d);
757 tcg_gen_shli_vec(vece, t, a, sh);
758 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
759 tcg_gen_and_vec(vece, d, d, m);
760 tcg_gen_or_vec(vece, d, d, t);
763 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
764 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
766 static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
767 const GVecGen2i ops[4] = {
768 { .fni8 = gen_shl8_ins_i64,
769 .fniv = gen_shl_ins_vec,
770 .fno = gen_helper_gvec_sli_b,
771 .load_dest = true,
772 .opt_opc = vecop_list,
773 .vece = MO_8 },
774 { .fni8 = gen_shl16_ins_i64,
775 .fniv = gen_shl_ins_vec,
776 .fno = gen_helper_gvec_sli_h,
777 .load_dest = true,
778 .opt_opc = vecop_list,
779 .vece = MO_16 },
780 { .fni4 = gen_shl32_ins_i32,
781 .fniv = gen_shl_ins_vec,
782 .fno = gen_helper_gvec_sli_s,
783 .load_dest = true,
784 .opt_opc = vecop_list,
785 .vece = MO_32 },
786 { .fni8 = gen_shl64_ins_i64,
787 .fniv = gen_shl_ins_vec,
788 .fno = gen_helper_gvec_sli_d,
789 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
790 .load_dest = true,
791 .opt_opc = vecop_list,
792 .vece = MO_64 },
795 /* tszimm encoding produces immediates in the range [0..esize-1]. */
796 tcg_debug_assert(shift >= 0);
797 tcg_debug_assert(shift < (8 << vece));
799 if (shift == 0) {
800 tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
801 } else {
802 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
806 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
808 gen_helper_neon_mul_u8(a, a, b);
809 gen_helper_neon_add_u8(d, d, a);
812 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
814 gen_helper_neon_mul_u8(a, a, b);
815 gen_helper_neon_sub_u8(d, d, a);
818 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
820 gen_helper_neon_mul_u16(a, a, b);
821 gen_helper_neon_add_u16(d, d, a);
824 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
826 gen_helper_neon_mul_u16(a, a, b);
827 gen_helper_neon_sub_u16(d, d, a);
830 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
832 tcg_gen_mul_i32(a, a, b);
833 tcg_gen_add_i32(d, d, a);
836 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
838 tcg_gen_mul_i32(a, a, b);
839 tcg_gen_sub_i32(d, d, a);
842 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
844 tcg_gen_mul_i64(a, a, b);
845 tcg_gen_add_i64(d, d, a);
848 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
850 tcg_gen_mul_i64(a, a, b);
851 tcg_gen_sub_i64(d, d, a);
854 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
856 tcg_gen_mul_vec(vece, a, a, b);
857 tcg_gen_add_vec(vece, d, d, a);
860 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
862 tcg_gen_mul_vec(vece, a, a, b);
863 tcg_gen_sub_vec(vece, d, d, a);
866 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
867 * these tables are shared with AArch64 which does support them.
869 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
870 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
872 static const TCGOpcode vecop_list[] = {
873 INDEX_op_mul_vec, INDEX_op_add_vec, 0
875 static const GVecGen3 ops[4] = {
876 { .fni4 = gen_mla8_i32,
877 .fniv = gen_mla_vec,
878 .load_dest = true,
879 .opt_opc = vecop_list,
880 .vece = MO_8 },
881 { .fni4 = gen_mla16_i32,
882 .fniv = gen_mla_vec,
883 .load_dest = true,
884 .opt_opc = vecop_list,
885 .vece = MO_16 },
886 { .fni4 = gen_mla32_i32,
887 .fniv = gen_mla_vec,
888 .load_dest = true,
889 .opt_opc = vecop_list,
890 .vece = MO_32 },
891 { .fni8 = gen_mla64_i64,
892 .fniv = gen_mla_vec,
893 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
894 .load_dest = true,
895 .opt_opc = vecop_list,
896 .vece = MO_64 },
898 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
901 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
902 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
904 static const TCGOpcode vecop_list[] = {
905 INDEX_op_mul_vec, INDEX_op_sub_vec, 0
907 static const GVecGen3 ops[4] = {
908 { .fni4 = gen_mls8_i32,
909 .fniv = gen_mls_vec,
910 .load_dest = true,
911 .opt_opc = vecop_list,
912 .vece = MO_8 },
913 { .fni4 = gen_mls16_i32,
914 .fniv = gen_mls_vec,
915 .load_dest = true,
916 .opt_opc = vecop_list,
917 .vece = MO_16 },
918 { .fni4 = gen_mls32_i32,
919 .fniv = gen_mls_vec,
920 .load_dest = true,
921 .opt_opc = vecop_list,
922 .vece = MO_32 },
923 { .fni8 = gen_mls64_i64,
924 .fniv = gen_mls_vec,
925 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
926 .load_dest = true,
927 .opt_opc = vecop_list,
928 .vece = MO_64 },
930 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
933 /* CMTST : test is "if (X & Y != 0)". */
934 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
936 tcg_gen_and_i32(d, a, b);
937 tcg_gen_negsetcond_i32(TCG_COND_NE, d, d, tcg_constant_i32(0));
940 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
942 tcg_gen_and_i64(d, a, b);
943 tcg_gen_negsetcond_i64(TCG_COND_NE, d, d, tcg_constant_i64(0));
946 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
948 tcg_gen_and_vec(vece, d, a, b);
949 tcg_gen_dupi_vec(vece, a, 0);
950 tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
953 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
954 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
956 static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
957 static const GVecGen3 ops[4] = {
958 { .fni4 = gen_helper_neon_tst_u8,
959 .fniv = gen_cmtst_vec,
960 .opt_opc = vecop_list,
961 .vece = MO_8 },
962 { .fni4 = gen_helper_neon_tst_u16,
963 .fniv = gen_cmtst_vec,
964 .opt_opc = vecop_list,
965 .vece = MO_16 },
966 { .fni4 = gen_cmtst_i32,
967 .fniv = gen_cmtst_vec,
968 .opt_opc = vecop_list,
969 .vece = MO_32 },
970 { .fni8 = gen_cmtst_i64,
971 .fniv = gen_cmtst_vec,
972 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
973 .opt_opc = vecop_list,
974 .vece = MO_64 },
976 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
979 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
981 TCGv_i32 lval = tcg_temp_new_i32();
982 TCGv_i32 rval = tcg_temp_new_i32();
983 TCGv_i32 lsh = tcg_temp_new_i32();
984 TCGv_i32 rsh = tcg_temp_new_i32();
985 TCGv_i32 zero = tcg_constant_i32(0);
986 TCGv_i32 max = tcg_constant_i32(32);
989 * Rely on the TCG guarantee that out of range shifts produce
990 * unspecified results, not undefined behaviour (i.e. no trap).
991 * Discard out-of-range results after the fact.
993 tcg_gen_ext8s_i32(lsh, shift);
994 tcg_gen_neg_i32(rsh, lsh);
995 tcg_gen_shl_i32(lval, src, lsh);
996 tcg_gen_shr_i32(rval, src, rsh);
997 tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
998 tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
1001 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
1003 TCGv_i64 lval = tcg_temp_new_i64();
1004 TCGv_i64 rval = tcg_temp_new_i64();
1005 TCGv_i64 lsh = tcg_temp_new_i64();
1006 TCGv_i64 rsh = tcg_temp_new_i64();
1007 TCGv_i64 zero = tcg_constant_i64(0);
1008 TCGv_i64 max = tcg_constant_i64(64);
1011 * Rely on the TCG guarantee that out of range shifts produce
1012 * unspecified results, not undefined behaviour (i.e. no trap).
1013 * Discard out-of-range results after the fact.
1015 tcg_gen_ext8s_i64(lsh, shift);
1016 tcg_gen_neg_i64(rsh, lsh);
1017 tcg_gen_shl_i64(lval, src, lsh);
1018 tcg_gen_shr_i64(rval, src, rsh);
1019 tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
1020 tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
1023 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
1024 TCGv_vec src, TCGv_vec shift)
1026 TCGv_vec lval = tcg_temp_new_vec_matching(dst);
1027 TCGv_vec rval = tcg_temp_new_vec_matching(dst);
1028 TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
1029 TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
1030 TCGv_vec msk, max;
1032 tcg_gen_neg_vec(vece, rsh, shift);
1033 if (vece == MO_8) {
1034 tcg_gen_mov_vec(lsh, shift);
1035 } else {
1036 msk = tcg_temp_new_vec_matching(dst);
1037 tcg_gen_dupi_vec(vece, msk, 0xff);
1038 tcg_gen_and_vec(vece, lsh, shift, msk);
1039 tcg_gen_and_vec(vece, rsh, rsh, msk);
1043 * Rely on the TCG guarantee that out of range shifts produce
1044 * unspecified results, not undefined behaviour (i.e. no trap).
1045 * Discard out-of-range results after the fact.
1047 tcg_gen_shlv_vec(vece, lval, src, lsh);
1048 tcg_gen_shrv_vec(vece, rval, src, rsh);
1050 max = tcg_temp_new_vec_matching(dst);
1051 tcg_gen_dupi_vec(vece, max, 8 << vece);
1054 * The choice of LT (signed) and GEU (unsigned) are biased toward
1055 * the instructions of the x86_64 host. For MO_8, the whole byte
1056 * is significant so we must use an unsigned compare; otherwise we
1057 * have already masked to a byte and so a signed compare works.
1058 * Other tcg hosts have a full set of comparisons and do not care.
1060 if (vece == MO_8) {
1061 tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
1062 tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
1063 tcg_gen_andc_vec(vece, lval, lval, lsh);
1064 tcg_gen_andc_vec(vece, rval, rval, rsh);
1065 } else {
1066 tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
1067 tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
1068 tcg_gen_and_vec(vece, lval, lval, lsh);
1069 tcg_gen_and_vec(vece, rval, rval, rsh);
1071 tcg_gen_or_vec(vece, dst, lval, rval);
1074 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1075 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1077 static const TCGOpcode vecop_list[] = {
1078 INDEX_op_neg_vec, INDEX_op_shlv_vec,
1079 INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
1081 static const GVecGen3 ops[4] = {
1082 { .fniv = gen_ushl_vec,
1083 .fno = gen_helper_gvec_ushl_b,
1084 .opt_opc = vecop_list,
1085 .vece = MO_8 },
1086 { .fniv = gen_ushl_vec,
1087 .fno = gen_helper_gvec_ushl_h,
1088 .opt_opc = vecop_list,
1089 .vece = MO_16 },
1090 { .fni4 = gen_ushl_i32,
1091 .fniv = gen_ushl_vec,
1092 .opt_opc = vecop_list,
1093 .vece = MO_32 },
1094 { .fni8 = gen_ushl_i64,
1095 .fniv = gen_ushl_vec,
1096 .opt_opc = vecop_list,
1097 .vece = MO_64 },
1099 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1102 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
1104 TCGv_i32 lval = tcg_temp_new_i32();
1105 TCGv_i32 rval = tcg_temp_new_i32();
1106 TCGv_i32 lsh = tcg_temp_new_i32();
1107 TCGv_i32 rsh = tcg_temp_new_i32();
1108 TCGv_i32 zero = tcg_constant_i32(0);
1109 TCGv_i32 max = tcg_constant_i32(31);
1112 * Rely on the TCG guarantee that out of range shifts produce
1113 * unspecified results, not undefined behaviour (i.e. no trap).
1114 * Discard out-of-range results after the fact.
1116 tcg_gen_ext8s_i32(lsh, shift);
1117 tcg_gen_neg_i32(rsh, lsh);
1118 tcg_gen_shl_i32(lval, src, lsh);
1119 tcg_gen_umin_i32(rsh, rsh, max);
1120 tcg_gen_sar_i32(rval, src, rsh);
1121 tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
1122 tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
1125 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
1127 TCGv_i64 lval = tcg_temp_new_i64();
1128 TCGv_i64 rval = tcg_temp_new_i64();
1129 TCGv_i64 lsh = tcg_temp_new_i64();
1130 TCGv_i64 rsh = tcg_temp_new_i64();
1131 TCGv_i64 zero = tcg_constant_i64(0);
1132 TCGv_i64 max = tcg_constant_i64(63);
1135 * Rely on the TCG guarantee that out of range shifts produce
1136 * unspecified results, not undefined behaviour (i.e. no trap).
1137 * Discard out-of-range results after the fact.
1139 tcg_gen_ext8s_i64(lsh, shift);
1140 tcg_gen_neg_i64(rsh, lsh);
1141 tcg_gen_shl_i64(lval, src, lsh);
1142 tcg_gen_umin_i64(rsh, rsh, max);
1143 tcg_gen_sar_i64(rval, src, rsh);
1144 tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
1145 tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
1148 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
1149 TCGv_vec src, TCGv_vec shift)
1151 TCGv_vec lval = tcg_temp_new_vec_matching(dst);
1152 TCGv_vec rval = tcg_temp_new_vec_matching(dst);
1153 TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
1154 TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
1155 TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
1158 * Rely on the TCG guarantee that out of range shifts produce
1159 * unspecified results, not undefined behaviour (i.e. no trap).
1160 * Discard out-of-range results after the fact.
1162 tcg_gen_neg_vec(vece, rsh, shift);
1163 if (vece == MO_8) {
1164 tcg_gen_mov_vec(lsh, shift);
1165 } else {
1166 tcg_gen_dupi_vec(vece, tmp, 0xff);
1167 tcg_gen_and_vec(vece, lsh, shift, tmp);
1168 tcg_gen_and_vec(vece, rsh, rsh, tmp);
1171 /* Bound rsh so out of bound right shift gets -1. */
1172 tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
1173 tcg_gen_umin_vec(vece, rsh, rsh, tmp);
1174 tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
1176 tcg_gen_shlv_vec(vece, lval, src, lsh);
1177 tcg_gen_sarv_vec(vece, rval, src, rsh);
1179 /* Select in-bound left shift. */
1180 tcg_gen_andc_vec(vece, lval, lval, tmp);
1182 /* Select between left and right shift. */
1183 if (vece == MO_8) {
1184 tcg_gen_dupi_vec(vece, tmp, 0);
1185 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
1186 } else {
1187 tcg_gen_dupi_vec(vece, tmp, 0x80);
1188 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
1192 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1193 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1195 static const TCGOpcode vecop_list[] = {
1196 INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
1197 INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
1199 static const GVecGen3 ops[4] = {
1200 { .fniv = gen_sshl_vec,
1201 .fno = gen_helper_gvec_sshl_b,
1202 .opt_opc = vecop_list,
1203 .vece = MO_8 },
1204 { .fniv = gen_sshl_vec,
1205 .fno = gen_helper_gvec_sshl_h,
1206 .opt_opc = vecop_list,
1207 .vece = MO_16 },
1208 { .fni4 = gen_sshl_i32,
1209 .fniv = gen_sshl_vec,
1210 .opt_opc = vecop_list,
1211 .vece = MO_32 },
1212 { .fni8 = gen_sshl_i64,
1213 .fniv = gen_sshl_vec,
1214 .opt_opc = vecop_list,
1215 .vece = MO_64 },
1217 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1220 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
1221 TCGv_vec a, TCGv_vec b)
1223 TCGv_vec x = tcg_temp_new_vec_matching(t);
1224 tcg_gen_add_vec(vece, x, a, b);
1225 tcg_gen_usadd_vec(vece, t, a, b);
1226 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
1227 tcg_gen_or_vec(vece, sat, sat, x);
1230 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1231 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1233 static const TCGOpcode vecop_list[] = {
1234 INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
1236 static const GVecGen4 ops[4] = {
1237 { .fniv = gen_uqadd_vec,
1238 .fno = gen_helper_gvec_uqadd_b,
1239 .write_aofs = true,
1240 .opt_opc = vecop_list,
1241 .vece = MO_8 },
1242 { .fniv = gen_uqadd_vec,
1243 .fno = gen_helper_gvec_uqadd_h,
1244 .write_aofs = true,
1245 .opt_opc = vecop_list,
1246 .vece = MO_16 },
1247 { .fniv = gen_uqadd_vec,
1248 .fno = gen_helper_gvec_uqadd_s,
1249 .write_aofs = true,
1250 .opt_opc = vecop_list,
1251 .vece = MO_32 },
1252 { .fniv = gen_uqadd_vec,
1253 .fno = gen_helper_gvec_uqadd_d,
1254 .write_aofs = true,
1255 .opt_opc = vecop_list,
1256 .vece = MO_64 },
1258 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
1259 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1262 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
1263 TCGv_vec a, TCGv_vec b)
1265 TCGv_vec x = tcg_temp_new_vec_matching(t);
1266 tcg_gen_add_vec(vece, x, a, b);
1267 tcg_gen_ssadd_vec(vece, t, a, b);
1268 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
1269 tcg_gen_or_vec(vece, sat, sat, x);
1272 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1273 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1275 static const TCGOpcode vecop_list[] = {
1276 INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
1278 static const GVecGen4 ops[4] = {
1279 { .fniv = gen_sqadd_vec,
1280 .fno = gen_helper_gvec_sqadd_b,
1281 .opt_opc = vecop_list,
1282 .write_aofs = true,
1283 .vece = MO_8 },
1284 { .fniv = gen_sqadd_vec,
1285 .fno = gen_helper_gvec_sqadd_h,
1286 .opt_opc = vecop_list,
1287 .write_aofs = true,
1288 .vece = MO_16 },
1289 { .fniv = gen_sqadd_vec,
1290 .fno = gen_helper_gvec_sqadd_s,
1291 .opt_opc = vecop_list,
1292 .write_aofs = true,
1293 .vece = MO_32 },
1294 { .fniv = gen_sqadd_vec,
1295 .fno = gen_helper_gvec_sqadd_d,
1296 .opt_opc = vecop_list,
1297 .write_aofs = true,
1298 .vece = MO_64 },
1300 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
1301 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1304 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
1305 TCGv_vec a, TCGv_vec b)
1307 TCGv_vec x = tcg_temp_new_vec_matching(t);
1308 tcg_gen_sub_vec(vece, x, a, b);
1309 tcg_gen_ussub_vec(vece, t, a, b);
1310 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
1311 tcg_gen_or_vec(vece, sat, sat, x);
1314 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1315 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1317 static const TCGOpcode vecop_list[] = {
1318 INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
1320 static const GVecGen4 ops[4] = {
1321 { .fniv = gen_uqsub_vec,
1322 .fno = gen_helper_gvec_uqsub_b,
1323 .opt_opc = vecop_list,
1324 .write_aofs = true,
1325 .vece = MO_8 },
1326 { .fniv = gen_uqsub_vec,
1327 .fno = gen_helper_gvec_uqsub_h,
1328 .opt_opc = vecop_list,
1329 .write_aofs = true,
1330 .vece = MO_16 },
1331 { .fniv = gen_uqsub_vec,
1332 .fno = gen_helper_gvec_uqsub_s,
1333 .opt_opc = vecop_list,
1334 .write_aofs = true,
1335 .vece = MO_32 },
1336 { .fniv = gen_uqsub_vec,
1337 .fno = gen_helper_gvec_uqsub_d,
1338 .opt_opc = vecop_list,
1339 .write_aofs = true,
1340 .vece = MO_64 },
1342 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
1343 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1346 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
1347 TCGv_vec a, TCGv_vec b)
1349 TCGv_vec x = tcg_temp_new_vec_matching(t);
1350 tcg_gen_sub_vec(vece, x, a, b);
1351 tcg_gen_sssub_vec(vece, t, a, b);
1352 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
1353 tcg_gen_or_vec(vece, sat, sat, x);
1356 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1357 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1359 static const TCGOpcode vecop_list[] = {
1360 INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
1362 static const GVecGen4 ops[4] = {
1363 { .fniv = gen_sqsub_vec,
1364 .fno = gen_helper_gvec_sqsub_b,
1365 .opt_opc = vecop_list,
1366 .write_aofs = true,
1367 .vece = MO_8 },
1368 { .fniv = gen_sqsub_vec,
1369 .fno = gen_helper_gvec_sqsub_h,
1370 .opt_opc = vecop_list,
1371 .write_aofs = true,
1372 .vece = MO_16 },
1373 { .fniv = gen_sqsub_vec,
1374 .fno = gen_helper_gvec_sqsub_s,
1375 .opt_opc = vecop_list,
1376 .write_aofs = true,
1377 .vece = MO_32 },
1378 { .fniv = gen_sqsub_vec,
1379 .fno = gen_helper_gvec_sqsub_d,
1380 .opt_opc = vecop_list,
1381 .write_aofs = true,
1382 .vece = MO_64 },
1384 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
1385 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1388 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1390 TCGv_i32 t = tcg_temp_new_i32();
1392 tcg_gen_sub_i32(t, a, b);
1393 tcg_gen_sub_i32(d, b, a);
1394 tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
1397 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1399 TCGv_i64 t = tcg_temp_new_i64();
1401 tcg_gen_sub_i64(t, a, b);
1402 tcg_gen_sub_i64(d, b, a);
1403 tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
1406 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
1408 TCGv_vec t = tcg_temp_new_vec_matching(d);
1410 tcg_gen_smin_vec(vece, t, a, b);
1411 tcg_gen_smax_vec(vece, d, a, b);
1412 tcg_gen_sub_vec(vece, d, d, t);
1415 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1416 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1418 static const TCGOpcode vecop_list[] = {
1419 INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
1421 static const GVecGen3 ops[4] = {
1422 { .fniv = gen_sabd_vec,
1423 .fno = gen_helper_gvec_sabd_b,
1424 .opt_opc = vecop_list,
1425 .vece = MO_8 },
1426 { .fniv = gen_sabd_vec,
1427 .fno = gen_helper_gvec_sabd_h,
1428 .opt_opc = vecop_list,
1429 .vece = MO_16 },
1430 { .fni4 = gen_sabd_i32,
1431 .fniv = gen_sabd_vec,
1432 .fno = gen_helper_gvec_sabd_s,
1433 .opt_opc = vecop_list,
1434 .vece = MO_32 },
1435 { .fni8 = gen_sabd_i64,
1436 .fniv = gen_sabd_vec,
1437 .fno = gen_helper_gvec_sabd_d,
1438 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1439 .opt_opc = vecop_list,
1440 .vece = MO_64 },
1442 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1445 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1447 TCGv_i32 t = tcg_temp_new_i32();
1449 tcg_gen_sub_i32(t, a, b);
1450 tcg_gen_sub_i32(d, b, a);
1451 tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
1454 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1456 TCGv_i64 t = tcg_temp_new_i64();
1458 tcg_gen_sub_i64(t, a, b);
1459 tcg_gen_sub_i64(d, b, a);
1460 tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
1463 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
1465 TCGv_vec t = tcg_temp_new_vec_matching(d);
1467 tcg_gen_umin_vec(vece, t, a, b);
1468 tcg_gen_umax_vec(vece, d, a, b);
1469 tcg_gen_sub_vec(vece, d, d, t);
1472 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1473 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1475 static const TCGOpcode vecop_list[] = {
1476 INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
1478 static const GVecGen3 ops[4] = {
1479 { .fniv = gen_uabd_vec,
1480 .fno = gen_helper_gvec_uabd_b,
1481 .opt_opc = vecop_list,
1482 .vece = MO_8 },
1483 { .fniv = gen_uabd_vec,
1484 .fno = gen_helper_gvec_uabd_h,
1485 .opt_opc = vecop_list,
1486 .vece = MO_16 },
1487 { .fni4 = gen_uabd_i32,
1488 .fniv = gen_uabd_vec,
1489 .fno = gen_helper_gvec_uabd_s,
1490 .opt_opc = vecop_list,
1491 .vece = MO_32 },
1492 { .fni8 = gen_uabd_i64,
1493 .fniv = gen_uabd_vec,
1494 .fno = gen_helper_gvec_uabd_d,
1495 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1496 .opt_opc = vecop_list,
1497 .vece = MO_64 },
1499 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1502 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1504 TCGv_i32 t = tcg_temp_new_i32();
1505 gen_sabd_i32(t, a, b);
1506 tcg_gen_add_i32(d, d, t);
1509 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1511 TCGv_i64 t = tcg_temp_new_i64();
1512 gen_sabd_i64(t, a, b);
1513 tcg_gen_add_i64(d, d, t);
1516 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
1518 TCGv_vec t = tcg_temp_new_vec_matching(d);
1519 gen_sabd_vec(vece, t, a, b);
1520 tcg_gen_add_vec(vece, d, d, t);
1523 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1524 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1526 static const TCGOpcode vecop_list[] = {
1527 INDEX_op_sub_vec, INDEX_op_add_vec,
1528 INDEX_op_smin_vec, INDEX_op_smax_vec, 0
1530 static const GVecGen3 ops[4] = {
1531 { .fniv = gen_saba_vec,
1532 .fno = gen_helper_gvec_saba_b,
1533 .opt_opc = vecop_list,
1534 .load_dest = true,
1535 .vece = MO_8 },
1536 { .fniv = gen_saba_vec,
1537 .fno = gen_helper_gvec_saba_h,
1538 .opt_opc = vecop_list,
1539 .load_dest = true,
1540 .vece = MO_16 },
1541 { .fni4 = gen_saba_i32,
1542 .fniv = gen_saba_vec,
1543 .fno = gen_helper_gvec_saba_s,
1544 .opt_opc = vecop_list,
1545 .load_dest = true,
1546 .vece = MO_32 },
1547 { .fni8 = gen_saba_i64,
1548 .fniv = gen_saba_vec,
1549 .fno = gen_helper_gvec_saba_d,
1550 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1551 .opt_opc = vecop_list,
1552 .load_dest = true,
1553 .vece = MO_64 },
1555 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
1558 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1560 TCGv_i32 t = tcg_temp_new_i32();
1561 gen_uabd_i32(t, a, b);
1562 tcg_gen_add_i32(d, d, t);
1565 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1567 TCGv_i64 t = tcg_temp_new_i64();
1568 gen_uabd_i64(t, a, b);
1569 tcg_gen_add_i64(d, d, t);
1572 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
1574 TCGv_vec t = tcg_temp_new_vec_matching(d);
1575 gen_uabd_vec(vece, t, a, b);
1576 tcg_gen_add_vec(vece, d, d, t);
1579 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
1580 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
1582 static const TCGOpcode vecop_list[] = {
1583 INDEX_op_sub_vec, INDEX_op_add_vec,
1584 INDEX_op_umin_vec, INDEX_op_umax_vec, 0
1586 static const GVecGen3 ops[4] = {
1587 { .fniv = gen_uaba_vec,
1588 .fno = gen_helper_gvec_uaba_b,
1589 .opt_opc = vecop_list,
1590 .load_dest = true,
1591 .vece = MO_8 },
1592 { .fniv = gen_uaba_vec,
1593 .fno = gen_helper_gvec_uaba_h,
1594 .opt_opc = vecop_list,
1595 .load_dest = true,
1596 .vece = MO_16 },
1597 { .fni4 = gen_uaba_i32,
1598 .fniv = gen_uaba_vec,
1599 .fno = gen_helper_gvec_uaba_s,
1600 .opt_opc = vecop_list,
1601 .load_dest = true,
1602 .vece = MO_32 },
1603 { .fni8 = gen_uaba_i64,
1604 .fniv = gen_uaba_vec,
1605 .fno = gen_helper_gvec_uaba_d,
1606 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1607 .opt_opc = vecop_list,
1608 .load_dest = true,
1609 .vece = MO_64 },
1611 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);