2 * ARM generic vector expansion
4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
22 #include "qemu/osdep.h"
23 #include "translate.h"
26 static void gen_gvec_fn3_qc(uint32_t rd_ofs
, uint32_t rn_ofs
, uint32_t rm_ofs
,
27 uint32_t opr_sz
, uint32_t max_sz
,
28 gen_helper_gvec_3_ptr
*fn
)
30 TCGv_ptr qc_ptr
= tcg_temp_new_ptr();
32 tcg_debug_assert(opr_sz
<= sizeof_field(CPUARMState
, vfp
.qc
));
33 tcg_gen_addi_ptr(qc_ptr
, tcg_env
, offsetof(CPUARMState
, vfp
.qc
));
34 tcg_gen_gvec_3_ptr(rd_ofs
, rn_ofs
, rm_ofs
, qc_ptr
,
35 opr_sz
, max_sz
, 0, fn
);
38 void gen_gvec_sqrdmlah_qc(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
39 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
41 static gen_helper_gvec_3_ptr
* const fns
[2] = {
42 gen_helper_gvec_qrdmlah_s16
, gen_helper_gvec_qrdmlah_s32
44 tcg_debug_assert(vece
>= 1 && vece
<= 2);
45 gen_gvec_fn3_qc(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, fns
[vece
- 1]);
48 void gen_gvec_sqrdmlsh_qc(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
49 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
51 static gen_helper_gvec_3_ptr
* const fns
[2] = {
52 gen_helper_gvec_qrdmlsh_s16
, gen_helper_gvec_qrdmlsh_s32
54 tcg_debug_assert(vece
>= 1 && vece
<= 2);
55 gen_gvec_fn3_qc(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, fns
[vece
- 1]);
58 #define GEN_CMP0(NAME, COND) \
59 void NAME(unsigned vece, uint32_t d, uint32_t m, \
60 uint32_t opr_sz, uint32_t max_sz) \
61 { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); }
63 GEN_CMP0(gen_gvec_ceq0
, TCG_COND_EQ
)
64 GEN_CMP0(gen_gvec_cle0
, TCG_COND_LE
)
65 GEN_CMP0(gen_gvec_cge0
, TCG_COND_GE
)
66 GEN_CMP0(gen_gvec_clt0
, TCG_COND_LT
)
67 GEN_CMP0(gen_gvec_cgt0
, TCG_COND_GT
)
71 static void gen_ssra8_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t shift
)
73 tcg_gen_vec_sar8i_i64(a
, a
, shift
);
74 tcg_gen_vec_add8_i64(d
, d
, a
);
77 static void gen_ssra16_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t shift
)
79 tcg_gen_vec_sar16i_i64(a
, a
, shift
);
80 tcg_gen_vec_add16_i64(d
, d
, a
);
83 static void gen_ssra32_i32(TCGv_i32 d
, TCGv_i32 a
, int32_t shift
)
85 tcg_gen_sari_i32(a
, a
, shift
);
86 tcg_gen_add_i32(d
, d
, a
);
89 static void gen_ssra64_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t shift
)
91 tcg_gen_sari_i64(a
, a
, shift
);
92 tcg_gen_add_i64(d
, d
, a
);
95 static void gen_ssra_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, int64_t sh
)
97 tcg_gen_sari_vec(vece
, a
, a
, sh
);
98 tcg_gen_add_vec(vece
, d
, d
, a
);
101 void gen_gvec_ssra(unsigned vece
, uint32_t rd_ofs
, uint32_t rm_ofs
,
102 int64_t shift
, uint32_t opr_sz
, uint32_t max_sz
)
104 static const TCGOpcode vecop_list
[] = {
105 INDEX_op_sari_vec
, INDEX_op_add_vec
, 0
107 static const GVecGen2i ops
[4] = {
108 { .fni8
= gen_ssra8_i64
,
109 .fniv
= gen_ssra_vec
,
110 .fno
= gen_helper_gvec_ssra_b
,
112 .opt_opc
= vecop_list
,
114 { .fni8
= gen_ssra16_i64
,
115 .fniv
= gen_ssra_vec
,
116 .fno
= gen_helper_gvec_ssra_h
,
118 .opt_opc
= vecop_list
,
120 { .fni4
= gen_ssra32_i32
,
121 .fniv
= gen_ssra_vec
,
122 .fno
= gen_helper_gvec_ssra_s
,
124 .opt_opc
= vecop_list
,
126 { .fni8
= gen_ssra64_i64
,
127 .fniv
= gen_ssra_vec
,
128 .fno
= gen_helper_gvec_ssra_d
,
129 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
130 .opt_opc
= vecop_list
,
135 /* tszimm encoding produces immediates in the range [1..esize]. */
136 tcg_debug_assert(shift
> 0);
137 tcg_debug_assert(shift
<= (8 << vece
));
140 * Shifts larger than the element size are architecturally valid.
141 * Signed results in all sign bits.
143 shift
= MIN(shift
, (8 << vece
) - 1);
144 tcg_gen_gvec_2i(rd_ofs
, rm_ofs
, opr_sz
, max_sz
, shift
, &ops
[vece
]);
147 static void gen_usra8_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t shift
)
149 tcg_gen_vec_shr8i_i64(a
, a
, shift
);
150 tcg_gen_vec_add8_i64(d
, d
, a
);
153 static void gen_usra16_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t shift
)
155 tcg_gen_vec_shr16i_i64(a
, a
, shift
);
156 tcg_gen_vec_add16_i64(d
, d
, a
);
159 static void gen_usra32_i32(TCGv_i32 d
, TCGv_i32 a
, int32_t shift
)
161 tcg_gen_shri_i32(a
, a
, shift
);
162 tcg_gen_add_i32(d
, d
, a
);
165 static void gen_usra64_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t shift
)
167 tcg_gen_shri_i64(a
, a
, shift
);
168 tcg_gen_add_i64(d
, d
, a
);
171 static void gen_usra_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, int64_t sh
)
173 tcg_gen_shri_vec(vece
, a
, a
, sh
);
174 tcg_gen_add_vec(vece
, d
, d
, a
);
177 void gen_gvec_usra(unsigned vece
, uint32_t rd_ofs
, uint32_t rm_ofs
,
178 int64_t shift
, uint32_t opr_sz
, uint32_t max_sz
)
180 static const TCGOpcode vecop_list
[] = {
181 INDEX_op_shri_vec
, INDEX_op_add_vec
, 0
183 static const GVecGen2i ops
[4] = {
184 { .fni8
= gen_usra8_i64
,
185 .fniv
= gen_usra_vec
,
186 .fno
= gen_helper_gvec_usra_b
,
188 .opt_opc
= vecop_list
,
190 { .fni8
= gen_usra16_i64
,
191 .fniv
= gen_usra_vec
,
192 .fno
= gen_helper_gvec_usra_h
,
194 .opt_opc
= vecop_list
,
196 { .fni4
= gen_usra32_i32
,
197 .fniv
= gen_usra_vec
,
198 .fno
= gen_helper_gvec_usra_s
,
200 .opt_opc
= vecop_list
,
202 { .fni8
= gen_usra64_i64
,
203 .fniv
= gen_usra_vec
,
204 .fno
= gen_helper_gvec_usra_d
,
205 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
207 .opt_opc
= vecop_list
,
211 /* tszimm encoding produces immediates in the range [1..esize]. */
212 tcg_debug_assert(shift
> 0);
213 tcg_debug_assert(shift
<= (8 << vece
));
216 * Shifts larger than the element size are architecturally valid.
217 * Unsigned results in all zeros as input to accumulate: nop.
219 if (shift
< (8 << vece
)) {
220 tcg_gen_gvec_2i(rd_ofs
, rm_ofs
, opr_sz
, max_sz
, shift
, &ops
[vece
]);
222 /* Nop, but we do need to clear the tail. */
223 tcg_gen_gvec_mov(vece
, rd_ofs
, rd_ofs
, opr_sz
, max_sz
);
228 * Shift one less than the requested amount, and the low bit is
229 * the rounding bit. For the 8 and 16-bit operations, because we
230 * mask the low bit, we can perform a normal integer shift instead
233 static void gen_srshr8_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t sh
)
235 TCGv_i64 t
= tcg_temp_new_i64();
237 tcg_gen_shri_i64(t
, a
, sh
- 1);
238 tcg_gen_andi_i64(t
, t
, dup_const(MO_8
, 1));
239 tcg_gen_vec_sar8i_i64(d
, a
, sh
);
240 tcg_gen_vec_add8_i64(d
, d
, t
);
243 static void gen_srshr16_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t sh
)
245 TCGv_i64 t
= tcg_temp_new_i64();
247 tcg_gen_shri_i64(t
, a
, sh
- 1);
248 tcg_gen_andi_i64(t
, t
, dup_const(MO_16
, 1));
249 tcg_gen_vec_sar16i_i64(d
, a
, sh
);
250 tcg_gen_vec_add16_i64(d
, d
, t
);
253 void gen_srshr32_i32(TCGv_i32 d
, TCGv_i32 a
, int32_t sh
)
257 /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
259 tcg_gen_movi_i32(d
, 0);
262 t
= tcg_temp_new_i32();
263 tcg_gen_extract_i32(t
, a
, sh
- 1, 1);
264 tcg_gen_sari_i32(d
, a
, sh
);
265 tcg_gen_add_i32(d
, d
, t
);
268 void gen_srshr64_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t sh
)
270 TCGv_i64 t
= tcg_temp_new_i64();
272 tcg_gen_extract_i64(t
, a
, sh
- 1, 1);
273 tcg_gen_sari_i64(d
, a
, sh
);
274 tcg_gen_add_i64(d
, d
, t
);
277 static void gen_srshr_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, int64_t sh
)
279 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
280 TCGv_vec ones
= tcg_temp_new_vec_matching(d
);
282 tcg_gen_shri_vec(vece
, t
, a
, sh
- 1);
283 tcg_gen_dupi_vec(vece
, ones
, 1);
284 tcg_gen_and_vec(vece
, t
, t
, ones
);
285 tcg_gen_sari_vec(vece
, d
, a
, sh
);
286 tcg_gen_add_vec(vece
, d
, d
, t
);
289 void gen_gvec_srshr(unsigned vece
, uint32_t rd_ofs
, uint32_t rm_ofs
,
290 int64_t shift
, uint32_t opr_sz
, uint32_t max_sz
)
292 static const TCGOpcode vecop_list
[] = {
293 INDEX_op_shri_vec
, INDEX_op_sari_vec
, INDEX_op_add_vec
, 0
295 static const GVecGen2i ops
[4] = {
296 { .fni8
= gen_srshr8_i64
,
297 .fniv
= gen_srshr_vec
,
298 .fno
= gen_helper_gvec_srshr_b
,
299 .opt_opc
= vecop_list
,
301 { .fni8
= gen_srshr16_i64
,
302 .fniv
= gen_srshr_vec
,
303 .fno
= gen_helper_gvec_srshr_h
,
304 .opt_opc
= vecop_list
,
306 { .fni4
= gen_srshr32_i32
,
307 .fniv
= gen_srshr_vec
,
308 .fno
= gen_helper_gvec_srshr_s
,
309 .opt_opc
= vecop_list
,
311 { .fni8
= gen_srshr64_i64
,
312 .fniv
= gen_srshr_vec
,
313 .fno
= gen_helper_gvec_srshr_d
,
314 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
315 .opt_opc
= vecop_list
,
319 /* tszimm encoding produces immediates in the range [1..esize] */
320 tcg_debug_assert(shift
> 0);
321 tcg_debug_assert(shift
<= (8 << vece
));
323 if (shift
== (8 << vece
)) {
325 * Shifts larger than the element size are architecturally valid.
326 * Signed results in all sign bits. With rounding, this produces
327 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
330 tcg_gen_gvec_dup_imm(vece
, rd_ofs
, opr_sz
, max_sz
, 0);
332 tcg_gen_gvec_2i(rd_ofs
, rm_ofs
, opr_sz
, max_sz
, shift
, &ops
[vece
]);
336 static void gen_srsra8_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t sh
)
338 TCGv_i64 t
= tcg_temp_new_i64();
340 gen_srshr8_i64(t
, a
, sh
);
341 tcg_gen_vec_add8_i64(d
, d
, t
);
344 static void gen_srsra16_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t sh
)
346 TCGv_i64 t
= tcg_temp_new_i64();
348 gen_srshr16_i64(t
, a
, sh
);
349 tcg_gen_vec_add16_i64(d
, d
, t
);
352 static void gen_srsra32_i32(TCGv_i32 d
, TCGv_i32 a
, int32_t sh
)
354 TCGv_i32 t
= tcg_temp_new_i32();
356 gen_srshr32_i32(t
, a
, sh
);
357 tcg_gen_add_i32(d
, d
, t
);
360 static void gen_srsra64_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t sh
)
362 TCGv_i64 t
= tcg_temp_new_i64();
364 gen_srshr64_i64(t
, a
, sh
);
365 tcg_gen_add_i64(d
, d
, t
);
368 static void gen_srsra_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, int64_t sh
)
370 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
372 gen_srshr_vec(vece
, t
, a
, sh
);
373 tcg_gen_add_vec(vece
, d
, d
, t
);
376 void gen_gvec_srsra(unsigned vece
, uint32_t rd_ofs
, uint32_t rm_ofs
,
377 int64_t shift
, uint32_t opr_sz
, uint32_t max_sz
)
379 static const TCGOpcode vecop_list
[] = {
380 INDEX_op_shri_vec
, INDEX_op_sari_vec
, INDEX_op_add_vec
, 0
382 static const GVecGen2i ops
[4] = {
383 { .fni8
= gen_srsra8_i64
,
384 .fniv
= gen_srsra_vec
,
385 .fno
= gen_helper_gvec_srsra_b
,
386 .opt_opc
= vecop_list
,
389 { .fni8
= gen_srsra16_i64
,
390 .fniv
= gen_srsra_vec
,
391 .fno
= gen_helper_gvec_srsra_h
,
392 .opt_opc
= vecop_list
,
395 { .fni4
= gen_srsra32_i32
,
396 .fniv
= gen_srsra_vec
,
397 .fno
= gen_helper_gvec_srsra_s
,
398 .opt_opc
= vecop_list
,
401 { .fni8
= gen_srsra64_i64
,
402 .fniv
= gen_srsra_vec
,
403 .fno
= gen_helper_gvec_srsra_d
,
404 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
405 .opt_opc
= vecop_list
,
410 /* tszimm encoding produces immediates in the range [1..esize] */
411 tcg_debug_assert(shift
> 0);
412 tcg_debug_assert(shift
<= (8 << vece
));
415 * Shifts larger than the element size are architecturally valid.
416 * Signed results in all sign bits. With rounding, this produces
417 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
418 * I.e. always zero. With accumulation, this leaves D unchanged.
420 if (shift
== (8 << vece
)) {
421 /* Nop, but we do need to clear the tail. */
422 tcg_gen_gvec_mov(vece
, rd_ofs
, rd_ofs
, opr_sz
, max_sz
);
424 tcg_gen_gvec_2i(rd_ofs
, rm_ofs
, opr_sz
, max_sz
, shift
, &ops
[vece
]);
428 static void gen_urshr8_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t sh
)
430 TCGv_i64 t
= tcg_temp_new_i64();
432 tcg_gen_shri_i64(t
, a
, sh
- 1);
433 tcg_gen_andi_i64(t
, t
, dup_const(MO_8
, 1));
434 tcg_gen_vec_shr8i_i64(d
, a
, sh
);
435 tcg_gen_vec_add8_i64(d
, d
, t
);
438 static void gen_urshr16_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t sh
)
440 TCGv_i64 t
= tcg_temp_new_i64();
442 tcg_gen_shri_i64(t
, a
, sh
- 1);
443 tcg_gen_andi_i64(t
, t
, dup_const(MO_16
, 1));
444 tcg_gen_vec_shr16i_i64(d
, a
, sh
);
445 tcg_gen_vec_add16_i64(d
, d
, t
);
448 void gen_urshr32_i32(TCGv_i32 d
, TCGv_i32 a
, int32_t sh
)
452 /* Handle shift by the input size for the benefit of trans_URSHR_ri */
454 tcg_gen_extract_i32(d
, a
, sh
- 1, 1);
457 t
= tcg_temp_new_i32();
458 tcg_gen_extract_i32(t
, a
, sh
- 1, 1);
459 tcg_gen_shri_i32(d
, a
, sh
);
460 tcg_gen_add_i32(d
, d
, t
);
463 void gen_urshr64_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t sh
)
465 TCGv_i64 t
= tcg_temp_new_i64();
467 tcg_gen_extract_i64(t
, a
, sh
- 1, 1);
468 tcg_gen_shri_i64(d
, a
, sh
);
469 tcg_gen_add_i64(d
, d
, t
);
472 static void gen_urshr_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, int64_t shift
)
474 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
475 TCGv_vec ones
= tcg_temp_new_vec_matching(d
);
477 tcg_gen_shri_vec(vece
, t
, a
, shift
- 1);
478 tcg_gen_dupi_vec(vece
, ones
, 1);
479 tcg_gen_and_vec(vece
, t
, t
, ones
);
480 tcg_gen_shri_vec(vece
, d
, a
, shift
);
481 tcg_gen_add_vec(vece
, d
, d
, t
);
484 void gen_gvec_urshr(unsigned vece
, uint32_t rd_ofs
, uint32_t rm_ofs
,
485 int64_t shift
, uint32_t opr_sz
, uint32_t max_sz
)
487 static const TCGOpcode vecop_list
[] = {
488 INDEX_op_shri_vec
, INDEX_op_add_vec
, 0
490 static const GVecGen2i ops
[4] = {
491 { .fni8
= gen_urshr8_i64
,
492 .fniv
= gen_urshr_vec
,
493 .fno
= gen_helper_gvec_urshr_b
,
494 .opt_opc
= vecop_list
,
496 { .fni8
= gen_urshr16_i64
,
497 .fniv
= gen_urshr_vec
,
498 .fno
= gen_helper_gvec_urshr_h
,
499 .opt_opc
= vecop_list
,
501 { .fni4
= gen_urshr32_i32
,
502 .fniv
= gen_urshr_vec
,
503 .fno
= gen_helper_gvec_urshr_s
,
504 .opt_opc
= vecop_list
,
506 { .fni8
= gen_urshr64_i64
,
507 .fniv
= gen_urshr_vec
,
508 .fno
= gen_helper_gvec_urshr_d
,
509 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
510 .opt_opc
= vecop_list
,
514 /* tszimm encoding produces immediates in the range [1..esize] */
515 tcg_debug_assert(shift
> 0);
516 tcg_debug_assert(shift
<= (8 << vece
));
518 if (shift
== (8 << vece
)) {
520 * Shifts larger than the element size are architecturally valid.
521 * Unsigned results in zero. With rounding, this produces a
522 * copy of the most significant bit.
524 tcg_gen_gvec_shri(vece
, rd_ofs
, rm_ofs
, shift
- 1, opr_sz
, max_sz
);
526 tcg_gen_gvec_2i(rd_ofs
, rm_ofs
, opr_sz
, max_sz
, shift
, &ops
[vece
]);
530 static void gen_ursra8_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t sh
)
532 TCGv_i64 t
= tcg_temp_new_i64();
535 tcg_gen_vec_shr8i_i64(t
, a
, 7);
537 gen_urshr8_i64(t
, a
, sh
);
539 tcg_gen_vec_add8_i64(d
, d
, t
);
542 static void gen_ursra16_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t sh
)
544 TCGv_i64 t
= tcg_temp_new_i64();
547 tcg_gen_vec_shr16i_i64(t
, a
, 15);
549 gen_urshr16_i64(t
, a
, sh
);
551 tcg_gen_vec_add16_i64(d
, d
, t
);
554 static void gen_ursra32_i32(TCGv_i32 d
, TCGv_i32 a
, int32_t sh
)
556 TCGv_i32 t
= tcg_temp_new_i32();
559 tcg_gen_shri_i32(t
, a
, 31);
561 gen_urshr32_i32(t
, a
, sh
);
563 tcg_gen_add_i32(d
, d
, t
);
566 static void gen_ursra64_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t sh
)
568 TCGv_i64 t
= tcg_temp_new_i64();
571 tcg_gen_shri_i64(t
, a
, 63);
573 gen_urshr64_i64(t
, a
, sh
);
575 tcg_gen_add_i64(d
, d
, t
);
578 static void gen_ursra_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, int64_t sh
)
580 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
582 if (sh
== (8 << vece
)) {
583 tcg_gen_shri_vec(vece
, t
, a
, sh
- 1);
585 gen_urshr_vec(vece
, t
, a
, sh
);
587 tcg_gen_add_vec(vece
, d
, d
, t
);
590 void gen_gvec_ursra(unsigned vece
, uint32_t rd_ofs
, uint32_t rm_ofs
,
591 int64_t shift
, uint32_t opr_sz
, uint32_t max_sz
)
593 static const TCGOpcode vecop_list
[] = {
594 INDEX_op_shri_vec
, INDEX_op_add_vec
, 0
596 static const GVecGen2i ops
[4] = {
597 { .fni8
= gen_ursra8_i64
,
598 .fniv
= gen_ursra_vec
,
599 .fno
= gen_helper_gvec_ursra_b
,
600 .opt_opc
= vecop_list
,
603 { .fni8
= gen_ursra16_i64
,
604 .fniv
= gen_ursra_vec
,
605 .fno
= gen_helper_gvec_ursra_h
,
606 .opt_opc
= vecop_list
,
609 { .fni4
= gen_ursra32_i32
,
610 .fniv
= gen_ursra_vec
,
611 .fno
= gen_helper_gvec_ursra_s
,
612 .opt_opc
= vecop_list
,
615 { .fni8
= gen_ursra64_i64
,
616 .fniv
= gen_ursra_vec
,
617 .fno
= gen_helper_gvec_ursra_d
,
618 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
619 .opt_opc
= vecop_list
,
624 /* tszimm encoding produces immediates in the range [1..esize] */
625 tcg_debug_assert(shift
> 0);
626 tcg_debug_assert(shift
<= (8 << vece
));
628 tcg_gen_gvec_2i(rd_ofs
, rm_ofs
, opr_sz
, max_sz
, shift
, &ops
[vece
]);
631 static void gen_shr8_ins_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t shift
)
633 uint64_t mask
= dup_const(MO_8
, 0xff >> shift
);
634 TCGv_i64 t
= tcg_temp_new_i64();
636 tcg_gen_shri_i64(t
, a
, shift
);
637 tcg_gen_andi_i64(t
, t
, mask
);
638 tcg_gen_andi_i64(d
, d
, ~mask
);
639 tcg_gen_or_i64(d
, d
, t
);
642 static void gen_shr16_ins_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t shift
)
644 uint64_t mask
= dup_const(MO_16
, 0xffff >> shift
);
645 TCGv_i64 t
= tcg_temp_new_i64();
647 tcg_gen_shri_i64(t
, a
, shift
);
648 tcg_gen_andi_i64(t
, t
, mask
);
649 tcg_gen_andi_i64(d
, d
, ~mask
);
650 tcg_gen_or_i64(d
, d
, t
);
653 static void gen_shr32_ins_i32(TCGv_i32 d
, TCGv_i32 a
, int32_t shift
)
655 tcg_gen_shri_i32(a
, a
, shift
);
656 tcg_gen_deposit_i32(d
, d
, a
, 0, 32 - shift
);
659 static void gen_shr64_ins_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t shift
)
661 tcg_gen_shri_i64(a
, a
, shift
);
662 tcg_gen_deposit_i64(d
, d
, a
, 0, 64 - shift
);
665 static void gen_shr_ins_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, int64_t sh
)
667 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
668 TCGv_vec m
= tcg_temp_new_vec_matching(d
);
670 tcg_gen_dupi_vec(vece
, m
, MAKE_64BIT_MASK((8 << vece
) - sh
, sh
));
671 tcg_gen_shri_vec(vece
, t
, a
, sh
);
672 tcg_gen_and_vec(vece
, d
, d
, m
);
673 tcg_gen_or_vec(vece
, d
, d
, t
);
676 void gen_gvec_sri(unsigned vece
, uint32_t rd_ofs
, uint32_t rm_ofs
,
677 int64_t shift
, uint32_t opr_sz
, uint32_t max_sz
)
679 static const TCGOpcode vecop_list
[] = { INDEX_op_shri_vec
, 0 };
680 const GVecGen2i ops
[4] = {
681 { .fni8
= gen_shr8_ins_i64
,
682 .fniv
= gen_shr_ins_vec
,
683 .fno
= gen_helper_gvec_sri_b
,
685 .opt_opc
= vecop_list
,
687 { .fni8
= gen_shr16_ins_i64
,
688 .fniv
= gen_shr_ins_vec
,
689 .fno
= gen_helper_gvec_sri_h
,
691 .opt_opc
= vecop_list
,
693 { .fni4
= gen_shr32_ins_i32
,
694 .fniv
= gen_shr_ins_vec
,
695 .fno
= gen_helper_gvec_sri_s
,
697 .opt_opc
= vecop_list
,
699 { .fni8
= gen_shr64_ins_i64
,
700 .fniv
= gen_shr_ins_vec
,
701 .fno
= gen_helper_gvec_sri_d
,
702 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
704 .opt_opc
= vecop_list
,
708 /* tszimm encoding produces immediates in the range [1..esize]. */
709 tcg_debug_assert(shift
> 0);
710 tcg_debug_assert(shift
<= (8 << vece
));
712 /* Shift of esize leaves destination unchanged. */
713 if (shift
< (8 << vece
)) {
714 tcg_gen_gvec_2i(rd_ofs
, rm_ofs
, opr_sz
, max_sz
, shift
, &ops
[vece
]);
716 /* Nop, but we do need to clear the tail. */
717 tcg_gen_gvec_mov(vece
, rd_ofs
, rd_ofs
, opr_sz
, max_sz
);
721 static void gen_shl8_ins_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t shift
)
723 uint64_t mask
= dup_const(MO_8
, 0xff << shift
);
724 TCGv_i64 t
= tcg_temp_new_i64();
726 tcg_gen_shli_i64(t
, a
, shift
);
727 tcg_gen_andi_i64(t
, t
, mask
);
728 tcg_gen_andi_i64(d
, d
, ~mask
);
729 tcg_gen_or_i64(d
, d
, t
);
732 static void gen_shl16_ins_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t shift
)
734 uint64_t mask
= dup_const(MO_16
, 0xffff << shift
);
735 TCGv_i64 t
= tcg_temp_new_i64();
737 tcg_gen_shli_i64(t
, a
, shift
);
738 tcg_gen_andi_i64(t
, t
, mask
);
739 tcg_gen_andi_i64(d
, d
, ~mask
);
740 tcg_gen_or_i64(d
, d
, t
);
743 static void gen_shl32_ins_i32(TCGv_i32 d
, TCGv_i32 a
, int32_t shift
)
745 tcg_gen_deposit_i32(d
, d
, a
, shift
, 32 - shift
);
748 static void gen_shl64_ins_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t shift
)
750 tcg_gen_deposit_i64(d
, d
, a
, shift
, 64 - shift
);
753 static void gen_shl_ins_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, int64_t sh
)
755 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
756 TCGv_vec m
= tcg_temp_new_vec_matching(d
);
758 tcg_gen_shli_vec(vece
, t
, a
, sh
);
759 tcg_gen_dupi_vec(vece
, m
, MAKE_64BIT_MASK(0, sh
));
760 tcg_gen_and_vec(vece
, d
, d
, m
);
761 tcg_gen_or_vec(vece
, d
, d
, t
);
764 void gen_gvec_sli(unsigned vece
, uint32_t rd_ofs
, uint32_t rm_ofs
,
765 int64_t shift
, uint32_t opr_sz
, uint32_t max_sz
)
767 static const TCGOpcode vecop_list
[] = { INDEX_op_shli_vec
, 0 };
768 const GVecGen2i ops
[4] = {
769 { .fni8
= gen_shl8_ins_i64
,
770 .fniv
= gen_shl_ins_vec
,
771 .fno
= gen_helper_gvec_sli_b
,
773 .opt_opc
= vecop_list
,
775 { .fni8
= gen_shl16_ins_i64
,
776 .fniv
= gen_shl_ins_vec
,
777 .fno
= gen_helper_gvec_sli_h
,
779 .opt_opc
= vecop_list
,
781 { .fni4
= gen_shl32_ins_i32
,
782 .fniv
= gen_shl_ins_vec
,
783 .fno
= gen_helper_gvec_sli_s
,
785 .opt_opc
= vecop_list
,
787 { .fni8
= gen_shl64_ins_i64
,
788 .fniv
= gen_shl_ins_vec
,
789 .fno
= gen_helper_gvec_sli_d
,
790 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
792 .opt_opc
= vecop_list
,
796 /* tszimm encoding produces immediates in the range [0..esize-1]. */
797 tcg_debug_assert(shift
>= 0);
798 tcg_debug_assert(shift
< (8 << vece
));
801 tcg_gen_gvec_mov(vece
, rd_ofs
, rm_ofs
, opr_sz
, max_sz
);
803 tcg_gen_gvec_2i(rd_ofs
, rm_ofs
, opr_sz
, max_sz
, shift
, &ops
[vece
]);
807 static void gen_mla8_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
809 gen_helper_neon_mul_u8(a
, a
, b
);
810 gen_helper_neon_add_u8(d
, d
, a
);
813 static void gen_mls8_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
815 gen_helper_neon_mul_u8(a
, a
, b
);
816 gen_helper_neon_sub_u8(d
, d
, a
);
819 static void gen_mla16_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
821 gen_helper_neon_mul_u16(a
, a
, b
);
822 gen_helper_neon_add_u16(d
, d
, a
);
825 static void gen_mls16_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
827 gen_helper_neon_mul_u16(a
, a
, b
);
828 gen_helper_neon_sub_u16(d
, d
, a
);
831 static void gen_mla32_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
833 tcg_gen_mul_i32(a
, a
, b
);
834 tcg_gen_add_i32(d
, d
, a
);
837 static void gen_mls32_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
839 tcg_gen_mul_i32(a
, a
, b
);
840 tcg_gen_sub_i32(d
, d
, a
);
843 static void gen_mla64_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
845 tcg_gen_mul_i64(a
, a
, b
);
846 tcg_gen_add_i64(d
, d
, a
);
849 static void gen_mls64_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
851 tcg_gen_mul_i64(a
, a
, b
);
852 tcg_gen_sub_i64(d
, d
, a
);
855 static void gen_mla_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, TCGv_vec b
)
857 tcg_gen_mul_vec(vece
, a
, a
, b
);
858 tcg_gen_add_vec(vece
, d
, d
, a
);
861 static void gen_mls_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, TCGv_vec b
)
863 tcg_gen_mul_vec(vece
, a
, a
, b
);
864 tcg_gen_sub_vec(vece
, d
, d
, a
);
867 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
868 * these tables are shared with AArch64 which does support them.
870 void gen_gvec_mla(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
871 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
873 static const TCGOpcode vecop_list
[] = {
874 INDEX_op_mul_vec
, INDEX_op_add_vec
, 0
876 static const GVecGen3 ops
[4] = {
877 { .fni4
= gen_mla8_i32
,
880 .opt_opc
= vecop_list
,
882 { .fni4
= gen_mla16_i32
,
885 .opt_opc
= vecop_list
,
887 { .fni4
= gen_mla32_i32
,
890 .opt_opc
= vecop_list
,
892 { .fni8
= gen_mla64_i64
,
894 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
896 .opt_opc
= vecop_list
,
899 tcg_gen_gvec_3(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &ops
[vece
]);
902 void gen_gvec_mls(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
903 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
905 static const TCGOpcode vecop_list
[] = {
906 INDEX_op_mul_vec
, INDEX_op_sub_vec
, 0
908 static const GVecGen3 ops
[4] = {
909 { .fni4
= gen_mls8_i32
,
912 .opt_opc
= vecop_list
,
914 { .fni4
= gen_mls16_i32
,
917 .opt_opc
= vecop_list
,
919 { .fni4
= gen_mls32_i32
,
922 .opt_opc
= vecop_list
,
924 { .fni8
= gen_mls64_i64
,
926 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
928 .opt_opc
= vecop_list
,
931 tcg_gen_gvec_3(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &ops
[vece
]);
934 /* CMTST : test is "if (X & Y != 0)". */
935 static void gen_cmtst_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
937 tcg_gen_and_i32(d
, a
, b
);
938 tcg_gen_negsetcond_i32(TCG_COND_NE
, d
, d
, tcg_constant_i32(0));
941 void gen_cmtst_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
943 tcg_gen_and_i64(d
, a
, b
);
944 tcg_gen_negsetcond_i64(TCG_COND_NE
, d
, d
, tcg_constant_i64(0));
947 static void gen_cmtst_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, TCGv_vec b
)
949 tcg_gen_and_vec(vece
, d
, a
, b
);
950 tcg_gen_dupi_vec(vece
, a
, 0);
951 tcg_gen_cmp_vec(TCG_COND_NE
, vece
, d
, d
, a
);
954 void gen_gvec_cmtst(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
955 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
957 static const TCGOpcode vecop_list
[] = { INDEX_op_cmp_vec
, 0 };
958 static const GVecGen3 ops
[4] = {
959 { .fni4
= gen_helper_neon_tst_u8
,
960 .fniv
= gen_cmtst_vec
,
961 .opt_opc
= vecop_list
,
963 { .fni4
= gen_helper_neon_tst_u16
,
964 .fniv
= gen_cmtst_vec
,
965 .opt_opc
= vecop_list
,
967 { .fni4
= gen_cmtst_i32
,
968 .fniv
= gen_cmtst_vec
,
969 .opt_opc
= vecop_list
,
971 { .fni8
= gen_cmtst_i64
,
972 .fniv
= gen_cmtst_vec
,
973 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
974 .opt_opc
= vecop_list
,
977 tcg_gen_gvec_3(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &ops
[vece
]);
980 void gen_ushl_i32(TCGv_i32 dst
, TCGv_i32 src
, TCGv_i32 shift
)
982 TCGv_i32 lval
= tcg_temp_new_i32();
983 TCGv_i32 rval
= tcg_temp_new_i32();
984 TCGv_i32 lsh
= tcg_temp_new_i32();
985 TCGv_i32 rsh
= tcg_temp_new_i32();
986 TCGv_i32 zero
= tcg_constant_i32(0);
987 TCGv_i32 max
= tcg_constant_i32(32);
990 * Rely on the TCG guarantee that out of range shifts produce
991 * unspecified results, not undefined behaviour (i.e. no trap).
992 * Discard out-of-range results after the fact.
994 tcg_gen_ext8s_i32(lsh
, shift
);
995 tcg_gen_neg_i32(rsh
, lsh
);
996 tcg_gen_shl_i32(lval
, src
, lsh
);
997 tcg_gen_shr_i32(rval
, src
, rsh
);
998 tcg_gen_movcond_i32(TCG_COND_LTU
, dst
, lsh
, max
, lval
, zero
);
999 tcg_gen_movcond_i32(TCG_COND_LTU
, dst
, rsh
, max
, rval
, dst
);
1002 void gen_ushl_i64(TCGv_i64 dst
, TCGv_i64 src
, TCGv_i64 shift
)
1004 TCGv_i64 lval
= tcg_temp_new_i64();
1005 TCGv_i64 rval
= tcg_temp_new_i64();
1006 TCGv_i64 lsh
= tcg_temp_new_i64();
1007 TCGv_i64 rsh
= tcg_temp_new_i64();
1008 TCGv_i64 zero
= tcg_constant_i64(0);
1009 TCGv_i64 max
= tcg_constant_i64(64);
1012 * Rely on the TCG guarantee that out of range shifts produce
1013 * unspecified results, not undefined behaviour (i.e. no trap).
1014 * Discard out-of-range results after the fact.
1016 tcg_gen_ext8s_i64(lsh
, shift
);
1017 tcg_gen_neg_i64(rsh
, lsh
);
1018 tcg_gen_shl_i64(lval
, src
, lsh
);
1019 tcg_gen_shr_i64(rval
, src
, rsh
);
1020 tcg_gen_movcond_i64(TCG_COND_LTU
, dst
, lsh
, max
, lval
, zero
);
1021 tcg_gen_movcond_i64(TCG_COND_LTU
, dst
, rsh
, max
, rval
, dst
);
1024 static void gen_ushl_vec(unsigned vece
, TCGv_vec dst
,
1025 TCGv_vec src
, TCGv_vec shift
)
1027 TCGv_vec lval
= tcg_temp_new_vec_matching(dst
);
1028 TCGv_vec rval
= tcg_temp_new_vec_matching(dst
);
1029 TCGv_vec lsh
= tcg_temp_new_vec_matching(dst
);
1030 TCGv_vec rsh
= tcg_temp_new_vec_matching(dst
);
1033 tcg_gen_neg_vec(vece
, rsh
, shift
);
1035 tcg_gen_mov_vec(lsh
, shift
);
1037 msk
= tcg_temp_new_vec_matching(dst
);
1038 tcg_gen_dupi_vec(vece
, msk
, 0xff);
1039 tcg_gen_and_vec(vece
, lsh
, shift
, msk
);
1040 tcg_gen_and_vec(vece
, rsh
, rsh
, msk
);
1044 * Rely on the TCG guarantee that out of range shifts produce
1045 * unspecified results, not undefined behaviour (i.e. no trap).
1046 * Discard out-of-range results after the fact.
1048 tcg_gen_shlv_vec(vece
, lval
, src
, lsh
);
1049 tcg_gen_shrv_vec(vece
, rval
, src
, rsh
);
1051 max
= tcg_temp_new_vec_matching(dst
);
1052 tcg_gen_dupi_vec(vece
, max
, 8 << vece
);
1055 * The choice of LT (signed) and GEU (unsigned) are biased toward
1056 * the instructions of the x86_64 host. For MO_8, the whole byte
1057 * is significant so we must use an unsigned compare; otherwise we
1058 * have already masked to a byte and so a signed compare works.
1059 * Other tcg hosts have a full set of comparisons and do not care.
1062 tcg_gen_cmp_vec(TCG_COND_GEU
, vece
, lsh
, lsh
, max
);
1063 tcg_gen_cmp_vec(TCG_COND_GEU
, vece
, rsh
, rsh
, max
);
1064 tcg_gen_andc_vec(vece
, lval
, lval
, lsh
);
1065 tcg_gen_andc_vec(vece
, rval
, rval
, rsh
);
1067 tcg_gen_cmp_vec(TCG_COND_LT
, vece
, lsh
, lsh
, max
);
1068 tcg_gen_cmp_vec(TCG_COND_LT
, vece
, rsh
, rsh
, max
);
1069 tcg_gen_and_vec(vece
, lval
, lval
, lsh
);
1070 tcg_gen_and_vec(vece
, rval
, rval
, rsh
);
1072 tcg_gen_or_vec(vece
, dst
, lval
, rval
);
1075 void gen_gvec_ushl(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1076 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1078 static const TCGOpcode vecop_list
[] = {
1079 INDEX_op_neg_vec
, INDEX_op_shlv_vec
,
1080 INDEX_op_shrv_vec
, INDEX_op_cmp_vec
, 0
1082 static const GVecGen3 ops
[4] = {
1083 { .fniv
= gen_ushl_vec
,
1084 .fno
= gen_helper_gvec_ushl_b
,
1085 .opt_opc
= vecop_list
,
1087 { .fniv
= gen_ushl_vec
,
1088 .fno
= gen_helper_gvec_ushl_h
,
1089 .opt_opc
= vecop_list
,
1091 { .fni4
= gen_ushl_i32
,
1092 .fniv
= gen_ushl_vec
,
1093 .opt_opc
= vecop_list
,
1095 { .fni8
= gen_ushl_i64
,
1096 .fniv
= gen_ushl_vec
,
1097 .opt_opc
= vecop_list
,
1100 tcg_gen_gvec_3(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &ops
[vece
]);
1103 void gen_sshl_i32(TCGv_i32 dst
, TCGv_i32 src
, TCGv_i32 shift
)
1105 TCGv_i32 lval
= tcg_temp_new_i32();
1106 TCGv_i32 rval
= tcg_temp_new_i32();
1107 TCGv_i32 lsh
= tcg_temp_new_i32();
1108 TCGv_i32 rsh
= tcg_temp_new_i32();
1109 TCGv_i32 zero
= tcg_constant_i32(0);
1110 TCGv_i32 max
= tcg_constant_i32(31);
1113 * Rely on the TCG guarantee that out of range shifts produce
1114 * unspecified results, not undefined behaviour (i.e. no trap).
1115 * Discard out-of-range results after the fact.
1117 tcg_gen_ext8s_i32(lsh
, shift
);
1118 tcg_gen_neg_i32(rsh
, lsh
);
1119 tcg_gen_shl_i32(lval
, src
, lsh
);
1120 tcg_gen_umin_i32(rsh
, rsh
, max
);
1121 tcg_gen_sar_i32(rval
, src
, rsh
);
1122 tcg_gen_movcond_i32(TCG_COND_LEU
, lval
, lsh
, max
, lval
, zero
);
1123 tcg_gen_movcond_i32(TCG_COND_LT
, dst
, lsh
, zero
, rval
, lval
);
1126 void gen_sshl_i64(TCGv_i64 dst
, TCGv_i64 src
, TCGv_i64 shift
)
1128 TCGv_i64 lval
= tcg_temp_new_i64();
1129 TCGv_i64 rval
= tcg_temp_new_i64();
1130 TCGv_i64 lsh
= tcg_temp_new_i64();
1131 TCGv_i64 rsh
= tcg_temp_new_i64();
1132 TCGv_i64 zero
= tcg_constant_i64(0);
1133 TCGv_i64 max
= tcg_constant_i64(63);
1136 * Rely on the TCG guarantee that out of range shifts produce
1137 * unspecified results, not undefined behaviour (i.e. no trap).
1138 * Discard out-of-range results after the fact.
1140 tcg_gen_ext8s_i64(lsh
, shift
);
1141 tcg_gen_neg_i64(rsh
, lsh
);
1142 tcg_gen_shl_i64(lval
, src
, lsh
);
1143 tcg_gen_umin_i64(rsh
, rsh
, max
);
1144 tcg_gen_sar_i64(rval
, src
, rsh
);
1145 tcg_gen_movcond_i64(TCG_COND_LEU
, lval
, lsh
, max
, lval
, zero
);
1146 tcg_gen_movcond_i64(TCG_COND_LT
, dst
, lsh
, zero
, rval
, lval
);
1149 static void gen_sshl_vec(unsigned vece
, TCGv_vec dst
,
1150 TCGv_vec src
, TCGv_vec shift
)
1152 TCGv_vec lval
= tcg_temp_new_vec_matching(dst
);
1153 TCGv_vec rval
= tcg_temp_new_vec_matching(dst
);
1154 TCGv_vec lsh
= tcg_temp_new_vec_matching(dst
);
1155 TCGv_vec rsh
= tcg_temp_new_vec_matching(dst
);
1156 TCGv_vec tmp
= tcg_temp_new_vec_matching(dst
);
1159 * Rely on the TCG guarantee that out of range shifts produce
1160 * unspecified results, not undefined behaviour (i.e. no trap).
1161 * Discard out-of-range results after the fact.
1163 tcg_gen_neg_vec(vece
, rsh
, shift
);
1165 tcg_gen_mov_vec(lsh
, shift
);
1167 tcg_gen_dupi_vec(vece
, tmp
, 0xff);
1168 tcg_gen_and_vec(vece
, lsh
, shift
, tmp
);
1169 tcg_gen_and_vec(vece
, rsh
, rsh
, tmp
);
1172 /* Bound rsh so out of bound right shift gets -1. */
1173 tcg_gen_dupi_vec(vece
, tmp
, (8 << vece
) - 1);
1174 tcg_gen_umin_vec(vece
, rsh
, rsh
, tmp
);
1175 tcg_gen_cmp_vec(TCG_COND_GT
, vece
, tmp
, lsh
, tmp
);
1177 tcg_gen_shlv_vec(vece
, lval
, src
, lsh
);
1178 tcg_gen_sarv_vec(vece
, rval
, src
, rsh
);
1180 /* Select in-bound left shift. */
1181 tcg_gen_andc_vec(vece
, lval
, lval
, tmp
);
1183 /* Select between left and right shift. */
1185 tcg_gen_dupi_vec(vece
, tmp
, 0);
1186 tcg_gen_cmpsel_vec(TCG_COND_LT
, vece
, dst
, lsh
, tmp
, rval
, lval
);
1188 tcg_gen_dupi_vec(vece
, tmp
, 0x80);
1189 tcg_gen_cmpsel_vec(TCG_COND_LT
, vece
, dst
, lsh
, tmp
, lval
, rval
);
1193 void gen_gvec_sshl(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1194 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1196 static const TCGOpcode vecop_list
[] = {
1197 INDEX_op_neg_vec
, INDEX_op_umin_vec
, INDEX_op_shlv_vec
,
1198 INDEX_op_sarv_vec
, INDEX_op_cmp_vec
, INDEX_op_cmpsel_vec
, 0
1200 static const GVecGen3 ops
[4] = {
1201 { .fniv
= gen_sshl_vec
,
1202 .fno
= gen_helper_gvec_sshl_b
,
1203 .opt_opc
= vecop_list
,
1205 { .fniv
= gen_sshl_vec
,
1206 .fno
= gen_helper_gvec_sshl_h
,
1207 .opt_opc
= vecop_list
,
1209 { .fni4
= gen_sshl_i32
,
1210 .fniv
= gen_sshl_vec
,
1211 .opt_opc
= vecop_list
,
1213 { .fni8
= gen_sshl_i64
,
1214 .fniv
= gen_sshl_vec
,
1215 .opt_opc
= vecop_list
,
1218 tcg_gen_gvec_3(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &ops
[vece
]);
1221 void gen_gvec_srshl(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1222 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1224 static gen_helper_gvec_3
* const fns
[] = {
1225 gen_helper_gvec_srshl_b
, gen_helper_gvec_srshl_h
,
1226 gen_helper_gvec_srshl_s
, gen_helper_gvec_srshl_d
,
1228 tcg_debug_assert(vece
<= MO_64
);
1229 tcg_gen_gvec_3_ool(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, 0, fns
[vece
]);
1232 void gen_gvec_urshl(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1233 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1235 static gen_helper_gvec_3
* const fns
[] = {
1236 gen_helper_gvec_urshl_b
, gen_helper_gvec_urshl_h
,
1237 gen_helper_gvec_urshl_s
, gen_helper_gvec_urshl_d
,
1239 tcg_debug_assert(vece
<= MO_64
);
1240 tcg_gen_gvec_3_ool(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, 0, fns
[vece
]);
1243 void gen_neon_sqshl(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1244 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1246 static gen_helper_gvec_3_ptr
* const fns
[] = {
1247 gen_helper_neon_sqshl_b
, gen_helper_neon_sqshl_h
,
1248 gen_helper_neon_sqshl_s
, gen_helper_neon_sqshl_d
,
1250 tcg_debug_assert(vece
<= MO_64
);
1251 tcg_gen_gvec_3_ptr(rd_ofs
, rn_ofs
, rm_ofs
, tcg_env
,
1252 opr_sz
, max_sz
, 0, fns
[vece
]);
1255 void gen_neon_uqshl(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1256 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1258 static gen_helper_gvec_3_ptr
* const fns
[] = {
1259 gen_helper_neon_uqshl_b
, gen_helper_neon_uqshl_h
,
1260 gen_helper_neon_uqshl_s
, gen_helper_neon_uqshl_d
,
1262 tcg_debug_assert(vece
<= MO_64
);
1263 tcg_gen_gvec_3_ptr(rd_ofs
, rn_ofs
, rm_ofs
, tcg_env
,
1264 opr_sz
, max_sz
, 0, fns
[vece
]);
1267 void gen_uqadd_bhs(TCGv_i64 res
, TCGv_i64 qc
, TCGv_i64 a
, TCGv_i64 b
, MemOp esz
)
1269 uint64_t max
= MAKE_64BIT_MASK(0, 8 << esz
);
1270 TCGv_i64 tmp
= tcg_temp_new_i64();
1272 tcg_gen_add_i64(tmp
, a
, b
);
1273 tcg_gen_umin_i64(res
, tmp
, tcg_constant_i64(max
));
1274 tcg_gen_xor_i64(tmp
, tmp
, res
);
1275 tcg_gen_or_i64(qc
, qc
, tmp
);
1278 void gen_uqadd_d(TCGv_i64 res
, TCGv_i64 qc
, TCGv_i64 a
, TCGv_i64 b
)
1280 TCGv_i64 t
= tcg_temp_new_i64();
1282 tcg_gen_add_i64(t
, a
, b
);
1283 tcg_gen_movcond_i64(TCG_COND_LTU
, res
, t
, a
,
1284 tcg_constant_i64(UINT64_MAX
), t
);
1285 tcg_gen_xor_i64(t
, t
, res
);
1286 tcg_gen_or_i64(qc
, qc
, t
);
1289 static void gen_uqadd_vec(unsigned vece
, TCGv_vec t
, TCGv_vec qc
,
1290 TCGv_vec a
, TCGv_vec b
)
1292 TCGv_vec x
= tcg_temp_new_vec_matching(t
);
1293 tcg_gen_add_vec(vece
, x
, a
, b
);
1294 tcg_gen_usadd_vec(vece
, t
, a
, b
);
1295 tcg_gen_xor_vec(vece
, x
, x
, t
);
1296 tcg_gen_or_vec(vece
, qc
, qc
, x
);
1299 void gen_gvec_uqadd_qc(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1300 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1302 static const TCGOpcode vecop_list
[] = {
1303 INDEX_op_usadd_vec
, INDEX_op_add_vec
, 0
1305 static const GVecGen4 ops
[4] = {
1306 { .fniv
= gen_uqadd_vec
,
1307 .fno
= gen_helper_gvec_uqadd_b
,
1309 .opt_opc
= vecop_list
,
1311 { .fniv
= gen_uqadd_vec
,
1312 .fno
= gen_helper_gvec_uqadd_h
,
1314 .opt_opc
= vecop_list
,
1316 { .fniv
= gen_uqadd_vec
,
1317 .fno
= gen_helper_gvec_uqadd_s
,
1319 .opt_opc
= vecop_list
,
1321 { .fniv
= gen_uqadd_vec
,
1322 .fni8
= gen_uqadd_d
,
1323 .fno
= gen_helper_gvec_uqadd_d
,
1325 .opt_opc
= vecop_list
,
1329 tcg_debug_assert(opr_sz
<= sizeof_field(CPUARMState
, vfp
.qc
));
1330 tcg_gen_gvec_4(rd_ofs
, offsetof(CPUARMState
, vfp
.qc
),
1331 rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &ops
[vece
]);
1334 void gen_sqadd_bhs(TCGv_i64 res
, TCGv_i64 qc
, TCGv_i64 a
, TCGv_i64 b
, MemOp esz
)
1336 int64_t max
= MAKE_64BIT_MASK(0, (8 << esz
) - 1);
1337 int64_t min
= -1ll - max
;
1338 TCGv_i64 tmp
= tcg_temp_new_i64();
1340 tcg_gen_add_i64(tmp
, a
, b
);
1341 tcg_gen_smin_i64(res
, tmp
, tcg_constant_i64(max
));
1342 tcg_gen_smax_i64(res
, res
, tcg_constant_i64(min
));
1343 tcg_gen_xor_i64(tmp
, tmp
, res
);
1344 tcg_gen_or_i64(qc
, qc
, tmp
);
1347 void gen_sqadd_d(TCGv_i64 res
, TCGv_i64 qc
, TCGv_i64 a
, TCGv_i64 b
)
1349 TCGv_i64 t0
= tcg_temp_new_i64();
1350 TCGv_i64 t1
= tcg_temp_new_i64();
1351 TCGv_i64 t2
= tcg_temp_new_i64();
1353 tcg_gen_add_i64(t0
, a
, b
);
1355 /* Compute signed overflow indication into T1 */
1356 tcg_gen_xor_i64(t1
, a
, b
);
1357 tcg_gen_xor_i64(t2
, t0
, a
);
1358 tcg_gen_andc_i64(t1
, t2
, t1
);
1360 /* Compute saturated value into T2 */
1361 tcg_gen_sari_i64(t2
, a
, 63);
1362 tcg_gen_xori_i64(t2
, t2
, INT64_MAX
);
1364 tcg_gen_movcond_i64(TCG_COND_LT
, res
, t1
, tcg_constant_i64(0), t2
, t0
);
1365 tcg_gen_xor_i64(t0
, t0
, res
);
1366 tcg_gen_or_i64(qc
, qc
, t0
);
1369 static void gen_sqadd_vec(unsigned vece
, TCGv_vec t
, TCGv_vec qc
,
1370 TCGv_vec a
, TCGv_vec b
)
1372 TCGv_vec x
= tcg_temp_new_vec_matching(t
);
1373 tcg_gen_add_vec(vece
, x
, a
, b
);
1374 tcg_gen_ssadd_vec(vece
, t
, a
, b
);
1375 tcg_gen_xor_vec(vece
, x
, x
, t
);
1376 tcg_gen_or_vec(vece
, qc
, qc
, x
);
1379 void gen_gvec_sqadd_qc(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1380 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1382 static const TCGOpcode vecop_list
[] = {
1383 INDEX_op_ssadd_vec
, INDEX_op_add_vec
, 0
1385 static const GVecGen4 ops
[4] = {
1386 { .fniv
= gen_sqadd_vec
,
1387 .fno
= gen_helper_gvec_sqadd_b
,
1388 .opt_opc
= vecop_list
,
1391 { .fniv
= gen_sqadd_vec
,
1392 .fno
= gen_helper_gvec_sqadd_h
,
1393 .opt_opc
= vecop_list
,
1396 { .fniv
= gen_sqadd_vec
,
1397 .fno
= gen_helper_gvec_sqadd_s
,
1398 .opt_opc
= vecop_list
,
1401 { .fniv
= gen_sqadd_vec
,
1402 .fni8
= gen_sqadd_d
,
1403 .fno
= gen_helper_gvec_sqadd_d
,
1404 .opt_opc
= vecop_list
,
1409 tcg_debug_assert(opr_sz
<= sizeof_field(CPUARMState
, vfp
.qc
));
1410 tcg_gen_gvec_4(rd_ofs
, offsetof(CPUARMState
, vfp
.qc
),
1411 rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &ops
[vece
]);
1414 void gen_uqsub_bhs(TCGv_i64 res
, TCGv_i64 qc
, TCGv_i64 a
, TCGv_i64 b
, MemOp esz
)
1416 TCGv_i64 tmp
= tcg_temp_new_i64();
1418 tcg_gen_sub_i64(tmp
, a
, b
);
1419 tcg_gen_smax_i64(res
, tmp
, tcg_constant_i64(0));
1420 tcg_gen_xor_i64(tmp
, tmp
, res
);
1421 tcg_gen_or_i64(qc
, qc
, tmp
);
1424 void gen_uqsub_d(TCGv_i64 res
, TCGv_i64 qc
, TCGv_i64 a
, TCGv_i64 b
)
1426 TCGv_i64 t
= tcg_temp_new_i64();
1428 tcg_gen_sub_i64(t
, a
, b
);
1429 tcg_gen_movcond_i64(TCG_COND_LTU
, res
, a
, b
, tcg_constant_i64(0), t
);
1430 tcg_gen_xor_i64(t
, t
, res
);
1431 tcg_gen_or_i64(qc
, qc
, t
);
1434 static void gen_uqsub_vec(unsigned vece
, TCGv_vec t
, TCGv_vec qc
,
1435 TCGv_vec a
, TCGv_vec b
)
1437 TCGv_vec x
= tcg_temp_new_vec_matching(t
);
1438 tcg_gen_sub_vec(vece
, x
, a
, b
);
1439 tcg_gen_ussub_vec(vece
, t
, a
, b
);
1440 tcg_gen_xor_vec(vece
, x
, x
, t
);
1441 tcg_gen_or_vec(vece
, qc
, qc
, x
);
1444 void gen_gvec_uqsub_qc(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1445 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1447 static const TCGOpcode vecop_list
[] = {
1448 INDEX_op_ussub_vec
, INDEX_op_sub_vec
, 0
1450 static const GVecGen4 ops
[4] = {
1451 { .fniv
= gen_uqsub_vec
,
1452 .fno
= gen_helper_gvec_uqsub_b
,
1453 .opt_opc
= vecop_list
,
1456 { .fniv
= gen_uqsub_vec
,
1457 .fno
= gen_helper_gvec_uqsub_h
,
1458 .opt_opc
= vecop_list
,
1461 { .fniv
= gen_uqsub_vec
,
1462 .fno
= gen_helper_gvec_uqsub_s
,
1463 .opt_opc
= vecop_list
,
1466 { .fniv
= gen_uqsub_vec
,
1467 .fni8
= gen_uqsub_d
,
1468 .fno
= gen_helper_gvec_uqsub_d
,
1469 .opt_opc
= vecop_list
,
1474 tcg_debug_assert(opr_sz
<= sizeof_field(CPUARMState
, vfp
.qc
));
1475 tcg_gen_gvec_4(rd_ofs
, offsetof(CPUARMState
, vfp
.qc
),
1476 rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &ops
[vece
]);
1479 void gen_sqsub_bhs(TCGv_i64 res
, TCGv_i64 qc
, TCGv_i64 a
, TCGv_i64 b
, MemOp esz
)
1481 int64_t max
= MAKE_64BIT_MASK(0, (8 << esz
) - 1);
1482 int64_t min
= -1ll - max
;
1483 TCGv_i64 tmp
= tcg_temp_new_i64();
1485 tcg_gen_sub_i64(tmp
, a
, b
);
1486 tcg_gen_smin_i64(res
, tmp
, tcg_constant_i64(max
));
1487 tcg_gen_smax_i64(res
, res
, tcg_constant_i64(min
));
1488 tcg_gen_xor_i64(tmp
, tmp
, res
);
1489 tcg_gen_or_i64(qc
, qc
, tmp
);
1492 void gen_sqsub_d(TCGv_i64 res
, TCGv_i64 qc
, TCGv_i64 a
, TCGv_i64 b
)
1494 TCGv_i64 t0
= tcg_temp_new_i64();
1495 TCGv_i64 t1
= tcg_temp_new_i64();
1496 TCGv_i64 t2
= tcg_temp_new_i64();
1498 tcg_gen_sub_i64(t0
, a
, b
);
1500 /* Compute signed overflow indication into T1 */
1501 tcg_gen_xor_i64(t1
, a
, b
);
1502 tcg_gen_xor_i64(t2
, t0
, a
);
1503 tcg_gen_and_i64(t1
, t1
, t2
);
1505 /* Compute saturated value into T2 */
1506 tcg_gen_sari_i64(t2
, a
, 63);
1507 tcg_gen_xori_i64(t2
, t2
, INT64_MAX
);
1509 tcg_gen_movcond_i64(TCG_COND_LT
, res
, t1
, tcg_constant_i64(0), t2
, t0
);
1510 tcg_gen_xor_i64(t0
, t0
, res
);
1511 tcg_gen_or_i64(qc
, qc
, t0
);
1514 static void gen_sqsub_vec(unsigned vece
, TCGv_vec t
, TCGv_vec qc
,
1515 TCGv_vec a
, TCGv_vec b
)
1517 TCGv_vec x
= tcg_temp_new_vec_matching(t
);
1518 tcg_gen_sub_vec(vece
, x
, a
, b
);
1519 tcg_gen_sssub_vec(vece
, t
, a
, b
);
1520 tcg_gen_xor_vec(vece
, x
, x
, t
);
1521 tcg_gen_or_vec(vece
, qc
, qc
, x
);
1524 void gen_gvec_sqsub_qc(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1525 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1527 static const TCGOpcode vecop_list
[] = {
1528 INDEX_op_sssub_vec
, INDEX_op_sub_vec
, 0
1530 static const GVecGen4 ops
[4] = {
1531 { .fniv
= gen_sqsub_vec
,
1532 .fno
= gen_helper_gvec_sqsub_b
,
1533 .opt_opc
= vecop_list
,
1536 { .fniv
= gen_sqsub_vec
,
1537 .fno
= gen_helper_gvec_sqsub_h
,
1538 .opt_opc
= vecop_list
,
1541 { .fniv
= gen_sqsub_vec
,
1542 .fno
= gen_helper_gvec_sqsub_s
,
1543 .opt_opc
= vecop_list
,
1546 { .fniv
= gen_sqsub_vec
,
1547 .fni8
= gen_sqsub_d
,
1548 .fno
= gen_helper_gvec_sqsub_d
,
1549 .opt_opc
= vecop_list
,
1554 tcg_debug_assert(opr_sz
<= sizeof_field(CPUARMState
, vfp
.qc
));
1555 tcg_gen_gvec_4(rd_ofs
, offsetof(CPUARMState
, vfp
.qc
),
1556 rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &ops
[vece
]);
1559 static void gen_sabd_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
1561 TCGv_i32 t
= tcg_temp_new_i32();
1563 tcg_gen_sub_i32(t
, a
, b
);
1564 tcg_gen_sub_i32(d
, b
, a
);
1565 tcg_gen_movcond_i32(TCG_COND_LT
, d
, a
, b
, d
, t
);
1568 static void gen_sabd_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
1570 TCGv_i64 t
= tcg_temp_new_i64();
1572 tcg_gen_sub_i64(t
, a
, b
);
1573 tcg_gen_sub_i64(d
, b
, a
);
1574 tcg_gen_movcond_i64(TCG_COND_LT
, d
, a
, b
, d
, t
);
1577 static void gen_sabd_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, TCGv_vec b
)
1579 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
1581 tcg_gen_smin_vec(vece
, t
, a
, b
);
1582 tcg_gen_smax_vec(vece
, d
, a
, b
);
1583 tcg_gen_sub_vec(vece
, d
, d
, t
);
1586 void gen_gvec_sabd(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1587 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1589 static const TCGOpcode vecop_list
[] = {
1590 INDEX_op_sub_vec
, INDEX_op_smin_vec
, INDEX_op_smax_vec
, 0
1592 static const GVecGen3 ops
[4] = {
1593 { .fniv
= gen_sabd_vec
,
1594 .fno
= gen_helper_gvec_sabd_b
,
1595 .opt_opc
= vecop_list
,
1597 { .fniv
= gen_sabd_vec
,
1598 .fno
= gen_helper_gvec_sabd_h
,
1599 .opt_opc
= vecop_list
,
1601 { .fni4
= gen_sabd_i32
,
1602 .fniv
= gen_sabd_vec
,
1603 .fno
= gen_helper_gvec_sabd_s
,
1604 .opt_opc
= vecop_list
,
1606 { .fni8
= gen_sabd_i64
,
1607 .fniv
= gen_sabd_vec
,
1608 .fno
= gen_helper_gvec_sabd_d
,
1609 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1610 .opt_opc
= vecop_list
,
1613 tcg_gen_gvec_3(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &ops
[vece
]);
1616 static void gen_uabd_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
1618 TCGv_i32 t
= tcg_temp_new_i32();
1620 tcg_gen_sub_i32(t
, a
, b
);
1621 tcg_gen_sub_i32(d
, b
, a
);
1622 tcg_gen_movcond_i32(TCG_COND_LTU
, d
, a
, b
, d
, t
);
1625 static void gen_uabd_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
1627 TCGv_i64 t
= tcg_temp_new_i64();
1629 tcg_gen_sub_i64(t
, a
, b
);
1630 tcg_gen_sub_i64(d
, b
, a
);
1631 tcg_gen_movcond_i64(TCG_COND_LTU
, d
, a
, b
, d
, t
);
1634 static void gen_uabd_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, TCGv_vec b
)
1636 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
1638 tcg_gen_umin_vec(vece
, t
, a
, b
);
1639 tcg_gen_umax_vec(vece
, d
, a
, b
);
1640 tcg_gen_sub_vec(vece
, d
, d
, t
);
1643 void gen_gvec_uabd(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1644 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1646 static const TCGOpcode vecop_list
[] = {
1647 INDEX_op_sub_vec
, INDEX_op_umin_vec
, INDEX_op_umax_vec
, 0
1649 static const GVecGen3 ops
[4] = {
1650 { .fniv
= gen_uabd_vec
,
1651 .fno
= gen_helper_gvec_uabd_b
,
1652 .opt_opc
= vecop_list
,
1654 { .fniv
= gen_uabd_vec
,
1655 .fno
= gen_helper_gvec_uabd_h
,
1656 .opt_opc
= vecop_list
,
1658 { .fni4
= gen_uabd_i32
,
1659 .fniv
= gen_uabd_vec
,
1660 .fno
= gen_helper_gvec_uabd_s
,
1661 .opt_opc
= vecop_list
,
1663 { .fni8
= gen_uabd_i64
,
1664 .fniv
= gen_uabd_vec
,
1665 .fno
= gen_helper_gvec_uabd_d
,
1666 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1667 .opt_opc
= vecop_list
,
1670 tcg_gen_gvec_3(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &ops
[vece
]);
1673 static void gen_saba_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
1675 TCGv_i32 t
= tcg_temp_new_i32();
1676 gen_sabd_i32(t
, a
, b
);
1677 tcg_gen_add_i32(d
, d
, t
);
1680 static void gen_saba_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
1682 TCGv_i64 t
= tcg_temp_new_i64();
1683 gen_sabd_i64(t
, a
, b
);
1684 tcg_gen_add_i64(d
, d
, t
);
1687 static void gen_saba_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, TCGv_vec b
)
1689 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
1690 gen_sabd_vec(vece
, t
, a
, b
);
1691 tcg_gen_add_vec(vece
, d
, d
, t
);
1694 void gen_gvec_saba(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1695 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1697 static const TCGOpcode vecop_list
[] = {
1698 INDEX_op_sub_vec
, INDEX_op_add_vec
,
1699 INDEX_op_smin_vec
, INDEX_op_smax_vec
, 0
1701 static const GVecGen3 ops
[4] = {
1702 { .fniv
= gen_saba_vec
,
1703 .fno
= gen_helper_gvec_saba_b
,
1704 .opt_opc
= vecop_list
,
1707 { .fniv
= gen_saba_vec
,
1708 .fno
= gen_helper_gvec_saba_h
,
1709 .opt_opc
= vecop_list
,
1712 { .fni4
= gen_saba_i32
,
1713 .fniv
= gen_saba_vec
,
1714 .fno
= gen_helper_gvec_saba_s
,
1715 .opt_opc
= vecop_list
,
1718 { .fni8
= gen_saba_i64
,
1719 .fniv
= gen_saba_vec
,
1720 .fno
= gen_helper_gvec_saba_d
,
1721 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1722 .opt_opc
= vecop_list
,
1726 tcg_gen_gvec_3(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &ops
[vece
]);
1729 static void gen_uaba_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
1731 TCGv_i32 t
= tcg_temp_new_i32();
1732 gen_uabd_i32(t
, a
, b
);
1733 tcg_gen_add_i32(d
, d
, t
);
1736 static void gen_uaba_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
1738 TCGv_i64 t
= tcg_temp_new_i64();
1739 gen_uabd_i64(t
, a
, b
);
1740 tcg_gen_add_i64(d
, d
, t
);
1743 static void gen_uaba_vec(unsigned vece
, TCGv_vec d
, TCGv_vec a
, TCGv_vec b
)
1745 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
1746 gen_uabd_vec(vece
, t
, a
, b
);
1747 tcg_gen_add_vec(vece
, d
, d
, t
);
1750 void gen_gvec_uaba(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1751 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1753 static const TCGOpcode vecop_list
[] = {
1754 INDEX_op_sub_vec
, INDEX_op_add_vec
,
1755 INDEX_op_umin_vec
, INDEX_op_umax_vec
, 0
1757 static const GVecGen3 ops
[4] = {
1758 { .fniv
= gen_uaba_vec
,
1759 .fno
= gen_helper_gvec_uaba_b
,
1760 .opt_opc
= vecop_list
,
1763 { .fniv
= gen_uaba_vec
,
1764 .fno
= gen_helper_gvec_uaba_h
,
1765 .opt_opc
= vecop_list
,
1768 { .fni4
= gen_uaba_i32
,
1769 .fniv
= gen_uaba_vec
,
1770 .fno
= gen_helper_gvec_uaba_s
,
1771 .opt_opc
= vecop_list
,
1774 { .fni8
= gen_uaba_i64
,
1775 .fniv
= gen_uaba_vec
,
1776 .fno
= gen_helper_gvec_uaba_d
,
1777 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1778 .opt_opc
= vecop_list
,
1782 tcg_gen_gvec_3(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, &ops
[vece
]);
1785 void gen_gvec_addp(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1786 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1788 static gen_helper_gvec_3
* const fns
[4] = {
1789 gen_helper_gvec_addp_b
,
1790 gen_helper_gvec_addp_h
,
1791 gen_helper_gvec_addp_s
,
1792 gen_helper_gvec_addp_d
,
1794 tcg_gen_gvec_3_ool(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, 0, fns
[vece
]);
1797 void gen_gvec_smaxp(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1798 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1800 static gen_helper_gvec_3
* const fns
[4] = {
1801 gen_helper_gvec_smaxp_b
,
1802 gen_helper_gvec_smaxp_h
,
1803 gen_helper_gvec_smaxp_s
,
1805 tcg_debug_assert(vece
<= MO_32
);
1806 tcg_gen_gvec_3_ool(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, 0, fns
[vece
]);
1809 void gen_gvec_sminp(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1810 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1812 static gen_helper_gvec_3
* const fns
[4] = {
1813 gen_helper_gvec_sminp_b
,
1814 gen_helper_gvec_sminp_h
,
1815 gen_helper_gvec_sminp_s
,
1817 tcg_debug_assert(vece
<= MO_32
);
1818 tcg_gen_gvec_3_ool(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, 0, fns
[vece
]);
1821 void gen_gvec_umaxp(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1822 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1824 static gen_helper_gvec_3
* const fns
[4] = {
1825 gen_helper_gvec_umaxp_b
,
1826 gen_helper_gvec_umaxp_h
,
1827 gen_helper_gvec_umaxp_s
,
1829 tcg_debug_assert(vece
<= MO_32
);
1830 tcg_gen_gvec_3_ool(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, 0, fns
[vece
]);
1833 void gen_gvec_uminp(unsigned vece
, uint32_t rd_ofs
, uint32_t rn_ofs
,
1834 uint32_t rm_ofs
, uint32_t opr_sz
, uint32_t max_sz
)
1836 static gen_helper_gvec_3
* const fns
[4] = {
1837 gen_helper_gvec_uminp_b
,
1838 gen_helper_gvec_uminp_h
,
1839 gen_helper_gvec_uminp_s
,
1841 tcg_debug_assert(vece
<= MO_32
);
1842 tcg_gen_gvec_3_ool(rd_ofs
, rn_ofs
, rm_ofs
, opr_sz
, max_sz
, 0, fns
[vece
]);