2 * Generic vector operation expansion
4 * Copyright (c) 2018 Linaro
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
22 #include "tcg/tcg-temp-internal.h"
23 #include "tcg/tcg-op-common.h"
24 #include "tcg/tcg-op-gvec-common.h"
25 #include "tcg/tcg-gvec-desc.h"
29 #ifdef CONFIG_DEBUG_TCG
30 static const TCGOpcode vecop_list_empty
[1] = { 0 };
32 #define vecop_list_empty NULL
36 /* Verify vector size and alignment rules. OFS should be the OR of all
37 of the operand offsets so that we can check them all at once. */
38 static void check_size_align(uint32_t oprsz
, uint32_t maxsz
, uint32_t ofs
)
46 tcg_debug_assert(oprsz
<= maxsz
);
49 tcg_debug_assert(oprsz
== maxsz
);
52 tcg_debug_assert(maxsz
<= (8 << SIMD_MAXSZ_BITS
));
54 max_align
= maxsz
>= 16 ? 15 : 7;
55 tcg_debug_assert((maxsz
& max_align
) == 0);
56 tcg_debug_assert((ofs
& max_align
) == 0);
59 /* Verify vector overlap rules for two operands. */
60 static void check_overlap_2(uint32_t d
, uint32_t a
, uint32_t s
)
62 tcg_debug_assert(d
== a
|| d
+ s
<= a
|| a
+ s
<= d
);
65 /* Verify vector overlap rules for three operands. */
66 static void check_overlap_3(uint32_t d
, uint32_t a
, uint32_t b
, uint32_t s
)
68 check_overlap_2(d
, a
, s
);
69 check_overlap_2(d
, b
, s
);
70 check_overlap_2(a
, b
, s
);
73 /* Verify vector overlap rules for four operands. */
74 static void check_overlap_4(uint32_t d
, uint32_t a
, uint32_t b
,
75 uint32_t c
, uint32_t s
)
77 check_overlap_2(d
, a
, s
);
78 check_overlap_2(d
, b
, s
);
79 check_overlap_2(d
, c
, s
);
80 check_overlap_2(a
, b
, s
);
81 check_overlap_2(a
, c
, s
);
82 check_overlap_2(b
, c
, s
);
85 /* Create a descriptor from components. */
86 uint32_t simd_desc(uint32_t oprsz
, uint32_t maxsz
, int32_t data
)
90 check_size_align(oprsz
, maxsz
, 0);
91 tcg_debug_assert(data
== sextract32(data
, 0, SIMD_DATA_BITS
));
93 oprsz
= (oprsz
/ 8) - 1;
94 maxsz
= (maxsz
/ 8) - 1;
97 * We have just asserted in check_size_align that either
98 * oprsz is {8,16,32} or matches maxsz. Encode the final
99 * case with '2', as that would otherwise map to 24.
101 if (oprsz
== maxsz
) {
105 desc
= deposit32(desc
, SIMD_OPRSZ_SHIFT
, SIMD_OPRSZ_BITS
, oprsz
);
106 desc
= deposit32(desc
, SIMD_MAXSZ_SHIFT
, SIMD_MAXSZ_BITS
, maxsz
);
107 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, SIMD_DATA_BITS
, data
);
112 /* Generate a call to a gvec-style helper with two vector operands. */
113 void tcg_gen_gvec_2_ool(uint32_t dofs
, uint32_t aofs
,
114 uint32_t oprsz
, uint32_t maxsz
, int32_t data
,
115 gen_helper_gvec_2
*fn
)
118 TCGv_i32 desc
= tcg_constant_i32(simd_desc(oprsz
, maxsz
, data
));
120 a0
= tcg_temp_ebb_new_ptr();
121 a1
= tcg_temp_ebb_new_ptr();
123 tcg_gen_addi_ptr(a0
, tcg_env
, dofs
);
124 tcg_gen_addi_ptr(a1
, tcg_env
, aofs
);
128 tcg_temp_free_ptr(a0
);
129 tcg_temp_free_ptr(a1
);
132 /* Generate a call to a gvec-style helper with two vector operands
133 and one scalar operand. */
134 void tcg_gen_gvec_2i_ool(uint32_t dofs
, uint32_t aofs
, TCGv_i64 c
,
135 uint32_t oprsz
, uint32_t maxsz
, int32_t data
,
136 gen_helper_gvec_2i
*fn
)
139 TCGv_i32 desc
= tcg_constant_i32(simd_desc(oprsz
, maxsz
, data
));
141 a0
= tcg_temp_ebb_new_ptr();
142 a1
= tcg_temp_ebb_new_ptr();
144 tcg_gen_addi_ptr(a0
, tcg_env
, dofs
);
145 tcg_gen_addi_ptr(a1
, tcg_env
, aofs
);
149 tcg_temp_free_ptr(a0
);
150 tcg_temp_free_ptr(a1
);
153 /* Generate a call to a gvec-style helper with three vector operands. */
154 void tcg_gen_gvec_3_ool(uint32_t dofs
, uint32_t aofs
, uint32_t bofs
,
155 uint32_t oprsz
, uint32_t maxsz
, int32_t data
,
156 gen_helper_gvec_3
*fn
)
159 TCGv_i32 desc
= tcg_constant_i32(simd_desc(oprsz
, maxsz
, data
));
161 a0
= tcg_temp_ebb_new_ptr();
162 a1
= tcg_temp_ebb_new_ptr();
163 a2
= tcg_temp_ebb_new_ptr();
165 tcg_gen_addi_ptr(a0
, tcg_env
, dofs
);
166 tcg_gen_addi_ptr(a1
, tcg_env
, aofs
);
167 tcg_gen_addi_ptr(a2
, tcg_env
, bofs
);
169 fn(a0
, a1
, a2
, desc
);
171 tcg_temp_free_ptr(a0
);
172 tcg_temp_free_ptr(a1
);
173 tcg_temp_free_ptr(a2
);
176 /* Generate a call to a gvec-style helper with four vector operands. */
177 void tcg_gen_gvec_4_ool(uint32_t dofs
, uint32_t aofs
, uint32_t bofs
,
178 uint32_t cofs
, uint32_t oprsz
, uint32_t maxsz
,
179 int32_t data
, gen_helper_gvec_4
*fn
)
181 TCGv_ptr a0
, a1
, a2
, a3
;
182 TCGv_i32 desc
= tcg_constant_i32(simd_desc(oprsz
, maxsz
, data
));
184 a0
= tcg_temp_ebb_new_ptr();
185 a1
= tcg_temp_ebb_new_ptr();
186 a2
= tcg_temp_ebb_new_ptr();
187 a3
= tcg_temp_ebb_new_ptr();
189 tcg_gen_addi_ptr(a0
, tcg_env
, dofs
);
190 tcg_gen_addi_ptr(a1
, tcg_env
, aofs
);
191 tcg_gen_addi_ptr(a2
, tcg_env
, bofs
);
192 tcg_gen_addi_ptr(a3
, tcg_env
, cofs
);
194 fn(a0
, a1
, a2
, a3
, desc
);
196 tcg_temp_free_ptr(a0
);
197 tcg_temp_free_ptr(a1
);
198 tcg_temp_free_ptr(a2
);
199 tcg_temp_free_ptr(a3
);
202 /* Generate a call to a gvec-style helper with five vector operands. */
203 void tcg_gen_gvec_5_ool(uint32_t dofs
, uint32_t aofs
, uint32_t bofs
,
204 uint32_t cofs
, uint32_t xofs
, uint32_t oprsz
,
205 uint32_t maxsz
, int32_t data
, gen_helper_gvec_5
*fn
)
207 TCGv_ptr a0
, a1
, a2
, a3
, a4
;
208 TCGv_i32 desc
= tcg_constant_i32(simd_desc(oprsz
, maxsz
, data
));
210 a0
= tcg_temp_ebb_new_ptr();
211 a1
= tcg_temp_ebb_new_ptr();
212 a2
= tcg_temp_ebb_new_ptr();
213 a3
= tcg_temp_ebb_new_ptr();
214 a4
= tcg_temp_ebb_new_ptr();
216 tcg_gen_addi_ptr(a0
, tcg_env
, dofs
);
217 tcg_gen_addi_ptr(a1
, tcg_env
, aofs
);
218 tcg_gen_addi_ptr(a2
, tcg_env
, bofs
);
219 tcg_gen_addi_ptr(a3
, tcg_env
, cofs
);
220 tcg_gen_addi_ptr(a4
, tcg_env
, xofs
);
222 fn(a0
, a1
, a2
, a3
, a4
, desc
);
224 tcg_temp_free_ptr(a0
);
225 tcg_temp_free_ptr(a1
);
226 tcg_temp_free_ptr(a2
);
227 tcg_temp_free_ptr(a3
);
228 tcg_temp_free_ptr(a4
);
231 /* Generate a call to a gvec-style helper with three vector operands
232 and an extra pointer operand. */
233 void tcg_gen_gvec_2_ptr(uint32_t dofs
, uint32_t aofs
,
234 TCGv_ptr ptr
, uint32_t oprsz
, uint32_t maxsz
,
235 int32_t data
, gen_helper_gvec_2_ptr
*fn
)
238 TCGv_i32 desc
= tcg_constant_i32(simd_desc(oprsz
, maxsz
, data
));
240 a0
= tcg_temp_ebb_new_ptr();
241 a1
= tcg_temp_ebb_new_ptr();
243 tcg_gen_addi_ptr(a0
, tcg_env
, dofs
);
244 tcg_gen_addi_ptr(a1
, tcg_env
, aofs
);
246 fn(a0
, a1
, ptr
, desc
);
248 tcg_temp_free_ptr(a0
);
249 tcg_temp_free_ptr(a1
);
252 /* Generate a call to a gvec-style helper with three vector operands
253 and an extra pointer operand. */
254 void tcg_gen_gvec_3_ptr(uint32_t dofs
, uint32_t aofs
, uint32_t bofs
,
255 TCGv_ptr ptr
, uint32_t oprsz
, uint32_t maxsz
,
256 int32_t data
, gen_helper_gvec_3_ptr
*fn
)
259 TCGv_i32 desc
= tcg_constant_i32(simd_desc(oprsz
, maxsz
, data
));
261 a0
= tcg_temp_ebb_new_ptr();
262 a1
= tcg_temp_ebb_new_ptr();
263 a2
= tcg_temp_ebb_new_ptr();
265 tcg_gen_addi_ptr(a0
, tcg_env
, dofs
);
266 tcg_gen_addi_ptr(a1
, tcg_env
, aofs
);
267 tcg_gen_addi_ptr(a2
, tcg_env
, bofs
);
269 fn(a0
, a1
, a2
, ptr
, desc
);
271 tcg_temp_free_ptr(a0
);
272 tcg_temp_free_ptr(a1
);
273 tcg_temp_free_ptr(a2
);
276 /* Generate a call to a gvec-style helper with four vector operands
277 and an extra pointer operand. */
278 void tcg_gen_gvec_4_ptr(uint32_t dofs
, uint32_t aofs
, uint32_t bofs
,
279 uint32_t cofs
, TCGv_ptr ptr
, uint32_t oprsz
,
280 uint32_t maxsz
, int32_t data
,
281 gen_helper_gvec_4_ptr
*fn
)
283 TCGv_ptr a0
, a1
, a2
, a3
;
284 TCGv_i32 desc
= tcg_constant_i32(simd_desc(oprsz
, maxsz
, data
));
286 a0
= tcg_temp_ebb_new_ptr();
287 a1
= tcg_temp_ebb_new_ptr();
288 a2
= tcg_temp_ebb_new_ptr();
289 a3
= tcg_temp_ebb_new_ptr();
291 tcg_gen_addi_ptr(a0
, tcg_env
, dofs
);
292 tcg_gen_addi_ptr(a1
, tcg_env
, aofs
);
293 tcg_gen_addi_ptr(a2
, tcg_env
, bofs
);
294 tcg_gen_addi_ptr(a3
, tcg_env
, cofs
);
296 fn(a0
, a1
, a2
, a3
, ptr
, desc
);
298 tcg_temp_free_ptr(a0
);
299 tcg_temp_free_ptr(a1
);
300 tcg_temp_free_ptr(a2
);
301 tcg_temp_free_ptr(a3
);
304 /* Generate a call to a gvec-style helper with five vector operands
305 and an extra pointer operand. */
306 void tcg_gen_gvec_5_ptr(uint32_t dofs
, uint32_t aofs
, uint32_t bofs
,
307 uint32_t cofs
, uint32_t eofs
, TCGv_ptr ptr
,
308 uint32_t oprsz
, uint32_t maxsz
, int32_t data
,
309 gen_helper_gvec_5_ptr
*fn
)
311 TCGv_ptr a0
, a1
, a2
, a3
, a4
;
312 TCGv_i32 desc
= tcg_constant_i32(simd_desc(oprsz
, maxsz
, data
));
314 a0
= tcg_temp_ebb_new_ptr();
315 a1
= tcg_temp_ebb_new_ptr();
316 a2
= tcg_temp_ebb_new_ptr();
317 a3
= tcg_temp_ebb_new_ptr();
318 a4
= tcg_temp_ebb_new_ptr();
320 tcg_gen_addi_ptr(a0
, tcg_env
, dofs
);
321 tcg_gen_addi_ptr(a1
, tcg_env
, aofs
);
322 tcg_gen_addi_ptr(a2
, tcg_env
, bofs
);
323 tcg_gen_addi_ptr(a3
, tcg_env
, cofs
);
324 tcg_gen_addi_ptr(a4
, tcg_env
, eofs
);
326 fn(a0
, a1
, a2
, a3
, a4
, ptr
, desc
);
328 tcg_temp_free_ptr(a0
);
329 tcg_temp_free_ptr(a1
);
330 tcg_temp_free_ptr(a2
);
331 tcg_temp_free_ptr(a3
);
332 tcg_temp_free_ptr(a4
);
335 /* Return true if we want to implement something of OPRSZ bytes
336 in units of LNSZ. This limits the expansion of inline code. */
337 static inline bool check_size_impl(uint32_t oprsz
, uint32_t lnsz
)
347 tcg_debug_assert((r
& 7) == 0);
350 /* For sizes below 16, accept no remainder. */
356 * Recall that ARM SVE allows vector sizes that are not a
357 * power of 2, but always a multiple of 16. The intent is
358 * that e.g. size == 80 would be expanded with 2x32 + 1x16.
359 * In addition, expand_clr needs to handle a multiple of 8.
360 * Thus we can handle the tail with one more operation per
361 * diminishing power of 2.
366 return q
<= MAX_UNROLL
;
369 static void expand_clr(uint32_t dofs
, uint32_t maxsz
);
371 /* Duplicate C as per VECE. */
372 uint64_t (dup_const
)(unsigned vece
, uint64_t c
)
376 return 0x0101010101010101ull
* (uint8_t)c
;
378 return 0x0001000100010001ull
* (uint16_t)c
;
380 return 0x0000000100000001ull
* (uint32_t)c
;
384 g_assert_not_reached();
388 /* Duplicate IN into OUT as per VECE. */
389 void tcg_gen_dup_i32(unsigned vece
, TCGv_i32 out
, TCGv_i32 in
)
393 tcg_gen_ext8u_i32(out
, in
);
394 tcg_gen_muli_i32(out
, out
, 0x01010101);
397 tcg_gen_deposit_i32(out
, in
, in
, 16, 16);
400 tcg_gen_mov_i32(out
, in
);
403 g_assert_not_reached();
407 void tcg_gen_dup_i64(unsigned vece
, TCGv_i64 out
, TCGv_i64 in
)
411 tcg_gen_ext8u_i64(out
, in
);
412 tcg_gen_muli_i64(out
, out
, 0x0101010101010101ull
);
415 tcg_gen_ext16u_i64(out
, in
);
416 tcg_gen_muli_i64(out
, out
, 0x0001000100010001ull
);
419 tcg_gen_deposit_i64(out
, in
, in
, 32, 32);
422 tcg_gen_mov_i64(out
, in
);
425 g_assert_not_reached();
429 /* Select a supported vector type for implementing an operation on SIZE
430 * bytes. If OP is 0, assume that the real operation to be performed is
431 * required by all backends. Otherwise, make sure than OP can be performed
432 * on elements of size VECE in the selected type. Do not select V64 if
433 * PREFER_I64 is true. Return 0 if no vector type is selected.
435 static TCGType
choose_vector_type(const TCGOpcode
*list
, unsigned vece
,
436 uint32_t size
, bool prefer_i64
)
439 * Recall that ARM SVE allows vector sizes that are not a
440 * power of 2, but always a multiple of 16. The intent is
441 * that e.g. size == 80 would be expanded with 2x32 + 1x16.
442 * It is hard to imagine a case in which v256 is supported
443 * but v128 is not, but check anyway.
444 * In addition, expand_clr needs to handle a multiple of 8.
446 if (TCG_TARGET_HAS_v256
&&
447 check_size_impl(size
, 32) &&
448 tcg_can_emit_vecop_list(list
, TCG_TYPE_V256
, vece
) &&
450 (TCG_TARGET_HAS_v128
&&
451 tcg_can_emit_vecop_list(list
, TCG_TYPE_V128
, vece
))) &&
453 (TCG_TARGET_HAS_v64
&&
454 tcg_can_emit_vecop_list(list
, TCG_TYPE_V64
, vece
)))) {
455 return TCG_TYPE_V256
;
457 if (TCG_TARGET_HAS_v128
&&
458 check_size_impl(size
, 16) &&
459 tcg_can_emit_vecop_list(list
, TCG_TYPE_V128
, vece
) &&
461 (TCG_TARGET_HAS_v64
&&
462 tcg_can_emit_vecop_list(list
, TCG_TYPE_V64
, vece
)))) {
463 return TCG_TYPE_V128
;
465 if (TCG_TARGET_HAS_v64
&& !prefer_i64
&& check_size_impl(size
, 8)
466 && tcg_can_emit_vecop_list(list
, TCG_TYPE_V64
, vece
)) {
472 static void do_dup_store(TCGType type
, uint32_t dofs
, uint32_t oprsz
,
473 uint32_t maxsz
, TCGv_vec t_vec
)
477 tcg_debug_assert(oprsz
>= 8);
480 * This may be expand_clr for the tail of an operation, e.g.
481 * oprsz == 8 && maxsz == 64. The first 8 bytes of this store
482 * are misaligned wrt the maximum vector size, so do that first.
485 tcg_gen_stl_vec(t_vec
, tcg_env
, dofs
+ i
, TCG_TYPE_V64
);
492 * Recall that ARM SVE allows vector sizes that are not a
493 * power of 2, but always a multiple of 16. The intent is
494 * that e.g. size == 80 would be expanded with 2x32 + 1x16.
496 for (; i
+ 32 <= oprsz
; i
+= 32) {
497 tcg_gen_stl_vec(t_vec
, tcg_env
, dofs
+ i
, TCG_TYPE_V256
);
501 for (; i
+ 16 <= oprsz
; i
+= 16) {
502 tcg_gen_stl_vec(t_vec
, tcg_env
, dofs
+ i
, TCG_TYPE_V128
);
506 for (; i
< oprsz
; i
+= 8) {
507 tcg_gen_stl_vec(t_vec
, tcg_env
, dofs
+ i
, TCG_TYPE_V64
);
511 g_assert_not_reached();
515 expand_clr(dofs
+ oprsz
, maxsz
- oprsz
);
519 /* Set OPRSZ bytes at DOFS to replications of IN_32, IN_64 or IN_C.
520 * Only one of IN_32 or IN_64 may be set;
521 * IN_C is used if IN_32 and IN_64 are unset.
523 static void do_dup(unsigned vece
, uint32_t dofs
, uint32_t oprsz
,
524 uint32_t maxsz
, TCGv_i32 in_32
, TCGv_i64 in_64
,
529 TCGv_i32 t_32
, t_desc
;
533 assert(vece
<= (in_32
? MO_32
: MO_64
));
534 assert(in_32
== NULL
|| in_64
== NULL
);
536 /* If we're storing 0, expand oprsz to maxsz. */
537 if (in_32
== NULL
&& in_64
== NULL
) {
538 in_c
= dup_const(vece
, in_c
);
542 } else if (in_c
== dup_const(MO_8
, in_c
)) {
547 /* Implement inline with a vector type, if possible.
548 * Prefer integer when 64-bit host and no variable dup.
550 type
= choose_vector_type(NULL
, vece
, oprsz
,
551 (TCG_TARGET_REG_BITS
== 64 && in_32
== NULL
552 && (in_64
== NULL
|| vece
== MO_64
)));
554 TCGv_vec t_vec
= tcg_temp_new_vec(type
);
557 tcg_gen_dup_i32_vec(vece
, t_vec
, in_32
);
559 tcg_gen_dup_i64_vec(vece
, t_vec
, in_64
);
561 tcg_gen_dupi_vec(vece
, t_vec
, in_c
);
563 do_dup_store(type
, dofs
, oprsz
, maxsz
, t_vec
);
567 /* Otherwise, inline with an integer type, unless "large". */
568 if (check_size_impl(oprsz
, TCG_TARGET_REG_BITS
/ 8)) {
573 /* We are given a 32-bit variable input. For a 64-bit host,
574 use a 64-bit operation unless the 32-bit operation would
576 if (TCG_TARGET_REG_BITS
== 64
577 && (vece
!= MO_32
|| !check_size_impl(oprsz
, 4))) {
578 t_64
= tcg_temp_ebb_new_i64();
579 tcg_gen_extu_i32_i64(t_64
, in_32
);
580 tcg_gen_dup_i64(vece
, t_64
, t_64
);
582 t_32
= tcg_temp_ebb_new_i32();
583 tcg_gen_dup_i32(vece
, t_32
, in_32
);
586 /* We are given a 64-bit variable input. */
587 t_64
= tcg_temp_ebb_new_i64();
588 tcg_gen_dup_i64(vece
, t_64
, in_64
);
590 /* We are given a constant input. */
591 /* For 64-bit hosts, use 64-bit constants for "simple" constants
592 or when we'd need too many 32-bit stores, or when a 64-bit
593 constant is really required. */
595 || (TCG_TARGET_REG_BITS
== 64
596 && (in_c
== 0 || in_c
== -1
597 || !check_size_impl(oprsz
, 4)))) {
598 t_64
= tcg_constant_i64(in_c
);
600 t_32
= tcg_constant_i32(in_c
);
604 /* Implement inline if we picked an implementation size above. */
606 for (i
= 0; i
< oprsz
; i
+= 4) {
607 tcg_gen_st_i32(t_32
, tcg_env
, dofs
+ i
);
609 tcg_temp_free_i32(t_32
);
613 for (i
= 0; i
< oprsz
; i
+= 8) {
614 tcg_gen_st_i64(t_64
, tcg_env
, dofs
+ i
);
616 tcg_temp_free_i64(t_64
);
621 /* Otherwise implement out of line. */
622 t_ptr
= tcg_temp_ebb_new_ptr();
623 tcg_gen_addi_ptr(t_ptr
, tcg_env
, dofs
);
626 * This may be expand_clr for the tail of an operation, e.g.
627 * oprsz == 8 && maxsz == 64. The size of the clear is misaligned
628 * wrt simd_desc and will assert. Simply pass all replicated byte
629 * stores through to memset.
631 if (oprsz
== maxsz
&& vece
== MO_8
) {
632 TCGv_ptr t_size
= tcg_constant_ptr(oprsz
);
638 t_val
= tcg_temp_ebb_new_i32();
639 tcg_gen_extrl_i64_i32(t_val
, in_64
);
641 t_val
= tcg_constant_i32(in_c
);
643 gen_helper_memset(t_ptr
, t_ptr
, t_val
, t_size
);
646 tcg_temp_free_i32(t_val
);
648 tcg_temp_free_ptr(t_ptr
);
652 t_desc
= tcg_constant_i32(simd_desc(oprsz
, maxsz
, 0));
656 gen_helper_gvec_dup64(t_ptr
, t_desc
, in_64
);
658 t_64
= tcg_constant_i64(in_c
);
659 gen_helper_gvec_dup64(t_ptr
, t_desc
, t_64
);
662 typedef void dup_fn(TCGv_ptr
, TCGv_i32
, TCGv_i32
);
663 static dup_fn
* const fns
[3] = {
664 gen_helper_gvec_dup8
,
665 gen_helper_gvec_dup16
,
666 gen_helper_gvec_dup32
670 fns
[vece
](t_ptr
, t_desc
, in_32
);
672 t_32
= tcg_temp_ebb_new_i32();
673 tcg_gen_extrl_i64_i32(t_32
, in_64
);
674 fns
[vece
](t_ptr
, t_desc
, t_32
);
675 tcg_temp_free_i32(t_32
);
679 } else if (vece
== MO_16
) {
682 t_32
= tcg_constant_i32(in_c
);
683 fns
[vece
](t_ptr
, t_desc
, t_32
);
687 tcg_temp_free_ptr(t_ptr
);
692 expand_clr(dofs
+ oprsz
, maxsz
- oprsz
);
696 /* Likewise, but with zero. */
697 static void expand_clr(uint32_t dofs
, uint32_t maxsz
)
699 do_dup(MO_8
, dofs
, maxsz
, maxsz
, NULL
, NULL
, 0);
702 /* Expand OPSZ bytes worth of two-operand operations using i32 elements. */
703 static void expand_2_i32(uint32_t dofs
, uint32_t aofs
, uint32_t oprsz
,
704 bool load_dest
, void (*fni
)(TCGv_i32
, TCGv_i32
))
706 TCGv_i32 t0
= tcg_temp_new_i32();
707 TCGv_i32 t1
= tcg_temp_new_i32();
710 for (i
= 0; i
< oprsz
; i
+= 4) {
711 tcg_gen_ld_i32(t0
, tcg_env
, aofs
+ i
);
713 tcg_gen_ld_i32(t1
, tcg_env
, dofs
+ i
);
716 tcg_gen_st_i32(t1
, tcg_env
, dofs
+ i
);
718 tcg_temp_free_i32(t0
);
719 tcg_temp_free_i32(t1
);
722 static void expand_2i_i32(uint32_t dofs
, uint32_t aofs
, uint32_t oprsz
,
723 int32_t c
, bool load_dest
,
724 void (*fni
)(TCGv_i32
, TCGv_i32
, int32_t))
726 TCGv_i32 t0
= tcg_temp_new_i32();
727 TCGv_i32 t1
= tcg_temp_new_i32();
730 for (i
= 0; i
< oprsz
; i
+= 4) {
731 tcg_gen_ld_i32(t0
, tcg_env
, aofs
+ i
);
733 tcg_gen_ld_i32(t1
, tcg_env
, dofs
+ i
);
736 tcg_gen_st_i32(t1
, tcg_env
, dofs
+ i
);
738 tcg_temp_free_i32(t0
);
739 tcg_temp_free_i32(t1
);
742 static void expand_2s_i32(uint32_t dofs
, uint32_t aofs
, uint32_t oprsz
,
743 TCGv_i32 c
, bool scalar_first
,
744 void (*fni
)(TCGv_i32
, TCGv_i32
, TCGv_i32
))
746 TCGv_i32 t0
= tcg_temp_new_i32();
747 TCGv_i32 t1
= tcg_temp_new_i32();
750 for (i
= 0; i
< oprsz
; i
+= 4) {
751 tcg_gen_ld_i32(t0
, tcg_env
, aofs
+ i
);
757 tcg_gen_st_i32(t1
, tcg_env
, dofs
+ i
);
759 tcg_temp_free_i32(t0
);
760 tcg_temp_free_i32(t1
);
763 /* Expand OPSZ bytes worth of three-operand operations using i32 elements. */
764 static void expand_3_i32(uint32_t dofs
, uint32_t aofs
,
765 uint32_t bofs
, uint32_t oprsz
, bool load_dest
,
766 void (*fni
)(TCGv_i32
, TCGv_i32
, TCGv_i32
))
768 TCGv_i32 t0
= tcg_temp_new_i32();
769 TCGv_i32 t1
= tcg_temp_new_i32();
770 TCGv_i32 t2
= tcg_temp_new_i32();
773 for (i
= 0; i
< oprsz
; i
+= 4) {
774 tcg_gen_ld_i32(t0
, tcg_env
, aofs
+ i
);
775 tcg_gen_ld_i32(t1
, tcg_env
, bofs
+ i
);
777 tcg_gen_ld_i32(t2
, tcg_env
, dofs
+ i
);
780 tcg_gen_st_i32(t2
, tcg_env
, dofs
+ i
);
782 tcg_temp_free_i32(t2
);
783 tcg_temp_free_i32(t1
);
784 tcg_temp_free_i32(t0
);
787 static void expand_3i_i32(uint32_t dofs
, uint32_t aofs
, uint32_t bofs
,
788 uint32_t oprsz
, int32_t c
, bool load_dest
,
789 void (*fni
)(TCGv_i32
, TCGv_i32
, TCGv_i32
, int32_t))
791 TCGv_i32 t0
= tcg_temp_new_i32();
792 TCGv_i32 t1
= tcg_temp_new_i32();
793 TCGv_i32 t2
= tcg_temp_new_i32();
796 for (i
= 0; i
< oprsz
; i
+= 4) {
797 tcg_gen_ld_i32(t0
, tcg_env
, aofs
+ i
);
798 tcg_gen_ld_i32(t1
, tcg_env
, bofs
+ i
);
800 tcg_gen_ld_i32(t2
, tcg_env
, dofs
+ i
);
803 tcg_gen_st_i32(t2
, tcg_env
, dofs
+ i
);
805 tcg_temp_free_i32(t0
);
806 tcg_temp_free_i32(t1
);
807 tcg_temp_free_i32(t2
);
810 /* Expand OPSZ bytes worth of three-operand operations using i32 elements. */
811 static void expand_4_i32(uint32_t dofs
, uint32_t aofs
, uint32_t bofs
,
812 uint32_t cofs
, uint32_t oprsz
, bool write_aofs
,
813 void (*fni
)(TCGv_i32
, TCGv_i32
, TCGv_i32
, TCGv_i32
))
815 TCGv_i32 t0
= tcg_temp_new_i32();
816 TCGv_i32 t1
= tcg_temp_new_i32();
817 TCGv_i32 t2
= tcg_temp_new_i32();
818 TCGv_i32 t3
= tcg_temp_new_i32();
821 for (i
= 0; i
< oprsz
; i
+= 4) {
822 tcg_gen_ld_i32(t1
, tcg_env
, aofs
+ i
);
823 tcg_gen_ld_i32(t2
, tcg_env
, bofs
+ i
);
824 tcg_gen_ld_i32(t3
, tcg_env
, cofs
+ i
);
826 tcg_gen_st_i32(t0
, tcg_env
, dofs
+ i
);
828 tcg_gen_st_i32(t1
, tcg_env
, aofs
+ i
);
831 tcg_temp_free_i32(t3
);
832 tcg_temp_free_i32(t2
);
833 tcg_temp_free_i32(t1
);
834 tcg_temp_free_i32(t0
);
837 static void expand_4i_i32(uint32_t dofs
, uint32_t aofs
, uint32_t bofs
,
838 uint32_t cofs
, uint32_t oprsz
, int32_t c
,
839 void (*fni
)(TCGv_i32
, TCGv_i32
, TCGv_i32
, TCGv_i32
,
842 TCGv_i32 t0
= tcg_temp_new_i32();
843 TCGv_i32 t1
= tcg_temp_new_i32();
844 TCGv_i32 t2
= tcg_temp_new_i32();
845 TCGv_i32 t3
= tcg_temp_new_i32();
848 for (i
= 0; i
< oprsz
; i
+= 4) {
849 tcg_gen_ld_i32(t1
, tcg_env
, aofs
+ i
);
850 tcg_gen_ld_i32(t2
, tcg_env
, bofs
+ i
);
851 tcg_gen_ld_i32(t3
, tcg_env
, cofs
+ i
);
852 fni(t0
, t1
, t2
, t3
, c
);
853 tcg_gen_st_i32(t0
, tcg_env
, dofs
+ i
);
855 tcg_temp_free_i32(t3
);
856 tcg_temp_free_i32(t2
);
857 tcg_temp_free_i32(t1
);
858 tcg_temp_free_i32(t0
);
861 /* Expand OPSZ bytes worth of two-operand operations using i64 elements. */
862 static void expand_2_i64(uint32_t dofs
, uint32_t aofs
, uint32_t oprsz
,
863 bool load_dest
, void (*fni
)(TCGv_i64
, TCGv_i64
))
865 TCGv_i64 t0
= tcg_temp_new_i64();
866 TCGv_i64 t1
= tcg_temp_new_i64();
869 for (i
= 0; i
< oprsz
; i
+= 8) {
870 tcg_gen_ld_i64(t0
, tcg_env
, aofs
+ i
);
872 tcg_gen_ld_i64(t1
, tcg_env
, dofs
+ i
);
875 tcg_gen_st_i64(t1
, tcg_env
, dofs
+ i
);
877 tcg_temp_free_i64(t0
);
878 tcg_temp_free_i64(t1
);
881 static void expand_2i_i64(uint32_t dofs
, uint32_t aofs
, uint32_t oprsz
,
882 int64_t c
, bool load_dest
,
883 void (*fni
)(TCGv_i64
, TCGv_i64
, int64_t))
885 TCGv_i64 t0
= tcg_temp_new_i64();
886 TCGv_i64 t1
= tcg_temp_new_i64();
889 for (i
= 0; i
< oprsz
; i
+= 8) {
890 tcg_gen_ld_i64(t0
, tcg_env
, aofs
+ i
);
892 tcg_gen_ld_i64(t1
, tcg_env
, dofs
+ i
);
895 tcg_gen_st_i64(t1
, tcg_env
, dofs
+ i
);
897 tcg_temp_free_i64(t0
);
898 tcg_temp_free_i64(t1
);
901 static void expand_2s_i64(uint32_t dofs
, uint32_t aofs
, uint32_t oprsz
,
902 TCGv_i64 c
, bool scalar_first
,
903 void (*fni
)(TCGv_i64
, TCGv_i64
, TCGv_i64
))
905 TCGv_i64 t0
= tcg_temp_new_i64();
906 TCGv_i64 t1
= tcg_temp_new_i64();
909 for (i
= 0; i
< oprsz
; i
+= 8) {
910 tcg_gen_ld_i64(t0
, tcg_env
, aofs
+ i
);
916 tcg_gen_st_i64(t1
, tcg_env
, dofs
+ i
);
918 tcg_temp_free_i64(t0
);
919 tcg_temp_free_i64(t1
);
922 /* Expand OPSZ bytes worth of three-operand operations using i64 elements. */
923 static void expand_3_i64(uint32_t dofs
, uint32_t aofs
,
924 uint32_t bofs
, uint32_t oprsz
, bool load_dest
,
925 void (*fni
)(TCGv_i64
, TCGv_i64
, TCGv_i64
))
927 TCGv_i64 t0
= tcg_temp_new_i64();
928 TCGv_i64 t1
= tcg_temp_new_i64();
929 TCGv_i64 t2
= tcg_temp_new_i64();
932 for (i
= 0; i
< oprsz
; i
+= 8) {
933 tcg_gen_ld_i64(t0
, tcg_env
, aofs
+ i
);
934 tcg_gen_ld_i64(t1
, tcg_env
, bofs
+ i
);
936 tcg_gen_ld_i64(t2
, tcg_env
, dofs
+ i
);
939 tcg_gen_st_i64(t2
, tcg_env
, dofs
+ i
);
941 tcg_temp_free_i64(t2
);
942 tcg_temp_free_i64(t1
);
943 tcg_temp_free_i64(t0
);
946 static void expand_3i_i64(uint32_t dofs
, uint32_t aofs
, uint32_t bofs
,
947 uint32_t oprsz
, int64_t c
, bool load_dest
,
948 void (*fni
)(TCGv_i64
, TCGv_i64
, TCGv_i64
, int64_t))
950 TCGv_i64 t0
= tcg_temp_new_i64();
951 TCGv_i64 t1
= tcg_temp_new_i64();
952 TCGv_i64 t2
= tcg_temp_new_i64();
955 for (i
= 0; i
< oprsz
; i
+= 8) {
956 tcg_gen_ld_i64(t0
, tcg_env
, aofs
+ i
);
957 tcg_gen_ld_i64(t1
, tcg_env
, bofs
+ i
);
959 tcg_gen_ld_i64(t2
, tcg_env
, dofs
+ i
);
962 tcg_gen_st_i64(t2
, tcg_env
, dofs
+ i
);
964 tcg_temp_free_i64(t0
);
965 tcg_temp_free_i64(t1
);
966 tcg_temp_free_i64(t2
);
969 /* Expand OPSZ bytes worth of three-operand operations using i64 elements. */
970 static void expand_4_i64(uint32_t dofs
, uint32_t aofs
, uint32_t bofs
,
971 uint32_t cofs
, uint32_t oprsz
, bool write_aofs
,
972 void (*fni
)(TCGv_i64
, TCGv_i64
, TCGv_i64
, TCGv_i64
))
974 TCGv_i64 t0
= tcg_temp_new_i64();
975 TCGv_i64 t1
= tcg_temp_new_i64();
976 TCGv_i64 t2
= tcg_temp_new_i64();
977 TCGv_i64 t3
= tcg_temp_new_i64();
980 for (i
= 0; i
< oprsz
; i
+= 8) {
981 tcg_gen_ld_i64(t1
, tcg_env
, aofs
+ i
);
982 tcg_gen_ld_i64(t2
, tcg_env
, bofs
+ i
);
983 tcg_gen_ld_i64(t3
, tcg_env
, cofs
+ i
);
985 tcg_gen_st_i64(t0
, tcg_env
, dofs
+ i
);
987 tcg_gen_st_i64(t1
, tcg_env
, aofs
+ i
);
990 tcg_temp_free_i64(t3
);
991 tcg_temp_free_i64(t2
);
992 tcg_temp_free_i64(t1
);
993 tcg_temp_free_i64(t0
);
996 static void expand_4i_i64(uint32_t dofs
, uint32_t aofs
, uint32_t bofs
,
997 uint32_t cofs
, uint32_t oprsz
, int64_t c
,
998 void (*fni
)(TCGv_i64
, TCGv_i64
, TCGv_i64
, TCGv_i64
,
1001 TCGv_i64 t0
= tcg_temp_new_i64();
1002 TCGv_i64 t1
= tcg_temp_new_i64();
1003 TCGv_i64 t2
= tcg_temp_new_i64();
1004 TCGv_i64 t3
= tcg_temp_new_i64();
1007 for (i
= 0; i
< oprsz
; i
+= 8) {
1008 tcg_gen_ld_i64(t1
, tcg_env
, aofs
+ i
);
1009 tcg_gen_ld_i64(t2
, tcg_env
, bofs
+ i
);
1010 tcg_gen_ld_i64(t3
, tcg_env
, cofs
+ i
);
1011 fni(t0
, t1
, t2
, t3
, c
);
1012 tcg_gen_st_i64(t0
, tcg_env
, dofs
+ i
);
1014 tcg_temp_free_i64(t3
);
1015 tcg_temp_free_i64(t2
);
1016 tcg_temp_free_i64(t1
);
1017 tcg_temp_free_i64(t0
);
1020 /* Expand OPSZ bytes worth of two-operand operations using host vectors. */
1021 static void expand_2_vec(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1022 uint32_t oprsz
, uint32_t tysz
, TCGType type
,
1024 void (*fni
)(unsigned, TCGv_vec
, TCGv_vec
))
1026 for (uint32_t i
= 0; i
< oprsz
; i
+= tysz
) {
1027 TCGv_vec t0
= tcg_temp_new_vec(type
);
1028 TCGv_vec t1
= tcg_temp_new_vec(type
);
1030 tcg_gen_ld_vec(t0
, tcg_env
, aofs
+ i
);
1032 tcg_gen_ld_vec(t1
, tcg_env
, dofs
+ i
);
1035 tcg_gen_st_vec(t1
, tcg_env
, dofs
+ i
);
1039 /* Expand OPSZ bytes worth of two-vector operands and an immediate operand
1040 using host vectors. */
1041 static void expand_2i_vec(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1042 uint32_t oprsz
, uint32_t tysz
, TCGType type
,
1043 int64_t c
, bool load_dest
,
1044 void (*fni
)(unsigned, TCGv_vec
, TCGv_vec
, int64_t))
1046 for (uint32_t i
= 0; i
< oprsz
; i
+= tysz
) {
1047 TCGv_vec t0
= tcg_temp_new_vec(type
);
1048 TCGv_vec t1
= tcg_temp_new_vec(type
);
1050 tcg_gen_ld_vec(t0
, tcg_env
, aofs
+ i
);
1052 tcg_gen_ld_vec(t1
, tcg_env
, dofs
+ i
);
1054 fni(vece
, t1
, t0
, c
);
1055 tcg_gen_st_vec(t1
, tcg_env
, dofs
+ i
);
1059 static void expand_2s_vec(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1060 uint32_t oprsz
, uint32_t tysz
, TCGType type
,
1061 TCGv_vec c
, bool scalar_first
,
1062 void (*fni
)(unsigned, TCGv_vec
, TCGv_vec
, TCGv_vec
))
1064 for (uint32_t i
= 0; i
< oprsz
; i
+= tysz
) {
1065 TCGv_vec t0
= tcg_temp_new_vec(type
);
1066 TCGv_vec t1
= tcg_temp_new_vec(type
);
1068 tcg_gen_ld_vec(t0
, tcg_env
, aofs
+ i
);
1070 fni(vece
, t1
, c
, t0
);
1072 fni(vece
, t1
, t0
, c
);
1074 tcg_gen_st_vec(t1
, tcg_env
, dofs
+ i
);
1078 /* Expand OPSZ bytes worth of three-operand operations using host vectors. */
1079 static void expand_3_vec(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1080 uint32_t bofs
, uint32_t oprsz
,
1081 uint32_t tysz
, TCGType type
, bool load_dest
,
1082 void (*fni
)(unsigned, TCGv_vec
, TCGv_vec
, TCGv_vec
))
1084 for (uint32_t i
= 0; i
< oprsz
; i
+= tysz
) {
1085 TCGv_vec t0
= tcg_temp_new_vec(type
);
1086 TCGv_vec t1
= tcg_temp_new_vec(type
);
1087 TCGv_vec t2
= tcg_temp_new_vec(type
);
1089 tcg_gen_ld_vec(t0
, tcg_env
, aofs
+ i
);
1090 tcg_gen_ld_vec(t1
, tcg_env
, bofs
+ i
);
1092 tcg_gen_ld_vec(t2
, tcg_env
, dofs
+ i
);
1094 fni(vece
, t2
, t0
, t1
);
1095 tcg_gen_st_vec(t2
, tcg_env
, dofs
+ i
);
1100 * Expand OPSZ bytes worth of three-vector operands and an immediate operand
1101 * using host vectors.
1103 static void expand_3i_vec(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1104 uint32_t bofs
, uint32_t oprsz
, uint32_t tysz
,
1105 TCGType type
, int64_t c
, bool load_dest
,
1106 void (*fni
)(unsigned, TCGv_vec
, TCGv_vec
, TCGv_vec
,
1109 for (uint32_t i
= 0; i
< oprsz
; i
+= tysz
) {
1110 TCGv_vec t0
= tcg_temp_new_vec(type
);
1111 TCGv_vec t1
= tcg_temp_new_vec(type
);
1112 TCGv_vec t2
= tcg_temp_new_vec(type
);
1114 tcg_gen_ld_vec(t0
, tcg_env
, aofs
+ i
);
1115 tcg_gen_ld_vec(t1
, tcg_env
, bofs
+ i
);
1117 tcg_gen_ld_vec(t2
, tcg_env
, dofs
+ i
);
1119 fni(vece
, t2
, t0
, t1
, c
);
1120 tcg_gen_st_vec(t2
, tcg_env
, dofs
+ i
);
1124 /* Expand OPSZ bytes worth of four-operand operations using host vectors. */
1125 static void expand_4_vec(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1126 uint32_t bofs
, uint32_t cofs
, uint32_t oprsz
,
1127 uint32_t tysz
, TCGType type
, bool write_aofs
,
1128 void (*fni
)(unsigned, TCGv_vec
, TCGv_vec
,
1129 TCGv_vec
, TCGv_vec
))
1131 for (uint32_t i
= 0; i
< oprsz
; i
+= tysz
) {
1132 TCGv_vec t0
= tcg_temp_new_vec(type
);
1133 TCGv_vec t1
= tcg_temp_new_vec(type
);
1134 TCGv_vec t2
= tcg_temp_new_vec(type
);
1135 TCGv_vec t3
= tcg_temp_new_vec(type
);
1137 tcg_gen_ld_vec(t1
, tcg_env
, aofs
+ i
);
1138 tcg_gen_ld_vec(t2
, tcg_env
, bofs
+ i
);
1139 tcg_gen_ld_vec(t3
, tcg_env
, cofs
+ i
);
1140 fni(vece
, t0
, t1
, t2
, t3
);
1141 tcg_gen_st_vec(t0
, tcg_env
, dofs
+ i
);
1143 tcg_gen_st_vec(t1
, tcg_env
, aofs
+ i
);
1149 * Expand OPSZ bytes worth of four-vector operands and an immediate operand
1150 * using host vectors.
1152 static void expand_4i_vec(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1153 uint32_t bofs
, uint32_t cofs
, uint32_t oprsz
,
1154 uint32_t tysz
, TCGType type
, int64_t c
,
1155 void (*fni
)(unsigned, TCGv_vec
, TCGv_vec
,
1156 TCGv_vec
, TCGv_vec
, int64_t))
1158 for (uint32_t i
= 0; i
< oprsz
; i
+= tysz
) {
1159 TCGv_vec t0
= tcg_temp_new_vec(type
);
1160 TCGv_vec t1
= tcg_temp_new_vec(type
);
1161 TCGv_vec t2
= tcg_temp_new_vec(type
);
1162 TCGv_vec t3
= tcg_temp_new_vec(type
);
1164 tcg_gen_ld_vec(t1
, tcg_env
, aofs
+ i
);
1165 tcg_gen_ld_vec(t2
, tcg_env
, bofs
+ i
);
1166 tcg_gen_ld_vec(t3
, tcg_env
, cofs
+ i
);
1167 fni(vece
, t0
, t1
, t2
, t3
, c
);
1168 tcg_gen_st_vec(t0
, tcg_env
, dofs
+ i
);
1172 /* Expand a vector two-operand operation. */
1173 void tcg_gen_gvec_2(uint32_t dofs
, uint32_t aofs
,
1174 uint32_t oprsz
, uint32_t maxsz
, const GVecGen2
*g
)
1176 const TCGOpcode
*this_list
= g
->opt_opc
? : vecop_list_empty
;
1177 const TCGOpcode
*hold_list
= tcg_swap_vecop_list(this_list
);
1181 check_size_align(oprsz
, maxsz
, dofs
| aofs
);
1182 check_overlap_2(dofs
, aofs
, maxsz
);
1186 type
= choose_vector_type(g
->opt_opc
, g
->vece
, oprsz
, g
->prefer_i64
);
1190 /* Recall that ARM SVE allows vector sizes that are not a
1191 * power of 2, but always a multiple of 16. The intent is
1192 * that e.g. size == 80 would be expanded with 2x32 + 1x16.
1194 some
= QEMU_ALIGN_DOWN(oprsz
, 32);
1195 expand_2_vec(g
->vece
, dofs
, aofs
, some
, 32, TCG_TYPE_V256
,
1196 g
->load_dest
, g
->fniv
);
1197 if (some
== oprsz
) {
1206 expand_2_vec(g
->vece
, dofs
, aofs
, oprsz
, 16, TCG_TYPE_V128
,
1207 g
->load_dest
, g
->fniv
);
1210 expand_2_vec(g
->vece
, dofs
, aofs
, oprsz
, 8, TCG_TYPE_V64
,
1211 g
->load_dest
, g
->fniv
);
1215 if (g
->fni8
&& check_size_impl(oprsz
, 8)) {
1216 expand_2_i64(dofs
, aofs
, oprsz
, g
->load_dest
, g
->fni8
);
1217 } else if (g
->fni4
&& check_size_impl(oprsz
, 4)) {
1218 expand_2_i32(dofs
, aofs
, oprsz
, g
->load_dest
, g
->fni4
);
1220 assert(g
->fno
!= NULL
);
1221 tcg_gen_gvec_2_ool(dofs
, aofs
, oprsz
, maxsz
, g
->data
, g
->fno
);
1227 g_assert_not_reached();
1229 tcg_swap_vecop_list(hold_list
);
1231 if (oprsz
< maxsz
) {
1232 expand_clr(dofs
+ oprsz
, maxsz
- oprsz
);
1236 /* Expand a vector operation with two vectors and an immediate. */
1237 void tcg_gen_gvec_2i(uint32_t dofs
, uint32_t aofs
, uint32_t oprsz
,
1238 uint32_t maxsz
, int64_t c
, const GVecGen2i
*g
)
1240 const TCGOpcode
*this_list
= g
->opt_opc
? : vecop_list_empty
;
1241 const TCGOpcode
*hold_list
= tcg_swap_vecop_list(this_list
);
1245 check_size_align(oprsz
, maxsz
, dofs
| aofs
);
1246 check_overlap_2(dofs
, aofs
, maxsz
);
1250 type
= choose_vector_type(g
->opt_opc
, g
->vece
, oprsz
, g
->prefer_i64
);
1254 /* Recall that ARM SVE allows vector sizes that are not a
1255 * power of 2, but always a multiple of 16. The intent is
1256 * that e.g. size == 80 would be expanded with 2x32 + 1x16.
1258 some
= QEMU_ALIGN_DOWN(oprsz
, 32);
1259 expand_2i_vec(g
->vece
, dofs
, aofs
, some
, 32, TCG_TYPE_V256
,
1260 c
, g
->load_dest
, g
->fniv
);
1261 if (some
== oprsz
) {
1270 expand_2i_vec(g
->vece
, dofs
, aofs
, oprsz
, 16, TCG_TYPE_V128
,
1271 c
, g
->load_dest
, g
->fniv
);
1274 expand_2i_vec(g
->vece
, dofs
, aofs
, oprsz
, 8, TCG_TYPE_V64
,
1275 c
, g
->load_dest
, g
->fniv
);
1279 if (g
->fni8
&& check_size_impl(oprsz
, 8)) {
1280 expand_2i_i64(dofs
, aofs
, oprsz
, c
, g
->load_dest
, g
->fni8
);
1281 } else if (g
->fni4
&& check_size_impl(oprsz
, 4)) {
1282 expand_2i_i32(dofs
, aofs
, oprsz
, c
, g
->load_dest
, g
->fni4
);
1285 tcg_gen_gvec_2_ool(dofs
, aofs
, oprsz
, maxsz
, c
, g
->fno
);
1287 TCGv_i64 tcg_c
= tcg_constant_i64(c
);
1288 tcg_gen_gvec_2i_ool(dofs
, aofs
, tcg_c
, oprsz
,
1296 g_assert_not_reached();
1298 tcg_swap_vecop_list(hold_list
);
1300 if (oprsz
< maxsz
) {
1301 expand_clr(dofs
+ oprsz
, maxsz
- oprsz
);
1305 /* Expand a vector operation with two vectors and a scalar. */
1306 void tcg_gen_gvec_2s(uint32_t dofs
, uint32_t aofs
, uint32_t oprsz
,
1307 uint32_t maxsz
, TCGv_i64 c
, const GVecGen2s
*g
)
1311 check_size_align(oprsz
, maxsz
, dofs
| aofs
);
1312 check_overlap_2(dofs
, aofs
, maxsz
);
1316 type
= choose_vector_type(g
->opt_opc
, g
->vece
, oprsz
, g
->prefer_i64
);
1319 const TCGOpcode
*this_list
= g
->opt_opc
? : vecop_list_empty
;
1320 const TCGOpcode
*hold_list
= tcg_swap_vecop_list(this_list
);
1321 TCGv_vec t_vec
= tcg_temp_new_vec(type
);
1324 tcg_gen_dup_i64_vec(g
->vece
, t_vec
, c
);
1328 /* Recall that ARM SVE allows vector sizes that are not a
1329 * power of 2, but always a multiple of 16. The intent is
1330 * that e.g. size == 80 would be expanded with 2x32 + 1x16.
1332 some
= QEMU_ALIGN_DOWN(oprsz
, 32);
1333 expand_2s_vec(g
->vece
, dofs
, aofs
, some
, 32, TCG_TYPE_V256
,
1334 t_vec
, g
->scalar_first
, g
->fniv
);
1335 if (some
== oprsz
) {
1345 expand_2s_vec(g
->vece
, dofs
, aofs
, oprsz
, 16, TCG_TYPE_V128
,
1346 t_vec
, g
->scalar_first
, g
->fniv
);
1350 expand_2s_vec(g
->vece
, dofs
, aofs
, oprsz
, 8, TCG_TYPE_V64
,
1351 t_vec
, g
->scalar_first
, g
->fniv
);
1355 g_assert_not_reached();
1357 tcg_temp_free_vec(t_vec
);
1358 tcg_swap_vecop_list(hold_list
);
1359 } else if (g
->fni8
&& check_size_impl(oprsz
, 8)) {
1360 TCGv_i64 t64
= tcg_temp_new_i64();
1362 tcg_gen_dup_i64(g
->vece
, t64
, c
);
1363 expand_2s_i64(dofs
, aofs
, oprsz
, t64
, g
->scalar_first
, g
->fni8
);
1364 tcg_temp_free_i64(t64
);
1365 } else if (g
->fni4
&& check_size_impl(oprsz
, 4)) {
1366 TCGv_i32 t32
= tcg_temp_new_i32();
1368 tcg_gen_extrl_i64_i32(t32
, c
);
1369 tcg_gen_dup_i32(g
->vece
, t32
, t32
);
1370 expand_2s_i32(dofs
, aofs
, oprsz
, t32
, g
->scalar_first
, g
->fni4
);
1371 tcg_temp_free_i32(t32
);
1373 tcg_gen_gvec_2i_ool(dofs
, aofs
, c
, oprsz
, maxsz
, 0, g
->fno
);
1377 if (oprsz
< maxsz
) {
1378 expand_clr(dofs
+ oprsz
, maxsz
- oprsz
);
1382 /* Expand a vector three-operand operation. */
1383 void tcg_gen_gvec_3(uint32_t dofs
, uint32_t aofs
, uint32_t bofs
,
1384 uint32_t oprsz
, uint32_t maxsz
, const GVecGen3
*g
)
1386 const TCGOpcode
*this_list
= g
->opt_opc
? : vecop_list_empty
;
1387 const TCGOpcode
*hold_list
= tcg_swap_vecop_list(this_list
);
1391 check_size_align(oprsz
, maxsz
, dofs
| aofs
| bofs
);
1392 check_overlap_3(dofs
, aofs
, bofs
, maxsz
);
1396 type
= choose_vector_type(g
->opt_opc
, g
->vece
, oprsz
, g
->prefer_i64
);
1400 /* Recall that ARM SVE allows vector sizes that are not a
1401 * power of 2, but always a multiple of 16. The intent is
1402 * that e.g. size == 80 would be expanded with 2x32 + 1x16.
1404 some
= QEMU_ALIGN_DOWN(oprsz
, 32);
1405 expand_3_vec(g
->vece
, dofs
, aofs
, bofs
, some
, 32, TCG_TYPE_V256
,
1406 g
->load_dest
, g
->fniv
);
1407 if (some
== oprsz
) {
1417 expand_3_vec(g
->vece
, dofs
, aofs
, bofs
, oprsz
, 16, TCG_TYPE_V128
,
1418 g
->load_dest
, g
->fniv
);
1421 expand_3_vec(g
->vece
, dofs
, aofs
, bofs
, oprsz
, 8, TCG_TYPE_V64
,
1422 g
->load_dest
, g
->fniv
);
1426 if (g
->fni8
&& check_size_impl(oprsz
, 8)) {
1427 expand_3_i64(dofs
, aofs
, bofs
, oprsz
, g
->load_dest
, g
->fni8
);
1428 } else if (g
->fni4
&& check_size_impl(oprsz
, 4)) {
1429 expand_3_i32(dofs
, aofs
, bofs
, oprsz
, g
->load_dest
, g
->fni4
);
1431 assert(g
->fno
!= NULL
);
1432 tcg_gen_gvec_3_ool(dofs
, aofs
, bofs
, oprsz
,
1433 maxsz
, g
->data
, g
->fno
);
1439 g_assert_not_reached();
1441 tcg_swap_vecop_list(hold_list
);
1443 if (oprsz
< maxsz
) {
1444 expand_clr(dofs
+ oprsz
, maxsz
- oprsz
);
1448 /* Expand a vector operation with three vectors and an immediate. */
1449 void tcg_gen_gvec_3i(uint32_t dofs
, uint32_t aofs
, uint32_t bofs
,
1450 uint32_t oprsz
, uint32_t maxsz
, int64_t c
,
1453 const TCGOpcode
*this_list
= g
->opt_opc
? : vecop_list_empty
;
1454 const TCGOpcode
*hold_list
= tcg_swap_vecop_list(this_list
);
1458 check_size_align(oprsz
, maxsz
, dofs
| aofs
| bofs
);
1459 check_overlap_3(dofs
, aofs
, bofs
, maxsz
);
1463 type
= choose_vector_type(g
->opt_opc
, g
->vece
, oprsz
, g
->prefer_i64
);
1468 * Recall that ARM SVE allows vector sizes that are not a
1469 * power of 2, but always a multiple of 16. The intent is
1470 * that e.g. size == 80 would be expanded with 2x32 + 1x16.
1472 some
= QEMU_ALIGN_DOWN(oprsz
, 32);
1473 expand_3i_vec(g
->vece
, dofs
, aofs
, bofs
, some
, 32, TCG_TYPE_V256
,
1474 c
, g
->load_dest
, g
->fniv
);
1475 if (some
== oprsz
) {
1485 expand_3i_vec(g
->vece
, dofs
, aofs
, bofs
, oprsz
, 16, TCG_TYPE_V128
,
1486 c
, g
->load_dest
, g
->fniv
);
1489 expand_3i_vec(g
->vece
, dofs
, aofs
, bofs
, oprsz
, 8, TCG_TYPE_V64
,
1490 c
, g
->load_dest
, g
->fniv
);
1494 if (g
->fni8
&& check_size_impl(oprsz
, 8)) {
1495 expand_3i_i64(dofs
, aofs
, bofs
, oprsz
, c
, g
->load_dest
, g
->fni8
);
1496 } else if (g
->fni4
&& check_size_impl(oprsz
, 4)) {
1497 expand_3i_i32(dofs
, aofs
, bofs
, oprsz
, c
, g
->load_dest
, g
->fni4
);
1499 assert(g
->fno
!= NULL
);
1500 tcg_gen_gvec_3_ool(dofs
, aofs
, bofs
, oprsz
, maxsz
, c
, g
->fno
);
1506 g_assert_not_reached();
1508 tcg_swap_vecop_list(hold_list
);
1510 if (oprsz
< maxsz
) {
1511 expand_clr(dofs
+ oprsz
, maxsz
- oprsz
);
1515 /* Expand a vector four-operand operation. */
1516 void tcg_gen_gvec_4(uint32_t dofs
, uint32_t aofs
, uint32_t bofs
, uint32_t cofs
,
1517 uint32_t oprsz
, uint32_t maxsz
, const GVecGen4
*g
)
1519 const TCGOpcode
*this_list
= g
->opt_opc
? : vecop_list_empty
;
1520 const TCGOpcode
*hold_list
= tcg_swap_vecop_list(this_list
);
1524 check_size_align(oprsz
, maxsz
, dofs
| aofs
| bofs
| cofs
);
1525 check_overlap_4(dofs
, aofs
, bofs
, cofs
, maxsz
);
1529 type
= choose_vector_type(g
->opt_opc
, g
->vece
, oprsz
, g
->prefer_i64
);
1533 /* Recall that ARM SVE allows vector sizes that are not a
1534 * power of 2, but always a multiple of 16. The intent is
1535 * that e.g. size == 80 would be expanded with 2x32 + 1x16.
1537 some
= QEMU_ALIGN_DOWN(oprsz
, 32);
1538 expand_4_vec(g
->vece
, dofs
, aofs
, bofs
, cofs
, some
,
1539 32, TCG_TYPE_V256
, g
->write_aofs
, g
->fniv
);
1540 if (some
== oprsz
) {
1551 expand_4_vec(g
->vece
, dofs
, aofs
, bofs
, cofs
, oprsz
,
1552 16, TCG_TYPE_V128
, g
->write_aofs
, g
->fniv
);
1555 expand_4_vec(g
->vece
, dofs
, aofs
, bofs
, cofs
, oprsz
,
1556 8, TCG_TYPE_V64
, g
->write_aofs
, g
->fniv
);
1560 if (g
->fni8
&& check_size_impl(oprsz
, 8)) {
1561 expand_4_i64(dofs
, aofs
, bofs
, cofs
, oprsz
,
1562 g
->write_aofs
, g
->fni8
);
1563 } else if (g
->fni4
&& check_size_impl(oprsz
, 4)) {
1564 expand_4_i32(dofs
, aofs
, bofs
, cofs
, oprsz
,
1565 g
->write_aofs
, g
->fni4
);
1567 assert(g
->fno
!= NULL
);
1568 tcg_gen_gvec_4_ool(dofs
, aofs
, bofs
, cofs
,
1569 oprsz
, maxsz
, g
->data
, g
->fno
);
1575 g_assert_not_reached();
1577 tcg_swap_vecop_list(hold_list
);
1579 if (oprsz
< maxsz
) {
1580 expand_clr(dofs
+ oprsz
, maxsz
- oprsz
);
1584 /* Expand a vector four-operand operation. */
1585 void tcg_gen_gvec_4i(uint32_t dofs
, uint32_t aofs
, uint32_t bofs
, uint32_t cofs
,
1586 uint32_t oprsz
, uint32_t maxsz
, int64_t c
,
1589 const TCGOpcode
*this_list
= g
->opt_opc
? : vecop_list_empty
;
1590 const TCGOpcode
*hold_list
= tcg_swap_vecop_list(this_list
);
1594 check_size_align(oprsz
, maxsz
, dofs
| aofs
| bofs
| cofs
);
1595 check_overlap_4(dofs
, aofs
, bofs
, cofs
, maxsz
);
1599 type
= choose_vector_type(g
->opt_opc
, g
->vece
, oprsz
, g
->prefer_i64
);
1604 * Recall that ARM SVE allows vector sizes that are not a
1605 * power of 2, but always a multiple of 16. The intent is
1606 * that e.g. size == 80 would be expanded with 2x32 + 1x16.
1608 some
= QEMU_ALIGN_DOWN(oprsz
, 32);
1609 expand_4i_vec(g
->vece
, dofs
, aofs
, bofs
, cofs
, some
,
1610 32, TCG_TYPE_V256
, c
, g
->fniv
);
1611 if (some
== oprsz
) {
1622 expand_4i_vec(g
->vece
, dofs
, aofs
, bofs
, cofs
, oprsz
,
1623 16, TCG_TYPE_V128
, c
, g
->fniv
);
1626 expand_4i_vec(g
->vece
, dofs
, aofs
, bofs
, cofs
, oprsz
,
1627 8, TCG_TYPE_V64
, c
, g
->fniv
);
1631 if (g
->fni8
&& check_size_impl(oprsz
, 8)) {
1632 expand_4i_i64(dofs
, aofs
, bofs
, cofs
, oprsz
, c
, g
->fni8
);
1633 } else if (g
->fni4
&& check_size_impl(oprsz
, 4)) {
1634 expand_4i_i32(dofs
, aofs
, bofs
, cofs
, oprsz
, c
, g
->fni4
);
1636 assert(g
->fno
!= NULL
);
1637 tcg_gen_gvec_4_ool(dofs
, aofs
, bofs
, cofs
,
1638 oprsz
, maxsz
, c
, g
->fno
);
1644 g_assert_not_reached();
1646 tcg_swap_vecop_list(hold_list
);
1648 if (oprsz
< maxsz
) {
1649 expand_clr(dofs
+ oprsz
, maxsz
- oprsz
);
1654 * Expand specific vector operations.
1657 static void vec_mov2(unsigned vece
, TCGv_vec a
, TCGv_vec b
)
1659 tcg_gen_mov_vec(a
, b
);
1662 void tcg_gen_gvec_mov(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1663 uint32_t oprsz
, uint32_t maxsz
)
1665 static const GVecGen2 g
= {
1666 .fni8
= tcg_gen_mov_i64
,
1668 .fno
= gen_helper_gvec_mov
,
1669 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1672 tcg_gen_gvec_2(dofs
, aofs
, oprsz
, maxsz
, &g
);
1674 check_size_align(oprsz
, maxsz
, dofs
);
1675 if (oprsz
< maxsz
) {
1676 expand_clr(dofs
+ oprsz
, maxsz
- oprsz
);
1681 void tcg_gen_gvec_dup_i32(unsigned vece
, uint32_t dofs
, uint32_t oprsz
,
1682 uint32_t maxsz
, TCGv_i32 in
)
1684 check_size_align(oprsz
, maxsz
, dofs
);
1685 tcg_debug_assert(vece
<= MO_32
);
1686 do_dup(vece
, dofs
, oprsz
, maxsz
, in
, NULL
, 0);
1689 void tcg_gen_gvec_dup_i64(unsigned vece
, uint32_t dofs
, uint32_t oprsz
,
1690 uint32_t maxsz
, TCGv_i64 in
)
1692 check_size_align(oprsz
, maxsz
, dofs
);
1693 tcg_debug_assert(vece
<= MO_64
);
1694 do_dup(vece
, dofs
, oprsz
, maxsz
, NULL
, in
, 0);
1697 void tcg_gen_gvec_dup_mem(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1698 uint32_t oprsz
, uint32_t maxsz
)
1700 check_size_align(oprsz
, maxsz
, dofs
);
1701 if (vece
<= MO_64
) {
1702 TCGType type
= choose_vector_type(NULL
, vece
, oprsz
, 0);
1704 TCGv_vec t_vec
= tcg_temp_new_vec(type
);
1705 tcg_gen_dup_mem_vec(vece
, t_vec
, tcg_env
, aofs
);
1706 do_dup_store(type
, dofs
, oprsz
, maxsz
, t_vec
);
1707 } else if (vece
<= MO_32
) {
1708 TCGv_i32 in
= tcg_temp_ebb_new_i32();
1711 tcg_gen_ld8u_i32(in
, tcg_env
, aofs
);
1714 tcg_gen_ld16u_i32(in
, tcg_env
, aofs
);
1717 tcg_gen_ld_i32(in
, tcg_env
, aofs
);
1720 do_dup(vece
, dofs
, oprsz
, maxsz
, in
, NULL
, 0);
1721 tcg_temp_free_i32(in
);
1723 TCGv_i64 in
= tcg_temp_ebb_new_i64();
1724 tcg_gen_ld_i64(in
, tcg_env
, aofs
);
1725 do_dup(vece
, dofs
, oprsz
, maxsz
, NULL
, in
, 0);
1726 tcg_temp_free_i64(in
);
1728 } else if (vece
== 4) {
1729 /* 128-bit duplicate. */
1732 tcg_debug_assert(oprsz
>= 16);
1733 if (TCG_TARGET_HAS_v128
) {
1734 TCGv_vec in
= tcg_temp_new_vec(TCG_TYPE_V128
);
1736 tcg_gen_ld_vec(in
, tcg_env
, aofs
);
1737 for (i
= (aofs
== dofs
) * 16; i
< oprsz
; i
+= 16) {
1738 tcg_gen_st_vec(in
, tcg_env
, dofs
+ i
);
1741 TCGv_i64 in0
= tcg_temp_ebb_new_i64();
1742 TCGv_i64 in1
= tcg_temp_ebb_new_i64();
1744 tcg_gen_ld_i64(in0
, tcg_env
, aofs
);
1745 tcg_gen_ld_i64(in1
, tcg_env
, aofs
+ 8);
1746 for (i
= (aofs
== dofs
) * 16; i
< oprsz
; i
+= 16) {
1747 tcg_gen_st_i64(in0
, tcg_env
, dofs
+ i
);
1748 tcg_gen_st_i64(in1
, tcg_env
, dofs
+ i
+ 8);
1750 tcg_temp_free_i64(in0
);
1751 tcg_temp_free_i64(in1
);
1753 if (oprsz
< maxsz
) {
1754 expand_clr(dofs
+ oprsz
, maxsz
- oprsz
);
1756 } else if (vece
== 5) {
1757 /* 256-bit duplicate. */
1760 tcg_debug_assert(oprsz
>= 32);
1761 tcg_debug_assert(oprsz
% 32 == 0);
1762 if (TCG_TARGET_HAS_v256
) {
1763 TCGv_vec in
= tcg_temp_new_vec(TCG_TYPE_V256
);
1765 tcg_gen_ld_vec(in
, tcg_env
, aofs
);
1766 for (i
= (aofs
== dofs
) * 32; i
< oprsz
; i
+= 32) {
1767 tcg_gen_st_vec(in
, tcg_env
, dofs
+ i
);
1769 } else if (TCG_TARGET_HAS_v128
) {
1770 TCGv_vec in0
= tcg_temp_new_vec(TCG_TYPE_V128
);
1771 TCGv_vec in1
= tcg_temp_new_vec(TCG_TYPE_V128
);
1773 tcg_gen_ld_vec(in0
, tcg_env
, aofs
);
1774 tcg_gen_ld_vec(in1
, tcg_env
, aofs
+ 16);
1775 for (i
= (aofs
== dofs
) * 32; i
< oprsz
; i
+= 32) {
1776 tcg_gen_st_vec(in0
, tcg_env
, dofs
+ i
);
1777 tcg_gen_st_vec(in1
, tcg_env
, dofs
+ i
+ 16);
1783 for (j
= 0; j
< 4; ++j
) {
1784 in
[j
] = tcg_temp_ebb_new_i64();
1785 tcg_gen_ld_i64(in
[j
], tcg_env
, aofs
+ j
* 8);
1787 for (i
= (aofs
== dofs
) * 32; i
< oprsz
; i
+= 32) {
1788 for (j
= 0; j
< 4; ++j
) {
1789 tcg_gen_st_i64(in
[j
], tcg_env
, dofs
+ i
+ j
* 8);
1792 for (j
= 0; j
< 4; ++j
) {
1793 tcg_temp_free_i64(in
[j
]);
1796 if (oprsz
< maxsz
) {
1797 expand_clr(dofs
+ oprsz
, maxsz
- oprsz
);
1800 g_assert_not_reached();
1804 void tcg_gen_gvec_dup_imm(unsigned vece
, uint32_t dofs
, uint32_t oprsz
,
1805 uint32_t maxsz
, uint64_t x
)
1807 check_size_align(oprsz
, maxsz
, dofs
);
1808 do_dup(vece
, dofs
, oprsz
, maxsz
, NULL
, NULL
, x
);
1811 void tcg_gen_gvec_not(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1812 uint32_t oprsz
, uint32_t maxsz
)
1814 static const GVecGen2 g
= {
1815 .fni8
= tcg_gen_not_i64
,
1816 .fniv
= tcg_gen_not_vec
,
1817 .fno
= gen_helper_gvec_not
,
1818 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1820 tcg_gen_gvec_2(dofs
, aofs
, oprsz
, maxsz
, &g
);
1823 /* Perform a vector addition using normal addition and a mask. The mask
1824 should be the sign bit of each lane. This 6-operation form is more
1825 efficient than separate additions when there are 4 or more lanes in
1826 the 64-bit operation. */
1827 static void gen_addv_mask(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
, TCGv_i64 m
)
1829 TCGv_i64 t1
= tcg_temp_ebb_new_i64();
1830 TCGv_i64 t2
= tcg_temp_ebb_new_i64();
1831 TCGv_i64 t3
= tcg_temp_ebb_new_i64();
1833 tcg_gen_andc_i64(t1
, a
, m
);
1834 tcg_gen_andc_i64(t2
, b
, m
);
1835 tcg_gen_xor_i64(t3
, a
, b
);
1836 tcg_gen_add_i64(d
, t1
, t2
);
1837 tcg_gen_and_i64(t3
, t3
, m
);
1838 tcg_gen_xor_i64(d
, d
, t3
);
1840 tcg_temp_free_i64(t1
);
1841 tcg_temp_free_i64(t2
);
1842 tcg_temp_free_i64(t3
);
1845 void tcg_gen_vec_add8_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
1847 TCGv_i64 m
= tcg_constant_i64(dup_const(MO_8
, 0x80));
1848 gen_addv_mask(d
, a
, b
, m
);
1851 void tcg_gen_vec_add8_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
1853 TCGv_i32 m
= tcg_constant_i32((int32_t)dup_const(MO_8
, 0x80));
1854 TCGv_i32 t1
= tcg_temp_ebb_new_i32();
1855 TCGv_i32 t2
= tcg_temp_ebb_new_i32();
1856 TCGv_i32 t3
= tcg_temp_ebb_new_i32();
1858 tcg_gen_andc_i32(t1
, a
, m
);
1859 tcg_gen_andc_i32(t2
, b
, m
);
1860 tcg_gen_xor_i32(t3
, a
, b
);
1861 tcg_gen_add_i32(d
, t1
, t2
);
1862 tcg_gen_and_i32(t3
, t3
, m
);
1863 tcg_gen_xor_i32(d
, d
, t3
);
1865 tcg_temp_free_i32(t1
);
1866 tcg_temp_free_i32(t2
);
1867 tcg_temp_free_i32(t3
);
1870 void tcg_gen_vec_add16_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
1872 TCGv_i64 m
= tcg_constant_i64(dup_const(MO_16
, 0x8000));
1873 gen_addv_mask(d
, a
, b
, m
);
1876 void tcg_gen_vec_add16_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
1878 TCGv_i32 t1
= tcg_temp_ebb_new_i32();
1879 TCGv_i32 t2
= tcg_temp_ebb_new_i32();
1881 tcg_gen_andi_i32(t1
, a
, ~0xffff);
1882 tcg_gen_add_i32(t2
, a
, b
);
1883 tcg_gen_add_i32(t1
, t1
, b
);
1884 tcg_gen_deposit_i32(d
, t1
, t2
, 0, 16);
1886 tcg_temp_free_i32(t1
);
1887 tcg_temp_free_i32(t2
);
1890 void tcg_gen_vec_add32_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
1892 TCGv_i64 t1
= tcg_temp_ebb_new_i64();
1893 TCGv_i64 t2
= tcg_temp_ebb_new_i64();
1895 tcg_gen_andi_i64(t1
, a
, ~0xffffffffull
);
1896 tcg_gen_add_i64(t2
, a
, b
);
1897 tcg_gen_add_i64(t1
, t1
, b
);
1898 tcg_gen_deposit_i64(d
, t1
, t2
, 0, 32);
1900 tcg_temp_free_i64(t1
);
1901 tcg_temp_free_i64(t2
);
1904 static const TCGOpcode vecop_list_add
[] = { INDEX_op_add_vec
, 0 };
1906 void tcg_gen_gvec_add(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1907 uint32_t bofs
, uint32_t oprsz
, uint32_t maxsz
)
1909 static const GVecGen3 g
[4] = {
1910 { .fni8
= tcg_gen_vec_add8_i64
,
1911 .fniv
= tcg_gen_add_vec
,
1912 .fno
= gen_helper_gvec_add8
,
1913 .opt_opc
= vecop_list_add
,
1915 { .fni8
= tcg_gen_vec_add16_i64
,
1916 .fniv
= tcg_gen_add_vec
,
1917 .fno
= gen_helper_gvec_add16
,
1918 .opt_opc
= vecop_list_add
,
1920 { .fni4
= tcg_gen_add_i32
,
1921 .fniv
= tcg_gen_add_vec
,
1922 .fno
= gen_helper_gvec_add32
,
1923 .opt_opc
= vecop_list_add
,
1925 { .fni8
= tcg_gen_add_i64
,
1926 .fniv
= tcg_gen_add_vec
,
1927 .fno
= gen_helper_gvec_add64
,
1928 .opt_opc
= vecop_list_add
,
1929 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1933 tcg_debug_assert(vece
<= MO_64
);
1934 tcg_gen_gvec_3(dofs
, aofs
, bofs
, oprsz
, maxsz
, &g
[vece
]);
1937 void tcg_gen_gvec_adds(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1938 TCGv_i64 c
, uint32_t oprsz
, uint32_t maxsz
)
1940 static const GVecGen2s g
[4] = {
1941 { .fni8
= tcg_gen_vec_add8_i64
,
1942 .fniv
= tcg_gen_add_vec
,
1943 .fno
= gen_helper_gvec_adds8
,
1944 .opt_opc
= vecop_list_add
,
1946 { .fni8
= tcg_gen_vec_add16_i64
,
1947 .fniv
= tcg_gen_add_vec
,
1948 .fno
= gen_helper_gvec_adds16
,
1949 .opt_opc
= vecop_list_add
,
1951 { .fni4
= tcg_gen_add_i32
,
1952 .fniv
= tcg_gen_add_vec
,
1953 .fno
= gen_helper_gvec_adds32
,
1954 .opt_opc
= vecop_list_add
,
1956 { .fni8
= tcg_gen_add_i64
,
1957 .fniv
= tcg_gen_add_vec
,
1958 .fno
= gen_helper_gvec_adds64
,
1959 .opt_opc
= vecop_list_add
,
1960 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1964 tcg_debug_assert(vece
<= MO_64
);
1965 tcg_gen_gvec_2s(dofs
, aofs
, oprsz
, maxsz
, c
, &g
[vece
]);
1968 void tcg_gen_gvec_addi(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1969 int64_t c
, uint32_t oprsz
, uint32_t maxsz
)
1971 TCGv_i64 tmp
= tcg_constant_i64(c
);
1972 tcg_gen_gvec_adds(vece
, dofs
, aofs
, tmp
, oprsz
, maxsz
);
1975 static const TCGOpcode vecop_list_sub
[] = { INDEX_op_sub_vec
, 0 };
1977 void tcg_gen_gvec_subs(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
1978 TCGv_i64 c
, uint32_t oprsz
, uint32_t maxsz
)
1980 static const GVecGen2s g
[4] = {
1981 { .fni8
= tcg_gen_vec_sub8_i64
,
1982 .fniv
= tcg_gen_sub_vec
,
1983 .fno
= gen_helper_gvec_subs8
,
1984 .opt_opc
= vecop_list_sub
,
1986 { .fni8
= tcg_gen_vec_sub16_i64
,
1987 .fniv
= tcg_gen_sub_vec
,
1988 .fno
= gen_helper_gvec_subs16
,
1989 .opt_opc
= vecop_list_sub
,
1991 { .fni4
= tcg_gen_sub_i32
,
1992 .fniv
= tcg_gen_sub_vec
,
1993 .fno
= gen_helper_gvec_subs32
,
1994 .opt_opc
= vecop_list_sub
,
1996 { .fni8
= tcg_gen_sub_i64
,
1997 .fniv
= tcg_gen_sub_vec
,
1998 .fno
= gen_helper_gvec_subs64
,
1999 .opt_opc
= vecop_list_sub
,
2000 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
2004 tcg_debug_assert(vece
<= MO_64
);
2005 tcg_gen_gvec_2s(dofs
, aofs
, oprsz
, maxsz
, c
, &g
[vece
]);
2008 /* Perform a vector subtraction using normal subtraction and a mask.
2009 Compare gen_addv_mask above. */
2010 static void gen_subv_mask(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
, TCGv_i64 m
)
2012 TCGv_i64 t1
= tcg_temp_ebb_new_i64();
2013 TCGv_i64 t2
= tcg_temp_ebb_new_i64();
2014 TCGv_i64 t3
= tcg_temp_ebb_new_i64();
2016 tcg_gen_or_i64(t1
, a
, m
);
2017 tcg_gen_andc_i64(t2
, b
, m
);
2018 tcg_gen_eqv_i64(t3
, a
, b
);
2019 tcg_gen_sub_i64(d
, t1
, t2
);
2020 tcg_gen_and_i64(t3
, t3
, m
);
2021 tcg_gen_xor_i64(d
, d
, t3
);
2023 tcg_temp_free_i64(t1
);
2024 tcg_temp_free_i64(t2
);
2025 tcg_temp_free_i64(t3
);
2028 void tcg_gen_vec_sub8_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
2030 TCGv_i64 m
= tcg_constant_i64(dup_const(MO_8
, 0x80));
2031 gen_subv_mask(d
, a
, b
, m
);
2034 void tcg_gen_vec_sub8_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
2036 TCGv_i32 m
= tcg_constant_i32((int32_t)dup_const(MO_8
, 0x80));
2037 TCGv_i32 t1
= tcg_temp_ebb_new_i32();
2038 TCGv_i32 t2
= tcg_temp_ebb_new_i32();
2039 TCGv_i32 t3
= tcg_temp_ebb_new_i32();
2041 tcg_gen_or_i32(t1
, a
, m
);
2042 tcg_gen_andc_i32(t2
, b
, m
);
2043 tcg_gen_eqv_i32(t3
, a
, b
);
2044 tcg_gen_sub_i32(d
, t1
, t2
);
2045 tcg_gen_and_i32(t3
, t3
, m
);
2046 tcg_gen_xor_i32(d
, d
, t3
);
2048 tcg_temp_free_i32(t1
);
2049 tcg_temp_free_i32(t2
);
2050 tcg_temp_free_i32(t3
);
2053 void tcg_gen_vec_sub16_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
2055 TCGv_i64 m
= tcg_constant_i64(dup_const(MO_16
, 0x8000));
2056 gen_subv_mask(d
, a
, b
, m
);
2059 void tcg_gen_vec_sub16_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
2061 TCGv_i32 t1
= tcg_temp_ebb_new_i32();
2062 TCGv_i32 t2
= tcg_temp_ebb_new_i32();
2064 tcg_gen_andi_i32(t1
, b
, ~0xffff);
2065 tcg_gen_sub_i32(t2
, a
, b
);
2066 tcg_gen_sub_i32(t1
, a
, t1
);
2067 tcg_gen_deposit_i32(d
, t1
, t2
, 0, 16);
2069 tcg_temp_free_i32(t1
);
2070 tcg_temp_free_i32(t2
);
2073 void tcg_gen_vec_sub32_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
2075 TCGv_i64 t1
= tcg_temp_ebb_new_i64();
2076 TCGv_i64 t2
= tcg_temp_ebb_new_i64();
2078 tcg_gen_andi_i64(t1
, b
, ~0xffffffffull
);
2079 tcg_gen_sub_i64(t2
, a
, b
);
2080 tcg_gen_sub_i64(t1
, a
, t1
);
2081 tcg_gen_deposit_i64(d
, t1
, t2
, 0, 32);
2083 tcg_temp_free_i64(t1
);
2084 tcg_temp_free_i64(t2
);
2087 void tcg_gen_gvec_sub(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2088 uint32_t bofs
, uint32_t oprsz
, uint32_t maxsz
)
2090 static const GVecGen3 g
[4] = {
2091 { .fni8
= tcg_gen_vec_sub8_i64
,
2092 .fniv
= tcg_gen_sub_vec
,
2093 .fno
= gen_helper_gvec_sub8
,
2094 .opt_opc
= vecop_list_sub
,
2096 { .fni8
= tcg_gen_vec_sub16_i64
,
2097 .fniv
= tcg_gen_sub_vec
,
2098 .fno
= gen_helper_gvec_sub16
,
2099 .opt_opc
= vecop_list_sub
,
2101 { .fni4
= tcg_gen_sub_i32
,
2102 .fniv
= tcg_gen_sub_vec
,
2103 .fno
= gen_helper_gvec_sub32
,
2104 .opt_opc
= vecop_list_sub
,
2106 { .fni8
= tcg_gen_sub_i64
,
2107 .fniv
= tcg_gen_sub_vec
,
2108 .fno
= gen_helper_gvec_sub64
,
2109 .opt_opc
= vecop_list_sub
,
2110 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
2114 tcg_debug_assert(vece
<= MO_64
);
2115 tcg_gen_gvec_3(dofs
, aofs
, bofs
, oprsz
, maxsz
, &g
[vece
]);
2118 static const TCGOpcode vecop_list_mul
[] = { INDEX_op_mul_vec
, 0 };
2120 void tcg_gen_gvec_mul(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2121 uint32_t bofs
, uint32_t oprsz
, uint32_t maxsz
)
2123 static const GVecGen3 g
[4] = {
2124 { .fniv
= tcg_gen_mul_vec
,
2125 .fno
= gen_helper_gvec_mul8
,
2126 .opt_opc
= vecop_list_mul
,
2128 { .fniv
= tcg_gen_mul_vec
,
2129 .fno
= gen_helper_gvec_mul16
,
2130 .opt_opc
= vecop_list_mul
,
2132 { .fni4
= tcg_gen_mul_i32
,
2133 .fniv
= tcg_gen_mul_vec
,
2134 .fno
= gen_helper_gvec_mul32
,
2135 .opt_opc
= vecop_list_mul
,
2137 { .fni8
= tcg_gen_mul_i64
,
2138 .fniv
= tcg_gen_mul_vec
,
2139 .fno
= gen_helper_gvec_mul64
,
2140 .opt_opc
= vecop_list_mul
,
2141 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
2145 tcg_debug_assert(vece
<= MO_64
);
2146 tcg_gen_gvec_3(dofs
, aofs
, bofs
, oprsz
, maxsz
, &g
[vece
]);
2149 void tcg_gen_gvec_muls(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2150 TCGv_i64 c
, uint32_t oprsz
, uint32_t maxsz
)
2152 static const GVecGen2s g
[4] = {
2153 { .fniv
= tcg_gen_mul_vec
,
2154 .fno
= gen_helper_gvec_muls8
,
2155 .opt_opc
= vecop_list_mul
,
2157 { .fniv
= tcg_gen_mul_vec
,
2158 .fno
= gen_helper_gvec_muls16
,
2159 .opt_opc
= vecop_list_mul
,
2161 { .fni4
= tcg_gen_mul_i32
,
2162 .fniv
= tcg_gen_mul_vec
,
2163 .fno
= gen_helper_gvec_muls32
,
2164 .opt_opc
= vecop_list_mul
,
2166 { .fni8
= tcg_gen_mul_i64
,
2167 .fniv
= tcg_gen_mul_vec
,
2168 .fno
= gen_helper_gvec_muls64
,
2169 .opt_opc
= vecop_list_mul
,
2170 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
2174 tcg_debug_assert(vece
<= MO_64
);
2175 tcg_gen_gvec_2s(dofs
, aofs
, oprsz
, maxsz
, c
, &g
[vece
]);
2178 void tcg_gen_gvec_muli(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2179 int64_t c
, uint32_t oprsz
, uint32_t maxsz
)
2181 TCGv_i64 tmp
= tcg_constant_i64(c
);
2182 tcg_gen_gvec_muls(vece
, dofs
, aofs
, tmp
, oprsz
, maxsz
);
2185 void tcg_gen_gvec_ssadd(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2186 uint32_t bofs
, uint32_t oprsz
, uint32_t maxsz
)
2188 static const TCGOpcode vecop_list
[] = { INDEX_op_ssadd_vec
, 0 };
2189 static const GVecGen3 g
[4] = {
2190 { .fniv
= tcg_gen_ssadd_vec
,
2191 .fno
= gen_helper_gvec_ssadd8
,
2192 .opt_opc
= vecop_list
,
2194 { .fniv
= tcg_gen_ssadd_vec
,
2195 .fno
= gen_helper_gvec_ssadd16
,
2196 .opt_opc
= vecop_list
,
2198 { .fniv
= tcg_gen_ssadd_vec
,
2199 .fno
= gen_helper_gvec_ssadd32
,
2200 .opt_opc
= vecop_list
,
2202 { .fniv
= tcg_gen_ssadd_vec
,
2203 .fno
= gen_helper_gvec_ssadd64
,
2204 .opt_opc
= vecop_list
,
2207 tcg_debug_assert(vece
<= MO_64
);
2208 tcg_gen_gvec_3(dofs
, aofs
, bofs
, oprsz
, maxsz
, &g
[vece
]);
2211 void tcg_gen_gvec_sssub(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2212 uint32_t bofs
, uint32_t oprsz
, uint32_t maxsz
)
2214 static const TCGOpcode vecop_list
[] = { INDEX_op_sssub_vec
, 0 };
2215 static const GVecGen3 g
[4] = {
2216 { .fniv
= tcg_gen_sssub_vec
,
2217 .fno
= gen_helper_gvec_sssub8
,
2218 .opt_opc
= vecop_list
,
2220 { .fniv
= tcg_gen_sssub_vec
,
2221 .fno
= gen_helper_gvec_sssub16
,
2222 .opt_opc
= vecop_list
,
2224 { .fniv
= tcg_gen_sssub_vec
,
2225 .fno
= gen_helper_gvec_sssub32
,
2226 .opt_opc
= vecop_list
,
2228 { .fniv
= tcg_gen_sssub_vec
,
2229 .fno
= gen_helper_gvec_sssub64
,
2230 .opt_opc
= vecop_list
,
2233 tcg_debug_assert(vece
<= MO_64
);
2234 tcg_gen_gvec_3(dofs
, aofs
, bofs
, oprsz
, maxsz
, &g
[vece
]);
2237 static void tcg_gen_usadd_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
2239 TCGv_i32 max
= tcg_constant_i32(-1);
2240 tcg_gen_add_i32(d
, a
, b
);
2241 tcg_gen_movcond_i32(TCG_COND_LTU
, d
, d
, a
, max
, d
);
2244 static void tcg_gen_usadd_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
2246 TCGv_i64 max
= tcg_constant_i64(-1);
2247 tcg_gen_add_i64(d
, a
, b
);
2248 tcg_gen_movcond_i64(TCG_COND_LTU
, d
, d
, a
, max
, d
);
2251 void tcg_gen_gvec_usadd(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2252 uint32_t bofs
, uint32_t oprsz
, uint32_t maxsz
)
2254 static const TCGOpcode vecop_list
[] = { INDEX_op_usadd_vec
, 0 };
2255 static const GVecGen3 g
[4] = {
2256 { .fniv
= tcg_gen_usadd_vec
,
2257 .fno
= gen_helper_gvec_usadd8
,
2258 .opt_opc
= vecop_list
,
2260 { .fniv
= tcg_gen_usadd_vec
,
2261 .fno
= gen_helper_gvec_usadd16
,
2262 .opt_opc
= vecop_list
,
2264 { .fni4
= tcg_gen_usadd_i32
,
2265 .fniv
= tcg_gen_usadd_vec
,
2266 .fno
= gen_helper_gvec_usadd32
,
2267 .opt_opc
= vecop_list
,
2269 { .fni8
= tcg_gen_usadd_i64
,
2270 .fniv
= tcg_gen_usadd_vec
,
2271 .fno
= gen_helper_gvec_usadd64
,
2272 .opt_opc
= vecop_list
,
2275 tcg_debug_assert(vece
<= MO_64
);
2276 tcg_gen_gvec_3(dofs
, aofs
, bofs
, oprsz
, maxsz
, &g
[vece
]);
2279 static void tcg_gen_ussub_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
2281 TCGv_i32 min
= tcg_constant_i32(0);
2282 tcg_gen_sub_i32(d
, a
, b
);
2283 tcg_gen_movcond_i32(TCG_COND_LTU
, d
, a
, b
, min
, d
);
2286 static void tcg_gen_ussub_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
2288 TCGv_i64 min
= tcg_constant_i64(0);
2289 tcg_gen_sub_i64(d
, a
, b
);
2290 tcg_gen_movcond_i64(TCG_COND_LTU
, d
, a
, b
, min
, d
);
2293 void tcg_gen_gvec_ussub(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2294 uint32_t bofs
, uint32_t oprsz
, uint32_t maxsz
)
2296 static const TCGOpcode vecop_list
[] = { INDEX_op_ussub_vec
, 0 };
2297 static const GVecGen3 g
[4] = {
2298 { .fniv
= tcg_gen_ussub_vec
,
2299 .fno
= gen_helper_gvec_ussub8
,
2300 .opt_opc
= vecop_list
,
2302 { .fniv
= tcg_gen_ussub_vec
,
2303 .fno
= gen_helper_gvec_ussub16
,
2304 .opt_opc
= vecop_list
,
2306 { .fni4
= tcg_gen_ussub_i32
,
2307 .fniv
= tcg_gen_ussub_vec
,
2308 .fno
= gen_helper_gvec_ussub32
,
2309 .opt_opc
= vecop_list
,
2311 { .fni8
= tcg_gen_ussub_i64
,
2312 .fniv
= tcg_gen_ussub_vec
,
2313 .fno
= gen_helper_gvec_ussub64
,
2314 .opt_opc
= vecop_list
,
2317 tcg_debug_assert(vece
<= MO_64
);
2318 tcg_gen_gvec_3(dofs
, aofs
, bofs
, oprsz
, maxsz
, &g
[vece
]);
2321 void tcg_gen_gvec_smin(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2322 uint32_t bofs
, uint32_t oprsz
, uint32_t maxsz
)
2324 static const TCGOpcode vecop_list
[] = { INDEX_op_smin_vec
, 0 };
2325 static const GVecGen3 g
[4] = {
2326 { .fniv
= tcg_gen_smin_vec
,
2327 .fno
= gen_helper_gvec_smin8
,
2328 .opt_opc
= vecop_list
,
2330 { .fniv
= tcg_gen_smin_vec
,
2331 .fno
= gen_helper_gvec_smin16
,
2332 .opt_opc
= vecop_list
,
2334 { .fni4
= tcg_gen_smin_i32
,
2335 .fniv
= tcg_gen_smin_vec
,
2336 .fno
= gen_helper_gvec_smin32
,
2337 .opt_opc
= vecop_list
,
2339 { .fni8
= tcg_gen_smin_i64
,
2340 .fniv
= tcg_gen_smin_vec
,
2341 .fno
= gen_helper_gvec_smin64
,
2342 .opt_opc
= vecop_list
,
2345 tcg_debug_assert(vece
<= MO_64
);
2346 tcg_gen_gvec_3(dofs
, aofs
, bofs
, oprsz
, maxsz
, &g
[vece
]);
2349 void tcg_gen_gvec_umin(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2350 uint32_t bofs
, uint32_t oprsz
, uint32_t maxsz
)
2352 static const TCGOpcode vecop_list
[] = { INDEX_op_umin_vec
, 0 };
2353 static const GVecGen3 g
[4] = {
2354 { .fniv
= tcg_gen_umin_vec
,
2355 .fno
= gen_helper_gvec_umin8
,
2356 .opt_opc
= vecop_list
,
2358 { .fniv
= tcg_gen_umin_vec
,
2359 .fno
= gen_helper_gvec_umin16
,
2360 .opt_opc
= vecop_list
,
2362 { .fni4
= tcg_gen_umin_i32
,
2363 .fniv
= tcg_gen_umin_vec
,
2364 .fno
= gen_helper_gvec_umin32
,
2365 .opt_opc
= vecop_list
,
2367 { .fni8
= tcg_gen_umin_i64
,
2368 .fniv
= tcg_gen_umin_vec
,
2369 .fno
= gen_helper_gvec_umin64
,
2370 .opt_opc
= vecop_list
,
2373 tcg_debug_assert(vece
<= MO_64
);
2374 tcg_gen_gvec_3(dofs
, aofs
, bofs
, oprsz
, maxsz
, &g
[vece
]);
2377 void tcg_gen_gvec_smax(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2378 uint32_t bofs
, uint32_t oprsz
, uint32_t maxsz
)
2380 static const TCGOpcode vecop_list
[] = { INDEX_op_smax_vec
, 0 };
2381 static const GVecGen3 g
[4] = {
2382 { .fniv
= tcg_gen_smax_vec
,
2383 .fno
= gen_helper_gvec_smax8
,
2384 .opt_opc
= vecop_list
,
2386 { .fniv
= tcg_gen_smax_vec
,
2387 .fno
= gen_helper_gvec_smax16
,
2388 .opt_opc
= vecop_list
,
2390 { .fni4
= tcg_gen_smax_i32
,
2391 .fniv
= tcg_gen_smax_vec
,
2392 .fno
= gen_helper_gvec_smax32
,
2393 .opt_opc
= vecop_list
,
2395 { .fni8
= tcg_gen_smax_i64
,
2396 .fniv
= tcg_gen_smax_vec
,
2397 .fno
= gen_helper_gvec_smax64
,
2398 .opt_opc
= vecop_list
,
2401 tcg_debug_assert(vece
<= MO_64
);
2402 tcg_gen_gvec_3(dofs
, aofs
, bofs
, oprsz
, maxsz
, &g
[vece
]);
2405 void tcg_gen_gvec_umax(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2406 uint32_t bofs
, uint32_t oprsz
, uint32_t maxsz
)
2408 static const TCGOpcode vecop_list
[] = { INDEX_op_umax_vec
, 0 };
2409 static const GVecGen3 g
[4] = {
2410 { .fniv
= tcg_gen_umax_vec
,
2411 .fno
= gen_helper_gvec_umax8
,
2412 .opt_opc
= vecop_list
,
2414 { .fniv
= tcg_gen_umax_vec
,
2415 .fno
= gen_helper_gvec_umax16
,
2416 .opt_opc
= vecop_list
,
2418 { .fni4
= tcg_gen_umax_i32
,
2419 .fniv
= tcg_gen_umax_vec
,
2420 .fno
= gen_helper_gvec_umax32
,
2421 .opt_opc
= vecop_list
,
2423 { .fni8
= tcg_gen_umax_i64
,
2424 .fniv
= tcg_gen_umax_vec
,
2425 .fno
= gen_helper_gvec_umax64
,
2426 .opt_opc
= vecop_list
,
2429 tcg_debug_assert(vece
<= MO_64
);
2430 tcg_gen_gvec_3(dofs
, aofs
, bofs
, oprsz
, maxsz
, &g
[vece
]);
2433 /* Perform a vector negation using normal negation and a mask.
2434 Compare gen_subv_mask above. */
2435 static void gen_negv_mask(TCGv_i64 d
, TCGv_i64 b
, TCGv_i64 m
)
2437 TCGv_i64 t2
= tcg_temp_ebb_new_i64();
2438 TCGv_i64 t3
= tcg_temp_ebb_new_i64();
2440 tcg_gen_andc_i64(t3
, m
, b
);
2441 tcg_gen_andc_i64(t2
, b
, m
);
2442 tcg_gen_sub_i64(d
, m
, t2
);
2443 tcg_gen_xor_i64(d
, d
, t3
);
2445 tcg_temp_free_i64(t2
);
2446 tcg_temp_free_i64(t3
);
2449 void tcg_gen_vec_neg8_i64(TCGv_i64 d
, TCGv_i64 b
)
2451 TCGv_i64 m
= tcg_constant_i64(dup_const(MO_8
, 0x80));
2452 gen_negv_mask(d
, b
, m
);
2455 void tcg_gen_vec_neg16_i64(TCGv_i64 d
, TCGv_i64 b
)
2457 TCGv_i64 m
= tcg_constant_i64(dup_const(MO_16
, 0x8000));
2458 gen_negv_mask(d
, b
, m
);
2461 void tcg_gen_vec_neg32_i64(TCGv_i64 d
, TCGv_i64 b
)
2463 TCGv_i64 t1
= tcg_temp_ebb_new_i64();
2464 TCGv_i64 t2
= tcg_temp_ebb_new_i64();
2466 tcg_gen_andi_i64(t1
, b
, ~0xffffffffull
);
2467 tcg_gen_neg_i64(t2
, b
);
2468 tcg_gen_neg_i64(t1
, t1
);
2469 tcg_gen_deposit_i64(d
, t1
, t2
, 0, 32);
2471 tcg_temp_free_i64(t1
);
2472 tcg_temp_free_i64(t2
);
2475 void tcg_gen_gvec_neg(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2476 uint32_t oprsz
, uint32_t maxsz
)
2478 static const TCGOpcode vecop_list
[] = { INDEX_op_neg_vec
, 0 };
2479 static const GVecGen2 g
[4] = {
2480 { .fni8
= tcg_gen_vec_neg8_i64
,
2481 .fniv
= tcg_gen_neg_vec
,
2482 .fno
= gen_helper_gvec_neg8
,
2483 .opt_opc
= vecop_list
,
2485 { .fni8
= tcg_gen_vec_neg16_i64
,
2486 .fniv
= tcg_gen_neg_vec
,
2487 .fno
= gen_helper_gvec_neg16
,
2488 .opt_opc
= vecop_list
,
2490 { .fni4
= tcg_gen_neg_i32
,
2491 .fniv
= tcg_gen_neg_vec
,
2492 .fno
= gen_helper_gvec_neg32
,
2493 .opt_opc
= vecop_list
,
2495 { .fni8
= tcg_gen_neg_i64
,
2496 .fniv
= tcg_gen_neg_vec
,
2497 .fno
= gen_helper_gvec_neg64
,
2498 .opt_opc
= vecop_list
,
2499 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
2503 tcg_debug_assert(vece
<= MO_64
);
2504 tcg_gen_gvec_2(dofs
, aofs
, oprsz
, maxsz
, &g
[vece
]);
2507 static void gen_absv_mask(TCGv_i64 d
, TCGv_i64 b
, unsigned vece
)
2509 TCGv_i64 t
= tcg_temp_ebb_new_i64();
2510 int nbit
= 8 << vece
;
2512 /* Create -1 for each negative element. */
2513 tcg_gen_shri_i64(t
, b
, nbit
- 1);
2514 tcg_gen_andi_i64(t
, t
, dup_const(vece
, 1));
2515 tcg_gen_muli_i64(t
, t
, (1 << nbit
) - 1);
2518 * Invert (via xor -1) and add one.
2519 * Because of the ordering the msb is cleared,
2520 * so we never have carry into the next element.
2522 tcg_gen_xor_i64(d
, b
, t
);
2523 tcg_gen_andi_i64(t
, t
, dup_const(vece
, 1));
2524 tcg_gen_add_i64(d
, d
, t
);
2526 tcg_temp_free_i64(t
);
2529 static void tcg_gen_vec_abs8_i64(TCGv_i64 d
, TCGv_i64 b
)
2531 gen_absv_mask(d
, b
, MO_8
);
2534 static void tcg_gen_vec_abs16_i64(TCGv_i64 d
, TCGv_i64 b
)
2536 gen_absv_mask(d
, b
, MO_16
);
2539 void tcg_gen_gvec_abs(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2540 uint32_t oprsz
, uint32_t maxsz
)
2542 static const TCGOpcode vecop_list
[] = { INDEX_op_abs_vec
, 0 };
2543 static const GVecGen2 g
[4] = {
2544 { .fni8
= tcg_gen_vec_abs8_i64
,
2545 .fniv
= tcg_gen_abs_vec
,
2546 .fno
= gen_helper_gvec_abs8
,
2547 .opt_opc
= vecop_list
,
2549 { .fni8
= tcg_gen_vec_abs16_i64
,
2550 .fniv
= tcg_gen_abs_vec
,
2551 .fno
= gen_helper_gvec_abs16
,
2552 .opt_opc
= vecop_list
,
2554 { .fni4
= tcg_gen_abs_i32
,
2555 .fniv
= tcg_gen_abs_vec
,
2556 .fno
= gen_helper_gvec_abs32
,
2557 .opt_opc
= vecop_list
,
2559 { .fni8
= tcg_gen_abs_i64
,
2560 .fniv
= tcg_gen_abs_vec
,
2561 .fno
= gen_helper_gvec_abs64
,
2562 .opt_opc
= vecop_list
,
2563 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
2567 tcg_debug_assert(vece
<= MO_64
);
2568 tcg_gen_gvec_2(dofs
, aofs
, oprsz
, maxsz
, &g
[vece
]);
2571 void tcg_gen_gvec_and(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2572 uint32_t bofs
, uint32_t oprsz
, uint32_t maxsz
)
2574 static const GVecGen3 g
= {
2575 .fni8
= tcg_gen_and_i64
,
2576 .fniv
= tcg_gen_and_vec
,
2577 .fno
= gen_helper_gvec_and
,
2578 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
2582 tcg_gen_gvec_mov(vece
, dofs
, aofs
, oprsz
, maxsz
);
2584 tcg_gen_gvec_3(dofs
, aofs
, bofs
, oprsz
, maxsz
, &g
);
2588 void tcg_gen_gvec_or(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2589 uint32_t bofs
, uint32_t oprsz
, uint32_t maxsz
)
2591 static const GVecGen3 g
= {
2592 .fni8
= tcg_gen_or_i64
,
2593 .fniv
= tcg_gen_or_vec
,
2594 .fno
= gen_helper_gvec_or
,
2595 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
2599 tcg_gen_gvec_mov(vece
, dofs
, aofs
, oprsz
, maxsz
);
2601 tcg_gen_gvec_3(dofs
, aofs
, bofs
, oprsz
, maxsz
, &g
);
2605 void tcg_gen_gvec_xor(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2606 uint32_t bofs
, uint32_t oprsz
, uint32_t maxsz
)
2608 static const GVecGen3 g
= {
2609 .fni8
= tcg_gen_xor_i64
,
2610 .fniv
= tcg_gen_xor_vec
,
2611 .fno
= gen_helper_gvec_xor
,
2612 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
2616 tcg_gen_gvec_dup_imm(MO_64
, dofs
, oprsz
, maxsz
, 0);
2618 tcg_gen_gvec_3(dofs
, aofs
, bofs
, oprsz
, maxsz
, &g
);
2622 void tcg_gen_gvec_andc(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2623 uint32_t bofs
, uint32_t oprsz
, uint32_t maxsz
)
2625 static const GVecGen3 g
= {
2626 .fni8
= tcg_gen_andc_i64
,
2627 .fniv
= tcg_gen_andc_vec
,
2628 .fno
= gen_helper_gvec_andc
,
2629 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
2633 tcg_gen_gvec_dup_imm(MO_64
, dofs
, oprsz
, maxsz
, 0);
2635 tcg_gen_gvec_3(dofs
, aofs
, bofs
, oprsz
, maxsz
, &g
);
2639 void tcg_gen_gvec_orc(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2640 uint32_t bofs
, uint32_t oprsz
, uint32_t maxsz
)
2642 static const GVecGen3 g
= {
2643 .fni8
= tcg_gen_orc_i64
,
2644 .fniv
= tcg_gen_orc_vec
,
2645 .fno
= gen_helper_gvec_orc
,
2646 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
2650 tcg_gen_gvec_dup_imm(MO_64
, dofs
, oprsz
, maxsz
, -1);
2652 tcg_gen_gvec_3(dofs
, aofs
, bofs
, oprsz
, maxsz
, &g
);
2656 void tcg_gen_gvec_nand(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2657 uint32_t bofs
, uint32_t oprsz
, uint32_t maxsz
)
2659 static const GVecGen3 g
= {
2660 .fni8
= tcg_gen_nand_i64
,
2661 .fniv
= tcg_gen_nand_vec
,
2662 .fno
= gen_helper_gvec_nand
,
2663 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
2667 tcg_gen_gvec_not(vece
, dofs
, aofs
, oprsz
, maxsz
);
2669 tcg_gen_gvec_3(dofs
, aofs
, bofs
, oprsz
, maxsz
, &g
);
2673 void tcg_gen_gvec_nor(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2674 uint32_t bofs
, uint32_t oprsz
, uint32_t maxsz
)
2676 static const GVecGen3 g
= {
2677 .fni8
= tcg_gen_nor_i64
,
2678 .fniv
= tcg_gen_nor_vec
,
2679 .fno
= gen_helper_gvec_nor
,
2680 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
2684 tcg_gen_gvec_not(vece
, dofs
, aofs
, oprsz
, maxsz
);
2686 tcg_gen_gvec_3(dofs
, aofs
, bofs
, oprsz
, maxsz
, &g
);
2690 void tcg_gen_gvec_eqv(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2691 uint32_t bofs
, uint32_t oprsz
, uint32_t maxsz
)
2693 static const GVecGen3 g
= {
2694 .fni8
= tcg_gen_eqv_i64
,
2695 .fniv
= tcg_gen_eqv_vec
,
2696 .fno
= gen_helper_gvec_eqv
,
2697 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
2701 tcg_gen_gvec_dup_imm(MO_64
, dofs
, oprsz
, maxsz
, -1);
2703 tcg_gen_gvec_3(dofs
, aofs
, bofs
, oprsz
, maxsz
, &g
);
2707 static const GVecGen2s gop_ands
= {
2708 .fni8
= tcg_gen_and_i64
,
2709 .fniv
= tcg_gen_and_vec
,
2710 .fno
= gen_helper_gvec_ands
,
2711 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
2715 void tcg_gen_gvec_ands(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2716 TCGv_i64 c
, uint32_t oprsz
, uint32_t maxsz
)
2718 TCGv_i64 tmp
= tcg_temp_ebb_new_i64();
2719 tcg_gen_dup_i64(vece
, tmp
, c
);
2720 tcg_gen_gvec_2s(dofs
, aofs
, oprsz
, maxsz
, tmp
, &gop_ands
);
2721 tcg_temp_free_i64(tmp
);
2724 void tcg_gen_gvec_andi(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2725 int64_t c
, uint32_t oprsz
, uint32_t maxsz
)
2727 TCGv_i64 tmp
= tcg_constant_i64(dup_const(vece
, c
));
2728 tcg_gen_gvec_2s(dofs
, aofs
, oprsz
, maxsz
, tmp
, &gop_ands
);
2731 void tcg_gen_gvec_andcs(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2732 TCGv_i64 c
, uint32_t oprsz
, uint32_t maxsz
)
2734 static GVecGen2s g
= {
2735 .fni8
= tcg_gen_andc_i64
,
2736 .fniv
= tcg_gen_andc_vec
,
2737 .fno
= gen_helper_gvec_andcs
,
2738 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
2742 TCGv_i64 tmp
= tcg_temp_ebb_new_i64();
2743 tcg_gen_dup_i64(vece
, tmp
, c
);
2744 tcg_gen_gvec_2s(dofs
, aofs
, oprsz
, maxsz
, tmp
, &g
);
2745 tcg_temp_free_i64(tmp
);
2748 static const GVecGen2s gop_xors
= {
2749 .fni8
= tcg_gen_xor_i64
,
2750 .fniv
= tcg_gen_xor_vec
,
2751 .fno
= gen_helper_gvec_xors
,
2752 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
2756 void tcg_gen_gvec_xors(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2757 TCGv_i64 c
, uint32_t oprsz
, uint32_t maxsz
)
2759 TCGv_i64 tmp
= tcg_temp_ebb_new_i64();
2760 tcg_gen_dup_i64(vece
, tmp
, c
);
2761 tcg_gen_gvec_2s(dofs
, aofs
, oprsz
, maxsz
, tmp
, &gop_xors
);
2762 tcg_temp_free_i64(tmp
);
2765 void tcg_gen_gvec_xori(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2766 int64_t c
, uint32_t oprsz
, uint32_t maxsz
)
2768 TCGv_i64 tmp
= tcg_constant_i64(dup_const(vece
, c
));
2769 tcg_gen_gvec_2s(dofs
, aofs
, oprsz
, maxsz
, tmp
, &gop_xors
);
2772 static const GVecGen2s gop_ors
= {
2773 .fni8
= tcg_gen_or_i64
,
2774 .fniv
= tcg_gen_or_vec
,
2775 .fno
= gen_helper_gvec_ors
,
2776 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
2780 void tcg_gen_gvec_ors(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2781 TCGv_i64 c
, uint32_t oprsz
, uint32_t maxsz
)
2783 TCGv_i64 tmp
= tcg_temp_ebb_new_i64();
2784 tcg_gen_dup_i64(vece
, tmp
, c
);
2785 tcg_gen_gvec_2s(dofs
, aofs
, oprsz
, maxsz
, tmp
, &gop_ors
);
2786 tcg_temp_free_i64(tmp
);
2789 void tcg_gen_gvec_ori(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2790 int64_t c
, uint32_t oprsz
, uint32_t maxsz
)
2792 TCGv_i64 tmp
= tcg_constant_i64(dup_const(vece
, c
));
2793 tcg_gen_gvec_2s(dofs
, aofs
, oprsz
, maxsz
, tmp
, &gop_ors
);
2796 void tcg_gen_vec_shl8i_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t c
)
2798 uint64_t mask
= dup_const(MO_8
, 0xff << c
);
2799 tcg_gen_shli_i64(d
, a
, c
);
2800 tcg_gen_andi_i64(d
, d
, mask
);
2803 void tcg_gen_vec_shl16i_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t c
)
2805 uint64_t mask
= dup_const(MO_16
, 0xffff << c
);
2806 tcg_gen_shli_i64(d
, a
, c
);
2807 tcg_gen_andi_i64(d
, d
, mask
);
2810 void tcg_gen_vec_shl8i_i32(TCGv_i32 d
, TCGv_i32 a
, int32_t c
)
2812 uint32_t mask
= dup_const(MO_8
, 0xff << c
);
2813 tcg_gen_shli_i32(d
, a
, c
);
2814 tcg_gen_andi_i32(d
, d
, mask
);
2817 void tcg_gen_vec_shl16i_i32(TCGv_i32 d
, TCGv_i32 a
, int32_t c
)
2819 uint32_t mask
= dup_const(MO_16
, 0xffff << c
);
2820 tcg_gen_shli_i32(d
, a
, c
);
2821 tcg_gen_andi_i32(d
, d
, mask
);
2824 void tcg_gen_gvec_shli(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2825 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
2827 static const TCGOpcode vecop_list
[] = { INDEX_op_shli_vec
, 0 };
2828 static const GVecGen2i g
[4] = {
2829 { .fni8
= tcg_gen_vec_shl8i_i64
,
2830 .fniv
= tcg_gen_shli_vec
,
2831 .fno
= gen_helper_gvec_shl8i
,
2832 .opt_opc
= vecop_list
,
2834 { .fni8
= tcg_gen_vec_shl16i_i64
,
2835 .fniv
= tcg_gen_shli_vec
,
2836 .fno
= gen_helper_gvec_shl16i
,
2837 .opt_opc
= vecop_list
,
2839 { .fni4
= tcg_gen_shli_i32
,
2840 .fniv
= tcg_gen_shli_vec
,
2841 .fno
= gen_helper_gvec_shl32i
,
2842 .opt_opc
= vecop_list
,
2844 { .fni8
= tcg_gen_shli_i64
,
2845 .fniv
= tcg_gen_shli_vec
,
2846 .fno
= gen_helper_gvec_shl64i
,
2847 .opt_opc
= vecop_list
,
2848 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
2852 tcg_debug_assert(vece
<= MO_64
);
2853 tcg_debug_assert(shift
>= 0 && shift
< (8 << vece
));
2855 tcg_gen_gvec_mov(vece
, dofs
, aofs
, oprsz
, maxsz
);
2857 tcg_gen_gvec_2i(dofs
, aofs
, oprsz
, maxsz
, shift
, &g
[vece
]);
2861 void tcg_gen_vec_shr8i_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t c
)
2863 uint64_t mask
= dup_const(MO_8
, 0xff >> c
);
2864 tcg_gen_shri_i64(d
, a
, c
);
2865 tcg_gen_andi_i64(d
, d
, mask
);
2868 void tcg_gen_vec_shr16i_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t c
)
2870 uint64_t mask
= dup_const(MO_16
, 0xffff >> c
);
2871 tcg_gen_shri_i64(d
, a
, c
);
2872 tcg_gen_andi_i64(d
, d
, mask
);
2875 void tcg_gen_vec_shr8i_i32(TCGv_i32 d
, TCGv_i32 a
, int32_t c
)
2877 uint32_t mask
= dup_const(MO_8
, 0xff >> c
);
2878 tcg_gen_shri_i32(d
, a
, c
);
2879 tcg_gen_andi_i32(d
, d
, mask
);
2882 void tcg_gen_vec_shr16i_i32(TCGv_i32 d
, TCGv_i32 a
, int32_t c
)
2884 uint32_t mask
= dup_const(MO_16
, 0xffff >> c
);
2885 tcg_gen_shri_i32(d
, a
, c
);
2886 tcg_gen_andi_i32(d
, d
, mask
);
2889 void tcg_gen_gvec_shri(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2890 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
2892 static const TCGOpcode vecop_list
[] = { INDEX_op_shri_vec
, 0 };
2893 static const GVecGen2i g
[4] = {
2894 { .fni8
= tcg_gen_vec_shr8i_i64
,
2895 .fniv
= tcg_gen_shri_vec
,
2896 .fno
= gen_helper_gvec_shr8i
,
2897 .opt_opc
= vecop_list
,
2899 { .fni8
= tcg_gen_vec_shr16i_i64
,
2900 .fniv
= tcg_gen_shri_vec
,
2901 .fno
= gen_helper_gvec_shr16i
,
2902 .opt_opc
= vecop_list
,
2904 { .fni4
= tcg_gen_shri_i32
,
2905 .fniv
= tcg_gen_shri_vec
,
2906 .fno
= gen_helper_gvec_shr32i
,
2907 .opt_opc
= vecop_list
,
2909 { .fni8
= tcg_gen_shri_i64
,
2910 .fniv
= tcg_gen_shri_vec
,
2911 .fno
= gen_helper_gvec_shr64i
,
2912 .opt_opc
= vecop_list
,
2913 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
2917 tcg_debug_assert(vece
<= MO_64
);
2918 tcg_debug_assert(shift
>= 0 && shift
< (8 << vece
));
2920 tcg_gen_gvec_mov(vece
, dofs
, aofs
, oprsz
, maxsz
);
2922 tcg_gen_gvec_2i(dofs
, aofs
, oprsz
, maxsz
, shift
, &g
[vece
]);
2926 void tcg_gen_vec_sar8i_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t c
)
2928 uint64_t s_mask
= dup_const(MO_8
, 0x80 >> c
);
2929 uint64_t c_mask
= dup_const(MO_8
, 0xff >> c
);
2930 TCGv_i64 s
= tcg_temp_ebb_new_i64();
2932 tcg_gen_shri_i64(d
, a
, c
);
2933 tcg_gen_andi_i64(s
, d
, s_mask
); /* isolate (shifted) sign bit */
2934 tcg_gen_muli_i64(s
, s
, (2 << c
) - 2); /* replicate isolated signs */
2935 tcg_gen_andi_i64(d
, d
, c_mask
); /* clear out bits above sign */
2936 tcg_gen_or_i64(d
, d
, s
); /* include sign extension */
2937 tcg_temp_free_i64(s
);
2940 void tcg_gen_vec_sar16i_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t c
)
2942 uint64_t s_mask
= dup_const(MO_16
, 0x8000 >> c
);
2943 uint64_t c_mask
= dup_const(MO_16
, 0xffff >> c
);
2944 TCGv_i64 s
= tcg_temp_ebb_new_i64();
2946 tcg_gen_shri_i64(d
, a
, c
);
2947 tcg_gen_andi_i64(s
, d
, s_mask
); /* isolate (shifted) sign bit */
2948 tcg_gen_andi_i64(d
, d
, c_mask
); /* clear out bits above sign */
2949 tcg_gen_muli_i64(s
, s
, (2 << c
) - 2); /* replicate isolated signs */
2950 tcg_gen_or_i64(d
, d
, s
); /* include sign extension */
2951 tcg_temp_free_i64(s
);
2954 void tcg_gen_vec_sar8i_i32(TCGv_i32 d
, TCGv_i32 a
, int32_t c
)
2956 uint32_t s_mask
= dup_const(MO_8
, 0x80 >> c
);
2957 uint32_t c_mask
= dup_const(MO_8
, 0xff >> c
);
2958 TCGv_i32 s
= tcg_temp_ebb_new_i32();
2960 tcg_gen_shri_i32(d
, a
, c
);
2961 tcg_gen_andi_i32(s
, d
, s_mask
); /* isolate (shifted) sign bit */
2962 tcg_gen_muli_i32(s
, s
, (2 << c
) - 2); /* replicate isolated signs */
2963 tcg_gen_andi_i32(d
, d
, c_mask
); /* clear out bits above sign */
2964 tcg_gen_or_i32(d
, d
, s
); /* include sign extension */
2965 tcg_temp_free_i32(s
);
2968 void tcg_gen_vec_sar16i_i32(TCGv_i32 d
, TCGv_i32 a
, int32_t c
)
2970 uint32_t s_mask
= dup_const(MO_16
, 0x8000 >> c
);
2971 uint32_t c_mask
= dup_const(MO_16
, 0xffff >> c
);
2972 TCGv_i32 s
= tcg_temp_ebb_new_i32();
2974 tcg_gen_shri_i32(d
, a
, c
);
2975 tcg_gen_andi_i32(s
, d
, s_mask
); /* isolate (shifted) sign bit */
2976 tcg_gen_andi_i32(d
, d
, c_mask
); /* clear out bits above sign */
2977 tcg_gen_muli_i32(s
, s
, (2 << c
) - 2); /* replicate isolated signs */
2978 tcg_gen_or_i32(d
, d
, s
); /* include sign extension */
2979 tcg_temp_free_i32(s
);
2982 void tcg_gen_gvec_sari(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
2983 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
2985 static const TCGOpcode vecop_list
[] = { INDEX_op_sari_vec
, 0 };
2986 static const GVecGen2i g
[4] = {
2987 { .fni8
= tcg_gen_vec_sar8i_i64
,
2988 .fniv
= tcg_gen_sari_vec
,
2989 .fno
= gen_helper_gvec_sar8i
,
2990 .opt_opc
= vecop_list
,
2992 { .fni8
= tcg_gen_vec_sar16i_i64
,
2993 .fniv
= tcg_gen_sari_vec
,
2994 .fno
= gen_helper_gvec_sar16i
,
2995 .opt_opc
= vecop_list
,
2997 { .fni4
= tcg_gen_sari_i32
,
2998 .fniv
= tcg_gen_sari_vec
,
2999 .fno
= gen_helper_gvec_sar32i
,
3000 .opt_opc
= vecop_list
,
3002 { .fni8
= tcg_gen_sari_i64
,
3003 .fniv
= tcg_gen_sari_vec
,
3004 .fno
= gen_helper_gvec_sar64i
,
3005 .opt_opc
= vecop_list
,
3006 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
3010 tcg_debug_assert(vece
<= MO_64
);
3011 tcg_debug_assert(shift
>= 0 && shift
< (8 << vece
));
3013 tcg_gen_gvec_mov(vece
, dofs
, aofs
, oprsz
, maxsz
);
3015 tcg_gen_gvec_2i(dofs
, aofs
, oprsz
, maxsz
, shift
, &g
[vece
]);
3019 void tcg_gen_vec_rotl8i_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t c
)
3021 uint64_t mask
= dup_const(MO_8
, 0xff << c
);
3023 tcg_gen_shli_i64(d
, a
, c
);
3024 tcg_gen_shri_i64(a
, a
, 8 - c
);
3025 tcg_gen_andi_i64(d
, d
, mask
);
3026 tcg_gen_andi_i64(a
, a
, ~mask
);
3027 tcg_gen_or_i64(d
, d
, a
);
3030 void tcg_gen_vec_rotl16i_i64(TCGv_i64 d
, TCGv_i64 a
, int64_t c
)
3032 uint64_t mask
= dup_const(MO_16
, 0xffff << c
);
3034 tcg_gen_shli_i64(d
, a
, c
);
3035 tcg_gen_shri_i64(a
, a
, 16 - c
);
3036 tcg_gen_andi_i64(d
, d
, mask
);
3037 tcg_gen_andi_i64(a
, a
, ~mask
);
3038 tcg_gen_or_i64(d
, d
, a
);
3041 void tcg_gen_gvec_rotli(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
3042 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
3044 static const TCGOpcode vecop_list
[] = { INDEX_op_rotli_vec
, 0 };
3045 static const GVecGen2i g
[4] = {
3046 { .fni8
= tcg_gen_vec_rotl8i_i64
,
3047 .fniv
= tcg_gen_rotli_vec
,
3048 .fno
= gen_helper_gvec_rotl8i
,
3049 .opt_opc
= vecop_list
,
3051 { .fni8
= tcg_gen_vec_rotl16i_i64
,
3052 .fniv
= tcg_gen_rotli_vec
,
3053 .fno
= gen_helper_gvec_rotl16i
,
3054 .opt_opc
= vecop_list
,
3056 { .fni4
= tcg_gen_rotli_i32
,
3057 .fniv
= tcg_gen_rotli_vec
,
3058 .fno
= gen_helper_gvec_rotl32i
,
3059 .opt_opc
= vecop_list
,
3061 { .fni8
= tcg_gen_rotli_i64
,
3062 .fniv
= tcg_gen_rotli_vec
,
3063 .fno
= gen_helper_gvec_rotl64i
,
3064 .opt_opc
= vecop_list
,
3065 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
3069 tcg_debug_assert(vece
<= MO_64
);
3070 tcg_debug_assert(shift
>= 0 && shift
< (8 << vece
));
3072 tcg_gen_gvec_mov(vece
, dofs
, aofs
, oprsz
, maxsz
);
3074 tcg_gen_gvec_2i(dofs
, aofs
, oprsz
, maxsz
, shift
, &g
[vece
]);
3078 void tcg_gen_gvec_rotri(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
3079 int64_t shift
, uint32_t oprsz
, uint32_t maxsz
)
3081 tcg_debug_assert(vece
<= MO_64
);
3082 tcg_debug_assert(shift
>= 0 && shift
< (8 << vece
));
3083 tcg_gen_gvec_rotli(vece
, dofs
, aofs
, -shift
& ((8 << vece
) - 1),
3088 * Specialized generation vector shifts by a non-constant scalar.
3092 void (*fni4
)(TCGv_i32
, TCGv_i32
, TCGv_i32
);
3093 void (*fni8
)(TCGv_i64
, TCGv_i64
, TCGv_i64
);
3094 void (*fniv_s
)(unsigned, TCGv_vec
, TCGv_vec
, TCGv_i32
);
3095 void (*fniv_v
)(unsigned, TCGv_vec
, TCGv_vec
, TCGv_vec
);
3096 gen_helper_gvec_2
*fno
[4];
3097 TCGOpcode s_list
[2];
3098 TCGOpcode v_list
[2];
3101 static void expand_2sh_vec(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
3102 uint32_t oprsz
, uint32_t tysz
, TCGType type
,
3104 void (*fni
)(unsigned, TCGv_vec
, TCGv_vec
, TCGv_i32
))
3106 for (uint32_t i
= 0; i
< oprsz
; i
+= tysz
) {
3107 TCGv_vec t0
= tcg_temp_new_vec(type
);
3108 TCGv_vec t1
= tcg_temp_new_vec(type
);
3110 tcg_gen_ld_vec(t0
, tcg_env
, aofs
+ i
);
3111 fni(vece
, t1
, t0
, shift
);
3112 tcg_gen_st_vec(t1
, tcg_env
, dofs
+ i
);
3117 do_gvec_shifts(unsigned vece
, uint32_t dofs
, uint32_t aofs
, TCGv_i32 shift
,
3118 uint32_t oprsz
, uint32_t maxsz
, const GVecGen2sh
*g
)
3123 check_size_align(oprsz
, maxsz
, dofs
| aofs
);
3124 check_overlap_2(dofs
, aofs
, maxsz
);
3126 /* If the backend has a scalar expansion, great. */
3127 type
= choose_vector_type(g
->s_list
, vece
, oprsz
, vece
== MO_64
);
3129 const TCGOpcode
*hold_list
= tcg_swap_vecop_list(NULL
);
3132 some
= QEMU_ALIGN_DOWN(oprsz
, 32);
3133 expand_2sh_vec(vece
, dofs
, aofs
, some
, 32,
3134 TCG_TYPE_V256
, shift
, g
->fniv_s
);
3135 if (some
== oprsz
) {
3144 expand_2sh_vec(vece
, dofs
, aofs
, oprsz
, 16,
3145 TCG_TYPE_V128
, shift
, g
->fniv_s
);
3148 expand_2sh_vec(vece
, dofs
, aofs
, oprsz
, 8,
3149 TCG_TYPE_V64
, shift
, g
->fniv_s
);
3152 g_assert_not_reached();
3154 tcg_swap_vecop_list(hold_list
);
3158 /* If the backend supports variable vector shifts, also cool. */
3159 type
= choose_vector_type(g
->v_list
, vece
, oprsz
, vece
== MO_64
);
3161 const TCGOpcode
*hold_list
= tcg_swap_vecop_list(NULL
);
3162 TCGv_vec v_shift
= tcg_temp_new_vec(type
);
3164 if (vece
== MO_64
) {
3165 TCGv_i64 sh64
= tcg_temp_ebb_new_i64();
3166 tcg_gen_extu_i32_i64(sh64
, shift
);
3167 tcg_gen_dup_i64_vec(MO_64
, v_shift
, sh64
);
3168 tcg_temp_free_i64(sh64
);
3170 tcg_gen_dup_i32_vec(vece
, v_shift
, shift
);
3175 some
= QEMU_ALIGN_DOWN(oprsz
, 32);
3176 expand_2s_vec(vece
, dofs
, aofs
, some
, 32, TCG_TYPE_V256
,
3177 v_shift
, false, g
->fniv_v
);
3178 if (some
== oprsz
) {
3187 expand_2s_vec(vece
, dofs
, aofs
, oprsz
, 16, TCG_TYPE_V128
,
3188 v_shift
, false, g
->fniv_v
);
3191 expand_2s_vec(vece
, dofs
, aofs
, oprsz
, 8, TCG_TYPE_V64
,
3192 v_shift
, false, g
->fniv_v
);
3195 g_assert_not_reached();
3197 tcg_temp_free_vec(v_shift
);
3198 tcg_swap_vecop_list(hold_list
);
3202 /* Otherwise fall back to integral... */
3203 if (vece
== MO_32
&& check_size_impl(oprsz
, 4)) {
3204 expand_2s_i32(dofs
, aofs
, oprsz
, shift
, false, g
->fni4
);
3205 } else if (vece
== MO_64
&& check_size_impl(oprsz
, 8)) {
3206 TCGv_i64 sh64
= tcg_temp_ebb_new_i64();
3207 tcg_gen_extu_i32_i64(sh64
, shift
);
3208 expand_2s_i64(dofs
, aofs
, oprsz
, sh64
, false, g
->fni8
);
3209 tcg_temp_free_i64(sh64
);
3211 TCGv_ptr a0
= tcg_temp_ebb_new_ptr();
3212 TCGv_ptr a1
= tcg_temp_ebb_new_ptr();
3213 TCGv_i32 desc
= tcg_temp_ebb_new_i32();
3215 tcg_gen_shli_i32(desc
, shift
, SIMD_DATA_SHIFT
);
3216 tcg_gen_ori_i32(desc
, desc
, simd_desc(oprsz
, maxsz
, 0));
3217 tcg_gen_addi_ptr(a0
, tcg_env
, dofs
);
3218 tcg_gen_addi_ptr(a1
, tcg_env
, aofs
);
3220 g
->fno
[vece
](a0
, a1
, desc
);
3222 tcg_temp_free_ptr(a0
);
3223 tcg_temp_free_ptr(a1
);
3224 tcg_temp_free_i32(desc
);
3229 if (oprsz
< maxsz
) {
3230 expand_clr(dofs
+ oprsz
, maxsz
- oprsz
);
3234 void tcg_gen_gvec_shls(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
3235 TCGv_i32 shift
, uint32_t oprsz
, uint32_t maxsz
)
3237 static const GVecGen2sh g
= {
3238 .fni4
= tcg_gen_shl_i32
,
3239 .fni8
= tcg_gen_shl_i64
,
3240 .fniv_s
= tcg_gen_shls_vec
,
3241 .fniv_v
= tcg_gen_shlv_vec
,
3243 gen_helper_gvec_shl8i
,
3244 gen_helper_gvec_shl16i
,
3245 gen_helper_gvec_shl32i
,
3246 gen_helper_gvec_shl64i
,
3248 .s_list
= { INDEX_op_shls_vec
, 0 },
3249 .v_list
= { INDEX_op_shlv_vec
, 0 },
3252 tcg_debug_assert(vece
<= MO_64
);
3253 do_gvec_shifts(vece
, dofs
, aofs
, shift
, oprsz
, maxsz
, &g
);
3256 void tcg_gen_gvec_shrs(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
3257 TCGv_i32 shift
, uint32_t oprsz
, uint32_t maxsz
)
3259 static const GVecGen2sh g
= {
3260 .fni4
= tcg_gen_shr_i32
,
3261 .fni8
= tcg_gen_shr_i64
,
3262 .fniv_s
= tcg_gen_shrs_vec
,
3263 .fniv_v
= tcg_gen_shrv_vec
,
3265 gen_helper_gvec_shr8i
,
3266 gen_helper_gvec_shr16i
,
3267 gen_helper_gvec_shr32i
,
3268 gen_helper_gvec_shr64i
,
3270 .s_list
= { INDEX_op_shrs_vec
, 0 },
3271 .v_list
= { INDEX_op_shrv_vec
, 0 },
3274 tcg_debug_assert(vece
<= MO_64
);
3275 do_gvec_shifts(vece
, dofs
, aofs
, shift
, oprsz
, maxsz
, &g
);
3278 void tcg_gen_gvec_sars(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
3279 TCGv_i32 shift
, uint32_t oprsz
, uint32_t maxsz
)
3281 static const GVecGen2sh g
= {
3282 .fni4
= tcg_gen_sar_i32
,
3283 .fni8
= tcg_gen_sar_i64
,
3284 .fniv_s
= tcg_gen_sars_vec
,
3285 .fniv_v
= tcg_gen_sarv_vec
,
3287 gen_helper_gvec_sar8i
,
3288 gen_helper_gvec_sar16i
,
3289 gen_helper_gvec_sar32i
,
3290 gen_helper_gvec_sar64i
,
3292 .s_list
= { INDEX_op_sars_vec
, 0 },
3293 .v_list
= { INDEX_op_sarv_vec
, 0 },
3296 tcg_debug_assert(vece
<= MO_64
);
3297 do_gvec_shifts(vece
, dofs
, aofs
, shift
, oprsz
, maxsz
, &g
);
3300 void tcg_gen_gvec_rotls(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
3301 TCGv_i32 shift
, uint32_t oprsz
, uint32_t maxsz
)
3303 static const GVecGen2sh g
= {
3304 .fni4
= tcg_gen_rotl_i32
,
3305 .fni8
= tcg_gen_rotl_i64
,
3306 .fniv_s
= tcg_gen_rotls_vec
,
3307 .fniv_v
= tcg_gen_rotlv_vec
,
3309 gen_helper_gvec_rotl8i
,
3310 gen_helper_gvec_rotl16i
,
3311 gen_helper_gvec_rotl32i
,
3312 gen_helper_gvec_rotl64i
,
3314 .s_list
= { INDEX_op_rotls_vec
, 0 },
3315 .v_list
= { INDEX_op_rotlv_vec
, 0 },
3318 tcg_debug_assert(vece
<= MO_64
);
3319 do_gvec_shifts(vece
, dofs
, aofs
, shift
, oprsz
, maxsz
, &g
);
3322 void tcg_gen_gvec_rotrs(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
3323 TCGv_i32 shift
, uint32_t oprsz
, uint32_t maxsz
)
3325 TCGv_i32 tmp
= tcg_temp_ebb_new_i32();
3327 tcg_gen_neg_i32(tmp
, shift
);
3328 tcg_gen_andi_i32(tmp
, tmp
, (8 << vece
) - 1);
3329 tcg_gen_gvec_rotls(vece
, dofs
, aofs
, tmp
, oprsz
, maxsz
);
3330 tcg_temp_free_i32(tmp
);
3334 * Expand D = A << (B % element bits)
3336 * Unlike scalar shifts, where it is easy for the target front end
3337 * to include the modulo as part of the expansion. If the target
3338 * naturally includes the modulo as part of the operation, great!
3339 * If the target has some other behaviour from out-of-range shifts,
3340 * then it could not use this function anyway, and would need to
3341 * do it's own expansion with custom functions.
3343 static void tcg_gen_shlv_mod_vec(unsigned vece
, TCGv_vec d
,
3344 TCGv_vec a
, TCGv_vec b
)
3346 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
3347 TCGv_vec m
= tcg_constant_vec_matching(d
, vece
, (8 << vece
) - 1);
3349 tcg_gen_and_vec(vece
, t
, b
, m
);
3350 tcg_gen_shlv_vec(vece
, d
, a
, t
);
3351 tcg_temp_free_vec(t
);
3354 static void tcg_gen_shl_mod_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
3356 TCGv_i32 t
= tcg_temp_ebb_new_i32();
3358 tcg_gen_andi_i32(t
, b
, 31);
3359 tcg_gen_shl_i32(d
, a
, t
);
3360 tcg_temp_free_i32(t
);
3363 static void tcg_gen_shl_mod_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
3365 TCGv_i64 t
= tcg_temp_ebb_new_i64();
3367 tcg_gen_andi_i64(t
, b
, 63);
3368 tcg_gen_shl_i64(d
, a
, t
);
3369 tcg_temp_free_i64(t
);
3372 void tcg_gen_gvec_shlv(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
3373 uint32_t bofs
, uint32_t oprsz
, uint32_t maxsz
)
3375 static const TCGOpcode vecop_list
[] = { INDEX_op_shlv_vec
, 0 };
3376 static const GVecGen3 g
[4] = {
3377 { .fniv
= tcg_gen_shlv_mod_vec
,
3378 .fno
= gen_helper_gvec_shl8v
,
3379 .opt_opc
= vecop_list
,
3381 { .fniv
= tcg_gen_shlv_mod_vec
,
3382 .fno
= gen_helper_gvec_shl16v
,
3383 .opt_opc
= vecop_list
,
3385 { .fni4
= tcg_gen_shl_mod_i32
,
3386 .fniv
= tcg_gen_shlv_mod_vec
,
3387 .fno
= gen_helper_gvec_shl32v
,
3388 .opt_opc
= vecop_list
,
3390 { .fni8
= tcg_gen_shl_mod_i64
,
3391 .fniv
= tcg_gen_shlv_mod_vec
,
3392 .fno
= gen_helper_gvec_shl64v
,
3393 .opt_opc
= vecop_list
,
3394 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
3398 tcg_debug_assert(vece
<= MO_64
);
3399 tcg_gen_gvec_3(dofs
, aofs
, bofs
, oprsz
, maxsz
, &g
[vece
]);
3403 * Similarly for logical right shifts.
3406 static void tcg_gen_shrv_mod_vec(unsigned vece
, TCGv_vec d
,
3407 TCGv_vec a
, TCGv_vec b
)
3409 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
3410 TCGv_vec m
= tcg_constant_vec_matching(d
, vece
, (8 << vece
) - 1);
3412 tcg_gen_and_vec(vece
, t
, b
, m
);
3413 tcg_gen_shrv_vec(vece
, d
, a
, t
);
3414 tcg_temp_free_vec(t
);
3417 static void tcg_gen_shr_mod_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
3419 TCGv_i32 t
= tcg_temp_ebb_new_i32();
3421 tcg_gen_andi_i32(t
, b
, 31);
3422 tcg_gen_shr_i32(d
, a
, t
);
3423 tcg_temp_free_i32(t
);
3426 static void tcg_gen_shr_mod_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
3428 TCGv_i64 t
= tcg_temp_ebb_new_i64();
3430 tcg_gen_andi_i64(t
, b
, 63);
3431 tcg_gen_shr_i64(d
, a
, t
);
3432 tcg_temp_free_i64(t
);
3435 void tcg_gen_gvec_shrv(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
3436 uint32_t bofs
, uint32_t oprsz
, uint32_t maxsz
)
3438 static const TCGOpcode vecop_list
[] = { INDEX_op_shrv_vec
, 0 };
3439 static const GVecGen3 g
[4] = {
3440 { .fniv
= tcg_gen_shrv_mod_vec
,
3441 .fno
= gen_helper_gvec_shr8v
,
3442 .opt_opc
= vecop_list
,
3444 { .fniv
= tcg_gen_shrv_mod_vec
,
3445 .fno
= gen_helper_gvec_shr16v
,
3446 .opt_opc
= vecop_list
,
3448 { .fni4
= tcg_gen_shr_mod_i32
,
3449 .fniv
= tcg_gen_shrv_mod_vec
,
3450 .fno
= gen_helper_gvec_shr32v
,
3451 .opt_opc
= vecop_list
,
3453 { .fni8
= tcg_gen_shr_mod_i64
,
3454 .fniv
= tcg_gen_shrv_mod_vec
,
3455 .fno
= gen_helper_gvec_shr64v
,
3456 .opt_opc
= vecop_list
,
3457 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
3461 tcg_debug_assert(vece
<= MO_64
);
3462 tcg_gen_gvec_3(dofs
, aofs
, bofs
, oprsz
, maxsz
, &g
[vece
]);
3466 * Similarly for arithmetic right shifts.
3469 static void tcg_gen_sarv_mod_vec(unsigned vece
, TCGv_vec d
,
3470 TCGv_vec a
, TCGv_vec b
)
3472 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
3473 TCGv_vec m
= tcg_constant_vec_matching(d
, vece
, (8 << vece
) - 1);
3475 tcg_gen_and_vec(vece
, t
, b
, m
);
3476 tcg_gen_sarv_vec(vece
, d
, a
, t
);
3477 tcg_temp_free_vec(t
);
3480 static void tcg_gen_sar_mod_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
3482 TCGv_i32 t
= tcg_temp_ebb_new_i32();
3484 tcg_gen_andi_i32(t
, b
, 31);
3485 tcg_gen_sar_i32(d
, a
, t
);
3486 tcg_temp_free_i32(t
);
3489 static void tcg_gen_sar_mod_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
3491 TCGv_i64 t
= tcg_temp_ebb_new_i64();
3493 tcg_gen_andi_i64(t
, b
, 63);
3494 tcg_gen_sar_i64(d
, a
, t
);
3495 tcg_temp_free_i64(t
);
3498 void tcg_gen_gvec_sarv(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
3499 uint32_t bofs
, uint32_t oprsz
, uint32_t maxsz
)
3501 static const TCGOpcode vecop_list
[] = { INDEX_op_sarv_vec
, 0 };
3502 static const GVecGen3 g
[4] = {
3503 { .fniv
= tcg_gen_sarv_mod_vec
,
3504 .fno
= gen_helper_gvec_sar8v
,
3505 .opt_opc
= vecop_list
,
3507 { .fniv
= tcg_gen_sarv_mod_vec
,
3508 .fno
= gen_helper_gvec_sar16v
,
3509 .opt_opc
= vecop_list
,
3511 { .fni4
= tcg_gen_sar_mod_i32
,
3512 .fniv
= tcg_gen_sarv_mod_vec
,
3513 .fno
= gen_helper_gvec_sar32v
,
3514 .opt_opc
= vecop_list
,
3516 { .fni8
= tcg_gen_sar_mod_i64
,
3517 .fniv
= tcg_gen_sarv_mod_vec
,
3518 .fno
= gen_helper_gvec_sar64v
,
3519 .opt_opc
= vecop_list
,
3520 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
3524 tcg_debug_assert(vece
<= MO_64
);
3525 tcg_gen_gvec_3(dofs
, aofs
, bofs
, oprsz
, maxsz
, &g
[vece
]);
3529 * Similarly for rotates.
3532 static void tcg_gen_rotlv_mod_vec(unsigned vece
, TCGv_vec d
,
3533 TCGv_vec a
, TCGv_vec b
)
3535 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
3536 TCGv_vec m
= tcg_constant_vec_matching(d
, vece
, (8 << vece
) - 1);
3538 tcg_gen_and_vec(vece
, t
, b
, m
);
3539 tcg_gen_rotlv_vec(vece
, d
, a
, t
);
3540 tcg_temp_free_vec(t
);
3543 static void tcg_gen_rotl_mod_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
3545 TCGv_i32 t
= tcg_temp_ebb_new_i32();
3547 tcg_gen_andi_i32(t
, b
, 31);
3548 tcg_gen_rotl_i32(d
, a
, t
);
3549 tcg_temp_free_i32(t
);
3552 static void tcg_gen_rotl_mod_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
3554 TCGv_i64 t
= tcg_temp_ebb_new_i64();
3556 tcg_gen_andi_i64(t
, b
, 63);
3557 tcg_gen_rotl_i64(d
, a
, t
);
3558 tcg_temp_free_i64(t
);
3561 void tcg_gen_gvec_rotlv(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
3562 uint32_t bofs
, uint32_t oprsz
, uint32_t maxsz
)
3564 static const TCGOpcode vecop_list
[] = { INDEX_op_rotlv_vec
, 0 };
3565 static const GVecGen3 g
[4] = {
3566 { .fniv
= tcg_gen_rotlv_mod_vec
,
3567 .fno
= gen_helper_gvec_rotl8v
,
3568 .opt_opc
= vecop_list
,
3570 { .fniv
= tcg_gen_rotlv_mod_vec
,
3571 .fno
= gen_helper_gvec_rotl16v
,
3572 .opt_opc
= vecop_list
,
3574 { .fni4
= tcg_gen_rotl_mod_i32
,
3575 .fniv
= tcg_gen_rotlv_mod_vec
,
3576 .fno
= gen_helper_gvec_rotl32v
,
3577 .opt_opc
= vecop_list
,
3579 { .fni8
= tcg_gen_rotl_mod_i64
,
3580 .fniv
= tcg_gen_rotlv_mod_vec
,
3581 .fno
= gen_helper_gvec_rotl64v
,
3582 .opt_opc
= vecop_list
,
3583 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
3587 tcg_debug_assert(vece
<= MO_64
);
3588 tcg_gen_gvec_3(dofs
, aofs
, bofs
, oprsz
, maxsz
, &g
[vece
]);
3591 static void tcg_gen_rotrv_mod_vec(unsigned vece
, TCGv_vec d
,
3592 TCGv_vec a
, TCGv_vec b
)
3594 TCGv_vec t
= tcg_temp_new_vec_matching(d
);
3595 TCGv_vec m
= tcg_constant_vec_matching(d
, vece
, (8 << vece
) - 1);
3597 tcg_gen_and_vec(vece
, t
, b
, m
);
3598 tcg_gen_rotrv_vec(vece
, d
, a
, t
);
3599 tcg_temp_free_vec(t
);
3602 static void tcg_gen_rotr_mod_i32(TCGv_i32 d
, TCGv_i32 a
, TCGv_i32 b
)
3604 TCGv_i32 t
= tcg_temp_ebb_new_i32();
3606 tcg_gen_andi_i32(t
, b
, 31);
3607 tcg_gen_rotr_i32(d
, a
, t
);
3608 tcg_temp_free_i32(t
);
3611 static void tcg_gen_rotr_mod_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
)
3613 TCGv_i64 t
= tcg_temp_ebb_new_i64();
3615 tcg_gen_andi_i64(t
, b
, 63);
3616 tcg_gen_rotr_i64(d
, a
, t
);
3617 tcg_temp_free_i64(t
);
3620 void tcg_gen_gvec_rotrv(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
3621 uint32_t bofs
, uint32_t oprsz
, uint32_t maxsz
)
3623 static const TCGOpcode vecop_list
[] = { INDEX_op_rotrv_vec
, 0 };
3624 static const GVecGen3 g
[4] = {
3625 { .fniv
= tcg_gen_rotrv_mod_vec
,
3626 .fno
= gen_helper_gvec_rotr8v
,
3627 .opt_opc
= vecop_list
,
3629 { .fniv
= tcg_gen_rotrv_mod_vec
,
3630 .fno
= gen_helper_gvec_rotr16v
,
3631 .opt_opc
= vecop_list
,
3633 { .fni4
= tcg_gen_rotr_mod_i32
,
3634 .fniv
= tcg_gen_rotrv_mod_vec
,
3635 .fno
= gen_helper_gvec_rotr32v
,
3636 .opt_opc
= vecop_list
,
3638 { .fni8
= tcg_gen_rotr_mod_i64
,
3639 .fniv
= tcg_gen_rotrv_mod_vec
,
3640 .fno
= gen_helper_gvec_rotr64v
,
3641 .opt_opc
= vecop_list
,
3642 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
3646 tcg_debug_assert(vece
<= MO_64
);
3647 tcg_gen_gvec_3(dofs
, aofs
, bofs
, oprsz
, maxsz
, &g
[vece
]);
3650 /* Expand OPSZ bytes worth of three-operand operations using i32 elements. */
3651 static void expand_cmp_i32(uint32_t dofs
, uint32_t aofs
, uint32_t bofs
,
3652 uint32_t oprsz
, TCGCond cond
)
3654 TCGv_i32 t0
= tcg_temp_ebb_new_i32();
3655 TCGv_i32 t1
= tcg_temp_ebb_new_i32();
3658 for (i
= 0; i
< oprsz
; i
+= 4) {
3659 tcg_gen_ld_i32(t0
, tcg_env
, aofs
+ i
);
3660 tcg_gen_ld_i32(t1
, tcg_env
, bofs
+ i
);
3661 tcg_gen_negsetcond_i32(cond
, t0
, t0
, t1
);
3662 tcg_gen_st_i32(t0
, tcg_env
, dofs
+ i
);
3664 tcg_temp_free_i32(t1
);
3665 tcg_temp_free_i32(t0
);
3668 static void expand_cmp_i64(uint32_t dofs
, uint32_t aofs
, uint32_t bofs
,
3669 uint32_t oprsz
, TCGCond cond
)
3671 TCGv_i64 t0
= tcg_temp_ebb_new_i64();
3672 TCGv_i64 t1
= tcg_temp_ebb_new_i64();
3675 for (i
= 0; i
< oprsz
; i
+= 8) {
3676 tcg_gen_ld_i64(t0
, tcg_env
, aofs
+ i
);
3677 tcg_gen_ld_i64(t1
, tcg_env
, bofs
+ i
);
3678 tcg_gen_negsetcond_i64(cond
, t0
, t0
, t1
);
3679 tcg_gen_st_i64(t0
, tcg_env
, dofs
+ i
);
3681 tcg_temp_free_i64(t1
);
3682 tcg_temp_free_i64(t0
);
3685 static void expand_cmp_vec(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
3686 uint32_t bofs
, uint32_t oprsz
, uint32_t tysz
,
3687 TCGType type
, TCGCond cond
)
3689 for (uint32_t i
= 0; i
< oprsz
; i
+= tysz
) {
3690 TCGv_vec t0
= tcg_temp_new_vec(type
);
3691 TCGv_vec t1
= tcg_temp_new_vec(type
);
3692 TCGv_vec t2
= tcg_temp_new_vec(type
);
3694 tcg_gen_ld_vec(t0
, tcg_env
, aofs
+ i
);
3695 tcg_gen_ld_vec(t1
, tcg_env
, bofs
+ i
);
3696 tcg_gen_cmp_vec(cond
, vece
, t2
, t0
, t1
);
3697 tcg_gen_st_vec(t2
, tcg_env
, dofs
+ i
);
3701 void tcg_gen_gvec_cmp(TCGCond cond
, unsigned vece
, uint32_t dofs
,
3702 uint32_t aofs
, uint32_t bofs
,
3703 uint32_t oprsz
, uint32_t maxsz
)
3705 static const TCGOpcode cmp_list
[] = { INDEX_op_cmp_vec
, 0 };
3706 static gen_helper_gvec_3
* const eq_fn
[4] = {
3707 gen_helper_gvec_eq8
, gen_helper_gvec_eq16
,
3708 gen_helper_gvec_eq32
, gen_helper_gvec_eq64
3710 static gen_helper_gvec_3
* const ne_fn
[4] = {
3711 gen_helper_gvec_ne8
, gen_helper_gvec_ne16
,
3712 gen_helper_gvec_ne32
, gen_helper_gvec_ne64
3714 static gen_helper_gvec_3
* const lt_fn
[4] = {
3715 gen_helper_gvec_lt8
, gen_helper_gvec_lt16
,
3716 gen_helper_gvec_lt32
, gen_helper_gvec_lt64
3718 static gen_helper_gvec_3
* const le_fn
[4] = {
3719 gen_helper_gvec_le8
, gen_helper_gvec_le16
,
3720 gen_helper_gvec_le32
, gen_helper_gvec_le64
3722 static gen_helper_gvec_3
* const ltu_fn
[4] = {
3723 gen_helper_gvec_ltu8
, gen_helper_gvec_ltu16
,
3724 gen_helper_gvec_ltu32
, gen_helper_gvec_ltu64
3726 static gen_helper_gvec_3
* const leu_fn
[4] = {
3727 gen_helper_gvec_leu8
, gen_helper_gvec_leu16
,
3728 gen_helper_gvec_leu32
, gen_helper_gvec_leu64
3730 static gen_helper_gvec_3
* const * const fns
[16] = {
3731 [TCG_COND_EQ
] = eq_fn
,
3732 [TCG_COND_NE
] = ne_fn
,
3733 [TCG_COND_LT
] = lt_fn
,
3734 [TCG_COND_LE
] = le_fn
,
3735 [TCG_COND_LTU
] = ltu_fn
,
3736 [TCG_COND_LEU
] = leu_fn
,
3739 const TCGOpcode
*hold_list
;
3743 check_size_align(oprsz
, maxsz
, dofs
| aofs
| bofs
);
3744 check_overlap_3(dofs
, aofs
, bofs
, maxsz
);
3746 if (cond
== TCG_COND_NEVER
|| cond
== TCG_COND_ALWAYS
) {
3747 do_dup(MO_8
, dofs
, oprsz
, maxsz
,
3748 NULL
, NULL
, -(cond
== TCG_COND_ALWAYS
));
3753 * Implement inline with a vector type, if possible.
3754 * Prefer integer when 64-bit host and 64-bit comparison.
3756 hold_list
= tcg_swap_vecop_list(cmp_list
);
3757 type
= choose_vector_type(cmp_list
, vece
, oprsz
,
3758 TCG_TARGET_REG_BITS
== 64 && vece
== MO_64
);
3761 /* Recall that ARM SVE allows vector sizes that are not a
3762 * power of 2, but always a multiple of 16. The intent is
3763 * that e.g. size == 80 would be expanded with 2x32 + 1x16.
3765 some
= QEMU_ALIGN_DOWN(oprsz
, 32);
3766 expand_cmp_vec(vece
, dofs
, aofs
, bofs
, some
, 32, TCG_TYPE_V256
, cond
);
3767 if (some
== oprsz
) {
3777 expand_cmp_vec(vece
, dofs
, aofs
, bofs
, oprsz
, 16, TCG_TYPE_V128
, cond
);
3780 expand_cmp_vec(vece
, dofs
, aofs
, bofs
, oprsz
, 8, TCG_TYPE_V64
, cond
);
3784 if (vece
== MO_64
&& check_size_impl(oprsz
, 8)) {
3785 expand_cmp_i64(dofs
, aofs
, bofs
, oprsz
, cond
);
3786 } else if (vece
== MO_32
&& check_size_impl(oprsz
, 4)) {
3787 expand_cmp_i32(dofs
, aofs
, bofs
, oprsz
, cond
);
3789 gen_helper_gvec_3
* const *fn
= fns
[cond
];
3793 tmp
= aofs
, aofs
= bofs
, bofs
= tmp
;
3794 cond
= tcg_swap_cond(cond
);
3798 tcg_gen_gvec_3_ool(dofs
, aofs
, bofs
, oprsz
, maxsz
, 0, fn
[vece
]);
3804 g_assert_not_reached();
3806 tcg_swap_vecop_list(hold_list
);
3808 if (oprsz
< maxsz
) {
3809 expand_clr(dofs
+ oprsz
, maxsz
- oprsz
);
3813 static void expand_cmps_vec(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
3814 uint32_t oprsz
, uint32_t tysz
, TCGType type
,
3815 TCGCond cond
, TCGv_vec c
)
3817 TCGv_vec t0
= tcg_temp_new_vec(type
);
3818 TCGv_vec t1
= tcg_temp_new_vec(type
);
3821 for (i
= 0; i
< oprsz
; i
+= tysz
) {
3822 tcg_gen_ld_vec(t1
, tcg_env
, aofs
+ i
);
3823 tcg_gen_cmp_vec(cond
, vece
, t0
, t1
, c
);
3824 tcg_gen_st_vec(t0
, tcg_env
, dofs
+ i
);
3828 void tcg_gen_gvec_cmps(TCGCond cond
, unsigned vece
, uint32_t dofs
,
3829 uint32_t aofs
, TCGv_i64 c
,
3830 uint32_t oprsz
, uint32_t maxsz
)
3832 static const TCGOpcode cmp_list
[] = { INDEX_op_cmp_vec
, 0 };
3833 static gen_helper_gvec_2i
* const eq_fn
[4] = {
3834 gen_helper_gvec_eqs8
, gen_helper_gvec_eqs16
,
3835 gen_helper_gvec_eqs32
, gen_helper_gvec_eqs64
3837 static gen_helper_gvec_2i
* const lt_fn
[4] = {
3838 gen_helper_gvec_lts8
, gen_helper_gvec_lts16
,
3839 gen_helper_gvec_lts32
, gen_helper_gvec_lts64
3841 static gen_helper_gvec_2i
* const le_fn
[4] = {
3842 gen_helper_gvec_les8
, gen_helper_gvec_les16
,
3843 gen_helper_gvec_les32
, gen_helper_gvec_les64
3845 static gen_helper_gvec_2i
* const ltu_fn
[4] = {
3846 gen_helper_gvec_ltus8
, gen_helper_gvec_ltus16
,
3847 gen_helper_gvec_ltus32
, gen_helper_gvec_ltus64
3849 static gen_helper_gvec_2i
* const leu_fn
[4] = {
3850 gen_helper_gvec_leus8
, gen_helper_gvec_leus16
,
3851 gen_helper_gvec_leus32
, gen_helper_gvec_leus64
3853 static gen_helper_gvec_2i
* const * const fns
[16] = {
3854 [TCG_COND_EQ
] = eq_fn
,
3855 [TCG_COND_LT
] = lt_fn
,
3856 [TCG_COND_LE
] = le_fn
,
3857 [TCG_COND_LTU
] = ltu_fn
,
3858 [TCG_COND_LEU
] = leu_fn
,
3863 check_size_align(oprsz
, maxsz
, dofs
| aofs
);
3864 check_overlap_2(dofs
, aofs
, maxsz
);
3866 if (cond
== TCG_COND_NEVER
|| cond
== TCG_COND_ALWAYS
) {
3867 do_dup(MO_8
, dofs
, oprsz
, maxsz
,
3868 NULL
, NULL
, -(cond
== TCG_COND_ALWAYS
));
3873 * Implement inline with a vector type, if possible.
3874 * Prefer integer when 64-bit host and 64-bit comparison.
3876 type
= choose_vector_type(cmp_list
, vece
, oprsz
,
3877 TCG_TARGET_REG_BITS
== 64 && vece
== MO_64
);
3879 const TCGOpcode
*hold_list
= tcg_swap_vecop_list(cmp_list
);
3880 TCGv_vec t_vec
= tcg_temp_new_vec(type
);
3883 tcg_gen_dup_i64_vec(vece
, t_vec
, c
);
3886 some
= QEMU_ALIGN_DOWN(oprsz
, 32);
3887 expand_cmps_vec(vece
, dofs
, aofs
, some
, 32,
3888 TCG_TYPE_V256
, cond
, t_vec
);
3896 some
= QEMU_ALIGN_DOWN(oprsz
, 16);
3897 expand_cmps_vec(vece
, dofs
, aofs
, some
, 16,
3898 TCG_TYPE_V128
, cond
, t_vec
);
3902 some
= QEMU_ALIGN_DOWN(oprsz
, 8);
3903 expand_cmps_vec(vece
, dofs
, aofs
, some
, 8,
3904 TCG_TYPE_V64
, cond
, t_vec
);
3908 g_assert_not_reached();
3910 tcg_temp_free_vec(t_vec
);
3911 tcg_swap_vecop_list(hold_list
);
3912 } else if (vece
== MO_64
&& check_size_impl(oprsz
, 8)) {
3913 TCGv_i64 t0
= tcg_temp_ebb_new_i64();
3916 for (i
= 0; i
< oprsz
; i
+= 8) {
3917 tcg_gen_ld_i64(t0
, tcg_env
, aofs
+ i
);
3918 tcg_gen_negsetcond_i64(cond
, t0
, t0
, c
);
3919 tcg_gen_st_i64(t0
, tcg_env
, dofs
+ i
);
3921 tcg_temp_free_i64(t0
);
3922 } else if (vece
== MO_32
&& check_size_impl(oprsz
, 4)) {
3923 TCGv_i32 t0
= tcg_temp_ebb_new_i32();
3924 TCGv_i32 t1
= tcg_temp_ebb_new_i32();
3927 tcg_gen_extrl_i64_i32(t1
, c
);
3928 for (i
= 0; i
< oprsz
; i
+= 8) {
3929 tcg_gen_ld_i32(t0
, tcg_env
, aofs
+ i
);
3930 tcg_gen_negsetcond_i32(cond
, t0
, t0
, t1
);
3931 tcg_gen_st_i32(t0
, tcg_env
, dofs
+ i
);
3933 tcg_temp_free_i32(t0
);
3934 tcg_temp_free_i32(t1
);
3936 gen_helper_gvec_2i
* const *fn
= fns
[cond
];
3940 cond
= tcg_invert_cond(cond
);
3945 tcg_gen_gvec_2i_ool(dofs
, aofs
, c
, oprsz
, maxsz
, inv
, fn
[vece
]);
3949 if (oprsz
< maxsz
) {
3950 expand_clr(dofs
+ oprsz
, maxsz
- oprsz
);
3954 void tcg_gen_gvec_cmpi(TCGCond cond
, unsigned vece
, uint32_t dofs
,
3955 uint32_t aofs
, int64_t c
,
3956 uint32_t oprsz
, uint32_t maxsz
)
3958 TCGv_i64 tmp
= tcg_constant_i64(c
);
3959 tcg_gen_gvec_cmps(cond
, vece
, dofs
, aofs
, tmp
, oprsz
, maxsz
);
3962 static void tcg_gen_bitsel_i64(TCGv_i64 d
, TCGv_i64 a
, TCGv_i64 b
, TCGv_i64 c
)
3964 TCGv_i64 t
= tcg_temp_ebb_new_i64();
3966 tcg_gen_and_i64(t
, b
, a
);
3967 tcg_gen_andc_i64(d
, c
, a
);
3968 tcg_gen_or_i64(d
, d
, t
);
3969 tcg_temp_free_i64(t
);
3972 void tcg_gen_gvec_bitsel(unsigned vece
, uint32_t dofs
, uint32_t aofs
,
3973 uint32_t bofs
, uint32_t cofs
,
3974 uint32_t oprsz
, uint32_t maxsz
)
3976 static const GVecGen4 g
= {
3977 .fni8
= tcg_gen_bitsel_i64
,
3978 .fniv
= tcg_gen_bitsel_vec
,
3979 .fno
= gen_helper_gvec_bitsel
,
3982 tcg_gen_gvec_4(dofs
, aofs
, bofs
, cofs
, oprsz
, maxsz
, &g
);