virtio-gpu: fix crashes upon warm reboot with vga mode
[qemu/ar7.git] / tcg / tcg-op-gvec.c
blob61c25f578410b7ec13eb645c20aced9f734e3023
1 /*
2 * Generic vector operation expansion
4 * Copyright (c) 2018 Linaro
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "qemu-common.h"
22 #include "tcg.h"
23 #include "tcg-op.h"
24 #include "tcg-op-gvec.h"
25 #include "tcg-gvec-desc.h"
27 #define MAX_UNROLL 4
29 /* Verify vector size and alignment rules. OFS should be the OR of all
30 of the operand offsets so that we can check them all at once. */
31 static void check_size_align(uint32_t oprsz, uint32_t maxsz, uint32_t ofs)
33 uint32_t opr_align = oprsz >= 16 ? 15 : 7;
34 uint32_t max_align = maxsz >= 16 || oprsz >= 16 ? 15 : 7;
35 tcg_debug_assert(oprsz > 0);
36 tcg_debug_assert(oprsz <= maxsz);
37 tcg_debug_assert((oprsz & opr_align) == 0);
38 tcg_debug_assert((maxsz & max_align) == 0);
39 tcg_debug_assert((ofs & max_align) == 0);
42 /* Verify vector overlap rules for two operands. */
43 static void check_overlap_2(uint32_t d, uint32_t a, uint32_t s)
45 tcg_debug_assert(d == a || d + s <= a || a + s <= d);
48 /* Verify vector overlap rules for three operands. */
49 static void check_overlap_3(uint32_t d, uint32_t a, uint32_t b, uint32_t s)
51 check_overlap_2(d, a, s);
52 check_overlap_2(d, b, s);
53 check_overlap_2(a, b, s);
56 /* Verify vector overlap rules for four operands. */
57 static void check_overlap_4(uint32_t d, uint32_t a, uint32_t b,
58 uint32_t c, uint32_t s)
60 check_overlap_2(d, a, s);
61 check_overlap_2(d, b, s);
62 check_overlap_2(d, c, s);
63 check_overlap_2(a, b, s);
64 check_overlap_2(a, c, s);
65 check_overlap_2(b, c, s);
68 /* Create a descriptor from components. */
69 uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data)
71 uint32_t desc = 0;
73 assert(oprsz % 8 == 0 && oprsz <= (8 << SIMD_OPRSZ_BITS));
74 assert(maxsz % 8 == 0 && maxsz <= (8 << SIMD_MAXSZ_BITS));
75 assert(data == sextract32(data, 0, SIMD_DATA_BITS));
77 oprsz = (oprsz / 8) - 1;
78 maxsz = (maxsz / 8) - 1;
79 desc = deposit32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS, oprsz);
80 desc = deposit32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS, maxsz);
81 desc = deposit32(desc, SIMD_DATA_SHIFT, SIMD_DATA_BITS, data);
83 return desc;
86 /* Generate a call to a gvec-style helper with two vector operands. */
87 void tcg_gen_gvec_2_ool(uint32_t dofs, uint32_t aofs,
88 uint32_t oprsz, uint32_t maxsz, int32_t data,
89 gen_helper_gvec_2 *fn)
91 TCGv_ptr a0, a1;
92 TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
94 a0 = tcg_temp_new_ptr();
95 a1 = tcg_temp_new_ptr();
97 tcg_gen_addi_ptr(a0, cpu_env, dofs);
98 tcg_gen_addi_ptr(a1, cpu_env, aofs);
100 fn(a0, a1, desc);
102 tcg_temp_free_ptr(a0);
103 tcg_temp_free_ptr(a1);
104 tcg_temp_free_i32(desc);
107 /* Generate a call to a gvec-style helper with two vector operands
108 and one scalar operand. */
109 void tcg_gen_gvec_2i_ool(uint32_t dofs, uint32_t aofs, TCGv_i64 c,
110 uint32_t oprsz, uint32_t maxsz, int32_t data,
111 gen_helper_gvec_2i *fn)
113 TCGv_ptr a0, a1;
114 TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
116 a0 = tcg_temp_new_ptr();
117 a1 = tcg_temp_new_ptr();
119 tcg_gen_addi_ptr(a0, cpu_env, dofs);
120 tcg_gen_addi_ptr(a1, cpu_env, aofs);
122 fn(a0, a1, c, desc);
124 tcg_temp_free_ptr(a0);
125 tcg_temp_free_ptr(a1);
126 tcg_temp_free_i32(desc);
129 /* Generate a call to a gvec-style helper with three vector operands. */
130 void tcg_gen_gvec_3_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
131 uint32_t oprsz, uint32_t maxsz, int32_t data,
132 gen_helper_gvec_3 *fn)
134 TCGv_ptr a0, a1, a2;
135 TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
137 a0 = tcg_temp_new_ptr();
138 a1 = tcg_temp_new_ptr();
139 a2 = tcg_temp_new_ptr();
141 tcg_gen_addi_ptr(a0, cpu_env, dofs);
142 tcg_gen_addi_ptr(a1, cpu_env, aofs);
143 tcg_gen_addi_ptr(a2, cpu_env, bofs);
145 fn(a0, a1, a2, desc);
147 tcg_temp_free_ptr(a0);
148 tcg_temp_free_ptr(a1);
149 tcg_temp_free_ptr(a2);
150 tcg_temp_free_i32(desc);
153 /* Generate a call to a gvec-style helper with four vector operands. */
154 void tcg_gen_gvec_4_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
155 uint32_t cofs, uint32_t oprsz, uint32_t maxsz,
156 int32_t data, gen_helper_gvec_4 *fn)
158 TCGv_ptr a0, a1, a2, a3;
159 TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
161 a0 = tcg_temp_new_ptr();
162 a1 = tcg_temp_new_ptr();
163 a2 = tcg_temp_new_ptr();
164 a3 = tcg_temp_new_ptr();
166 tcg_gen_addi_ptr(a0, cpu_env, dofs);
167 tcg_gen_addi_ptr(a1, cpu_env, aofs);
168 tcg_gen_addi_ptr(a2, cpu_env, bofs);
169 tcg_gen_addi_ptr(a3, cpu_env, cofs);
171 fn(a0, a1, a2, a3, desc);
173 tcg_temp_free_ptr(a0);
174 tcg_temp_free_ptr(a1);
175 tcg_temp_free_ptr(a2);
176 tcg_temp_free_ptr(a3);
177 tcg_temp_free_i32(desc);
180 /* Generate a call to a gvec-style helper with five vector operands. */
181 void tcg_gen_gvec_5_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
182 uint32_t cofs, uint32_t xofs, uint32_t oprsz,
183 uint32_t maxsz, int32_t data, gen_helper_gvec_5 *fn)
185 TCGv_ptr a0, a1, a2, a3, a4;
186 TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
188 a0 = tcg_temp_new_ptr();
189 a1 = tcg_temp_new_ptr();
190 a2 = tcg_temp_new_ptr();
191 a3 = tcg_temp_new_ptr();
192 a4 = tcg_temp_new_ptr();
194 tcg_gen_addi_ptr(a0, cpu_env, dofs);
195 tcg_gen_addi_ptr(a1, cpu_env, aofs);
196 tcg_gen_addi_ptr(a2, cpu_env, bofs);
197 tcg_gen_addi_ptr(a3, cpu_env, cofs);
198 tcg_gen_addi_ptr(a4, cpu_env, xofs);
200 fn(a0, a1, a2, a3, a4, desc);
202 tcg_temp_free_ptr(a0);
203 tcg_temp_free_ptr(a1);
204 tcg_temp_free_ptr(a2);
205 tcg_temp_free_ptr(a3);
206 tcg_temp_free_ptr(a4);
207 tcg_temp_free_i32(desc);
210 /* Generate a call to a gvec-style helper with three vector operands
211 and an extra pointer operand. */
212 void tcg_gen_gvec_2_ptr(uint32_t dofs, uint32_t aofs,
213 TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz,
214 int32_t data, gen_helper_gvec_2_ptr *fn)
216 TCGv_ptr a0, a1;
217 TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
219 a0 = tcg_temp_new_ptr();
220 a1 = tcg_temp_new_ptr();
222 tcg_gen_addi_ptr(a0, cpu_env, dofs);
223 tcg_gen_addi_ptr(a1, cpu_env, aofs);
225 fn(a0, a1, ptr, desc);
227 tcg_temp_free_ptr(a0);
228 tcg_temp_free_ptr(a1);
229 tcg_temp_free_i32(desc);
232 /* Generate a call to a gvec-style helper with three vector operands
233 and an extra pointer operand. */
234 void tcg_gen_gvec_3_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
235 TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz,
236 int32_t data, gen_helper_gvec_3_ptr *fn)
238 TCGv_ptr a0, a1, a2;
239 TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
241 a0 = tcg_temp_new_ptr();
242 a1 = tcg_temp_new_ptr();
243 a2 = tcg_temp_new_ptr();
245 tcg_gen_addi_ptr(a0, cpu_env, dofs);
246 tcg_gen_addi_ptr(a1, cpu_env, aofs);
247 tcg_gen_addi_ptr(a2, cpu_env, bofs);
249 fn(a0, a1, a2, ptr, desc);
251 tcg_temp_free_ptr(a0);
252 tcg_temp_free_ptr(a1);
253 tcg_temp_free_ptr(a2);
254 tcg_temp_free_i32(desc);
257 /* Generate a call to a gvec-style helper with four vector operands
258 and an extra pointer operand. */
259 void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
260 uint32_t cofs, TCGv_ptr ptr, uint32_t oprsz,
261 uint32_t maxsz, int32_t data,
262 gen_helper_gvec_4_ptr *fn)
264 TCGv_ptr a0, a1, a2, a3;
265 TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
267 a0 = tcg_temp_new_ptr();
268 a1 = tcg_temp_new_ptr();
269 a2 = tcg_temp_new_ptr();
270 a3 = tcg_temp_new_ptr();
272 tcg_gen_addi_ptr(a0, cpu_env, dofs);
273 tcg_gen_addi_ptr(a1, cpu_env, aofs);
274 tcg_gen_addi_ptr(a2, cpu_env, bofs);
275 tcg_gen_addi_ptr(a3, cpu_env, cofs);
277 fn(a0, a1, a2, a3, ptr, desc);
279 tcg_temp_free_ptr(a0);
280 tcg_temp_free_ptr(a1);
281 tcg_temp_free_ptr(a2);
282 tcg_temp_free_ptr(a3);
283 tcg_temp_free_i32(desc);
286 /* Return true if we want to implement something of OPRSZ bytes
287 in units of LNSZ. This limits the expansion of inline code. */
288 static inline bool check_size_impl(uint32_t oprsz, uint32_t lnsz)
290 if (oprsz % lnsz == 0) {
291 uint32_t lnct = oprsz / lnsz;
292 return lnct >= 1 && lnct <= MAX_UNROLL;
294 return false;
297 static void expand_clr(uint32_t dofs, uint32_t maxsz);
299 /* Duplicate C as per VECE. */
300 uint64_t (dup_const)(unsigned vece, uint64_t c)
302 switch (vece) {
303 case MO_8:
304 return 0x0101010101010101ull * (uint8_t)c;
305 case MO_16:
306 return 0x0001000100010001ull * (uint16_t)c;
307 case MO_32:
308 return 0x0000000100000001ull * (uint32_t)c;
309 case MO_64:
310 return c;
311 default:
312 g_assert_not_reached();
316 /* Duplicate IN into OUT as per VECE. */
317 static void gen_dup_i32(unsigned vece, TCGv_i32 out, TCGv_i32 in)
319 switch (vece) {
320 case MO_8:
321 tcg_gen_ext8u_i32(out, in);
322 tcg_gen_muli_i32(out, out, 0x01010101);
323 break;
324 case MO_16:
325 tcg_gen_deposit_i32(out, in, in, 16, 16);
326 break;
327 case MO_32:
328 tcg_gen_mov_i32(out, in);
329 break;
330 default:
331 g_assert_not_reached();
335 static void gen_dup_i64(unsigned vece, TCGv_i64 out, TCGv_i64 in)
337 switch (vece) {
338 case MO_8:
339 tcg_gen_ext8u_i64(out, in);
340 tcg_gen_muli_i64(out, out, 0x0101010101010101ull);
341 break;
342 case MO_16:
343 tcg_gen_ext16u_i64(out, in);
344 tcg_gen_muli_i64(out, out, 0x0001000100010001ull);
345 break;
346 case MO_32:
347 tcg_gen_deposit_i64(out, in, in, 32, 32);
348 break;
349 case MO_64:
350 tcg_gen_mov_i64(out, in);
351 break;
352 default:
353 g_assert_not_reached();
357 /* Select a supported vector type for implementing an operation on SIZE
358 * bytes. If OP is 0, assume that the real operation to be performed is
359 * required by all backends. Otherwise, make sure than OP can be performed
360 * on elements of size VECE in the selected type. Do not select V64 if
361 * PREFER_I64 is true. Return 0 if no vector type is selected.
363 static TCGType choose_vector_type(TCGOpcode op, unsigned vece, uint32_t size,
364 bool prefer_i64)
366 if (TCG_TARGET_HAS_v256 && check_size_impl(size, 32)) {
367 if (op == 0) {
368 return TCG_TYPE_V256;
370 /* Recall that ARM SVE allows vector sizes that are not a
371 * power of 2, but always a multiple of 16. The intent is
372 * that e.g. size == 80 would be expanded with 2x32 + 1x16.
373 * It is hard to imagine a case in which v256 is supported
374 * but v128 is not, but check anyway.
376 if (tcg_can_emit_vec_op(op, TCG_TYPE_V256, vece)
377 && (size % 32 == 0
378 || tcg_can_emit_vec_op(op, TCG_TYPE_V128, vece))) {
379 return TCG_TYPE_V256;
382 if (TCG_TARGET_HAS_v128 && check_size_impl(size, 16)
383 && (op == 0 || tcg_can_emit_vec_op(op, TCG_TYPE_V128, vece))) {
384 return TCG_TYPE_V128;
386 if (TCG_TARGET_HAS_v64 && !prefer_i64 && check_size_impl(size, 8)
387 && (op == 0 || tcg_can_emit_vec_op(op, TCG_TYPE_V64, vece))) {
388 return TCG_TYPE_V64;
390 return 0;
393 /* Set OPRSZ bytes at DOFS to replications of IN_32, IN_64 or IN_C.
394 * Only one of IN_32 or IN_64 may be set;
395 * IN_C is used if IN_32 and IN_64 are unset.
397 static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
398 uint32_t maxsz, TCGv_i32 in_32, TCGv_i64 in_64,
399 uint64_t in_c)
401 TCGType type;
402 TCGv_i64 t_64;
403 TCGv_i32 t_32, t_desc;
404 TCGv_ptr t_ptr;
405 uint32_t i;
407 assert(vece <= (in_32 ? MO_32 : MO_64));
408 assert(in_32 == NULL || in_64 == NULL);
410 /* If we're storing 0, expand oprsz to maxsz. */
411 if (in_32 == NULL && in_64 == NULL) {
412 in_c = dup_const(vece, in_c);
413 if (in_c == 0) {
414 oprsz = maxsz;
418 /* Implement inline with a vector type, if possible.
419 * Prefer integer when 64-bit host and no variable dup.
421 type = choose_vector_type(0, vece, oprsz,
422 (TCG_TARGET_REG_BITS == 64 && in_32 == NULL
423 && (in_64 == NULL || vece == MO_64)));
424 if (type != 0) {
425 TCGv_vec t_vec = tcg_temp_new_vec(type);
427 if (in_32) {
428 tcg_gen_dup_i32_vec(vece, t_vec, in_32);
429 } else if (in_64) {
430 tcg_gen_dup_i64_vec(vece, t_vec, in_64);
431 } else {
432 switch (vece) {
433 case MO_8:
434 tcg_gen_dup8i_vec(t_vec, in_c);
435 break;
436 case MO_16:
437 tcg_gen_dup16i_vec(t_vec, in_c);
438 break;
439 case MO_32:
440 tcg_gen_dup32i_vec(t_vec, in_c);
441 break;
442 default:
443 tcg_gen_dup64i_vec(t_vec, in_c);
444 break;
448 i = 0;
449 switch (type) {
450 case TCG_TYPE_V256:
451 /* Recall that ARM SVE allows vector sizes that are not a
452 * power of 2, but always a multiple of 16. The intent is
453 * that e.g. size == 80 would be expanded with 2x32 + 1x16.
455 for (; i + 32 <= oprsz; i += 32) {
456 tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V256);
458 /* fallthru */
459 case TCG_TYPE_V128:
460 for (; i + 16 <= oprsz; i += 16) {
461 tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V128);
463 break;
464 case TCG_TYPE_V64:
465 for (; i < oprsz; i += 8) {
466 tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V64);
468 break;
469 default:
470 g_assert_not_reached();
473 tcg_temp_free_vec(t_vec);
474 goto done;
477 /* Otherwise, inline with an integer type, unless "large". */
478 if (check_size_impl(oprsz, TCG_TARGET_REG_BITS / 8)) {
479 t_64 = NULL;
480 t_32 = NULL;
482 if (in_32) {
483 /* We are given a 32-bit variable input. For a 64-bit host,
484 use a 64-bit operation unless the 32-bit operation would
485 be simple enough. */
486 if (TCG_TARGET_REG_BITS == 64
487 && (vece != MO_32 || !check_size_impl(oprsz, 4))) {
488 t_64 = tcg_temp_new_i64();
489 tcg_gen_extu_i32_i64(t_64, in_32);
490 gen_dup_i64(vece, t_64, t_64);
491 } else {
492 t_32 = tcg_temp_new_i32();
493 gen_dup_i32(vece, t_32, in_32);
495 } else if (in_64) {
496 /* We are given a 64-bit variable input. */
497 t_64 = tcg_temp_new_i64();
498 gen_dup_i64(vece, t_64, in_64);
499 } else {
500 /* We are given a constant input. */
501 /* For 64-bit hosts, use 64-bit constants for "simple" constants
502 or when we'd need too many 32-bit stores, or when a 64-bit
503 constant is really required. */
504 if (vece == MO_64
505 || (TCG_TARGET_REG_BITS == 64
506 && (in_c == 0 || in_c == -1
507 || !check_size_impl(oprsz, 4)))) {
508 t_64 = tcg_const_i64(in_c);
509 } else {
510 t_32 = tcg_const_i32(in_c);
514 /* Implement inline if we picked an implementation size above. */
515 if (t_32) {
516 for (i = 0; i < oprsz; i += 4) {
517 tcg_gen_st_i32(t_32, cpu_env, dofs + i);
519 tcg_temp_free_i32(t_32);
520 goto done;
522 if (t_64) {
523 for (i = 0; i < oprsz; i += 8) {
524 tcg_gen_st_i64(t_64, cpu_env, dofs + i);
526 tcg_temp_free_i64(t_64);
527 goto done;
531 /* Otherwise implement out of line. */
532 t_ptr = tcg_temp_new_ptr();
533 tcg_gen_addi_ptr(t_ptr, cpu_env, dofs);
534 t_desc = tcg_const_i32(simd_desc(oprsz, maxsz, 0));
536 if (vece == MO_64) {
537 if (in_64) {
538 gen_helper_gvec_dup64(t_ptr, t_desc, in_64);
539 } else {
540 t_64 = tcg_const_i64(in_c);
541 gen_helper_gvec_dup64(t_ptr, t_desc, t_64);
542 tcg_temp_free_i64(t_64);
544 } else {
545 typedef void dup_fn(TCGv_ptr, TCGv_i32, TCGv_i32);
546 static dup_fn * const fns[3] = {
547 gen_helper_gvec_dup8,
548 gen_helper_gvec_dup16,
549 gen_helper_gvec_dup32
552 if (in_32) {
553 fns[vece](t_ptr, t_desc, in_32);
554 } else {
555 t_32 = tcg_temp_new_i32();
556 if (in_64) {
557 tcg_gen_extrl_i64_i32(t_32, in_64);
558 } else if (vece == MO_8) {
559 tcg_gen_movi_i32(t_32, in_c & 0xff);
560 } else if (vece == MO_16) {
561 tcg_gen_movi_i32(t_32, in_c & 0xffff);
562 } else {
563 tcg_gen_movi_i32(t_32, in_c);
565 fns[vece](t_ptr, t_desc, t_32);
566 tcg_temp_free_i32(t_32);
570 tcg_temp_free_ptr(t_ptr);
571 tcg_temp_free_i32(t_desc);
572 return;
574 done:
575 if (oprsz < maxsz) {
576 expand_clr(dofs + oprsz, maxsz - oprsz);
580 /* Likewise, but with zero. */
581 static void expand_clr(uint32_t dofs, uint32_t maxsz)
583 do_dup(MO_8, dofs, maxsz, maxsz, NULL, NULL, 0);
586 /* Expand OPSZ bytes worth of two-operand operations using i32 elements. */
587 static void expand_2_i32(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
588 void (*fni)(TCGv_i32, TCGv_i32))
590 TCGv_i32 t0 = tcg_temp_new_i32();
591 uint32_t i;
593 for (i = 0; i < oprsz; i += 4) {
594 tcg_gen_ld_i32(t0, cpu_env, aofs + i);
595 fni(t0, t0);
596 tcg_gen_st_i32(t0, cpu_env, dofs + i);
598 tcg_temp_free_i32(t0);
601 static void expand_2i_i32(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
602 int32_t c, bool load_dest,
603 void (*fni)(TCGv_i32, TCGv_i32, int32_t))
605 TCGv_i32 t0 = tcg_temp_new_i32();
606 TCGv_i32 t1 = tcg_temp_new_i32();
607 uint32_t i;
609 for (i = 0; i < oprsz; i += 4) {
610 tcg_gen_ld_i32(t0, cpu_env, aofs + i);
611 if (load_dest) {
612 tcg_gen_ld_i32(t1, cpu_env, dofs + i);
614 fni(t1, t0, c);
615 tcg_gen_st_i32(t1, cpu_env, dofs + i);
617 tcg_temp_free_i32(t0);
618 tcg_temp_free_i32(t1);
621 static void expand_2s_i32(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
622 TCGv_i32 c, bool scalar_first,
623 void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32))
625 TCGv_i32 t0 = tcg_temp_new_i32();
626 TCGv_i32 t1 = tcg_temp_new_i32();
627 uint32_t i;
629 for (i = 0; i < oprsz; i += 4) {
630 tcg_gen_ld_i32(t0, cpu_env, aofs + i);
631 if (scalar_first) {
632 fni(t1, c, t0);
633 } else {
634 fni(t1, t0, c);
636 tcg_gen_st_i32(t1, cpu_env, dofs + i);
638 tcg_temp_free_i32(t0);
639 tcg_temp_free_i32(t1);
642 /* Expand OPSZ bytes worth of three-operand operations using i32 elements. */
643 static void expand_3_i32(uint32_t dofs, uint32_t aofs,
644 uint32_t bofs, uint32_t oprsz, bool load_dest,
645 void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32))
647 TCGv_i32 t0 = tcg_temp_new_i32();
648 TCGv_i32 t1 = tcg_temp_new_i32();
649 TCGv_i32 t2 = tcg_temp_new_i32();
650 uint32_t i;
652 for (i = 0; i < oprsz; i += 4) {
653 tcg_gen_ld_i32(t0, cpu_env, aofs + i);
654 tcg_gen_ld_i32(t1, cpu_env, bofs + i);
655 if (load_dest) {
656 tcg_gen_ld_i32(t2, cpu_env, dofs + i);
658 fni(t2, t0, t1);
659 tcg_gen_st_i32(t2, cpu_env, dofs + i);
661 tcg_temp_free_i32(t2);
662 tcg_temp_free_i32(t1);
663 tcg_temp_free_i32(t0);
666 /* Expand OPSZ bytes worth of three-operand operations using i32 elements. */
667 static void expand_4_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs,
668 uint32_t cofs, uint32_t oprsz,
669 void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32))
671 TCGv_i32 t0 = tcg_temp_new_i32();
672 TCGv_i32 t1 = tcg_temp_new_i32();
673 TCGv_i32 t2 = tcg_temp_new_i32();
674 TCGv_i32 t3 = tcg_temp_new_i32();
675 uint32_t i;
677 for (i = 0; i < oprsz; i += 4) {
678 tcg_gen_ld_i32(t1, cpu_env, aofs + i);
679 tcg_gen_ld_i32(t2, cpu_env, bofs + i);
680 tcg_gen_ld_i32(t3, cpu_env, cofs + i);
681 fni(t0, t1, t2, t3);
682 tcg_gen_st_i32(t0, cpu_env, dofs + i);
684 tcg_temp_free_i32(t3);
685 tcg_temp_free_i32(t2);
686 tcg_temp_free_i32(t1);
687 tcg_temp_free_i32(t0);
690 /* Expand OPSZ bytes worth of two-operand operations using i64 elements. */
691 static void expand_2_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
692 void (*fni)(TCGv_i64, TCGv_i64))
694 TCGv_i64 t0 = tcg_temp_new_i64();
695 uint32_t i;
697 for (i = 0; i < oprsz; i += 8) {
698 tcg_gen_ld_i64(t0, cpu_env, aofs + i);
699 fni(t0, t0);
700 tcg_gen_st_i64(t0, cpu_env, dofs + i);
702 tcg_temp_free_i64(t0);
705 static void expand_2i_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
706 int64_t c, bool load_dest,
707 void (*fni)(TCGv_i64, TCGv_i64, int64_t))
709 TCGv_i64 t0 = tcg_temp_new_i64();
710 TCGv_i64 t1 = tcg_temp_new_i64();
711 uint32_t i;
713 for (i = 0; i < oprsz; i += 8) {
714 tcg_gen_ld_i64(t0, cpu_env, aofs + i);
715 if (load_dest) {
716 tcg_gen_ld_i64(t1, cpu_env, dofs + i);
718 fni(t1, t0, c);
719 tcg_gen_st_i64(t1, cpu_env, dofs + i);
721 tcg_temp_free_i64(t0);
722 tcg_temp_free_i64(t1);
725 static void expand_2s_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
726 TCGv_i64 c, bool scalar_first,
727 void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64))
729 TCGv_i64 t0 = tcg_temp_new_i64();
730 TCGv_i64 t1 = tcg_temp_new_i64();
731 uint32_t i;
733 for (i = 0; i < oprsz; i += 8) {
734 tcg_gen_ld_i64(t0, cpu_env, aofs + i);
735 if (scalar_first) {
736 fni(t1, c, t0);
737 } else {
738 fni(t1, t0, c);
740 tcg_gen_st_i64(t1, cpu_env, dofs + i);
742 tcg_temp_free_i64(t0);
743 tcg_temp_free_i64(t1);
746 /* Expand OPSZ bytes worth of three-operand operations using i64 elements. */
747 static void expand_3_i64(uint32_t dofs, uint32_t aofs,
748 uint32_t bofs, uint32_t oprsz, bool load_dest,
749 void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64))
751 TCGv_i64 t0 = tcg_temp_new_i64();
752 TCGv_i64 t1 = tcg_temp_new_i64();
753 TCGv_i64 t2 = tcg_temp_new_i64();
754 uint32_t i;
756 for (i = 0; i < oprsz; i += 8) {
757 tcg_gen_ld_i64(t0, cpu_env, aofs + i);
758 tcg_gen_ld_i64(t1, cpu_env, bofs + i);
759 if (load_dest) {
760 tcg_gen_ld_i64(t2, cpu_env, dofs + i);
762 fni(t2, t0, t1);
763 tcg_gen_st_i64(t2, cpu_env, dofs + i);
765 tcg_temp_free_i64(t2);
766 tcg_temp_free_i64(t1);
767 tcg_temp_free_i64(t0);
770 /* Expand OPSZ bytes worth of three-operand operations using i64 elements. */
771 static void expand_4_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs,
772 uint32_t cofs, uint32_t oprsz,
773 void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
775 TCGv_i64 t0 = tcg_temp_new_i64();
776 TCGv_i64 t1 = tcg_temp_new_i64();
777 TCGv_i64 t2 = tcg_temp_new_i64();
778 TCGv_i64 t3 = tcg_temp_new_i64();
779 uint32_t i;
781 for (i = 0; i < oprsz; i += 8) {
782 tcg_gen_ld_i64(t1, cpu_env, aofs + i);
783 tcg_gen_ld_i64(t2, cpu_env, bofs + i);
784 tcg_gen_ld_i64(t3, cpu_env, cofs + i);
785 fni(t0, t1, t2, t3);
786 tcg_gen_st_i64(t0, cpu_env, dofs + i);
788 tcg_temp_free_i64(t3);
789 tcg_temp_free_i64(t2);
790 tcg_temp_free_i64(t1);
791 tcg_temp_free_i64(t0);
794 /* Expand OPSZ bytes worth of two-operand operations using host vectors. */
795 static void expand_2_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
796 uint32_t oprsz, uint32_t tysz, TCGType type,
797 void (*fni)(unsigned, TCGv_vec, TCGv_vec))
799 TCGv_vec t0 = tcg_temp_new_vec(type);
800 uint32_t i;
802 for (i = 0; i < oprsz; i += tysz) {
803 tcg_gen_ld_vec(t0, cpu_env, aofs + i);
804 fni(vece, t0, t0);
805 tcg_gen_st_vec(t0, cpu_env, dofs + i);
807 tcg_temp_free_vec(t0);
810 /* Expand OPSZ bytes worth of two-vector operands and an immediate operand
811 using host vectors. */
812 static void expand_2i_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
813 uint32_t oprsz, uint32_t tysz, TCGType type,
814 int64_t c, bool load_dest,
815 void (*fni)(unsigned, TCGv_vec, TCGv_vec, int64_t))
817 TCGv_vec t0 = tcg_temp_new_vec(type);
818 TCGv_vec t1 = tcg_temp_new_vec(type);
819 uint32_t i;
821 for (i = 0; i < oprsz; i += tysz) {
822 tcg_gen_ld_vec(t0, cpu_env, aofs + i);
823 if (load_dest) {
824 tcg_gen_ld_vec(t1, cpu_env, dofs + i);
826 fni(vece, t1, t0, c);
827 tcg_gen_st_vec(t1, cpu_env, dofs + i);
829 tcg_temp_free_vec(t0);
830 tcg_temp_free_vec(t1);
833 static void expand_2s_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
834 uint32_t oprsz, uint32_t tysz, TCGType type,
835 TCGv_vec c, bool scalar_first,
836 void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
838 TCGv_vec t0 = tcg_temp_new_vec(type);
839 TCGv_vec t1 = tcg_temp_new_vec(type);
840 uint32_t i;
842 for (i = 0; i < oprsz; i += tysz) {
843 tcg_gen_ld_vec(t0, cpu_env, aofs + i);
844 if (scalar_first) {
845 fni(vece, t1, c, t0);
846 } else {
847 fni(vece, t1, t0, c);
849 tcg_gen_st_vec(t1, cpu_env, dofs + i);
851 tcg_temp_free_vec(t0);
852 tcg_temp_free_vec(t1);
855 /* Expand OPSZ bytes worth of three-operand operations using host vectors. */
856 static void expand_3_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
857 uint32_t bofs, uint32_t oprsz,
858 uint32_t tysz, TCGType type, bool load_dest,
859 void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
861 TCGv_vec t0 = tcg_temp_new_vec(type);
862 TCGv_vec t1 = tcg_temp_new_vec(type);
863 TCGv_vec t2 = tcg_temp_new_vec(type);
864 uint32_t i;
866 for (i = 0; i < oprsz; i += tysz) {
867 tcg_gen_ld_vec(t0, cpu_env, aofs + i);
868 tcg_gen_ld_vec(t1, cpu_env, bofs + i);
869 if (load_dest) {
870 tcg_gen_ld_vec(t2, cpu_env, dofs + i);
872 fni(vece, t2, t0, t1);
873 tcg_gen_st_vec(t2, cpu_env, dofs + i);
875 tcg_temp_free_vec(t2);
876 tcg_temp_free_vec(t1);
877 tcg_temp_free_vec(t0);
880 /* Expand OPSZ bytes worth of four-operand operations using host vectors. */
881 static void expand_4_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
882 uint32_t bofs, uint32_t cofs, uint32_t oprsz,
883 uint32_t tysz, TCGType type,
884 void (*fni)(unsigned, TCGv_vec, TCGv_vec,
885 TCGv_vec, TCGv_vec))
887 TCGv_vec t0 = tcg_temp_new_vec(type);
888 TCGv_vec t1 = tcg_temp_new_vec(type);
889 TCGv_vec t2 = tcg_temp_new_vec(type);
890 TCGv_vec t3 = tcg_temp_new_vec(type);
891 uint32_t i;
893 for (i = 0; i < oprsz; i += tysz) {
894 tcg_gen_ld_vec(t1, cpu_env, aofs + i);
895 tcg_gen_ld_vec(t2, cpu_env, bofs + i);
896 tcg_gen_ld_vec(t3, cpu_env, cofs + i);
897 fni(vece, t0, t1, t2, t3);
898 tcg_gen_st_vec(t0, cpu_env, dofs + i);
900 tcg_temp_free_vec(t3);
901 tcg_temp_free_vec(t2);
902 tcg_temp_free_vec(t1);
903 tcg_temp_free_vec(t0);
906 /* Expand a vector two-operand operation. */
907 void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
908 uint32_t oprsz, uint32_t maxsz, const GVecGen2 *g)
910 TCGType type;
911 uint32_t some;
913 check_size_align(oprsz, maxsz, dofs | aofs);
914 check_overlap_2(dofs, aofs, maxsz);
916 type = 0;
917 if (g->fniv) {
918 type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64);
920 switch (type) {
921 case TCG_TYPE_V256:
922 /* Recall that ARM SVE allows vector sizes that are not a
923 * power of 2, but always a multiple of 16. The intent is
924 * that e.g. size == 80 would be expanded with 2x32 + 1x16.
926 some = QEMU_ALIGN_DOWN(oprsz, 32);
927 expand_2_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256, g->fniv);
928 if (some == oprsz) {
929 break;
931 dofs += some;
932 aofs += some;
933 oprsz -= some;
934 maxsz -= some;
935 /* fallthru */
936 case TCG_TYPE_V128:
937 expand_2_vec(g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128, g->fniv);
938 break;
939 case TCG_TYPE_V64:
940 expand_2_vec(g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64, g->fniv);
941 break;
943 case 0:
944 if (g->fni8 && check_size_impl(oprsz, 8)) {
945 expand_2_i64(dofs, aofs, oprsz, g->fni8);
946 } else if (g->fni4 && check_size_impl(oprsz, 4)) {
947 expand_2_i32(dofs, aofs, oprsz, g->fni4);
948 } else {
949 assert(g->fno != NULL);
950 tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, g->data, g->fno);
951 return;
953 break;
955 default:
956 g_assert_not_reached();
959 if (oprsz < maxsz) {
960 expand_clr(dofs + oprsz, maxsz - oprsz);
964 /* Expand a vector operation with two vectors and an immediate. */
965 void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
966 uint32_t maxsz, int64_t c, const GVecGen2i *g)
968 TCGType type;
969 uint32_t some;
971 check_size_align(oprsz, maxsz, dofs | aofs);
972 check_overlap_2(dofs, aofs, maxsz);
974 type = 0;
975 if (g->fniv) {
976 type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64);
978 switch (type) {
979 case TCG_TYPE_V256:
980 /* Recall that ARM SVE allows vector sizes that are not a
981 * power of 2, but always a multiple of 16. The intent is
982 * that e.g. size == 80 would be expanded with 2x32 + 1x16.
984 some = QEMU_ALIGN_DOWN(oprsz, 32);
985 expand_2i_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256,
986 c, g->load_dest, g->fniv);
987 if (some == oprsz) {
988 break;
990 dofs += some;
991 aofs += some;
992 oprsz -= some;
993 maxsz -= some;
994 /* fallthru */
995 case TCG_TYPE_V128:
996 expand_2i_vec(g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128,
997 c, g->load_dest, g->fniv);
998 break;
999 case TCG_TYPE_V64:
1000 expand_2i_vec(g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64,
1001 c, g->load_dest, g->fniv);
1002 break;
1004 case 0:
1005 if (g->fni8 && check_size_impl(oprsz, 8)) {
1006 expand_2i_i64(dofs, aofs, oprsz, c, g->load_dest, g->fni8);
1007 } else if (g->fni4 && check_size_impl(oprsz, 4)) {
1008 expand_2i_i32(dofs, aofs, oprsz, c, g->load_dest, g->fni4);
1009 } else {
1010 if (g->fno) {
1011 tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, c, g->fno);
1012 } else {
1013 TCGv_i64 tcg_c = tcg_const_i64(c);
1014 tcg_gen_gvec_2i_ool(dofs, aofs, tcg_c, oprsz,
1015 maxsz, c, g->fnoi);
1016 tcg_temp_free_i64(tcg_c);
1018 return;
1020 break;
1022 default:
1023 g_assert_not_reached();
1026 if (oprsz < maxsz) {
1027 expand_clr(dofs + oprsz, maxsz - oprsz);
1031 /* Expand a vector operation with two vectors and a scalar. */
1032 void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
1033 uint32_t maxsz, TCGv_i64 c, const GVecGen2s *g)
1035 TCGType type;
1037 check_size_align(oprsz, maxsz, dofs | aofs);
1038 check_overlap_2(dofs, aofs, maxsz);
1040 type = 0;
1041 if (g->fniv) {
1042 type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64);
1044 if (type != 0) {
1045 TCGv_vec t_vec = tcg_temp_new_vec(type);
1046 uint32_t some;
1048 tcg_gen_dup_i64_vec(g->vece, t_vec, c);
1050 switch (type) {
1051 case TCG_TYPE_V256:
1052 /* Recall that ARM SVE allows vector sizes that are not a
1053 * power of 2, but always a multiple of 16. The intent is
1054 * that e.g. size == 80 would be expanded with 2x32 + 1x16.
1056 some = QEMU_ALIGN_DOWN(oprsz, 32);
1057 expand_2s_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256,
1058 t_vec, g->scalar_first, g->fniv);
1059 if (some == oprsz) {
1060 break;
1062 dofs += some;
1063 aofs += some;
1064 oprsz -= some;
1065 maxsz -= some;
1066 /* fallthru */
1068 case TCG_TYPE_V128:
1069 expand_2s_vec(g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128,
1070 t_vec, g->scalar_first, g->fniv);
1071 break;
1073 case TCG_TYPE_V64:
1074 expand_2s_vec(g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64,
1075 t_vec, g->scalar_first, g->fniv);
1076 break;
1078 default:
1079 g_assert_not_reached();
1081 tcg_temp_free_vec(t_vec);
1082 } else if (g->fni8 && check_size_impl(oprsz, 8)) {
1083 TCGv_i64 t64 = tcg_temp_new_i64();
1085 gen_dup_i64(g->vece, t64, c);
1086 expand_2s_i64(dofs, aofs, oprsz, t64, g->scalar_first, g->fni8);
1087 tcg_temp_free_i64(t64);
1088 } else if (g->fni4 && check_size_impl(oprsz, 4)) {
1089 TCGv_i32 t32 = tcg_temp_new_i32();
1091 tcg_gen_extrl_i64_i32(t32, c);
1092 gen_dup_i32(g->vece, t32, t32);
1093 expand_2s_i32(dofs, aofs, oprsz, t32, g->scalar_first, g->fni4);
1094 tcg_temp_free_i32(t32);
1095 } else {
1096 tcg_gen_gvec_2i_ool(dofs, aofs, c, oprsz, maxsz, 0, g->fno);
1097 return;
1100 if (oprsz < maxsz) {
1101 expand_clr(dofs + oprsz, maxsz - oprsz);
1105 /* Expand a vector three-operand operation. */
1106 void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs,
1107 uint32_t oprsz, uint32_t maxsz, const GVecGen3 *g)
1109 TCGType type;
1110 uint32_t some;
1112 check_size_align(oprsz, maxsz, dofs | aofs | bofs);
1113 check_overlap_3(dofs, aofs, bofs, maxsz);
1115 type = 0;
1116 if (g->fniv) {
1117 type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64);
1119 switch (type) {
1120 case TCG_TYPE_V256:
1121 /* Recall that ARM SVE allows vector sizes that are not a
1122 * power of 2, but always a multiple of 16. The intent is
1123 * that e.g. size == 80 would be expanded with 2x32 + 1x16.
1125 some = QEMU_ALIGN_DOWN(oprsz, 32);
1126 expand_3_vec(g->vece, dofs, aofs, bofs, some, 32, TCG_TYPE_V256,
1127 g->load_dest, g->fniv);
1128 if (some == oprsz) {
1129 break;
1131 dofs += some;
1132 aofs += some;
1133 bofs += some;
1134 oprsz -= some;
1135 maxsz -= some;
1136 /* fallthru */
1137 case TCG_TYPE_V128:
1138 expand_3_vec(g->vece, dofs, aofs, bofs, oprsz, 16, TCG_TYPE_V128,
1139 g->load_dest, g->fniv);
1140 break;
1141 case TCG_TYPE_V64:
1142 expand_3_vec(g->vece, dofs, aofs, bofs, oprsz, 8, TCG_TYPE_V64,
1143 g->load_dest, g->fniv);
1144 break;
1146 case 0:
1147 if (g->fni8 && check_size_impl(oprsz, 8)) {
1148 expand_3_i64(dofs, aofs, bofs, oprsz, g->load_dest, g->fni8);
1149 } else if (g->fni4 && check_size_impl(oprsz, 4)) {
1150 expand_3_i32(dofs, aofs, bofs, oprsz, g->load_dest, g->fni4);
1151 } else {
1152 assert(g->fno != NULL);
1153 tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz,
1154 maxsz, g->data, g->fno);
1155 return;
1157 break;
1159 default:
1160 g_assert_not_reached();
1163 if (oprsz < maxsz) {
1164 expand_clr(dofs + oprsz, maxsz - oprsz);
1168 /* Expand a vector four-operand operation. */
1169 void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs,
1170 uint32_t oprsz, uint32_t maxsz, const GVecGen4 *g)
1172 TCGType type;
1173 uint32_t some;
1175 check_size_align(oprsz, maxsz, dofs | aofs | bofs | cofs);
1176 check_overlap_4(dofs, aofs, bofs, cofs, maxsz);
1178 type = 0;
1179 if (g->fniv) {
1180 type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64);
1182 switch (type) {
1183 case TCG_TYPE_V256:
1184 /* Recall that ARM SVE allows vector sizes that are not a
1185 * power of 2, but always a multiple of 16. The intent is
1186 * that e.g. size == 80 would be expanded with 2x32 + 1x16.
1188 some = QEMU_ALIGN_DOWN(oprsz, 32);
1189 expand_4_vec(g->vece, dofs, aofs, bofs, cofs, some,
1190 32, TCG_TYPE_V256, g->fniv);
1191 if (some == oprsz) {
1192 break;
1194 dofs += some;
1195 aofs += some;
1196 bofs += some;
1197 cofs += some;
1198 oprsz -= some;
1199 maxsz -= some;
1200 /* fallthru */
1201 case TCG_TYPE_V128:
1202 expand_4_vec(g->vece, dofs, aofs, bofs, cofs, oprsz,
1203 16, TCG_TYPE_V128, g->fniv);
1204 break;
1205 case TCG_TYPE_V64:
1206 expand_4_vec(g->vece, dofs, aofs, bofs, cofs, oprsz,
1207 8, TCG_TYPE_V64, g->fniv);
1208 break;
1210 case 0:
1211 if (g->fni8 && check_size_impl(oprsz, 8)) {
1212 expand_4_i64(dofs, aofs, bofs, cofs, oprsz, g->fni8);
1213 } else if (g->fni4 && check_size_impl(oprsz, 4)) {
1214 expand_4_i32(dofs, aofs, bofs, cofs, oprsz, g->fni4);
1215 } else {
1216 assert(g->fno != NULL);
1217 tcg_gen_gvec_4_ool(dofs, aofs, bofs, cofs,
1218 oprsz, maxsz, g->data, g->fno);
1219 return;
1221 break;
1223 default:
1224 g_assert_not_reached();
1227 if (oprsz < maxsz) {
1228 expand_clr(dofs + oprsz, maxsz - oprsz);
1233 * Expand specific vector operations.
1236 static void vec_mov2(unsigned vece, TCGv_vec a, TCGv_vec b)
1238 tcg_gen_mov_vec(a, b);
1241 void tcg_gen_gvec_mov(unsigned vece, uint32_t dofs, uint32_t aofs,
1242 uint32_t oprsz, uint32_t maxsz)
1244 static const GVecGen2 g = {
1245 .fni8 = tcg_gen_mov_i64,
1246 .fniv = vec_mov2,
1247 .fno = gen_helper_gvec_mov,
1248 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1250 if (dofs != aofs) {
1251 tcg_gen_gvec_2(dofs, aofs, oprsz, maxsz, &g);
1252 } else {
1253 check_size_align(oprsz, maxsz, dofs);
1254 if (oprsz < maxsz) {
1255 expand_clr(dofs + oprsz, maxsz - oprsz);
1260 void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t oprsz,
1261 uint32_t maxsz, TCGv_i32 in)
1263 check_size_align(oprsz, maxsz, dofs);
1264 tcg_debug_assert(vece <= MO_32);
1265 do_dup(vece, dofs, oprsz, maxsz, in, NULL, 0);
1268 void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t oprsz,
1269 uint32_t maxsz, TCGv_i64 in)
1271 check_size_align(oprsz, maxsz, dofs);
1272 tcg_debug_assert(vece <= MO_64);
1273 do_dup(vece, dofs, oprsz, maxsz, NULL, in, 0);
1276 void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
1277 uint32_t oprsz, uint32_t maxsz)
1279 if (vece <= MO_32) {
1280 TCGv_i32 in = tcg_temp_new_i32();
1281 switch (vece) {
1282 case MO_8:
1283 tcg_gen_ld8u_i32(in, cpu_env, aofs);
1284 break;
1285 case MO_16:
1286 tcg_gen_ld16u_i32(in, cpu_env, aofs);
1287 break;
1288 case MO_32:
1289 tcg_gen_ld_i32(in, cpu_env, aofs);
1290 break;
1292 tcg_gen_gvec_dup_i32(vece, dofs, oprsz, maxsz, in);
1293 tcg_temp_free_i32(in);
1294 } else if (vece == MO_64) {
1295 TCGv_i64 in = tcg_temp_new_i64();
1296 tcg_gen_ld_i64(in, cpu_env, aofs);
1297 tcg_gen_gvec_dup_i64(MO_64, dofs, oprsz, maxsz, in);
1298 tcg_temp_free_i64(in);
1299 } else {
1300 /* 128-bit duplicate. */
1301 /* ??? Dup to 256-bit vector. */
1302 int i;
1304 tcg_debug_assert(vece == 4);
1305 tcg_debug_assert(oprsz >= 16);
1306 if (TCG_TARGET_HAS_v128) {
1307 TCGv_vec in = tcg_temp_new_vec(TCG_TYPE_V128);
1309 tcg_gen_ld_vec(in, cpu_env, aofs);
1310 for (i = 0; i < oprsz; i += 16) {
1311 tcg_gen_st_vec(in, cpu_env, dofs + i);
1313 tcg_temp_free_vec(in);
1314 } else {
1315 TCGv_i64 in0 = tcg_temp_new_i64();
1316 TCGv_i64 in1 = tcg_temp_new_i64();
1318 tcg_gen_ld_i64(in0, cpu_env, aofs);
1319 tcg_gen_ld_i64(in1, cpu_env, aofs + 8);
1320 for (i = 0; i < oprsz; i += 16) {
1321 tcg_gen_st_i64(in0, cpu_env, dofs + i);
1322 tcg_gen_st_i64(in1, cpu_env, dofs + i + 8);
1324 tcg_temp_free_i64(in0);
1325 tcg_temp_free_i64(in1);
1330 void tcg_gen_gvec_dup64i(uint32_t dofs, uint32_t oprsz,
1331 uint32_t maxsz, uint64_t x)
1333 check_size_align(oprsz, maxsz, dofs);
1334 do_dup(MO_64, dofs, oprsz, maxsz, NULL, NULL, x);
1337 void tcg_gen_gvec_dup32i(uint32_t dofs, uint32_t oprsz,
1338 uint32_t maxsz, uint32_t x)
1340 check_size_align(oprsz, maxsz, dofs);
1341 do_dup(MO_32, dofs, oprsz, maxsz, NULL, NULL, x);
1344 void tcg_gen_gvec_dup16i(uint32_t dofs, uint32_t oprsz,
1345 uint32_t maxsz, uint16_t x)
1347 check_size_align(oprsz, maxsz, dofs);
1348 do_dup(MO_16, dofs, oprsz, maxsz, NULL, NULL, x);
1351 void tcg_gen_gvec_dup8i(uint32_t dofs, uint32_t oprsz,
1352 uint32_t maxsz, uint8_t x)
1354 check_size_align(oprsz, maxsz, dofs);
1355 do_dup(MO_8, dofs, oprsz, maxsz, NULL, NULL, x);
1358 void tcg_gen_gvec_not(unsigned vece, uint32_t dofs, uint32_t aofs,
1359 uint32_t oprsz, uint32_t maxsz)
1361 static const GVecGen2 g = {
1362 .fni8 = tcg_gen_not_i64,
1363 .fniv = tcg_gen_not_vec,
1364 .fno = gen_helper_gvec_not,
1365 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1367 tcg_gen_gvec_2(dofs, aofs, oprsz, maxsz, &g);
1370 /* Perform a vector addition using normal addition and a mask. The mask
1371 should be the sign bit of each lane. This 6-operation form is more
1372 efficient than separate additions when there are 4 or more lanes in
1373 the 64-bit operation. */
1374 static void gen_addv_mask(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 m)
1376 TCGv_i64 t1 = tcg_temp_new_i64();
1377 TCGv_i64 t2 = tcg_temp_new_i64();
1378 TCGv_i64 t3 = tcg_temp_new_i64();
1380 tcg_gen_andc_i64(t1, a, m);
1381 tcg_gen_andc_i64(t2, b, m);
1382 tcg_gen_xor_i64(t3, a, b);
1383 tcg_gen_add_i64(d, t1, t2);
1384 tcg_gen_and_i64(t3, t3, m);
1385 tcg_gen_xor_i64(d, d, t3);
1387 tcg_temp_free_i64(t1);
1388 tcg_temp_free_i64(t2);
1389 tcg_temp_free_i64(t3);
1392 void tcg_gen_vec_add8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1394 TCGv_i64 m = tcg_const_i64(dup_const(MO_8, 0x80));
1395 gen_addv_mask(d, a, b, m);
1396 tcg_temp_free_i64(m);
1399 void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1401 TCGv_i64 m = tcg_const_i64(dup_const(MO_16, 0x8000));
1402 gen_addv_mask(d, a, b, m);
1403 tcg_temp_free_i64(m);
1406 void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1408 TCGv_i64 t1 = tcg_temp_new_i64();
1409 TCGv_i64 t2 = tcg_temp_new_i64();
1411 tcg_gen_andi_i64(t1, a, ~0xffffffffull);
1412 tcg_gen_add_i64(t2, a, b);
1413 tcg_gen_add_i64(t1, t1, b);
1414 tcg_gen_deposit_i64(d, t1, t2, 0, 32);
1416 tcg_temp_free_i64(t1);
1417 tcg_temp_free_i64(t2);
1420 void tcg_gen_gvec_add(unsigned vece, uint32_t dofs, uint32_t aofs,
1421 uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
1423 static const GVecGen3 g[4] = {
1424 { .fni8 = tcg_gen_vec_add8_i64,
1425 .fniv = tcg_gen_add_vec,
1426 .fno = gen_helper_gvec_add8,
1427 .opc = INDEX_op_add_vec,
1428 .vece = MO_8 },
1429 { .fni8 = tcg_gen_vec_add16_i64,
1430 .fniv = tcg_gen_add_vec,
1431 .fno = gen_helper_gvec_add16,
1432 .opc = INDEX_op_add_vec,
1433 .vece = MO_16 },
1434 { .fni4 = tcg_gen_add_i32,
1435 .fniv = tcg_gen_add_vec,
1436 .fno = gen_helper_gvec_add32,
1437 .opc = INDEX_op_add_vec,
1438 .vece = MO_32 },
1439 { .fni8 = tcg_gen_add_i64,
1440 .fniv = tcg_gen_add_vec,
1441 .fno = gen_helper_gvec_add64,
1442 .opc = INDEX_op_add_vec,
1443 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1444 .vece = MO_64 },
1447 tcg_debug_assert(vece <= MO_64);
1448 tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
1451 void tcg_gen_gvec_adds(unsigned vece, uint32_t dofs, uint32_t aofs,
1452 TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
1454 static const GVecGen2s g[4] = {
1455 { .fni8 = tcg_gen_vec_add8_i64,
1456 .fniv = tcg_gen_add_vec,
1457 .fno = gen_helper_gvec_adds8,
1458 .opc = INDEX_op_add_vec,
1459 .vece = MO_8 },
1460 { .fni8 = tcg_gen_vec_add16_i64,
1461 .fniv = tcg_gen_add_vec,
1462 .fno = gen_helper_gvec_adds16,
1463 .opc = INDEX_op_add_vec,
1464 .vece = MO_16 },
1465 { .fni4 = tcg_gen_add_i32,
1466 .fniv = tcg_gen_add_vec,
1467 .fno = gen_helper_gvec_adds32,
1468 .opc = INDEX_op_add_vec,
1469 .vece = MO_32 },
1470 { .fni8 = tcg_gen_add_i64,
1471 .fniv = tcg_gen_add_vec,
1472 .fno = gen_helper_gvec_adds64,
1473 .opc = INDEX_op_add_vec,
1474 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1475 .vece = MO_64 },
1478 tcg_debug_assert(vece <= MO_64);
1479 tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &g[vece]);
1482 void tcg_gen_gvec_addi(unsigned vece, uint32_t dofs, uint32_t aofs,
1483 int64_t c, uint32_t oprsz, uint32_t maxsz)
1485 TCGv_i64 tmp = tcg_const_i64(c);
1486 tcg_gen_gvec_adds(vece, dofs, aofs, tmp, oprsz, maxsz);
1487 tcg_temp_free_i64(tmp);
1490 void tcg_gen_gvec_subs(unsigned vece, uint32_t dofs, uint32_t aofs,
1491 TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
1493 static const GVecGen2s g[4] = {
1494 { .fni8 = tcg_gen_vec_sub8_i64,
1495 .fniv = tcg_gen_sub_vec,
1496 .fno = gen_helper_gvec_subs8,
1497 .opc = INDEX_op_sub_vec,
1498 .vece = MO_8 },
1499 { .fni8 = tcg_gen_vec_sub16_i64,
1500 .fniv = tcg_gen_sub_vec,
1501 .fno = gen_helper_gvec_subs16,
1502 .opc = INDEX_op_sub_vec,
1503 .vece = MO_16 },
1504 { .fni4 = tcg_gen_sub_i32,
1505 .fniv = tcg_gen_sub_vec,
1506 .fno = gen_helper_gvec_subs32,
1507 .opc = INDEX_op_sub_vec,
1508 .vece = MO_32 },
1509 { .fni8 = tcg_gen_sub_i64,
1510 .fniv = tcg_gen_sub_vec,
1511 .fno = gen_helper_gvec_subs64,
1512 .opc = INDEX_op_sub_vec,
1513 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1514 .vece = MO_64 },
1517 tcg_debug_assert(vece <= MO_64);
1518 tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &g[vece]);
1521 /* Perform a vector subtraction using normal subtraction and a mask.
1522 Compare gen_addv_mask above. */
1523 static void gen_subv_mask(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 m)
1525 TCGv_i64 t1 = tcg_temp_new_i64();
1526 TCGv_i64 t2 = tcg_temp_new_i64();
1527 TCGv_i64 t3 = tcg_temp_new_i64();
1529 tcg_gen_or_i64(t1, a, m);
1530 tcg_gen_andc_i64(t2, b, m);
1531 tcg_gen_eqv_i64(t3, a, b);
1532 tcg_gen_sub_i64(d, t1, t2);
1533 tcg_gen_and_i64(t3, t3, m);
1534 tcg_gen_xor_i64(d, d, t3);
1536 tcg_temp_free_i64(t1);
1537 tcg_temp_free_i64(t2);
1538 tcg_temp_free_i64(t3);
1541 void tcg_gen_vec_sub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1543 TCGv_i64 m = tcg_const_i64(dup_const(MO_8, 0x80));
1544 gen_subv_mask(d, a, b, m);
1545 tcg_temp_free_i64(m);
1548 void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1550 TCGv_i64 m = tcg_const_i64(dup_const(MO_16, 0x8000));
1551 gen_subv_mask(d, a, b, m);
1552 tcg_temp_free_i64(m);
1555 void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1557 TCGv_i64 t1 = tcg_temp_new_i64();
1558 TCGv_i64 t2 = tcg_temp_new_i64();
1560 tcg_gen_andi_i64(t1, b, ~0xffffffffull);
1561 tcg_gen_sub_i64(t2, a, b);
1562 tcg_gen_sub_i64(t1, a, t1);
1563 tcg_gen_deposit_i64(d, t1, t2, 0, 32);
1565 tcg_temp_free_i64(t1);
1566 tcg_temp_free_i64(t2);
1569 void tcg_gen_gvec_sub(unsigned vece, uint32_t dofs, uint32_t aofs,
1570 uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
1572 static const GVecGen3 g[4] = {
1573 { .fni8 = tcg_gen_vec_sub8_i64,
1574 .fniv = tcg_gen_sub_vec,
1575 .fno = gen_helper_gvec_sub8,
1576 .opc = INDEX_op_sub_vec,
1577 .vece = MO_8 },
1578 { .fni8 = tcg_gen_vec_sub16_i64,
1579 .fniv = tcg_gen_sub_vec,
1580 .fno = gen_helper_gvec_sub16,
1581 .opc = INDEX_op_sub_vec,
1582 .vece = MO_16 },
1583 { .fni4 = tcg_gen_sub_i32,
1584 .fniv = tcg_gen_sub_vec,
1585 .fno = gen_helper_gvec_sub32,
1586 .opc = INDEX_op_sub_vec,
1587 .vece = MO_32 },
1588 { .fni8 = tcg_gen_sub_i64,
1589 .fniv = tcg_gen_sub_vec,
1590 .fno = gen_helper_gvec_sub64,
1591 .opc = INDEX_op_sub_vec,
1592 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1593 .vece = MO_64 },
1596 tcg_debug_assert(vece <= MO_64);
1597 tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
1600 void tcg_gen_gvec_mul(unsigned vece, uint32_t dofs, uint32_t aofs,
1601 uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
1603 static const GVecGen3 g[4] = {
1604 { .fniv = tcg_gen_mul_vec,
1605 .fno = gen_helper_gvec_mul8,
1606 .opc = INDEX_op_mul_vec,
1607 .vece = MO_8 },
1608 { .fniv = tcg_gen_mul_vec,
1609 .fno = gen_helper_gvec_mul16,
1610 .opc = INDEX_op_mul_vec,
1611 .vece = MO_16 },
1612 { .fni4 = tcg_gen_mul_i32,
1613 .fniv = tcg_gen_mul_vec,
1614 .fno = gen_helper_gvec_mul32,
1615 .opc = INDEX_op_mul_vec,
1616 .vece = MO_32 },
1617 { .fni8 = tcg_gen_mul_i64,
1618 .fniv = tcg_gen_mul_vec,
1619 .fno = gen_helper_gvec_mul64,
1620 .opc = INDEX_op_mul_vec,
1621 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1622 .vece = MO_64 },
1625 tcg_debug_assert(vece <= MO_64);
1626 tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
1629 void tcg_gen_gvec_muls(unsigned vece, uint32_t dofs, uint32_t aofs,
1630 TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
1632 static const GVecGen2s g[4] = {
1633 { .fniv = tcg_gen_mul_vec,
1634 .fno = gen_helper_gvec_muls8,
1635 .opc = INDEX_op_mul_vec,
1636 .vece = MO_8 },
1637 { .fniv = tcg_gen_mul_vec,
1638 .fno = gen_helper_gvec_muls16,
1639 .opc = INDEX_op_mul_vec,
1640 .vece = MO_16 },
1641 { .fni4 = tcg_gen_mul_i32,
1642 .fniv = tcg_gen_mul_vec,
1643 .fno = gen_helper_gvec_muls32,
1644 .opc = INDEX_op_mul_vec,
1645 .vece = MO_32 },
1646 { .fni8 = tcg_gen_mul_i64,
1647 .fniv = tcg_gen_mul_vec,
1648 .fno = gen_helper_gvec_muls64,
1649 .opc = INDEX_op_mul_vec,
1650 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1651 .vece = MO_64 },
1654 tcg_debug_assert(vece <= MO_64);
1655 tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &g[vece]);
1658 void tcg_gen_gvec_muli(unsigned vece, uint32_t dofs, uint32_t aofs,
1659 int64_t c, uint32_t oprsz, uint32_t maxsz)
1661 TCGv_i64 tmp = tcg_const_i64(c);
1662 tcg_gen_gvec_muls(vece, dofs, aofs, tmp, oprsz, maxsz);
1663 tcg_temp_free_i64(tmp);
1666 void tcg_gen_gvec_ssadd(unsigned vece, uint32_t dofs, uint32_t aofs,
1667 uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
1669 static const GVecGen3 g[4] = {
1670 { .fno = gen_helper_gvec_ssadd8, .vece = MO_8 },
1671 { .fno = gen_helper_gvec_ssadd16, .vece = MO_16 },
1672 { .fno = gen_helper_gvec_ssadd32, .vece = MO_32 },
1673 { .fno = gen_helper_gvec_ssadd64, .vece = MO_64 }
1675 tcg_debug_assert(vece <= MO_64);
1676 tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
1679 void tcg_gen_gvec_sssub(unsigned vece, uint32_t dofs, uint32_t aofs,
1680 uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
1682 static const GVecGen3 g[4] = {
1683 { .fno = gen_helper_gvec_sssub8, .vece = MO_8 },
1684 { .fno = gen_helper_gvec_sssub16, .vece = MO_16 },
1685 { .fno = gen_helper_gvec_sssub32, .vece = MO_32 },
1686 { .fno = gen_helper_gvec_sssub64, .vece = MO_64 }
1688 tcg_debug_assert(vece <= MO_64);
1689 tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
1692 static void tcg_gen_vec_usadd32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1694 TCGv_i32 max = tcg_const_i32(-1);
1695 tcg_gen_add_i32(d, a, b);
1696 tcg_gen_movcond_i32(TCG_COND_LTU, d, d, a, max, d);
1697 tcg_temp_free_i32(max);
1700 static void tcg_gen_vec_usadd32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1702 TCGv_i64 max = tcg_const_i64(-1);
1703 tcg_gen_add_i64(d, a, b);
1704 tcg_gen_movcond_i64(TCG_COND_LTU, d, d, a, max, d);
1705 tcg_temp_free_i64(max);
1708 void tcg_gen_gvec_usadd(unsigned vece, uint32_t dofs, uint32_t aofs,
1709 uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
1711 static const GVecGen3 g[4] = {
1712 { .fno = gen_helper_gvec_usadd8, .vece = MO_8 },
1713 { .fno = gen_helper_gvec_usadd16, .vece = MO_16 },
1714 { .fni4 = tcg_gen_vec_usadd32_i32,
1715 .fno = gen_helper_gvec_usadd32,
1716 .vece = MO_32 },
1717 { .fni8 = tcg_gen_vec_usadd32_i64,
1718 .fno = gen_helper_gvec_usadd64,
1719 .vece = MO_64 }
1721 tcg_debug_assert(vece <= MO_64);
1722 tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
1725 static void tcg_gen_vec_ussub32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
1727 TCGv_i32 min = tcg_const_i32(0);
1728 tcg_gen_sub_i32(d, a, b);
1729 tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, min, d);
1730 tcg_temp_free_i32(min);
1733 static void tcg_gen_vec_ussub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
1735 TCGv_i64 min = tcg_const_i64(0);
1736 tcg_gen_sub_i64(d, a, b);
1737 tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, min, d);
1738 tcg_temp_free_i64(min);
1741 void tcg_gen_gvec_ussub(unsigned vece, uint32_t dofs, uint32_t aofs,
1742 uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
1744 static const GVecGen3 g[4] = {
1745 { .fno = gen_helper_gvec_ussub8, .vece = MO_8 },
1746 { .fno = gen_helper_gvec_ussub16, .vece = MO_16 },
1747 { .fni4 = tcg_gen_vec_ussub32_i32,
1748 .fno = gen_helper_gvec_ussub32,
1749 .vece = MO_32 },
1750 { .fni8 = tcg_gen_vec_ussub32_i64,
1751 .fno = gen_helper_gvec_ussub64,
1752 .vece = MO_64 }
1754 tcg_debug_assert(vece <= MO_64);
1755 tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
1758 /* Perform a vector negation using normal negation and a mask.
1759 Compare gen_subv_mask above. */
1760 static void gen_negv_mask(TCGv_i64 d, TCGv_i64 b, TCGv_i64 m)
1762 TCGv_i64 t2 = tcg_temp_new_i64();
1763 TCGv_i64 t3 = tcg_temp_new_i64();
1765 tcg_gen_andc_i64(t3, m, b);
1766 tcg_gen_andc_i64(t2, b, m);
1767 tcg_gen_sub_i64(d, m, t2);
1768 tcg_gen_xor_i64(d, d, t3);
1770 tcg_temp_free_i64(t2);
1771 tcg_temp_free_i64(t3);
1774 void tcg_gen_vec_neg8_i64(TCGv_i64 d, TCGv_i64 b)
1776 TCGv_i64 m = tcg_const_i64(dup_const(MO_8, 0x80));
1777 gen_negv_mask(d, b, m);
1778 tcg_temp_free_i64(m);
1781 void tcg_gen_vec_neg16_i64(TCGv_i64 d, TCGv_i64 b)
1783 TCGv_i64 m = tcg_const_i64(dup_const(MO_16, 0x8000));
1784 gen_negv_mask(d, b, m);
1785 tcg_temp_free_i64(m);
1788 void tcg_gen_vec_neg32_i64(TCGv_i64 d, TCGv_i64 b)
1790 TCGv_i64 t1 = tcg_temp_new_i64();
1791 TCGv_i64 t2 = tcg_temp_new_i64();
1793 tcg_gen_andi_i64(t1, b, ~0xffffffffull);
1794 tcg_gen_neg_i64(t2, b);
1795 tcg_gen_neg_i64(t1, t1);
1796 tcg_gen_deposit_i64(d, t1, t2, 0, 32);
1798 tcg_temp_free_i64(t1);
1799 tcg_temp_free_i64(t2);
1802 void tcg_gen_gvec_neg(unsigned vece, uint32_t dofs, uint32_t aofs,
1803 uint32_t oprsz, uint32_t maxsz)
1805 static const GVecGen2 g[4] = {
1806 { .fni8 = tcg_gen_vec_neg8_i64,
1807 .fniv = tcg_gen_neg_vec,
1808 .fno = gen_helper_gvec_neg8,
1809 .opc = INDEX_op_neg_vec,
1810 .vece = MO_8 },
1811 { .fni8 = tcg_gen_vec_neg16_i64,
1812 .fniv = tcg_gen_neg_vec,
1813 .fno = gen_helper_gvec_neg16,
1814 .opc = INDEX_op_neg_vec,
1815 .vece = MO_16 },
1816 { .fni4 = tcg_gen_neg_i32,
1817 .fniv = tcg_gen_neg_vec,
1818 .fno = gen_helper_gvec_neg32,
1819 .opc = INDEX_op_neg_vec,
1820 .vece = MO_32 },
1821 { .fni8 = tcg_gen_neg_i64,
1822 .fniv = tcg_gen_neg_vec,
1823 .fno = gen_helper_gvec_neg64,
1824 .opc = INDEX_op_neg_vec,
1825 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1826 .vece = MO_64 },
1829 tcg_debug_assert(vece <= MO_64);
1830 tcg_gen_gvec_2(dofs, aofs, oprsz, maxsz, &g[vece]);
1833 void tcg_gen_gvec_and(unsigned vece, uint32_t dofs, uint32_t aofs,
1834 uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
1836 static const GVecGen3 g = {
1837 .fni8 = tcg_gen_and_i64,
1838 .fniv = tcg_gen_and_vec,
1839 .fno = gen_helper_gvec_and,
1840 .opc = INDEX_op_and_vec,
1841 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1843 tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
1846 void tcg_gen_gvec_or(unsigned vece, uint32_t dofs, uint32_t aofs,
1847 uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
1849 static const GVecGen3 g = {
1850 .fni8 = tcg_gen_or_i64,
1851 .fniv = tcg_gen_or_vec,
1852 .fno = gen_helper_gvec_or,
1853 .opc = INDEX_op_or_vec,
1854 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1856 tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
1859 void tcg_gen_gvec_xor(unsigned vece, uint32_t dofs, uint32_t aofs,
1860 uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
1862 static const GVecGen3 g = {
1863 .fni8 = tcg_gen_xor_i64,
1864 .fniv = tcg_gen_xor_vec,
1865 .fno = gen_helper_gvec_xor,
1866 .opc = INDEX_op_xor_vec,
1867 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1869 tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
1872 void tcg_gen_gvec_andc(unsigned vece, uint32_t dofs, uint32_t aofs,
1873 uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
1875 static const GVecGen3 g = {
1876 .fni8 = tcg_gen_andc_i64,
1877 .fniv = tcg_gen_andc_vec,
1878 .fno = gen_helper_gvec_andc,
1879 .opc = INDEX_op_andc_vec,
1880 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1882 tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
1885 void tcg_gen_gvec_orc(unsigned vece, uint32_t dofs, uint32_t aofs,
1886 uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
1888 static const GVecGen3 g = {
1889 .fni8 = tcg_gen_orc_i64,
1890 .fniv = tcg_gen_orc_vec,
1891 .fno = gen_helper_gvec_orc,
1892 .opc = INDEX_op_orc_vec,
1893 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1895 tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
1898 static const GVecGen2s gop_ands = {
1899 .fni8 = tcg_gen_and_i64,
1900 .fniv = tcg_gen_and_vec,
1901 .fno = gen_helper_gvec_ands,
1902 .opc = INDEX_op_and_vec,
1903 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1904 .vece = MO_64
1907 void tcg_gen_gvec_ands(unsigned vece, uint32_t dofs, uint32_t aofs,
1908 TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
1910 TCGv_i64 tmp = tcg_temp_new_i64();
1911 gen_dup_i64(vece, tmp, c);
1912 tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ands);
1913 tcg_temp_free_i64(tmp);
1916 void tcg_gen_gvec_andi(unsigned vece, uint32_t dofs, uint32_t aofs,
1917 int64_t c, uint32_t oprsz, uint32_t maxsz)
1919 TCGv_i64 tmp = tcg_const_i64(dup_const(vece, c));
1920 tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ands);
1921 tcg_temp_free_i64(tmp);
1924 static const GVecGen2s gop_xors = {
1925 .fni8 = tcg_gen_xor_i64,
1926 .fniv = tcg_gen_xor_vec,
1927 .fno = gen_helper_gvec_xors,
1928 .opc = INDEX_op_xor_vec,
1929 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1930 .vece = MO_64
1933 void tcg_gen_gvec_xors(unsigned vece, uint32_t dofs, uint32_t aofs,
1934 TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
1936 TCGv_i64 tmp = tcg_temp_new_i64();
1937 gen_dup_i64(vece, tmp, c);
1938 tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_xors);
1939 tcg_temp_free_i64(tmp);
1942 void tcg_gen_gvec_xori(unsigned vece, uint32_t dofs, uint32_t aofs,
1943 int64_t c, uint32_t oprsz, uint32_t maxsz)
1945 TCGv_i64 tmp = tcg_const_i64(dup_const(vece, c));
1946 tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_xors);
1947 tcg_temp_free_i64(tmp);
1950 static const GVecGen2s gop_ors = {
1951 .fni8 = tcg_gen_or_i64,
1952 .fniv = tcg_gen_or_vec,
1953 .fno = gen_helper_gvec_ors,
1954 .opc = INDEX_op_or_vec,
1955 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1956 .vece = MO_64
1959 void tcg_gen_gvec_ors(unsigned vece, uint32_t dofs, uint32_t aofs,
1960 TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
1962 TCGv_i64 tmp = tcg_temp_new_i64();
1963 gen_dup_i64(vece, tmp, c);
1964 tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ors);
1965 tcg_temp_free_i64(tmp);
1968 void tcg_gen_gvec_ori(unsigned vece, uint32_t dofs, uint32_t aofs,
1969 int64_t c, uint32_t oprsz, uint32_t maxsz)
1971 TCGv_i64 tmp = tcg_const_i64(dup_const(vece, c));
1972 tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ors);
1973 tcg_temp_free_i64(tmp);
1976 void tcg_gen_vec_shl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
1978 uint64_t mask = dup_const(MO_8, 0xff << c);
1979 tcg_gen_shli_i64(d, a, c);
1980 tcg_gen_andi_i64(d, d, mask);
1983 void tcg_gen_vec_shl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
1985 uint64_t mask = dup_const(MO_16, 0xffff << c);
1986 tcg_gen_shli_i64(d, a, c);
1987 tcg_gen_andi_i64(d, d, mask);
1990 void tcg_gen_gvec_shli(unsigned vece, uint32_t dofs, uint32_t aofs,
1991 int64_t shift, uint32_t oprsz, uint32_t maxsz)
1993 static const GVecGen2i g[4] = {
1994 { .fni8 = tcg_gen_vec_shl8i_i64,
1995 .fniv = tcg_gen_shli_vec,
1996 .fno = gen_helper_gvec_shl8i,
1997 .opc = INDEX_op_shli_vec,
1998 .vece = MO_8 },
1999 { .fni8 = tcg_gen_vec_shl16i_i64,
2000 .fniv = tcg_gen_shli_vec,
2001 .fno = gen_helper_gvec_shl16i,
2002 .opc = INDEX_op_shli_vec,
2003 .vece = MO_16 },
2004 { .fni4 = tcg_gen_shli_i32,
2005 .fniv = tcg_gen_shli_vec,
2006 .fno = gen_helper_gvec_shl32i,
2007 .opc = INDEX_op_shli_vec,
2008 .vece = MO_32 },
2009 { .fni8 = tcg_gen_shli_i64,
2010 .fniv = tcg_gen_shli_vec,
2011 .fno = gen_helper_gvec_shl64i,
2012 .opc = INDEX_op_shli_vec,
2013 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
2014 .vece = MO_64 },
2017 tcg_debug_assert(vece <= MO_64);
2018 tcg_debug_assert(shift >= 0 && shift < (8 << vece));
2019 if (shift == 0) {
2020 tcg_gen_gvec_mov(vece, dofs, aofs, oprsz, maxsz);
2021 } else {
2022 tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, shift, &g[vece]);
2026 void tcg_gen_vec_shr8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
2028 uint64_t mask = dup_const(MO_8, 0xff >> c);
2029 tcg_gen_shri_i64(d, a, c);
2030 tcg_gen_andi_i64(d, d, mask);
2033 void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
2035 uint64_t mask = dup_const(MO_16, 0xffff >> c);
2036 tcg_gen_shri_i64(d, a, c);
2037 tcg_gen_andi_i64(d, d, mask);
2040 void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs,
2041 int64_t shift, uint32_t oprsz, uint32_t maxsz)
2043 static const GVecGen2i g[4] = {
2044 { .fni8 = tcg_gen_vec_shr8i_i64,
2045 .fniv = tcg_gen_shri_vec,
2046 .fno = gen_helper_gvec_shr8i,
2047 .opc = INDEX_op_shri_vec,
2048 .vece = MO_8 },
2049 { .fni8 = tcg_gen_vec_shr16i_i64,
2050 .fniv = tcg_gen_shri_vec,
2051 .fno = gen_helper_gvec_shr16i,
2052 .opc = INDEX_op_shri_vec,
2053 .vece = MO_16 },
2054 { .fni4 = tcg_gen_shri_i32,
2055 .fniv = tcg_gen_shri_vec,
2056 .fno = gen_helper_gvec_shr32i,
2057 .opc = INDEX_op_shri_vec,
2058 .vece = MO_32 },
2059 { .fni8 = tcg_gen_shri_i64,
2060 .fniv = tcg_gen_shri_vec,
2061 .fno = gen_helper_gvec_shr64i,
2062 .opc = INDEX_op_shri_vec,
2063 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
2064 .vece = MO_64 },
2067 tcg_debug_assert(vece <= MO_64);
2068 tcg_debug_assert(shift >= 0 && shift < (8 << vece));
2069 if (shift == 0) {
2070 tcg_gen_gvec_mov(vece, dofs, aofs, oprsz, maxsz);
2071 } else {
2072 tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, shift, &g[vece]);
2076 void tcg_gen_vec_sar8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
2078 uint64_t s_mask = dup_const(MO_8, 0x80 >> c);
2079 uint64_t c_mask = dup_const(MO_8, 0xff >> c);
2080 TCGv_i64 s = tcg_temp_new_i64();
2082 tcg_gen_shri_i64(d, a, c);
2083 tcg_gen_andi_i64(s, d, s_mask); /* isolate (shifted) sign bit */
2084 tcg_gen_muli_i64(s, s, (2 << c) - 2); /* replicate isolated signs */
2085 tcg_gen_andi_i64(d, d, c_mask); /* clear out bits above sign */
2086 tcg_gen_or_i64(d, d, s); /* include sign extension */
2087 tcg_temp_free_i64(s);
2090 void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
2092 uint64_t s_mask = dup_const(MO_16, 0x8000 >> c);
2093 uint64_t c_mask = dup_const(MO_16, 0xffff >> c);
2094 TCGv_i64 s = tcg_temp_new_i64();
2096 tcg_gen_shri_i64(d, a, c);
2097 tcg_gen_andi_i64(s, d, s_mask); /* isolate (shifted) sign bit */
2098 tcg_gen_andi_i64(d, d, c_mask); /* clear out bits above sign */
2099 tcg_gen_muli_i64(s, s, (2 << c) - 2); /* replicate isolated signs */
2100 tcg_gen_or_i64(d, d, s); /* include sign extension */
2101 tcg_temp_free_i64(s);
2104 void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs,
2105 int64_t shift, uint32_t oprsz, uint32_t maxsz)
2107 static const GVecGen2i g[4] = {
2108 { .fni8 = tcg_gen_vec_sar8i_i64,
2109 .fniv = tcg_gen_sari_vec,
2110 .fno = gen_helper_gvec_sar8i,
2111 .opc = INDEX_op_sari_vec,
2112 .vece = MO_8 },
2113 { .fni8 = tcg_gen_vec_sar16i_i64,
2114 .fniv = tcg_gen_sari_vec,
2115 .fno = gen_helper_gvec_sar16i,
2116 .opc = INDEX_op_sari_vec,
2117 .vece = MO_16 },
2118 { .fni4 = tcg_gen_sari_i32,
2119 .fniv = tcg_gen_sari_vec,
2120 .fno = gen_helper_gvec_sar32i,
2121 .opc = INDEX_op_sari_vec,
2122 .vece = MO_32 },
2123 { .fni8 = tcg_gen_sari_i64,
2124 .fniv = tcg_gen_sari_vec,
2125 .fno = gen_helper_gvec_sar64i,
2126 .opc = INDEX_op_sari_vec,
2127 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
2128 .vece = MO_64 },
2131 tcg_debug_assert(vece <= MO_64);
2132 tcg_debug_assert(shift >= 0 && shift < (8 << vece));
2133 if (shift == 0) {
2134 tcg_gen_gvec_mov(vece, dofs, aofs, oprsz, maxsz);
2135 } else {
2136 tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, shift, &g[vece]);
2140 /* Expand OPSZ bytes worth of three-operand operations using i32 elements. */
2141 static void expand_cmp_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs,
2142 uint32_t oprsz, TCGCond cond)
2144 TCGv_i32 t0 = tcg_temp_new_i32();
2145 TCGv_i32 t1 = tcg_temp_new_i32();
2146 uint32_t i;
2148 for (i = 0; i < oprsz; i += 4) {
2149 tcg_gen_ld_i32(t0, cpu_env, aofs + i);
2150 tcg_gen_ld_i32(t1, cpu_env, bofs + i);
2151 tcg_gen_setcond_i32(cond, t0, t0, t1);
2152 tcg_gen_neg_i32(t0, t0);
2153 tcg_gen_st_i32(t0, cpu_env, dofs + i);
2155 tcg_temp_free_i32(t1);
2156 tcg_temp_free_i32(t0);
2159 static void expand_cmp_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs,
2160 uint32_t oprsz, TCGCond cond)
2162 TCGv_i64 t0 = tcg_temp_new_i64();
2163 TCGv_i64 t1 = tcg_temp_new_i64();
2164 uint32_t i;
2166 for (i = 0; i < oprsz; i += 8) {
2167 tcg_gen_ld_i64(t0, cpu_env, aofs + i);
2168 tcg_gen_ld_i64(t1, cpu_env, bofs + i);
2169 tcg_gen_setcond_i64(cond, t0, t0, t1);
2170 tcg_gen_neg_i64(t0, t0);
2171 tcg_gen_st_i64(t0, cpu_env, dofs + i);
2173 tcg_temp_free_i64(t1);
2174 tcg_temp_free_i64(t0);
2177 static void expand_cmp_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
2178 uint32_t bofs, uint32_t oprsz, uint32_t tysz,
2179 TCGType type, TCGCond cond)
2181 TCGv_vec t0 = tcg_temp_new_vec(type);
2182 TCGv_vec t1 = tcg_temp_new_vec(type);
2183 uint32_t i;
2185 for (i = 0; i < oprsz; i += tysz) {
2186 tcg_gen_ld_vec(t0, cpu_env, aofs + i);
2187 tcg_gen_ld_vec(t1, cpu_env, bofs + i);
2188 tcg_gen_cmp_vec(cond, vece, t0, t0, t1);
2189 tcg_gen_st_vec(t0, cpu_env, dofs + i);
2191 tcg_temp_free_vec(t1);
2192 tcg_temp_free_vec(t0);
2195 void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
2196 uint32_t aofs, uint32_t bofs,
2197 uint32_t oprsz, uint32_t maxsz)
2199 static gen_helper_gvec_3 * const eq_fn[4] = {
2200 gen_helper_gvec_eq8, gen_helper_gvec_eq16,
2201 gen_helper_gvec_eq32, gen_helper_gvec_eq64
2203 static gen_helper_gvec_3 * const ne_fn[4] = {
2204 gen_helper_gvec_ne8, gen_helper_gvec_ne16,
2205 gen_helper_gvec_ne32, gen_helper_gvec_ne64
2207 static gen_helper_gvec_3 * const lt_fn[4] = {
2208 gen_helper_gvec_lt8, gen_helper_gvec_lt16,
2209 gen_helper_gvec_lt32, gen_helper_gvec_lt64
2211 static gen_helper_gvec_3 * const le_fn[4] = {
2212 gen_helper_gvec_le8, gen_helper_gvec_le16,
2213 gen_helper_gvec_le32, gen_helper_gvec_le64
2215 static gen_helper_gvec_3 * const ltu_fn[4] = {
2216 gen_helper_gvec_ltu8, gen_helper_gvec_ltu16,
2217 gen_helper_gvec_ltu32, gen_helper_gvec_ltu64
2219 static gen_helper_gvec_3 * const leu_fn[4] = {
2220 gen_helper_gvec_leu8, gen_helper_gvec_leu16,
2221 gen_helper_gvec_leu32, gen_helper_gvec_leu64
2223 static gen_helper_gvec_3 * const * const fns[16] = {
2224 [TCG_COND_EQ] = eq_fn,
2225 [TCG_COND_NE] = ne_fn,
2226 [TCG_COND_LT] = lt_fn,
2227 [TCG_COND_LE] = le_fn,
2228 [TCG_COND_LTU] = ltu_fn,
2229 [TCG_COND_LEU] = leu_fn,
2231 TCGType type;
2232 uint32_t some;
2234 check_size_align(oprsz, maxsz, dofs | aofs | bofs);
2235 check_overlap_3(dofs, aofs, bofs, maxsz);
2237 if (cond == TCG_COND_NEVER || cond == TCG_COND_ALWAYS) {
2238 do_dup(MO_8, dofs, oprsz, maxsz,
2239 NULL, NULL, -(cond == TCG_COND_ALWAYS));
2240 return;
2243 /* Implement inline with a vector type, if possible.
2244 * Prefer integer when 64-bit host and 64-bit comparison.
2246 type = choose_vector_type(INDEX_op_cmp_vec, vece, oprsz,
2247 TCG_TARGET_REG_BITS == 64 && vece == MO_64);
2248 switch (type) {
2249 case TCG_TYPE_V256:
2250 /* Recall that ARM SVE allows vector sizes that are not a
2251 * power of 2, but always a multiple of 16. The intent is
2252 * that e.g. size == 80 would be expanded with 2x32 + 1x16.
2254 some = QEMU_ALIGN_DOWN(oprsz, 32);
2255 expand_cmp_vec(vece, dofs, aofs, bofs, some, 32, TCG_TYPE_V256, cond);
2256 if (some == oprsz) {
2257 break;
2259 dofs += some;
2260 aofs += some;
2261 bofs += some;
2262 oprsz -= some;
2263 maxsz -= some;
2264 /* fallthru */
2265 case TCG_TYPE_V128:
2266 expand_cmp_vec(vece, dofs, aofs, bofs, oprsz, 16, TCG_TYPE_V128, cond);
2267 break;
2268 case TCG_TYPE_V64:
2269 expand_cmp_vec(vece, dofs, aofs, bofs, oprsz, 8, TCG_TYPE_V64, cond);
2270 break;
2272 case 0:
2273 if (vece == MO_64 && check_size_impl(oprsz, 8)) {
2274 expand_cmp_i64(dofs, aofs, bofs, oprsz, cond);
2275 } else if (vece == MO_32 && check_size_impl(oprsz, 4)) {
2276 expand_cmp_i32(dofs, aofs, bofs, oprsz, cond);
2277 } else {
2278 gen_helper_gvec_3 * const *fn = fns[cond];
2280 if (fn == NULL) {
2281 uint32_t tmp;
2282 tmp = aofs, aofs = bofs, bofs = tmp;
2283 cond = tcg_swap_cond(cond);
2284 fn = fns[cond];
2285 assert(fn != NULL);
2287 tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, maxsz, 0, fn[vece]);
2288 return;
2290 break;
2292 default:
2293 g_assert_not_reached();
2296 if (oprsz < maxsz) {
2297 expand_clr(dofs + oprsz, maxsz - oprsz);