2 * Generic vectorized operation runtime
4 * Copyright (c) 2018 Linaro
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "qemu/host-utils.h"
23 #include "exec/helper-proto.h"
24 #include "tcg-gvec-desc.h"
27 /* Virtually all hosts support 16-byte vectors. Those that don't can emulate
28 * them via GCC's generic vector extension. This turns out to be simpler and
29 * more reliable than getting the compiler to autovectorize.
31 * In tcg-op-gvec.c, we asserted that both the size and alignment of the data
32 * are multiples of 16.
34 * When the compiler does not support all of the operations we require, the
35 * loops are written so that we can always fall back on the base types.
37 #ifdef CONFIG_VECTOR16
38 typedef uint8_t vec8
__attribute__((vector_size(16)));
39 typedef uint16_t vec16
__attribute__((vector_size(16)));
40 typedef uint32_t vec32
__attribute__((vector_size(16)));
41 typedef uint64_t vec64
__attribute__((vector_size(16)));
43 typedef int8_t svec8
__attribute__((vector_size(16)));
44 typedef int16_t svec16
__attribute__((vector_size(16)));
45 typedef int32_t svec32
__attribute__((vector_size(16)));
46 typedef int64_t svec64
__attribute__((vector_size(16)));
48 #define DUP16(X) { X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X }
49 #define DUP8(X) { X, X, X, X, X, X, X, X }
50 #define DUP4(X) { X, X, X, X }
51 #define DUP2(X) { X, X }
54 typedef uint16_t vec16
;
55 typedef uint32_t vec32
;
56 typedef uint64_t vec64
;
59 typedef int16_t svec16
;
60 typedef int32_t svec32
;
61 typedef int64_t svec64
;
67 #endif /* CONFIG_VECTOR16 */
69 static inline void clear_high(void *d
, intptr_t oprsz
, uint32_t desc
)
71 intptr_t maxsz
= simd_maxsz(desc
);
74 if (unlikely(maxsz
> oprsz
)) {
75 for (i
= oprsz
; i
< maxsz
; i
+= sizeof(uint64_t)) {
76 *(uint64_t *)(d
+ i
) = 0;
81 void HELPER(gvec_add8
)(void *d
, void *a
, void *b
, uint32_t desc
)
83 intptr_t oprsz
= simd_oprsz(desc
);
86 for (i
= 0; i
< oprsz
; i
+= sizeof(vec8
)) {
87 *(vec8
*)(d
+ i
) = *(vec8
*)(a
+ i
) + *(vec8
*)(b
+ i
);
89 clear_high(d
, oprsz
, desc
);
92 void HELPER(gvec_add16
)(void *d
, void *a
, void *b
, uint32_t desc
)
94 intptr_t oprsz
= simd_oprsz(desc
);
97 for (i
= 0; i
< oprsz
; i
+= sizeof(vec16
)) {
98 *(vec16
*)(d
+ i
) = *(vec16
*)(a
+ i
) + *(vec16
*)(b
+ i
);
100 clear_high(d
, oprsz
, desc
);
103 void HELPER(gvec_add32
)(void *d
, void *a
, void *b
, uint32_t desc
)
105 intptr_t oprsz
= simd_oprsz(desc
);
108 for (i
= 0; i
< oprsz
; i
+= sizeof(vec32
)) {
109 *(vec32
*)(d
+ i
) = *(vec32
*)(a
+ i
) + *(vec32
*)(b
+ i
);
111 clear_high(d
, oprsz
, desc
);
114 void HELPER(gvec_add64
)(void *d
, void *a
, void *b
, uint32_t desc
)
116 intptr_t oprsz
= simd_oprsz(desc
);
119 for (i
= 0; i
< oprsz
; i
+= sizeof(vec64
)) {
120 *(vec64
*)(d
+ i
) = *(vec64
*)(a
+ i
) + *(vec64
*)(b
+ i
);
122 clear_high(d
, oprsz
, desc
);
125 void HELPER(gvec_adds8
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
127 intptr_t oprsz
= simd_oprsz(desc
);
128 vec8 vecb
= (vec8
)DUP16(b
);
131 for (i
= 0; i
< oprsz
; i
+= sizeof(vec8
)) {
132 *(vec8
*)(d
+ i
) = *(vec8
*)(a
+ i
) + vecb
;
134 clear_high(d
, oprsz
, desc
);
137 void HELPER(gvec_adds16
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
139 intptr_t oprsz
= simd_oprsz(desc
);
140 vec16 vecb
= (vec16
)DUP8(b
);
143 for (i
= 0; i
< oprsz
; i
+= sizeof(vec16
)) {
144 *(vec16
*)(d
+ i
) = *(vec16
*)(a
+ i
) + vecb
;
146 clear_high(d
, oprsz
, desc
);
149 void HELPER(gvec_adds32
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
151 intptr_t oprsz
= simd_oprsz(desc
);
152 vec32 vecb
= (vec32
)DUP4(b
);
155 for (i
= 0; i
< oprsz
; i
+= sizeof(vec32
)) {
156 *(vec32
*)(d
+ i
) = *(vec32
*)(a
+ i
) + vecb
;
158 clear_high(d
, oprsz
, desc
);
161 void HELPER(gvec_adds64
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
163 intptr_t oprsz
= simd_oprsz(desc
);
164 vec64 vecb
= (vec64
)DUP2(b
);
167 for (i
= 0; i
< oprsz
; i
+= sizeof(vec64
)) {
168 *(vec64
*)(d
+ i
) = *(vec64
*)(a
+ i
) + vecb
;
170 clear_high(d
, oprsz
, desc
);
173 void HELPER(gvec_sub8
)(void *d
, void *a
, void *b
, uint32_t desc
)
175 intptr_t oprsz
= simd_oprsz(desc
);
178 for (i
= 0; i
< oprsz
; i
+= sizeof(vec8
)) {
179 *(vec8
*)(d
+ i
) = *(vec8
*)(a
+ i
) - *(vec8
*)(b
+ i
);
181 clear_high(d
, oprsz
, desc
);
184 void HELPER(gvec_sub16
)(void *d
, void *a
, void *b
, uint32_t desc
)
186 intptr_t oprsz
= simd_oprsz(desc
);
189 for (i
= 0; i
< oprsz
; i
+= sizeof(vec16
)) {
190 *(vec16
*)(d
+ i
) = *(vec16
*)(a
+ i
) - *(vec16
*)(b
+ i
);
192 clear_high(d
, oprsz
, desc
);
195 void HELPER(gvec_sub32
)(void *d
, void *a
, void *b
, uint32_t desc
)
197 intptr_t oprsz
= simd_oprsz(desc
);
200 for (i
= 0; i
< oprsz
; i
+= sizeof(vec32
)) {
201 *(vec32
*)(d
+ i
) = *(vec32
*)(a
+ i
) - *(vec32
*)(b
+ i
);
203 clear_high(d
, oprsz
, desc
);
206 void HELPER(gvec_sub64
)(void *d
, void *a
, void *b
, uint32_t desc
)
208 intptr_t oprsz
= simd_oprsz(desc
);
211 for (i
= 0; i
< oprsz
; i
+= sizeof(vec64
)) {
212 *(vec64
*)(d
+ i
) = *(vec64
*)(a
+ i
) - *(vec64
*)(b
+ i
);
214 clear_high(d
, oprsz
, desc
);
217 void HELPER(gvec_subs8
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
219 intptr_t oprsz
= simd_oprsz(desc
);
220 vec8 vecb
= (vec8
)DUP16(b
);
223 for (i
= 0; i
< oprsz
; i
+= sizeof(vec8
)) {
224 *(vec8
*)(d
+ i
) = *(vec8
*)(a
+ i
) - vecb
;
226 clear_high(d
, oprsz
, desc
);
229 void HELPER(gvec_subs16
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
231 intptr_t oprsz
= simd_oprsz(desc
);
232 vec16 vecb
= (vec16
)DUP8(b
);
235 for (i
= 0; i
< oprsz
; i
+= sizeof(vec16
)) {
236 *(vec16
*)(d
+ i
) = *(vec16
*)(a
+ i
) - vecb
;
238 clear_high(d
, oprsz
, desc
);
241 void HELPER(gvec_subs32
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
243 intptr_t oprsz
= simd_oprsz(desc
);
244 vec32 vecb
= (vec32
)DUP4(b
);
247 for (i
= 0; i
< oprsz
; i
+= sizeof(vec32
)) {
248 *(vec32
*)(d
+ i
) = *(vec32
*)(a
+ i
) - vecb
;
250 clear_high(d
, oprsz
, desc
);
253 void HELPER(gvec_subs64
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
255 intptr_t oprsz
= simd_oprsz(desc
);
256 vec64 vecb
= (vec64
)DUP2(b
);
259 for (i
= 0; i
< oprsz
; i
+= sizeof(vec64
)) {
260 *(vec64
*)(d
+ i
) = *(vec64
*)(a
+ i
) - vecb
;
262 clear_high(d
, oprsz
, desc
);
265 void HELPER(gvec_mul8
)(void *d
, void *a
, void *b
, uint32_t desc
)
267 intptr_t oprsz
= simd_oprsz(desc
);
270 for (i
= 0; i
< oprsz
; i
+= sizeof(vec8
)) {
271 *(vec8
*)(d
+ i
) = *(vec8
*)(a
+ i
) * *(vec8
*)(b
+ i
);
273 clear_high(d
, oprsz
, desc
);
276 void HELPER(gvec_mul16
)(void *d
, void *a
, void *b
, uint32_t desc
)
278 intptr_t oprsz
= simd_oprsz(desc
);
281 for (i
= 0; i
< oprsz
; i
+= sizeof(vec16
)) {
282 *(vec16
*)(d
+ i
) = *(vec16
*)(a
+ i
) * *(vec16
*)(b
+ i
);
284 clear_high(d
, oprsz
, desc
);
287 void HELPER(gvec_mul32
)(void *d
, void *a
, void *b
, uint32_t desc
)
289 intptr_t oprsz
= simd_oprsz(desc
);
292 for (i
= 0; i
< oprsz
; i
+= sizeof(vec32
)) {
293 *(vec32
*)(d
+ i
) = *(vec32
*)(a
+ i
) * *(vec32
*)(b
+ i
);
295 clear_high(d
, oprsz
, desc
);
298 void HELPER(gvec_mul64
)(void *d
, void *a
, void *b
, uint32_t desc
)
300 intptr_t oprsz
= simd_oprsz(desc
);
303 for (i
= 0; i
< oprsz
; i
+= sizeof(vec64
)) {
304 *(vec64
*)(d
+ i
) = *(vec64
*)(a
+ i
) * *(vec64
*)(b
+ i
);
306 clear_high(d
, oprsz
, desc
);
309 void HELPER(gvec_muls8
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
311 intptr_t oprsz
= simd_oprsz(desc
);
312 vec8 vecb
= (vec8
)DUP16(b
);
315 for (i
= 0; i
< oprsz
; i
+= sizeof(vec8
)) {
316 *(vec8
*)(d
+ i
) = *(vec8
*)(a
+ i
) * vecb
;
318 clear_high(d
, oprsz
, desc
);
321 void HELPER(gvec_muls16
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
323 intptr_t oprsz
= simd_oprsz(desc
);
324 vec16 vecb
= (vec16
)DUP8(b
);
327 for (i
= 0; i
< oprsz
; i
+= sizeof(vec16
)) {
328 *(vec16
*)(d
+ i
) = *(vec16
*)(a
+ i
) * vecb
;
330 clear_high(d
, oprsz
, desc
);
333 void HELPER(gvec_muls32
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
335 intptr_t oprsz
= simd_oprsz(desc
);
336 vec32 vecb
= (vec32
)DUP4(b
);
339 for (i
= 0; i
< oprsz
; i
+= sizeof(vec32
)) {
340 *(vec32
*)(d
+ i
) = *(vec32
*)(a
+ i
) * vecb
;
342 clear_high(d
, oprsz
, desc
);
345 void HELPER(gvec_muls64
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
347 intptr_t oprsz
= simd_oprsz(desc
);
348 vec64 vecb
= (vec64
)DUP2(b
);
351 for (i
= 0; i
< oprsz
; i
+= sizeof(vec64
)) {
352 *(vec64
*)(d
+ i
) = *(vec64
*)(a
+ i
) * vecb
;
354 clear_high(d
, oprsz
, desc
);
357 void HELPER(gvec_neg8
)(void *d
, void *a
, uint32_t desc
)
359 intptr_t oprsz
= simd_oprsz(desc
);
362 for (i
= 0; i
< oprsz
; i
+= sizeof(vec8
)) {
363 *(vec8
*)(d
+ i
) = -*(vec8
*)(a
+ i
);
365 clear_high(d
, oprsz
, desc
);
368 void HELPER(gvec_neg16
)(void *d
, void *a
, uint32_t desc
)
370 intptr_t oprsz
= simd_oprsz(desc
);
373 for (i
= 0; i
< oprsz
; i
+= sizeof(vec16
)) {
374 *(vec16
*)(d
+ i
) = -*(vec16
*)(a
+ i
);
376 clear_high(d
, oprsz
, desc
);
379 void HELPER(gvec_neg32
)(void *d
, void *a
, uint32_t desc
)
381 intptr_t oprsz
= simd_oprsz(desc
);
384 for (i
= 0; i
< oprsz
; i
+= sizeof(vec32
)) {
385 *(vec32
*)(d
+ i
) = -*(vec32
*)(a
+ i
);
387 clear_high(d
, oprsz
, desc
);
390 void HELPER(gvec_neg64
)(void *d
, void *a
, uint32_t desc
)
392 intptr_t oprsz
= simd_oprsz(desc
);
395 for (i
= 0; i
< oprsz
; i
+= sizeof(vec64
)) {
396 *(vec64
*)(d
+ i
) = -*(vec64
*)(a
+ i
);
398 clear_high(d
, oprsz
, desc
);
401 void HELPER(gvec_mov
)(void *d
, void *a
, uint32_t desc
)
403 intptr_t oprsz
= simd_oprsz(desc
);
406 clear_high(d
, oprsz
, desc
);
409 void HELPER(gvec_dup64
)(void *d
, uint32_t desc
, uint64_t c
)
411 intptr_t oprsz
= simd_oprsz(desc
);
417 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
418 *(uint64_t *)(d
+ i
) = c
;
421 clear_high(d
, oprsz
, desc
);
424 void HELPER(gvec_dup32
)(void *d
, uint32_t desc
, uint32_t c
)
426 intptr_t oprsz
= simd_oprsz(desc
);
432 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
433 *(uint32_t *)(d
+ i
) = c
;
436 clear_high(d
, oprsz
, desc
);
439 void HELPER(gvec_dup16
)(void *d
, uint32_t desc
, uint32_t c
)
441 HELPER(gvec_dup32
)(d
, desc
, 0x00010001 * (c
& 0xffff));
444 void HELPER(gvec_dup8
)(void *d
, uint32_t desc
, uint32_t c
)
446 HELPER(gvec_dup32
)(d
, desc
, 0x01010101 * (c
& 0xff));
449 void HELPER(gvec_not
)(void *d
, void *a
, uint32_t desc
)
451 intptr_t oprsz
= simd_oprsz(desc
);
454 for (i
= 0; i
< oprsz
; i
+= sizeof(vec64
)) {
455 *(vec64
*)(d
+ i
) = ~*(vec64
*)(a
+ i
);
457 clear_high(d
, oprsz
, desc
);
460 void HELPER(gvec_and
)(void *d
, void *a
, void *b
, uint32_t desc
)
462 intptr_t oprsz
= simd_oprsz(desc
);
465 for (i
= 0; i
< oprsz
; i
+= sizeof(vec64
)) {
466 *(vec64
*)(d
+ i
) = *(vec64
*)(a
+ i
) & *(vec64
*)(b
+ i
);
468 clear_high(d
, oprsz
, desc
);
471 void HELPER(gvec_or
)(void *d
, void *a
, void *b
, uint32_t desc
)
473 intptr_t oprsz
= simd_oprsz(desc
);
476 for (i
= 0; i
< oprsz
; i
+= sizeof(vec64
)) {
477 *(vec64
*)(d
+ i
) = *(vec64
*)(a
+ i
) | *(vec64
*)(b
+ i
);
479 clear_high(d
, oprsz
, desc
);
482 void HELPER(gvec_xor
)(void *d
, void *a
, void *b
, uint32_t desc
)
484 intptr_t oprsz
= simd_oprsz(desc
);
487 for (i
= 0; i
< oprsz
; i
+= sizeof(vec64
)) {
488 *(vec64
*)(d
+ i
) = *(vec64
*)(a
+ i
) ^ *(vec64
*)(b
+ i
);
490 clear_high(d
, oprsz
, desc
);
493 void HELPER(gvec_andc
)(void *d
, void *a
, void *b
, uint32_t desc
)
495 intptr_t oprsz
= simd_oprsz(desc
);
498 for (i
= 0; i
< oprsz
; i
+= sizeof(vec64
)) {
499 *(vec64
*)(d
+ i
) = *(vec64
*)(a
+ i
) &~ *(vec64
*)(b
+ i
);
501 clear_high(d
, oprsz
, desc
);
504 void HELPER(gvec_orc
)(void *d
, void *a
, void *b
, uint32_t desc
)
506 intptr_t oprsz
= simd_oprsz(desc
);
509 for (i
= 0; i
< oprsz
; i
+= sizeof(vec64
)) {
510 *(vec64
*)(d
+ i
) = *(vec64
*)(a
+ i
) |~ *(vec64
*)(b
+ i
);
512 clear_high(d
, oprsz
, desc
);
515 void HELPER(gvec_ands
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
517 intptr_t oprsz
= simd_oprsz(desc
);
518 vec64 vecb
= (vec64
)DUP2(b
);
521 for (i
= 0; i
< oprsz
; i
+= sizeof(vec64
)) {
522 *(vec64
*)(d
+ i
) = *(vec64
*)(a
+ i
) & vecb
;
524 clear_high(d
, oprsz
, desc
);
527 void HELPER(gvec_xors
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
529 intptr_t oprsz
= simd_oprsz(desc
);
530 vec64 vecb
= (vec64
)DUP2(b
);
533 for (i
= 0; i
< oprsz
; i
+= sizeof(vec64
)) {
534 *(vec64
*)(d
+ i
) = *(vec64
*)(a
+ i
) ^ vecb
;
536 clear_high(d
, oprsz
, desc
);
539 void HELPER(gvec_ors
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
541 intptr_t oprsz
= simd_oprsz(desc
);
542 vec64 vecb
= (vec64
)DUP2(b
);
545 for (i
= 0; i
< oprsz
; i
+= sizeof(vec64
)) {
546 *(vec64
*)(d
+ i
) = *(vec64
*)(a
+ i
) | vecb
;
548 clear_high(d
, oprsz
, desc
);
551 void HELPER(gvec_shl8i
)(void *d
, void *a
, uint32_t desc
)
553 intptr_t oprsz
= simd_oprsz(desc
);
554 int shift
= simd_data(desc
);
557 for (i
= 0; i
< oprsz
; i
+= sizeof(vec8
)) {
558 *(vec8
*)(d
+ i
) = *(vec8
*)(a
+ i
) << shift
;
560 clear_high(d
, oprsz
, desc
);
563 void HELPER(gvec_shl16i
)(void *d
, void *a
, uint32_t desc
)
565 intptr_t oprsz
= simd_oprsz(desc
);
566 int shift
= simd_data(desc
);
569 for (i
= 0; i
< oprsz
; i
+= sizeof(vec16
)) {
570 *(vec16
*)(d
+ i
) = *(vec16
*)(a
+ i
) << shift
;
572 clear_high(d
, oprsz
, desc
);
575 void HELPER(gvec_shl32i
)(void *d
, void *a
, uint32_t desc
)
577 intptr_t oprsz
= simd_oprsz(desc
);
578 int shift
= simd_data(desc
);
581 for (i
= 0; i
< oprsz
; i
+= sizeof(vec32
)) {
582 *(vec32
*)(d
+ i
) = *(vec32
*)(a
+ i
) << shift
;
584 clear_high(d
, oprsz
, desc
);
587 void HELPER(gvec_shl64i
)(void *d
, void *a
, uint32_t desc
)
589 intptr_t oprsz
= simd_oprsz(desc
);
590 int shift
= simd_data(desc
);
593 for (i
= 0; i
< oprsz
; i
+= sizeof(vec64
)) {
594 *(vec64
*)(d
+ i
) = *(vec64
*)(a
+ i
) << shift
;
596 clear_high(d
, oprsz
, desc
);
599 void HELPER(gvec_shr8i
)(void *d
, void *a
, uint32_t desc
)
601 intptr_t oprsz
= simd_oprsz(desc
);
602 int shift
= simd_data(desc
);
605 for (i
= 0; i
< oprsz
; i
+= sizeof(vec8
)) {
606 *(vec8
*)(d
+ i
) = *(vec8
*)(a
+ i
) >> shift
;
608 clear_high(d
, oprsz
, desc
);
611 void HELPER(gvec_shr16i
)(void *d
, void *a
, uint32_t desc
)
613 intptr_t oprsz
= simd_oprsz(desc
);
614 int shift
= simd_data(desc
);
617 for (i
= 0; i
< oprsz
; i
+= sizeof(vec16
)) {
618 *(vec16
*)(d
+ i
) = *(vec16
*)(a
+ i
) >> shift
;
620 clear_high(d
, oprsz
, desc
);
623 void HELPER(gvec_shr32i
)(void *d
, void *a
, uint32_t desc
)
625 intptr_t oprsz
= simd_oprsz(desc
);
626 int shift
= simd_data(desc
);
629 for (i
= 0; i
< oprsz
; i
+= sizeof(vec32
)) {
630 *(vec32
*)(d
+ i
) = *(vec32
*)(a
+ i
) >> shift
;
632 clear_high(d
, oprsz
, desc
);
635 void HELPER(gvec_shr64i
)(void *d
, void *a
, uint32_t desc
)
637 intptr_t oprsz
= simd_oprsz(desc
);
638 int shift
= simd_data(desc
);
641 for (i
= 0; i
< oprsz
; i
+= sizeof(vec64
)) {
642 *(vec64
*)(d
+ i
) = *(vec64
*)(a
+ i
) >> shift
;
644 clear_high(d
, oprsz
, desc
);
647 void HELPER(gvec_sar8i
)(void *d
, void *a
, uint32_t desc
)
649 intptr_t oprsz
= simd_oprsz(desc
);
650 int shift
= simd_data(desc
);
653 for (i
= 0; i
< oprsz
; i
+= sizeof(vec8
)) {
654 *(svec8
*)(d
+ i
) = *(svec8
*)(a
+ i
) >> shift
;
656 clear_high(d
, oprsz
, desc
);
659 void HELPER(gvec_sar16i
)(void *d
, void *a
, uint32_t desc
)
661 intptr_t oprsz
= simd_oprsz(desc
);
662 int shift
= simd_data(desc
);
665 for (i
= 0; i
< oprsz
; i
+= sizeof(vec16
)) {
666 *(svec16
*)(d
+ i
) = *(svec16
*)(a
+ i
) >> shift
;
668 clear_high(d
, oprsz
, desc
);
671 void HELPER(gvec_sar32i
)(void *d
, void *a
, uint32_t desc
)
673 intptr_t oprsz
= simd_oprsz(desc
);
674 int shift
= simd_data(desc
);
677 for (i
= 0; i
< oprsz
; i
+= sizeof(vec32
)) {
678 *(svec32
*)(d
+ i
) = *(svec32
*)(a
+ i
) >> shift
;
680 clear_high(d
, oprsz
, desc
);
683 void HELPER(gvec_sar64i
)(void *d
, void *a
, uint32_t desc
)
685 intptr_t oprsz
= simd_oprsz(desc
);
686 int shift
= simd_data(desc
);
689 for (i
= 0; i
< oprsz
; i
+= sizeof(vec64
)) {
690 *(svec64
*)(d
+ i
) = *(svec64
*)(a
+ i
) >> shift
;
692 clear_high(d
, oprsz
, desc
);
695 /* If vectors are enabled, the compiler fills in -1 for true.
696 Otherwise, we must take care of this by hand. */
697 #ifdef CONFIG_VECTOR16
698 # define DO_CMP0(X) X
700 # define DO_CMP0(X) -(X)
703 #define DO_CMP1(NAME, TYPE, OP) \
704 void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \
706 intptr_t oprsz = simd_oprsz(desc); \
708 for (i = 0; i < oprsz; i += sizeof(TYPE)) { \
709 *(TYPE *)(d + i) = DO_CMP0(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \
711 clear_high(d, oprsz, desc); \
714 #define DO_CMP2(SZ) \
715 DO_CMP1(gvec_eq##SZ, vec##SZ, ==) \
716 DO_CMP1(gvec_ne##SZ, vec##SZ, !=) \
717 DO_CMP1(gvec_lt##SZ, svec##SZ, <) \
718 DO_CMP1(gvec_le##SZ, svec##SZ, <=) \
719 DO_CMP1(gvec_ltu##SZ, vec##SZ, <) \
720 DO_CMP1(gvec_leu##SZ, vec##SZ, <=)
731 void HELPER(gvec_ssadd8
)(void *d
, void *a
, void *b
, uint32_t desc
)
733 intptr_t oprsz
= simd_oprsz(desc
);
736 for (i
= 0; i
< oprsz
; i
+= sizeof(int8_t)) {
737 int r
= *(int8_t *)(a
+ i
) + *(int8_t *)(b
+ i
);
740 } else if (r
< INT8_MIN
) {
743 *(int8_t *)(d
+ i
) = r
;
745 clear_high(d
, oprsz
, desc
);
748 void HELPER(gvec_ssadd16
)(void *d
, void *a
, void *b
, uint32_t desc
)
750 intptr_t oprsz
= simd_oprsz(desc
);
753 for (i
= 0; i
< oprsz
; i
+= sizeof(int16_t)) {
754 int r
= *(int16_t *)(a
+ i
) + *(int16_t *)(b
+ i
);
757 } else if (r
< INT16_MIN
) {
760 *(int16_t *)(d
+ i
) = r
;
762 clear_high(d
, oprsz
, desc
);
765 void HELPER(gvec_ssadd32
)(void *d
, void *a
, void *b
, uint32_t desc
)
767 intptr_t oprsz
= simd_oprsz(desc
);
770 for (i
= 0; i
< oprsz
; i
+= sizeof(int32_t)) {
771 int32_t ai
= *(int32_t *)(a
+ i
);
772 int32_t bi
= *(int32_t *)(b
+ i
);
773 int32_t di
= ai
+ bi
;
774 if (((di
^ ai
) &~ (ai
^ bi
)) < 0) {
775 /* Signed overflow. */
776 di
= (di
< 0 ? INT32_MAX
: INT32_MIN
);
778 *(int32_t *)(d
+ i
) = di
;
780 clear_high(d
, oprsz
, desc
);
783 void HELPER(gvec_ssadd64
)(void *d
, void *a
, void *b
, uint32_t desc
)
785 intptr_t oprsz
= simd_oprsz(desc
);
788 for (i
= 0; i
< oprsz
; i
+= sizeof(int64_t)) {
789 int64_t ai
= *(int64_t *)(a
+ i
);
790 int64_t bi
= *(int64_t *)(b
+ i
);
791 int64_t di
= ai
+ bi
;
792 if (((di
^ ai
) &~ (ai
^ bi
)) < 0) {
793 /* Signed overflow. */
794 di
= (di
< 0 ? INT64_MAX
: INT64_MIN
);
796 *(int64_t *)(d
+ i
) = di
;
798 clear_high(d
, oprsz
, desc
);
801 void HELPER(gvec_sssub8
)(void *d
, void *a
, void *b
, uint32_t desc
)
803 intptr_t oprsz
= simd_oprsz(desc
);
806 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
807 int r
= *(int8_t *)(a
+ i
) - *(int8_t *)(b
+ i
);
810 } else if (r
< INT8_MIN
) {
813 *(uint8_t *)(d
+ i
) = r
;
815 clear_high(d
, oprsz
, desc
);
818 void HELPER(gvec_sssub16
)(void *d
, void *a
, void *b
, uint32_t desc
)
820 intptr_t oprsz
= simd_oprsz(desc
);
823 for (i
= 0; i
< oprsz
; i
+= sizeof(int16_t)) {
824 int r
= *(int16_t *)(a
+ i
) - *(int16_t *)(b
+ i
);
827 } else if (r
< INT16_MIN
) {
830 *(int16_t *)(d
+ i
) = r
;
832 clear_high(d
, oprsz
, desc
);
835 void HELPER(gvec_sssub32
)(void *d
, void *a
, void *b
, uint32_t desc
)
837 intptr_t oprsz
= simd_oprsz(desc
);
840 for (i
= 0; i
< oprsz
; i
+= sizeof(int32_t)) {
841 int32_t ai
= *(int32_t *)(a
+ i
);
842 int32_t bi
= *(int32_t *)(b
+ i
);
843 int32_t di
= ai
- bi
;
844 if (((di
^ ai
) & (ai
^ bi
)) < 0) {
845 /* Signed overflow. */
846 di
= (di
< 0 ? INT32_MAX
: INT32_MIN
);
848 *(int32_t *)(d
+ i
) = di
;
850 clear_high(d
, oprsz
, desc
);
853 void HELPER(gvec_sssub64
)(void *d
, void *a
, void *b
, uint32_t desc
)
855 intptr_t oprsz
= simd_oprsz(desc
);
858 for (i
= 0; i
< oprsz
; i
+= sizeof(int64_t)) {
859 int64_t ai
= *(int64_t *)(a
+ i
);
860 int64_t bi
= *(int64_t *)(b
+ i
);
861 int64_t di
= ai
- bi
;
862 if (((di
^ ai
) & (ai
^ bi
)) < 0) {
863 /* Signed overflow. */
864 di
= (di
< 0 ? INT64_MAX
: INT64_MIN
);
866 *(int64_t *)(d
+ i
) = di
;
868 clear_high(d
, oprsz
, desc
);
871 void HELPER(gvec_usadd8
)(void *d
, void *a
, void *b
, uint32_t desc
)
873 intptr_t oprsz
= simd_oprsz(desc
);
876 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
877 unsigned r
= *(uint8_t *)(a
+ i
) + *(uint8_t *)(b
+ i
);
881 *(uint8_t *)(d
+ i
) = r
;
883 clear_high(d
, oprsz
, desc
);
886 void HELPER(gvec_usadd16
)(void *d
, void *a
, void *b
, uint32_t desc
)
888 intptr_t oprsz
= simd_oprsz(desc
);
891 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
892 unsigned r
= *(uint16_t *)(a
+ i
) + *(uint16_t *)(b
+ i
);
893 if (r
> UINT16_MAX
) {
896 *(uint16_t *)(d
+ i
) = r
;
898 clear_high(d
, oprsz
, desc
);
901 void HELPER(gvec_usadd32
)(void *d
, void *a
, void *b
, uint32_t desc
)
903 intptr_t oprsz
= simd_oprsz(desc
);
906 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
907 uint32_t ai
= *(uint32_t *)(a
+ i
);
908 uint32_t bi
= *(uint32_t *)(b
+ i
);
909 uint32_t di
= ai
+ bi
;
913 *(uint32_t *)(d
+ i
) = di
;
915 clear_high(d
, oprsz
, desc
);
918 void HELPER(gvec_usadd64
)(void *d
, void *a
, void *b
, uint32_t desc
)
920 intptr_t oprsz
= simd_oprsz(desc
);
923 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
924 uint64_t ai
= *(uint64_t *)(a
+ i
);
925 uint64_t bi
= *(uint64_t *)(b
+ i
);
926 uint64_t di
= ai
+ bi
;
930 *(uint64_t *)(d
+ i
) = di
;
932 clear_high(d
, oprsz
, desc
);
935 void HELPER(gvec_ussub8
)(void *d
, void *a
, void *b
, uint32_t desc
)
937 intptr_t oprsz
= simd_oprsz(desc
);
940 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
941 int r
= *(uint8_t *)(a
+ i
) - *(uint8_t *)(b
+ i
);
945 *(uint8_t *)(d
+ i
) = r
;
947 clear_high(d
, oprsz
, desc
);
950 void HELPER(gvec_ussub16
)(void *d
, void *a
, void *b
, uint32_t desc
)
952 intptr_t oprsz
= simd_oprsz(desc
);
955 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
956 int r
= *(uint16_t *)(a
+ i
) - *(uint16_t *)(b
+ i
);
960 *(uint16_t *)(d
+ i
) = r
;
962 clear_high(d
, oprsz
, desc
);
965 void HELPER(gvec_ussub32
)(void *d
, void *a
, void *b
, uint32_t desc
)
967 intptr_t oprsz
= simd_oprsz(desc
);
970 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
971 uint32_t ai
= *(uint32_t *)(a
+ i
);
972 uint32_t bi
= *(uint32_t *)(b
+ i
);
973 uint32_t di
= ai
- bi
;
977 *(uint32_t *)(d
+ i
) = di
;
979 clear_high(d
, oprsz
, desc
);
982 void HELPER(gvec_ussub64
)(void *d
, void *a
, void *b
, uint32_t desc
)
984 intptr_t oprsz
= simd_oprsz(desc
);
987 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
988 uint64_t ai
= *(uint64_t *)(a
+ i
);
989 uint64_t bi
= *(uint64_t *)(b
+ i
);
990 uint64_t di
= ai
- bi
;
994 *(uint64_t *)(d
+ i
) = di
;
996 clear_high(d
, oprsz
, desc
);