2 * Generic vectorized operation runtime
4 * Copyright (c) 2018 Linaro
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "qemu/host-utils.h"
23 #include "exec/helper-proto.h"
24 #include "tcg/tcg-gvec-desc.h"
27 static inline void clear_high(void *d
, intptr_t oprsz
, uint32_t desc
)
29 intptr_t maxsz
= simd_maxsz(desc
);
32 if (unlikely(maxsz
> oprsz
)) {
33 for (i
= oprsz
; i
< maxsz
; i
+= sizeof(uint64_t)) {
34 *(uint64_t *)(d
+ i
) = 0;
39 void HELPER(gvec_add8
)(void *d
, void *a
, void *b
, uint32_t desc
)
41 intptr_t oprsz
= simd_oprsz(desc
);
44 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
45 *(uint8_t *)(d
+ i
) = *(uint8_t *)(a
+ i
) + *(uint8_t *)(b
+ i
);
47 clear_high(d
, oprsz
, desc
);
50 void HELPER(gvec_add16
)(void *d
, void *a
, void *b
, uint32_t desc
)
52 intptr_t oprsz
= simd_oprsz(desc
);
55 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
56 *(uint16_t *)(d
+ i
) = *(uint16_t *)(a
+ i
) + *(uint16_t *)(b
+ i
);
58 clear_high(d
, oprsz
, desc
);
61 void HELPER(gvec_add32
)(void *d
, void *a
, void *b
, uint32_t desc
)
63 intptr_t oprsz
= simd_oprsz(desc
);
66 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
67 *(uint32_t *)(d
+ i
) = *(uint32_t *)(a
+ i
) + *(uint32_t *)(b
+ i
);
69 clear_high(d
, oprsz
, desc
);
72 void HELPER(gvec_add64
)(void *d
, void *a
, void *b
, uint32_t desc
)
74 intptr_t oprsz
= simd_oprsz(desc
);
77 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
78 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) + *(uint64_t *)(b
+ i
);
80 clear_high(d
, oprsz
, desc
);
83 void HELPER(gvec_adds8
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
85 intptr_t oprsz
= simd_oprsz(desc
);
88 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
89 *(uint8_t *)(d
+ i
) = *(uint8_t *)(a
+ i
) + (uint8_t)b
;
91 clear_high(d
, oprsz
, desc
);
94 void HELPER(gvec_adds16
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
96 intptr_t oprsz
= simd_oprsz(desc
);
99 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
100 *(uint16_t *)(d
+ i
) = *(uint16_t *)(a
+ i
) + (uint16_t)b
;
102 clear_high(d
, oprsz
, desc
);
105 void HELPER(gvec_adds32
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
107 intptr_t oprsz
= simd_oprsz(desc
);
110 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
111 *(uint32_t *)(d
+ i
) = *(uint32_t *)(a
+ i
) + (uint32_t)b
;
113 clear_high(d
, oprsz
, desc
);
116 void HELPER(gvec_adds64
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
118 intptr_t oprsz
= simd_oprsz(desc
);
121 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
122 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) + b
;
124 clear_high(d
, oprsz
, desc
);
127 void HELPER(gvec_sub8
)(void *d
, void *a
, void *b
, uint32_t desc
)
129 intptr_t oprsz
= simd_oprsz(desc
);
132 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
133 *(uint8_t *)(d
+ i
) = *(uint8_t *)(a
+ i
) - *(uint8_t *)(b
+ i
);
135 clear_high(d
, oprsz
, desc
);
138 void HELPER(gvec_sub16
)(void *d
, void *a
, void *b
, uint32_t desc
)
140 intptr_t oprsz
= simd_oprsz(desc
);
143 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
144 *(uint16_t *)(d
+ i
) = *(uint16_t *)(a
+ i
) - *(uint16_t *)(b
+ i
);
146 clear_high(d
, oprsz
, desc
);
149 void HELPER(gvec_sub32
)(void *d
, void *a
, void *b
, uint32_t desc
)
151 intptr_t oprsz
= simd_oprsz(desc
);
154 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
155 *(uint32_t *)(d
+ i
) = *(uint32_t *)(a
+ i
) - *(uint32_t *)(b
+ i
);
157 clear_high(d
, oprsz
, desc
);
160 void HELPER(gvec_sub64
)(void *d
, void *a
, void *b
, uint32_t desc
)
162 intptr_t oprsz
= simd_oprsz(desc
);
165 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
166 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) - *(uint64_t *)(b
+ i
);
168 clear_high(d
, oprsz
, desc
);
171 void HELPER(gvec_subs8
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
173 intptr_t oprsz
= simd_oprsz(desc
);
176 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
177 *(uint8_t *)(d
+ i
) = *(uint8_t *)(a
+ i
) - (uint8_t)b
;
179 clear_high(d
, oprsz
, desc
);
182 void HELPER(gvec_subs16
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
184 intptr_t oprsz
= simd_oprsz(desc
);
187 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
188 *(uint16_t *)(d
+ i
) = *(uint16_t *)(a
+ i
) - (uint16_t)b
;
190 clear_high(d
, oprsz
, desc
);
193 void HELPER(gvec_subs32
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
195 intptr_t oprsz
= simd_oprsz(desc
);
198 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
199 *(uint32_t *)(d
+ i
) = *(uint32_t *)(a
+ i
) - (uint32_t)b
;
201 clear_high(d
, oprsz
, desc
);
204 void HELPER(gvec_subs64
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
206 intptr_t oprsz
= simd_oprsz(desc
);
209 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
210 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) - b
;
212 clear_high(d
, oprsz
, desc
);
215 void HELPER(gvec_mul8
)(void *d
, void *a
, void *b
, uint32_t desc
)
217 intptr_t oprsz
= simd_oprsz(desc
);
220 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
221 *(uint8_t *)(d
+ i
) = *(uint8_t *)(a
+ i
) * *(uint8_t *)(b
+ i
);
223 clear_high(d
, oprsz
, desc
);
226 void HELPER(gvec_mul16
)(void *d
, void *a
, void *b
, uint32_t desc
)
228 intptr_t oprsz
= simd_oprsz(desc
);
231 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
232 *(uint16_t *)(d
+ i
) = *(uint16_t *)(a
+ i
) * *(uint16_t *)(b
+ i
);
234 clear_high(d
, oprsz
, desc
);
237 void HELPER(gvec_mul32
)(void *d
, void *a
, void *b
, uint32_t desc
)
239 intptr_t oprsz
= simd_oprsz(desc
);
242 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
243 *(uint32_t *)(d
+ i
) = *(uint32_t *)(a
+ i
) * *(uint32_t *)(b
+ i
);
245 clear_high(d
, oprsz
, desc
);
248 void HELPER(gvec_mul64
)(void *d
, void *a
, void *b
, uint32_t desc
)
250 intptr_t oprsz
= simd_oprsz(desc
);
253 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
254 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) * *(uint64_t *)(b
+ i
);
256 clear_high(d
, oprsz
, desc
);
259 void HELPER(gvec_muls8
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
261 intptr_t oprsz
= simd_oprsz(desc
);
264 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
265 *(uint8_t *)(d
+ i
) = *(uint8_t *)(a
+ i
) * (uint8_t)b
;
267 clear_high(d
, oprsz
, desc
);
270 void HELPER(gvec_muls16
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
272 intptr_t oprsz
= simd_oprsz(desc
);
275 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
276 *(uint16_t *)(d
+ i
) = *(uint16_t *)(a
+ i
) * (uint16_t)b
;
278 clear_high(d
, oprsz
, desc
);
281 void HELPER(gvec_muls32
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
283 intptr_t oprsz
= simd_oprsz(desc
);
286 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
287 *(uint32_t *)(d
+ i
) = *(uint32_t *)(a
+ i
) * (uint32_t)b
;
289 clear_high(d
, oprsz
, desc
);
292 void HELPER(gvec_muls64
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
294 intptr_t oprsz
= simd_oprsz(desc
);
297 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
298 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) * b
;
300 clear_high(d
, oprsz
, desc
);
303 void HELPER(gvec_neg8
)(void *d
, void *a
, uint32_t desc
)
305 intptr_t oprsz
= simd_oprsz(desc
);
308 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
309 *(uint8_t *)(d
+ i
) = -*(uint8_t *)(a
+ i
);
311 clear_high(d
, oprsz
, desc
);
314 void HELPER(gvec_neg16
)(void *d
, void *a
, uint32_t desc
)
316 intptr_t oprsz
= simd_oprsz(desc
);
319 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
320 *(uint16_t *)(d
+ i
) = -*(uint16_t *)(a
+ i
);
322 clear_high(d
, oprsz
, desc
);
325 void HELPER(gvec_neg32
)(void *d
, void *a
, uint32_t desc
)
327 intptr_t oprsz
= simd_oprsz(desc
);
330 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
331 *(uint32_t *)(d
+ i
) = -*(uint32_t *)(a
+ i
);
333 clear_high(d
, oprsz
, desc
);
336 void HELPER(gvec_neg64
)(void *d
, void *a
, uint32_t desc
)
338 intptr_t oprsz
= simd_oprsz(desc
);
341 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
342 *(uint64_t *)(d
+ i
) = -*(uint64_t *)(a
+ i
);
344 clear_high(d
, oprsz
, desc
);
347 void HELPER(gvec_abs8
)(void *d
, void *a
, uint32_t desc
)
349 intptr_t oprsz
= simd_oprsz(desc
);
352 for (i
= 0; i
< oprsz
; i
+= sizeof(int8_t)) {
353 int8_t aa
= *(int8_t *)(a
+ i
);
354 *(int8_t *)(d
+ i
) = aa
< 0 ? -aa
: aa
;
356 clear_high(d
, oprsz
, desc
);
359 void HELPER(gvec_abs16
)(void *d
, void *a
, uint32_t desc
)
361 intptr_t oprsz
= simd_oprsz(desc
);
364 for (i
= 0; i
< oprsz
; i
+= sizeof(int16_t)) {
365 int16_t aa
= *(int16_t *)(a
+ i
);
366 *(int16_t *)(d
+ i
) = aa
< 0 ? -aa
: aa
;
368 clear_high(d
, oprsz
, desc
);
371 void HELPER(gvec_abs32
)(void *d
, void *a
, uint32_t desc
)
373 intptr_t oprsz
= simd_oprsz(desc
);
376 for (i
= 0; i
< oprsz
; i
+= sizeof(int32_t)) {
377 int32_t aa
= *(int32_t *)(a
+ i
);
378 *(int32_t *)(d
+ i
) = aa
< 0 ? -aa
: aa
;
380 clear_high(d
, oprsz
, desc
);
383 void HELPER(gvec_abs64
)(void *d
, void *a
, uint32_t desc
)
385 intptr_t oprsz
= simd_oprsz(desc
);
388 for (i
= 0; i
< oprsz
; i
+= sizeof(int64_t)) {
389 int64_t aa
= *(int64_t *)(a
+ i
);
390 *(int64_t *)(d
+ i
) = aa
< 0 ? -aa
: aa
;
392 clear_high(d
, oprsz
, desc
);
395 void HELPER(gvec_mov
)(void *d
, void *a
, uint32_t desc
)
397 intptr_t oprsz
= simd_oprsz(desc
);
400 clear_high(d
, oprsz
, desc
);
403 void HELPER(gvec_dup64
)(void *d
, uint32_t desc
, uint64_t c
)
405 intptr_t oprsz
= simd_oprsz(desc
);
411 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
412 *(uint64_t *)(d
+ i
) = c
;
415 clear_high(d
, oprsz
, desc
);
418 void HELPER(gvec_dup32
)(void *d
, uint32_t desc
, uint32_t c
)
420 intptr_t oprsz
= simd_oprsz(desc
);
426 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
427 *(uint32_t *)(d
+ i
) = c
;
430 clear_high(d
, oprsz
, desc
);
433 void HELPER(gvec_dup16
)(void *d
, uint32_t desc
, uint32_t c
)
435 HELPER(gvec_dup32
)(d
, desc
, 0x00010001 * (c
& 0xffff));
438 void HELPER(gvec_dup8
)(void *d
, uint32_t desc
, uint32_t c
)
440 HELPER(gvec_dup32
)(d
, desc
, 0x01010101 * (c
& 0xff));
443 void HELPER(gvec_not
)(void *d
, void *a
, uint32_t desc
)
445 intptr_t oprsz
= simd_oprsz(desc
);
448 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
449 *(uint64_t *)(d
+ i
) = ~*(uint64_t *)(a
+ i
);
451 clear_high(d
, oprsz
, desc
);
454 void HELPER(gvec_and
)(void *d
, void *a
, void *b
, uint32_t desc
)
456 intptr_t oprsz
= simd_oprsz(desc
);
459 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
460 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) & *(uint64_t *)(b
+ i
);
462 clear_high(d
, oprsz
, desc
);
465 void HELPER(gvec_or
)(void *d
, void *a
, void *b
, uint32_t desc
)
467 intptr_t oprsz
= simd_oprsz(desc
);
470 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
471 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) | *(uint64_t *)(b
+ i
);
473 clear_high(d
, oprsz
, desc
);
476 void HELPER(gvec_xor
)(void *d
, void *a
, void *b
, uint32_t desc
)
478 intptr_t oprsz
= simd_oprsz(desc
);
481 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
482 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) ^ *(uint64_t *)(b
+ i
);
484 clear_high(d
, oprsz
, desc
);
487 void HELPER(gvec_andc
)(void *d
, void *a
, void *b
, uint32_t desc
)
489 intptr_t oprsz
= simd_oprsz(desc
);
492 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
493 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) &~ *(uint64_t *)(b
+ i
);
495 clear_high(d
, oprsz
, desc
);
498 void HELPER(gvec_orc
)(void *d
, void *a
, void *b
, uint32_t desc
)
500 intptr_t oprsz
= simd_oprsz(desc
);
503 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
504 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) |~ *(uint64_t *)(b
+ i
);
506 clear_high(d
, oprsz
, desc
);
509 void HELPER(gvec_nand
)(void *d
, void *a
, void *b
, uint32_t desc
)
511 intptr_t oprsz
= simd_oprsz(desc
);
514 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
515 *(uint64_t *)(d
+ i
) = ~(*(uint64_t *)(a
+ i
) & *(uint64_t *)(b
+ i
));
517 clear_high(d
, oprsz
, desc
);
520 void HELPER(gvec_nor
)(void *d
, void *a
, void *b
, uint32_t desc
)
522 intptr_t oprsz
= simd_oprsz(desc
);
525 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
526 *(uint64_t *)(d
+ i
) = ~(*(uint64_t *)(a
+ i
) | *(uint64_t *)(b
+ i
));
528 clear_high(d
, oprsz
, desc
);
531 void HELPER(gvec_eqv
)(void *d
, void *a
, void *b
, uint32_t desc
)
533 intptr_t oprsz
= simd_oprsz(desc
);
536 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
537 *(uint64_t *)(d
+ i
) = ~(*(uint64_t *)(a
+ i
) ^ *(uint64_t *)(b
+ i
));
539 clear_high(d
, oprsz
, desc
);
542 void HELPER(gvec_ands
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
544 intptr_t oprsz
= simd_oprsz(desc
);
547 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
548 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) & b
;
550 clear_high(d
, oprsz
, desc
);
553 void HELPER(gvec_xors
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
555 intptr_t oprsz
= simd_oprsz(desc
);
558 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
559 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) ^ b
;
561 clear_high(d
, oprsz
, desc
);
564 void HELPER(gvec_ors
)(void *d
, void *a
, uint64_t b
, uint32_t desc
)
566 intptr_t oprsz
= simd_oprsz(desc
);
569 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
570 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) | b
;
572 clear_high(d
, oprsz
, desc
);
575 void HELPER(gvec_shl8i
)(void *d
, void *a
, uint32_t desc
)
577 intptr_t oprsz
= simd_oprsz(desc
);
578 int shift
= simd_data(desc
);
581 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
582 *(uint8_t *)(d
+ i
) = *(uint8_t *)(a
+ i
) << shift
;
584 clear_high(d
, oprsz
, desc
);
587 void HELPER(gvec_shl16i
)(void *d
, void *a
, uint32_t desc
)
589 intptr_t oprsz
= simd_oprsz(desc
);
590 int shift
= simd_data(desc
);
593 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
594 *(uint16_t *)(d
+ i
) = *(uint16_t *)(a
+ i
) << shift
;
596 clear_high(d
, oprsz
, desc
);
599 void HELPER(gvec_shl32i
)(void *d
, void *a
, uint32_t desc
)
601 intptr_t oprsz
= simd_oprsz(desc
);
602 int shift
= simd_data(desc
);
605 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
606 *(uint32_t *)(d
+ i
) = *(uint32_t *)(a
+ i
) << shift
;
608 clear_high(d
, oprsz
, desc
);
611 void HELPER(gvec_shl64i
)(void *d
, void *a
, uint32_t desc
)
613 intptr_t oprsz
= simd_oprsz(desc
);
614 int shift
= simd_data(desc
);
617 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
618 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) << shift
;
620 clear_high(d
, oprsz
, desc
);
623 void HELPER(gvec_shr8i
)(void *d
, void *a
, uint32_t desc
)
625 intptr_t oprsz
= simd_oprsz(desc
);
626 int shift
= simd_data(desc
);
629 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
630 *(uint8_t *)(d
+ i
) = *(uint8_t *)(a
+ i
) >> shift
;
632 clear_high(d
, oprsz
, desc
);
635 void HELPER(gvec_shr16i
)(void *d
, void *a
, uint32_t desc
)
637 intptr_t oprsz
= simd_oprsz(desc
);
638 int shift
= simd_data(desc
);
641 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
642 *(uint16_t *)(d
+ i
) = *(uint16_t *)(a
+ i
) >> shift
;
644 clear_high(d
, oprsz
, desc
);
647 void HELPER(gvec_shr32i
)(void *d
, void *a
, uint32_t desc
)
649 intptr_t oprsz
= simd_oprsz(desc
);
650 int shift
= simd_data(desc
);
653 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
654 *(uint32_t *)(d
+ i
) = *(uint32_t *)(a
+ i
) >> shift
;
656 clear_high(d
, oprsz
, desc
);
659 void HELPER(gvec_shr64i
)(void *d
, void *a
, uint32_t desc
)
661 intptr_t oprsz
= simd_oprsz(desc
);
662 int shift
= simd_data(desc
);
665 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
666 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) >> shift
;
668 clear_high(d
, oprsz
, desc
);
671 void HELPER(gvec_sar8i
)(void *d
, void *a
, uint32_t desc
)
673 intptr_t oprsz
= simd_oprsz(desc
);
674 int shift
= simd_data(desc
);
677 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
678 *(int8_t *)(d
+ i
) = *(int8_t *)(a
+ i
) >> shift
;
680 clear_high(d
, oprsz
, desc
);
683 void HELPER(gvec_sar16i
)(void *d
, void *a
, uint32_t desc
)
685 intptr_t oprsz
= simd_oprsz(desc
);
686 int shift
= simd_data(desc
);
689 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
690 *(int16_t *)(d
+ i
) = *(int16_t *)(a
+ i
) >> shift
;
692 clear_high(d
, oprsz
, desc
);
695 void HELPER(gvec_sar32i
)(void *d
, void *a
, uint32_t desc
)
697 intptr_t oprsz
= simd_oprsz(desc
);
698 int shift
= simd_data(desc
);
701 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
702 *(int32_t *)(d
+ i
) = *(int32_t *)(a
+ i
) >> shift
;
704 clear_high(d
, oprsz
, desc
);
707 void HELPER(gvec_sar64i
)(void *d
, void *a
, uint32_t desc
)
709 intptr_t oprsz
= simd_oprsz(desc
);
710 int shift
= simd_data(desc
);
713 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
714 *(int64_t *)(d
+ i
) = *(int64_t *)(a
+ i
) >> shift
;
716 clear_high(d
, oprsz
, desc
);
719 void HELPER(gvec_rotl8i
)(void *d
, void *a
, uint32_t desc
)
721 intptr_t oprsz
= simd_oprsz(desc
);
722 int shift
= simd_data(desc
);
725 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
726 *(uint8_t *)(d
+ i
) = rol8(*(uint8_t *)(a
+ i
), shift
);
728 clear_high(d
, oprsz
, desc
);
731 void HELPER(gvec_rotl16i
)(void *d
, void *a
, uint32_t desc
)
733 intptr_t oprsz
= simd_oprsz(desc
);
734 int shift
= simd_data(desc
);
737 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
738 *(uint16_t *)(d
+ i
) = rol16(*(uint16_t *)(a
+ i
), shift
);
740 clear_high(d
, oprsz
, desc
);
743 void HELPER(gvec_rotl32i
)(void *d
, void *a
, uint32_t desc
)
745 intptr_t oprsz
= simd_oprsz(desc
);
746 int shift
= simd_data(desc
);
749 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
750 *(uint32_t *)(d
+ i
) = rol32(*(uint32_t *)(a
+ i
), shift
);
752 clear_high(d
, oprsz
, desc
);
755 void HELPER(gvec_rotl64i
)(void *d
, void *a
, uint32_t desc
)
757 intptr_t oprsz
= simd_oprsz(desc
);
758 int shift
= simd_data(desc
);
761 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
762 *(uint64_t *)(d
+ i
) = rol64(*(uint64_t *)(a
+ i
), shift
);
764 clear_high(d
, oprsz
, desc
);
767 void HELPER(gvec_shl8v
)(void *d
, void *a
, void *b
, uint32_t desc
)
769 intptr_t oprsz
= simd_oprsz(desc
);
772 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
773 uint8_t sh
= *(uint8_t *)(b
+ i
) & 7;
774 *(uint8_t *)(d
+ i
) = *(uint8_t *)(a
+ i
) << sh
;
776 clear_high(d
, oprsz
, desc
);
779 void HELPER(gvec_shl16v
)(void *d
, void *a
, void *b
, uint32_t desc
)
781 intptr_t oprsz
= simd_oprsz(desc
);
784 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
785 uint8_t sh
= *(uint16_t *)(b
+ i
) & 15;
786 *(uint16_t *)(d
+ i
) = *(uint16_t *)(a
+ i
) << sh
;
788 clear_high(d
, oprsz
, desc
);
791 void HELPER(gvec_shl32v
)(void *d
, void *a
, void *b
, uint32_t desc
)
793 intptr_t oprsz
= simd_oprsz(desc
);
796 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
797 uint8_t sh
= *(uint32_t *)(b
+ i
) & 31;
798 *(uint32_t *)(d
+ i
) = *(uint32_t *)(a
+ i
) << sh
;
800 clear_high(d
, oprsz
, desc
);
803 void HELPER(gvec_shl64v
)(void *d
, void *a
, void *b
, uint32_t desc
)
805 intptr_t oprsz
= simd_oprsz(desc
);
808 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
809 uint8_t sh
= *(uint64_t *)(b
+ i
) & 63;
810 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) << sh
;
812 clear_high(d
, oprsz
, desc
);
815 void HELPER(gvec_shr8v
)(void *d
, void *a
, void *b
, uint32_t desc
)
817 intptr_t oprsz
= simd_oprsz(desc
);
820 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
821 uint8_t sh
= *(uint8_t *)(b
+ i
) & 7;
822 *(uint8_t *)(d
+ i
) = *(uint8_t *)(a
+ i
) >> sh
;
824 clear_high(d
, oprsz
, desc
);
827 void HELPER(gvec_shr16v
)(void *d
, void *a
, void *b
, uint32_t desc
)
829 intptr_t oprsz
= simd_oprsz(desc
);
832 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
833 uint8_t sh
= *(uint16_t *)(b
+ i
) & 15;
834 *(uint16_t *)(d
+ i
) = *(uint16_t *)(a
+ i
) >> sh
;
836 clear_high(d
, oprsz
, desc
);
839 void HELPER(gvec_shr32v
)(void *d
, void *a
, void *b
, uint32_t desc
)
841 intptr_t oprsz
= simd_oprsz(desc
);
844 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
845 uint8_t sh
= *(uint32_t *)(b
+ i
) & 31;
846 *(uint32_t *)(d
+ i
) = *(uint32_t *)(a
+ i
) >> sh
;
848 clear_high(d
, oprsz
, desc
);
851 void HELPER(gvec_shr64v
)(void *d
, void *a
, void *b
, uint32_t desc
)
853 intptr_t oprsz
= simd_oprsz(desc
);
856 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
857 uint8_t sh
= *(uint64_t *)(b
+ i
) & 63;
858 *(uint64_t *)(d
+ i
) = *(uint64_t *)(a
+ i
) >> sh
;
860 clear_high(d
, oprsz
, desc
);
863 void HELPER(gvec_sar8v
)(void *d
, void *a
, void *b
, uint32_t desc
)
865 intptr_t oprsz
= simd_oprsz(desc
);
868 for (i
= 0; i
< oprsz
; i
+= sizeof(int8_t)) {
869 uint8_t sh
= *(uint8_t *)(b
+ i
) & 7;
870 *(int8_t *)(d
+ i
) = *(int8_t *)(a
+ i
) >> sh
;
872 clear_high(d
, oprsz
, desc
);
875 void HELPER(gvec_sar16v
)(void *d
, void *a
, void *b
, uint32_t desc
)
877 intptr_t oprsz
= simd_oprsz(desc
);
880 for (i
= 0; i
< oprsz
; i
+= sizeof(int16_t)) {
881 uint8_t sh
= *(uint16_t *)(b
+ i
) & 15;
882 *(int16_t *)(d
+ i
) = *(int16_t *)(a
+ i
) >> sh
;
884 clear_high(d
, oprsz
, desc
);
887 void HELPER(gvec_sar32v
)(void *d
, void *a
, void *b
, uint32_t desc
)
889 intptr_t oprsz
= simd_oprsz(desc
);
892 for (i
= 0; i
< oprsz
; i
+= sizeof(int32_t)) {
893 uint8_t sh
= *(uint32_t *)(b
+ i
) & 31;
894 *(int32_t *)(d
+ i
) = *(int32_t *)(a
+ i
) >> sh
;
896 clear_high(d
, oprsz
, desc
);
899 void HELPER(gvec_sar64v
)(void *d
, void *a
, void *b
, uint32_t desc
)
901 intptr_t oprsz
= simd_oprsz(desc
);
904 for (i
= 0; i
< oprsz
; i
+= sizeof(int64_t)) {
905 uint8_t sh
= *(uint64_t *)(b
+ i
) & 63;
906 *(int64_t *)(d
+ i
) = *(int64_t *)(a
+ i
) >> sh
;
908 clear_high(d
, oprsz
, desc
);
911 void HELPER(gvec_rotl8v
)(void *d
, void *a
, void *b
, uint32_t desc
)
913 intptr_t oprsz
= simd_oprsz(desc
);
916 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
917 uint8_t sh
= *(uint8_t *)(b
+ i
) & 7;
918 *(uint8_t *)(d
+ i
) = rol8(*(uint8_t *)(a
+ i
), sh
);
920 clear_high(d
, oprsz
, desc
);
923 void HELPER(gvec_rotl16v
)(void *d
, void *a
, void *b
, uint32_t desc
)
925 intptr_t oprsz
= simd_oprsz(desc
);
928 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
929 uint8_t sh
= *(uint16_t *)(b
+ i
) & 15;
930 *(uint16_t *)(d
+ i
) = rol16(*(uint16_t *)(a
+ i
), sh
);
932 clear_high(d
, oprsz
, desc
);
935 void HELPER(gvec_rotl32v
)(void *d
, void *a
, void *b
, uint32_t desc
)
937 intptr_t oprsz
= simd_oprsz(desc
);
940 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
941 uint8_t sh
= *(uint32_t *)(b
+ i
) & 31;
942 *(uint32_t *)(d
+ i
) = rol32(*(uint32_t *)(a
+ i
), sh
);
944 clear_high(d
, oprsz
, desc
);
947 void HELPER(gvec_rotl64v
)(void *d
, void *a
, void *b
, uint32_t desc
)
949 intptr_t oprsz
= simd_oprsz(desc
);
952 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
953 uint8_t sh
= *(uint64_t *)(b
+ i
) & 63;
954 *(uint64_t *)(d
+ i
) = rol64(*(uint64_t *)(a
+ i
), sh
);
956 clear_high(d
, oprsz
, desc
);
959 void HELPER(gvec_rotr8v
)(void *d
, void *a
, void *b
, uint32_t desc
)
961 intptr_t oprsz
= simd_oprsz(desc
);
964 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
965 uint8_t sh
= *(uint8_t *)(b
+ i
) & 7;
966 *(uint8_t *)(d
+ i
) = ror8(*(uint8_t *)(a
+ i
), sh
);
968 clear_high(d
, oprsz
, desc
);
971 void HELPER(gvec_rotr16v
)(void *d
, void *a
, void *b
, uint32_t desc
)
973 intptr_t oprsz
= simd_oprsz(desc
);
976 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
977 uint8_t sh
= *(uint16_t *)(b
+ i
) & 15;
978 *(uint16_t *)(d
+ i
) = ror16(*(uint16_t *)(a
+ i
), sh
);
980 clear_high(d
, oprsz
, desc
);
983 void HELPER(gvec_rotr32v
)(void *d
, void *a
, void *b
, uint32_t desc
)
985 intptr_t oprsz
= simd_oprsz(desc
);
988 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
989 uint8_t sh
= *(uint32_t *)(b
+ i
) & 31;
990 *(uint32_t *)(d
+ i
) = ror32(*(uint32_t *)(a
+ i
), sh
);
992 clear_high(d
, oprsz
, desc
);
995 void HELPER(gvec_rotr64v
)(void *d
, void *a
, void *b
, uint32_t desc
)
997 intptr_t oprsz
= simd_oprsz(desc
);
1000 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
1001 uint8_t sh
= *(uint64_t *)(b
+ i
) & 63;
1002 *(uint64_t *)(d
+ i
) = ror64(*(uint64_t *)(a
+ i
), sh
);
1004 clear_high(d
, oprsz
, desc
);
1007 #define DO_CMP1(NAME, TYPE, OP) \
1008 void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \
1010 intptr_t oprsz = simd_oprsz(desc); \
1012 for (i = 0; i < oprsz; i += sizeof(TYPE)) { \
1013 *(TYPE *)(d + i) = -(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \
1015 clear_high(d, oprsz, desc); \
1018 #define DO_CMP2(SZ) \
1019 DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==) \
1020 DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=) \
1021 DO_CMP1(gvec_lt##SZ, int##SZ##_t, <) \
1022 DO_CMP1(gvec_le##SZ, int##SZ##_t, <=) \
1023 DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <) \
1024 DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=)
1034 void HELPER(gvec_ssadd8
)(void *d
, void *a
, void *b
, uint32_t desc
)
1036 intptr_t oprsz
= simd_oprsz(desc
);
1039 for (i
= 0; i
< oprsz
; i
+= sizeof(int8_t)) {
1040 int r
= *(int8_t *)(a
+ i
) + *(int8_t *)(b
+ i
);
1043 } else if (r
< INT8_MIN
) {
1046 *(int8_t *)(d
+ i
) = r
;
1048 clear_high(d
, oprsz
, desc
);
1051 void HELPER(gvec_ssadd16
)(void *d
, void *a
, void *b
, uint32_t desc
)
1053 intptr_t oprsz
= simd_oprsz(desc
);
1056 for (i
= 0; i
< oprsz
; i
+= sizeof(int16_t)) {
1057 int r
= *(int16_t *)(a
+ i
) + *(int16_t *)(b
+ i
);
1058 if (r
> INT16_MAX
) {
1060 } else if (r
< INT16_MIN
) {
1063 *(int16_t *)(d
+ i
) = r
;
1065 clear_high(d
, oprsz
, desc
);
1068 void HELPER(gvec_ssadd32
)(void *d
, void *a
, void *b
, uint32_t desc
)
1070 intptr_t oprsz
= simd_oprsz(desc
);
1073 for (i
= 0; i
< oprsz
; i
+= sizeof(int32_t)) {
1074 int32_t ai
= *(int32_t *)(a
+ i
);
1075 int32_t bi
= *(int32_t *)(b
+ i
);
1076 int32_t di
= ai
+ bi
;
1077 if (((di
^ ai
) &~ (ai
^ bi
)) < 0) {
1078 /* Signed overflow. */
1079 di
= (di
< 0 ? INT32_MAX
: INT32_MIN
);
1081 *(int32_t *)(d
+ i
) = di
;
1083 clear_high(d
, oprsz
, desc
);
1086 void HELPER(gvec_ssadd64
)(void *d
, void *a
, void *b
, uint32_t desc
)
1088 intptr_t oprsz
= simd_oprsz(desc
);
1091 for (i
= 0; i
< oprsz
; i
+= sizeof(int64_t)) {
1092 int64_t ai
= *(int64_t *)(a
+ i
);
1093 int64_t bi
= *(int64_t *)(b
+ i
);
1094 int64_t di
= ai
+ bi
;
1095 if (((di
^ ai
) &~ (ai
^ bi
)) < 0) {
1096 /* Signed overflow. */
1097 di
= (di
< 0 ? INT64_MAX
: INT64_MIN
);
1099 *(int64_t *)(d
+ i
) = di
;
1101 clear_high(d
, oprsz
, desc
);
1104 void HELPER(gvec_sssub8
)(void *d
, void *a
, void *b
, uint32_t desc
)
1106 intptr_t oprsz
= simd_oprsz(desc
);
1109 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
1110 int r
= *(int8_t *)(a
+ i
) - *(int8_t *)(b
+ i
);
1113 } else if (r
< INT8_MIN
) {
1116 *(uint8_t *)(d
+ i
) = r
;
1118 clear_high(d
, oprsz
, desc
);
1121 void HELPER(gvec_sssub16
)(void *d
, void *a
, void *b
, uint32_t desc
)
1123 intptr_t oprsz
= simd_oprsz(desc
);
1126 for (i
= 0; i
< oprsz
; i
+= sizeof(int16_t)) {
1127 int r
= *(int16_t *)(a
+ i
) - *(int16_t *)(b
+ i
);
1128 if (r
> INT16_MAX
) {
1130 } else if (r
< INT16_MIN
) {
1133 *(int16_t *)(d
+ i
) = r
;
1135 clear_high(d
, oprsz
, desc
);
1138 void HELPER(gvec_sssub32
)(void *d
, void *a
, void *b
, uint32_t desc
)
1140 intptr_t oprsz
= simd_oprsz(desc
);
1143 for (i
= 0; i
< oprsz
; i
+= sizeof(int32_t)) {
1144 int32_t ai
= *(int32_t *)(a
+ i
);
1145 int32_t bi
= *(int32_t *)(b
+ i
);
1146 int32_t di
= ai
- bi
;
1147 if (((di
^ ai
) & (ai
^ bi
)) < 0) {
1148 /* Signed overflow. */
1149 di
= (di
< 0 ? INT32_MAX
: INT32_MIN
);
1151 *(int32_t *)(d
+ i
) = di
;
1153 clear_high(d
, oprsz
, desc
);
1156 void HELPER(gvec_sssub64
)(void *d
, void *a
, void *b
, uint32_t desc
)
1158 intptr_t oprsz
= simd_oprsz(desc
);
1161 for (i
= 0; i
< oprsz
; i
+= sizeof(int64_t)) {
1162 int64_t ai
= *(int64_t *)(a
+ i
);
1163 int64_t bi
= *(int64_t *)(b
+ i
);
1164 int64_t di
= ai
- bi
;
1165 if (((di
^ ai
) & (ai
^ bi
)) < 0) {
1166 /* Signed overflow. */
1167 di
= (di
< 0 ? INT64_MAX
: INT64_MIN
);
1169 *(int64_t *)(d
+ i
) = di
;
1171 clear_high(d
, oprsz
, desc
);
1174 void HELPER(gvec_usadd8
)(void *d
, void *a
, void *b
, uint32_t desc
)
1176 intptr_t oprsz
= simd_oprsz(desc
);
1179 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
1180 unsigned r
= *(uint8_t *)(a
+ i
) + *(uint8_t *)(b
+ i
);
1181 if (r
> UINT8_MAX
) {
1184 *(uint8_t *)(d
+ i
) = r
;
1186 clear_high(d
, oprsz
, desc
);
1189 void HELPER(gvec_usadd16
)(void *d
, void *a
, void *b
, uint32_t desc
)
1191 intptr_t oprsz
= simd_oprsz(desc
);
1194 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
1195 unsigned r
= *(uint16_t *)(a
+ i
) + *(uint16_t *)(b
+ i
);
1196 if (r
> UINT16_MAX
) {
1199 *(uint16_t *)(d
+ i
) = r
;
1201 clear_high(d
, oprsz
, desc
);
1204 void HELPER(gvec_usadd32
)(void *d
, void *a
, void *b
, uint32_t desc
)
1206 intptr_t oprsz
= simd_oprsz(desc
);
1209 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
1210 uint32_t ai
= *(uint32_t *)(a
+ i
);
1211 uint32_t bi
= *(uint32_t *)(b
+ i
);
1212 uint32_t di
= ai
+ bi
;
1216 *(uint32_t *)(d
+ i
) = di
;
1218 clear_high(d
, oprsz
, desc
);
1221 void HELPER(gvec_usadd64
)(void *d
, void *a
, void *b
, uint32_t desc
)
1223 intptr_t oprsz
= simd_oprsz(desc
);
1226 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
1227 uint64_t ai
= *(uint64_t *)(a
+ i
);
1228 uint64_t bi
= *(uint64_t *)(b
+ i
);
1229 uint64_t di
= ai
+ bi
;
1233 *(uint64_t *)(d
+ i
) = di
;
1235 clear_high(d
, oprsz
, desc
);
1238 void HELPER(gvec_ussub8
)(void *d
, void *a
, void *b
, uint32_t desc
)
1240 intptr_t oprsz
= simd_oprsz(desc
);
1243 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
1244 int r
= *(uint8_t *)(a
+ i
) - *(uint8_t *)(b
+ i
);
1248 *(uint8_t *)(d
+ i
) = r
;
1250 clear_high(d
, oprsz
, desc
);
1253 void HELPER(gvec_ussub16
)(void *d
, void *a
, void *b
, uint32_t desc
)
1255 intptr_t oprsz
= simd_oprsz(desc
);
1258 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
1259 int r
= *(uint16_t *)(a
+ i
) - *(uint16_t *)(b
+ i
);
1263 *(uint16_t *)(d
+ i
) = r
;
1265 clear_high(d
, oprsz
, desc
);
1268 void HELPER(gvec_ussub32
)(void *d
, void *a
, void *b
, uint32_t desc
)
1270 intptr_t oprsz
= simd_oprsz(desc
);
1273 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
1274 uint32_t ai
= *(uint32_t *)(a
+ i
);
1275 uint32_t bi
= *(uint32_t *)(b
+ i
);
1276 uint32_t di
= ai
- bi
;
1280 *(uint32_t *)(d
+ i
) = di
;
1282 clear_high(d
, oprsz
, desc
);
1285 void HELPER(gvec_ussub64
)(void *d
, void *a
, void *b
, uint32_t desc
)
1287 intptr_t oprsz
= simd_oprsz(desc
);
1290 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
1291 uint64_t ai
= *(uint64_t *)(a
+ i
);
1292 uint64_t bi
= *(uint64_t *)(b
+ i
);
1293 uint64_t di
= ai
- bi
;
1297 *(uint64_t *)(d
+ i
) = di
;
1299 clear_high(d
, oprsz
, desc
);
1302 void HELPER(gvec_smin8
)(void *d
, void *a
, void *b
, uint32_t desc
)
1304 intptr_t oprsz
= simd_oprsz(desc
);
1307 for (i
= 0; i
< oprsz
; i
+= sizeof(int8_t)) {
1308 int8_t aa
= *(int8_t *)(a
+ i
);
1309 int8_t bb
= *(int8_t *)(b
+ i
);
1310 int8_t dd
= aa
< bb
? aa
: bb
;
1311 *(int8_t *)(d
+ i
) = dd
;
1313 clear_high(d
, oprsz
, desc
);
1316 void HELPER(gvec_smin16
)(void *d
, void *a
, void *b
, uint32_t desc
)
1318 intptr_t oprsz
= simd_oprsz(desc
);
1321 for (i
= 0; i
< oprsz
; i
+= sizeof(int16_t)) {
1322 int16_t aa
= *(int16_t *)(a
+ i
);
1323 int16_t bb
= *(int16_t *)(b
+ i
);
1324 int16_t dd
= aa
< bb
? aa
: bb
;
1325 *(int16_t *)(d
+ i
) = dd
;
1327 clear_high(d
, oprsz
, desc
);
1330 void HELPER(gvec_smin32
)(void *d
, void *a
, void *b
, uint32_t desc
)
1332 intptr_t oprsz
= simd_oprsz(desc
);
1335 for (i
= 0; i
< oprsz
; i
+= sizeof(int32_t)) {
1336 int32_t aa
= *(int32_t *)(a
+ i
);
1337 int32_t bb
= *(int32_t *)(b
+ i
);
1338 int32_t dd
= aa
< bb
? aa
: bb
;
1339 *(int32_t *)(d
+ i
) = dd
;
1341 clear_high(d
, oprsz
, desc
);
1344 void HELPER(gvec_smin64
)(void *d
, void *a
, void *b
, uint32_t desc
)
1346 intptr_t oprsz
= simd_oprsz(desc
);
1349 for (i
= 0; i
< oprsz
; i
+= sizeof(int64_t)) {
1350 int64_t aa
= *(int64_t *)(a
+ i
);
1351 int64_t bb
= *(int64_t *)(b
+ i
);
1352 int64_t dd
= aa
< bb
? aa
: bb
;
1353 *(int64_t *)(d
+ i
) = dd
;
1355 clear_high(d
, oprsz
, desc
);
1358 void HELPER(gvec_smax8
)(void *d
, void *a
, void *b
, uint32_t desc
)
1360 intptr_t oprsz
= simd_oprsz(desc
);
1363 for (i
= 0; i
< oprsz
; i
+= sizeof(int8_t)) {
1364 int8_t aa
= *(int8_t *)(a
+ i
);
1365 int8_t bb
= *(int8_t *)(b
+ i
);
1366 int8_t dd
= aa
> bb
? aa
: bb
;
1367 *(int8_t *)(d
+ i
) = dd
;
1369 clear_high(d
, oprsz
, desc
);
1372 void HELPER(gvec_smax16
)(void *d
, void *a
, void *b
, uint32_t desc
)
1374 intptr_t oprsz
= simd_oprsz(desc
);
1377 for (i
= 0; i
< oprsz
; i
+= sizeof(int16_t)) {
1378 int16_t aa
= *(int16_t *)(a
+ i
);
1379 int16_t bb
= *(int16_t *)(b
+ i
);
1380 int16_t dd
= aa
> bb
? aa
: bb
;
1381 *(int16_t *)(d
+ i
) = dd
;
1383 clear_high(d
, oprsz
, desc
);
1386 void HELPER(gvec_smax32
)(void *d
, void *a
, void *b
, uint32_t desc
)
1388 intptr_t oprsz
= simd_oprsz(desc
);
1391 for (i
= 0; i
< oprsz
; i
+= sizeof(int32_t)) {
1392 int32_t aa
= *(int32_t *)(a
+ i
);
1393 int32_t bb
= *(int32_t *)(b
+ i
);
1394 int32_t dd
= aa
> bb
? aa
: bb
;
1395 *(int32_t *)(d
+ i
) = dd
;
1397 clear_high(d
, oprsz
, desc
);
1400 void HELPER(gvec_smax64
)(void *d
, void *a
, void *b
, uint32_t desc
)
1402 intptr_t oprsz
= simd_oprsz(desc
);
1405 for (i
= 0; i
< oprsz
; i
+= sizeof(int64_t)) {
1406 int64_t aa
= *(int64_t *)(a
+ i
);
1407 int64_t bb
= *(int64_t *)(b
+ i
);
1408 int64_t dd
= aa
> bb
? aa
: bb
;
1409 *(int64_t *)(d
+ i
) = dd
;
1411 clear_high(d
, oprsz
, desc
);
1414 void HELPER(gvec_umin8
)(void *d
, void *a
, void *b
, uint32_t desc
)
1416 intptr_t oprsz
= simd_oprsz(desc
);
1419 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
1420 uint8_t aa
= *(uint8_t *)(a
+ i
);
1421 uint8_t bb
= *(uint8_t *)(b
+ i
);
1422 uint8_t dd
= aa
< bb
? aa
: bb
;
1423 *(uint8_t *)(d
+ i
) = dd
;
1425 clear_high(d
, oprsz
, desc
);
1428 void HELPER(gvec_umin16
)(void *d
, void *a
, void *b
, uint32_t desc
)
1430 intptr_t oprsz
= simd_oprsz(desc
);
1433 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
1434 uint16_t aa
= *(uint16_t *)(a
+ i
);
1435 uint16_t bb
= *(uint16_t *)(b
+ i
);
1436 uint16_t dd
= aa
< bb
? aa
: bb
;
1437 *(uint16_t *)(d
+ i
) = dd
;
1439 clear_high(d
, oprsz
, desc
);
1442 void HELPER(gvec_umin32
)(void *d
, void *a
, void *b
, uint32_t desc
)
1444 intptr_t oprsz
= simd_oprsz(desc
);
1447 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
1448 uint32_t aa
= *(uint32_t *)(a
+ i
);
1449 uint32_t bb
= *(uint32_t *)(b
+ i
);
1450 uint32_t dd
= aa
< bb
? aa
: bb
;
1451 *(uint32_t *)(d
+ i
) = dd
;
1453 clear_high(d
, oprsz
, desc
);
1456 void HELPER(gvec_umin64
)(void *d
, void *a
, void *b
, uint32_t desc
)
1458 intptr_t oprsz
= simd_oprsz(desc
);
1461 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
1462 uint64_t aa
= *(uint64_t *)(a
+ i
);
1463 uint64_t bb
= *(uint64_t *)(b
+ i
);
1464 uint64_t dd
= aa
< bb
? aa
: bb
;
1465 *(uint64_t *)(d
+ i
) = dd
;
1467 clear_high(d
, oprsz
, desc
);
1470 void HELPER(gvec_umax8
)(void *d
, void *a
, void *b
, uint32_t desc
)
1472 intptr_t oprsz
= simd_oprsz(desc
);
1475 for (i
= 0; i
< oprsz
; i
+= sizeof(uint8_t)) {
1476 uint8_t aa
= *(uint8_t *)(a
+ i
);
1477 uint8_t bb
= *(uint8_t *)(b
+ i
);
1478 uint8_t dd
= aa
> bb
? aa
: bb
;
1479 *(uint8_t *)(d
+ i
) = dd
;
1481 clear_high(d
, oprsz
, desc
);
1484 void HELPER(gvec_umax16
)(void *d
, void *a
, void *b
, uint32_t desc
)
1486 intptr_t oprsz
= simd_oprsz(desc
);
1489 for (i
= 0; i
< oprsz
; i
+= sizeof(uint16_t)) {
1490 uint16_t aa
= *(uint16_t *)(a
+ i
);
1491 uint16_t bb
= *(uint16_t *)(b
+ i
);
1492 uint16_t dd
= aa
> bb
? aa
: bb
;
1493 *(uint16_t *)(d
+ i
) = dd
;
1495 clear_high(d
, oprsz
, desc
);
1498 void HELPER(gvec_umax32
)(void *d
, void *a
, void *b
, uint32_t desc
)
1500 intptr_t oprsz
= simd_oprsz(desc
);
1503 for (i
= 0; i
< oprsz
; i
+= sizeof(uint32_t)) {
1504 uint32_t aa
= *(uint32_t *)(a
+ i
);
1505 uint32_t bb
= *(uint32_t *)(b
+ i
);
1506 uint32_t dd
= aa
> bb
? aa
: bb
;
1507 *(uint32_t *)(d
+ i
) = dd
;
1509 clear_high(d
, oprsz
, desc
);
1512 void HELPER(gvec_umax64
)(void *d
, void *a
, void *b
, uint32_t desc
)
1514 intptr_t oprsz
= simd_oprsz(desc
);
1517 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
1518 uint64_t aa
= *(uint64_t *)(a
+ i
);
1519 uint64_t bb
= *(uint64_t *)(b
+ i
);
1520 uint64_t dd
= aa
> bb
? aa
: bb
;
1521 *(uint64_t *)(d
+ i
) = dd
;
1523 clear_high(d
, oprsz
, desc
);
1526 void HELPER(gvec_bitsel
)(void *d
, void *a
, void *b
, void *c
, uint32_t desc
)
1528 intptr_t oprsz
= simd_oprsz(desc
);
1531 for (i
= 0; i
< oprsz
; i
+= sizeof(uint64_t)) {
1532 uint64_t aa
= *(uint64_t *)(a
+ i
);
1533 uint64_t bb
= *(uint64_t *)(b
+ i
);
1534 uint64_t cc
= *(uint64_t *)(c
+ i
);
1535 *(uint64_t *)(d
+ i
) = (bb
& aa
) | (cc
& ~aa
);
1537 clear_high(d
, oprsz
, desc
);