[AArch64] Use UNSPEC_MERGE_PTRUE for comparisons
[official-gcc.git] / gcc / config / mips / loongson.h
blobb4a26027c24dca8561184603cbdaf8c4c05e54d4
1 /* Intrinsics for ST Microelectronics Loongson-2E/2F SIMD operations.
3 Copyright (C) 2008-2018 Free Software Foundation, Inc.
4 Contributed by CodeSourcery.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 #ifndef _GCC_LOONGSON_H
28 #define _GCC_LOONGSON_H
30 #if !defined(__mips_loongson_vector_rev)
31 # error "You must select -march=loongson2e or -march=loongson2f to use loongson.h"
32 #endif
34 #ifdef __cplusplus
35 extern "C" {
36 #endif
38 #include <stdint.h>
40 /* Vectors of unsigned bytes, halfwords and words. */
41 typedef uint8_t uint8x8_t __attribute__((vector_size (8)));
42 typedef uint16_t uint16x4_t __attribute__((vector_size (8)));
43 typedef uint32_t uint32x2_t __attribute__((vector_size (8)));
45 /* Vectors of signed bytes, halfwords and words. */
46 typedef int8_t int8x8_t __attribute__((vector_size (8)));
47 typedef int16_t int16x4_t __attribute__((vector_size (8)));
48 typedef int32_t int32x2_t __attribute__((vector_size (8)));
50 /* SIMD intrinsics.
51 Unless otherwise noted, calls to the functions below will expand into
52 precisely one machine instruction, modulo any moves required to
53 satisfy register allocation constraints. */
55 /* Pack with signed saturation. */
56 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
57 packsswh (int32x2_t s, int32x2_t t)
59 return __builtin_loongson_packsswh (s, t);
62 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
63 packsshb (int16x4_t s, int16x4_t t)
65 return __builtin_loongson_packsshb (s, t);
68 /* Pack with unsigned saturation. */
69 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
70 packushb (uint16x4_t s, uint16x4_t t)
72 return __builtin_loongson_packushb (s, t);
75 /* Vector addition, treating overflow by wraparound. */
76 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
77 paddw_u (uint32x2_t s, uint32x2_t t)
79 return __builtin_loongson_paddw_u (s, t);
82 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
83 paddh_u (uint16x4_t s, uint16x4_t t)
85 return __builtin_loongson_paddh_u (s, t);
88 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
89 paddb_u (uint8x8_t s, uint8x8_t t)
91 return __builtin_loongson_paddb_u (s, t);
94 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
95 paddw_s (int32x2_t s, int32x2_t t)
97 return __builtin_loongson_paddw_s (s, t);
100 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
101 paddh_s (int16x4_t s, int16x4_t t)
103 return __builtin_loongson_paddh_s (s, t);
106 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
107 paddb_s (int8x8_t s, int8x8_t t)
109 return __builtin_loongson_paddb_s (s, t);
112 /* Addition of doubleword integers, treating overflow by wraparound. */
113 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
114 paddd_u (uint64_t s, uint64_t t)
116 return __builtin_loongson_paddd_u (s, t);
119 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
120 paddd_s (int64_t s, int64_t t)
122 return __builtin_loongson_paddd_s (s, t);
125 /* Vector addition, treating overflow by signed saturation. */
126 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
127 paddsh (int16x4_t s, int16x4_t t)
129 return __builtin_loongson_paddsh (s, t);
132 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
133 paddsb (int8x8_t s, int8x8_t t)
135 return __builtin_loongson_paddsb (s, t);
138 /* Vector addition, treating overflow by unsigned saturation. */
139 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
140 paddush (uint16x4_t s, uint16x4_t t)
142 return __builtin_loongson_paddush (s, t);
145 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
146 paddusb (uint8x8_t s, uint8x8_t t)
148 return __builtin_loongson_paddusb (s, t);
151 /* Logical AND NOT. */
152 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
153 pandn_ud (uint64_t s, uint64_t t)
155 return __builtin_loongson_pandn_ud (s, t);
158 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
159 pandn_uw (uint32x2_t s, uint32x2_t t)
161 return __builtin_loongson_pandn_uw (s, t);
164 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
165 pandn_uh (uint16x4_t s, uint16x4_t t)
167 return __builtin_loongson_pandn_uh (s, t);
170 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
171 pandn_ub (uint8x8_t s, uint8x8_t t)
173 return __builtin_loongson_pandn_ub (s, t);
176 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
177 pandn_sd (int64_t s, int64_t t)
179 return __builtin_loongson_pandn_sd (s, t);
182 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
183 pandn_sw (int32x2_t s, int32x2_t t)
185 return __builtin_loongson_pandn_sw (s, t);
188 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
189 pandn_sh (int16x4_t s, int16x4_t t)
191 return __builtin_loongson_pandn_sh (s, t);
194 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
195 pandn_sb (int8x8_t s, int8x8_t t)
197 return __builtin_loongson_pandn_sb (s, t);
200 /* Average. */
201 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
202 pavgh (uint16x4_t s, uint16x4_t t)
204 return __builtin_loongson_pavgh (s, t);
207 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
208 pavgb (uint8x8_t s, uint8x8_t t)
210 return __builtin_loongson_pavgb (s, t);
213 /* Equality test. */
214 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
215 pcmpeqw_u (uint32x2_t s, uint32x2_t t)
217 return __builtin_loongson_pcmpeqw_u (s, t);
220 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
221 pcmpeqh_u (uint16x4_t s, uint16x4_t t)
223 return __builtin_loongson_pcmpeqh_u (s, t);
226 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
227 pcmpeqb_u (uint8x8_t s, uint8x8_t t)
229 return __builtin_loongson_pcmpeqb_u (s, t);
232 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
233 pcmpeqw_s (int32x2_t s, int32x2_t t)
235 return __builtin_loongson_pcmpeqw_s (s, t);
238 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
239 pcmpeqh_s (int16x4_t s, int16x4_t t)
241 return __builtin_loongson_pcmpeqh_s (s, t);
244 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
245 pcmpeqb_s (int8x8_t s, int8x8_t t)
247 return __builtin_loongson_pcmpeqb_s (s, t);
250 /* Greater-than test. */
251 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
252 pcmpgtw_u (uint32x2_t s, uint32x2_t t)
254 return __builtin_loongson_pcmpgtw_u (s, t);
257 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
258 pcmpgth_u (uint16x4_t s, uint16x4_t t)
260 return __builtin_loongson_pcmpgth_u (s, t);
263 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
264 pcmpgtb_u (uint8x8_t s, uint8x8_t t)
266 return __builtin_loongson_pcmpgtb_u (s, t);
269 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
270 pcmpgtw_s (int32x2_t s, int32x2_t t)
272 return __builtin_loongson_pcmpgtw_s (s, t);
275 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
276 pcmpgth_s (int16x4_t s, int16x4_t t)
278 return __builtin_loongson_pcmpgth_s (s, t);
281 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
282 pcmpgtb_s (int8x8_t s, int8x8_t t)
284 return __builtin_loongson_pcmpgtb_s (s, t);
287 /* Extract halfword. */
288 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
289 pextrh_u (uint16x4_t s, int field /* 0--3 */)
291 return __builtin_loongson_pextrh_u (s, field);
294 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
295 pextrh_s (int16x4_t s, int field /* 0--3 */)
297 return __builtin_loongson_pextrh_s (s, field);
300 /* Insert halfword. */
301 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
302 pinsrh_0_u (uint16x4_t s, uint16x4_t t)
304 return __builtin_loongson_pinsrh_0_u (s, t);
307 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
308 pinsrh_1_u (uint16x4_t s, uint16x4_t t)
310 return __builtin_loongson_pinsrh_1_u (s, t);
313 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
314 pinsrh_2_u (uint16x4_t s, uint16x4_t t)
316 return __builtin_loongson_pinsrh_2_u (s, t);
319 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
320 pinsrh_3_u (uint16x4_t s, uint16x4_t t)
322 return __builtin_loongson_pinsrh_3_u (s, t);
325 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
326 pinsrh_0_s (int16x4_t s, int16x4_t t)
328 return __builtin_loongson_pinsrh_0_s (s, t);
331 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
332 pinsrh_1_s (int16x4_t s, int16x4_t t)
334 return __builtin_loongson_pinsrh_1_s (s, t);
337 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
338 pinsrh_2_s (int16x4_t s, int16x4_t t)
340 return __builtin_loongson_pinsrh_2_s (s, t);
343 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
344 pinsrh_3_s (int16x4_t s, int16x4_t t)
346 return __builtin_loongson_pinsrh_3_s (s, t);
349 /* Multiply and add. */
350 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
351 pmaddhw (int16x4_t s, int16x4_t t)
353 return __builtin_loongson_pmaddhw (s, t);
356 /* Maximum of signed halfwords. */
357 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
358 pmaxsh (int16x4_t s, int16x4_t t)
360 return __builtin_loongson_pmaxsh (s, t);
363 /* Maximum of unsigned bytes. */
364 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
365 pmaxub (uint8x8_t s, uint8x8_t t)
367 return __builtin_loongson_pmaxub (s, t);
370 /* Minimum of signed halfwords. */
371 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
372 pminsh (int16x4_t s, int16x4_t t)
374 return __builtin_loongson_pminsh (s, t);
377 /* Minimum of unsigned bytes. */
378 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
379 pminub (uint8x8_t s, uint8x8_t t)
381 return __builtin_loongson_pminub (s, t);
384 /* Move byte mask. */
385 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
386 pmovmskb_u (uint8x8_t s)
388 return __builtin_loongson_pmovmskb_u (s);
391 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
392 pmovmskb_s (int8x8_t s)
394 return __builtin_loongson_pmovmskb_s (s);
397 /* Multiply unsigned integers and store high result. */
398 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
399 pmulhuh (uint16x4_t s, uint16x4_t t)
401 return __builtin_loongson_pmulhuh (s, t);
404 /* Multiply signed integers and store high result. */
405 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
406 pmulhh (int16x4_t s, int16x4_t t)
408 return __builtin_loongson_pmulhh (s, t);
411 /* Multiply signed integers and store low result. */
412 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
413 pmullh (int16x4_t s, int16x4_t t)
415 return __builtin_loongson_pmullh (s, t);
418 /* Multiply unsigned word integers. */
419 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
420 pmuluw (uint32x2_t s, uint32x2_t t)
422 return __builtin_loongson_pmuluw (s, t);
425 /* Absolute difference. */
426 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
427 pasubub (uint8x8_t s, uint8x8_t t)
429 return __builtin_loongson_pasubub (s, t);
432 /* Sum of unsigned byte integers. */
433 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
434 biadd (uint8x8_t s)
436 return __builtin_loongson_biadd (s);
439 /* Sum of absolute differences.
440 Note that this intrinsic expands into two machine instructions:
441 PASUBUB followed by BIADD. */
442 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
443 psadbh (uint8x8_t s, uint8x8_t t)
445 return __builtin_loongson_psadbh (s, t);
448 /* Shuffle halfwords. */
449 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
450 pshufh_u (uint16x4_t dest, uint16x4_t s, uint8_t order)
452 return __builtin_loongson_pshufh_u (s, order);
455 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
456 pshufh_s (int16x4_t dest, int16x4_t s, uint8_t order)
458 return __builtin_loongson_pshufh_s (s, order);
461 /* Shift left logical. */
462 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
463 psllh_u (uint16x4_t s, uint8_t amount)
465 return __builtin_loongson_psllh_u (s, amount);
468 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
469 psllh_s (int16x4_t s, uint8_t amount)
471 return __builtin_loongson_psllh_s (s, amount);
474 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
475 psllw_u (uint32x2_t s, uint8_t amount)
477 return __builtin_loongson_psllw_u (s, amount);
480 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
481 psllw_s (int32x2_t s, uint8_t amount)
483 return __builtin_loongson_psllw_s (s, amount);
486 /* Shift right logical. */
487 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
488 psrlh_u (uint16x4_t s, uint8_t amount)
490 return __builtin_loongson_psrlh_u (s, amount);
493 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
494 psrlh_s (int16x4_t s, uint8_t amount)
496 return __builtin_loongson_psrlh_s (s, amount);
499 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
500 psrlw_u (uint32x2_t s, uint8_t amount)
502 return __builtin_loongson_psrlw_u (s, amount);
505 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
506 psrlw_s (int32x2_t s, uint8_t amount)
508 return __builtin_loongson_psrlw_s (s, amount);
511 /* Shift right arithmetic. */
512 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
513 psrah_u (uint16x4_t s, uint8_t amount)
515 return __builtin_loongson_psrah_u (s, amount);
518 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
519 psrah_s (int16x4_t s, uint8_t amount)
521 return __builtin_loongson_psrah_s (s, amount);
524 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
525 psraw_u (uint32x2_t s, uint8_t amount)
527 return __builtin_loongson_psraw_u (s, amount);
530 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
531 psraw_s (int32x2_t s, uint8_t amount)
533 return __builtin_loongson_psraw_s (s, amount);
536 /* Vector subtraction, treating overflow by wraparound. */
537 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
538 psubw_u (uint32x2_t s, uint32x2_t t)
540 return __builtin_loongson_psubw_u (s, t);
543 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
544 psubh_u (uint16x4_t s, uint16x4_t t)
546 return __builtin_loongson_psubh_u (s, t);
549 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
550 psubb_u (uint8x8_t s, uint8x8_t t)
552 return __builtin_loongson_psubb_u (s, t);
555 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
556 psubw_s (int32x2_t s, int32x2_t t)
558 return __builtin_loongson_psubw_s (s, t);
561 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
562 psubh_s (int16x4_t s, int16x4_t t)
564 return __builtin_loongson_psubh_s (s, t);
567 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
568 psubb_s (int8x8_t s, int8x8_t t)
570 return __builtin_loongson_psubb_s (s, t);
573 /* Subtraction of doubleword integers, treating overflow by wraparound. */
574 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
575 psubd_u (uint64_t s, uint64_t t)
577 return __builtin_loongson_psubd_u (s, t);
580 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
581 psubd_s (int64_t s, int64_t t)
583 return __builtin_loongson_psubd_s (s, t);
586 /* Vector subtraction, treating overflow by signed saturation. */
587 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
588 psubsh (int16x4_t s, int16x4_t t)
590 return __builtin_loongson_psubsh (s, t);
593 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
594 psubsb (int8x8_t s, int8x8_t t)
596 return __builtin_loongson_psubsb (s, t);
599 /* Vector subtraction, treating overflow by unsigned saturation. */
600 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
601 psubush (uint16x4_t s, uint16x4_t t)
603 return __builtin_loongson_psubush (s, t);
606 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
607 psubusb (uint8x8_t s, uint8x8_t t)
609 return __builtin_loongson_psubusb (s, t);
612 /* Unpack high data. */
613 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
614 punpckhwd_u (uint32x2_t s, uint32x2_t t)
616 return __builtin_loongson_punpckhwd_u (s, t);
619 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
620 punpckhhw_u (uint16x4_t s, uint16x4_t t)
622 return __builtin_loongson_punpckhhw_u (s, t);
625 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
626 punpckhbh_u (uint8x8_t s, uint8x8_t t)
628 return __builtin_loongson_punpckhbh_u (s, t);
631 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
632 punpckhwd_s (int32x2_t s, int32x2_t t)
634 return __builtin_loongson_punpckhwd_s (s, t);
637 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
638 punpckhhw_s (int16x4_t s, int16x4_t t)
640 return __builtin_loongson_punpckhhw_s (s, t);
643 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
644 punpckhbh_s (int8x8_t s, int8x8_t t)
646 return __builtin_loongson_punpckhbh_s (s, t);
649 /* Unpack low data. */
650 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
651 punpcklwd_u (uint32x2_t s, uint32x2_t t)
653 return __builtin_loongson_punpcklwd_u (s, t);
656 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
657 punpcklhw_u (uint16x4_t s, uint16x4_t t)
659 return __builtin_loongson_punpcklhw_u (s, t);
662 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
663 punpcklbh_u (uint8x8_t s, uint8x8_t t)
665 return __builtin_loongson_punpcklbh_u (s, t);
668 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
669 punpcklwd_s (int32x2_t s, int32x2_t t)
671 return __builtin_loongson_punpcklwd_s (s, t);
674 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
675 punpcklhw_s (int16x4_t s, int16x4_t t)
677 return __builtin_loongson_punpcklhw_s (s, t);
680 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
681 punpcklbh_s (int8x8_t s, int8x8_t t)
683 return __builtin_loongson_punpcklbh_s (s, t);
686 #ifdef __cplusplus
688 #endif
690 #endif