[AARCH64][ACLE][NEON] Implement vcvt*_s64_f64 and vcvt*_u64_f64 NEON intrinsics.
[official-gcc.git] / gcc / config / aarch64 / arm_neon.h
blob2612a325718918cf7cd808f28c09c9c4c7b11c07
1 /* ARM NEON intrinsics include file.
3 Copyright (C) 2011-2016 Free Software Foundation, Inc.
4 Contributed by ARM Ltd.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 #ifndef _AARCH64_NEON_H_
28 #define _AARCH64_NEON_H_
30 #pragma GCC push_options
31 #pragma GCC target ("+nothing+simd")
33 #include <stdint.h>
35 #define __AARCH64_UINT64_C(__C) ((uint64_t) __C)
36 #define __AARCH64_INT64_C(__C) ((int64_t) __C)
38 typedef __Int8x8_t int8x8_t;
39 typedef __Int16x4_t int16x4_t;
40 typedef __Int32x2_t int32x2_t;
41 typedef __Int64x1_t int64x1_t;
42 typedef __Float16x4_t float16x4_t;
43 typedef __Float32x2_t float32x2_t;
44 typedef __Poly8x8_t poly8x8_t;
45 typedef __Poly16x4_t poly16x4_t;
46 typedef __Uint8x8_t uint8x8_t;
47 typedef __Uint16x4_t uint16x4_t;
48 typedef __Uint32x2_t uint32x2_t;
49 typedef __Float64x1_t float64x1_t;
50 typedef __Uint64x1_t uint64x1_t;
51 typedef __Int8x16_t int8x16_t;
52 typedef __Int16x8_t int16x8_t;
53 typedef __Int32x4_t int32x4_t;
54 typedef __Int64x2_t int64x2_t;
55 typedef __Float16x8_t float16x8_t;
56 typedef __Float32x4_t float32x4_t;
57 typedef __Float64x2_t float64x2_t;
58 typedef __Poly8x16_t poly8x16_t;
59 typedef __Poly16x8_t poly16x8_t;
60 typedef __Poly64x2_t poly64x2_t;
61 typedef __Uint8x16_t uint8x16_t;
62 typedef __Uint16x8_t uint16x8_t;
63 typedef __Uint32x4_t uint32x4_t;
64 typedef __Uint64x2_t uint64x2_t;
66 typedef __Poly8_t poly8_t;
67 typedef __Poly16_t poly16_t;
68 typedef __Poly64_t poly64_t;
69 typedef __Poly128_t poly128_t;
71 typedef __fp16 float16_t;
72 typedef float float32_t;
73 typedef double float64_t;
75 typedef struct int8x8x2_t
77 int8x8_t val[2];
78 } int8x8x2_t;
80 typedef struct int8x16x2_t
82 int8x16_t val[2];
83 } int8x16x2_t;
85 typedef struct int16x4x2_t
87 int16x4_t val[2];
88 } int16x4x2_t;
90 typedef struct int16x8x2_t
92 int16x8_t val[2];
93 } int16x8x2_t;
95 typedef struct int32x2x2_t
97 int32x2_t val[2];
98 } int32x2x2_t;
100 typedef struct int32x4x2_t
102 int32x4_t val[2];
103 } int32x4x2_t;
105 typedef struct int64x1x2_t
107 int64x1_t val[2];
108 } int64x1x2_t;
110 typedef struct int64x2x2_t
112 int64x2_t val[2];
113 } int64x2x2_t;
115 typedef struct uint8x8x2_t
117 uint8x8_t val[2];
118 } uint8x8x2_t;
120 typedef struct uint8x16x2_t
122 uint8x16_t val[2];
123 } uint8x16x2_t;
125 typedef struct uint16x4x2_t
127 uint16x4_t val[2];
128 } uint16x4x2_t;
130 typedef struct uint16x8x2_t
132 uint16x8_t val[2];
133 } uint16x8x2_t;
135 typedef struct uint32x2x2_t
137 uint32x2_t val[2];
138 } uint32x2x2_t;
140 typedef struct uint32x4x2_t
142 uint32x4_t val[2];
143 } uint32x4x2_t;
145 typedef struct uint64x1x2_t
147 uint64x1_t val[2];
148 } uint64x1x2_t;
150 typedef struct uint64x2x2_t
152 uint64x2_t val[2];
153 } uint64x2x2_t;
155 typedef struct float16x4x2_t
157 float16x4_t val[2];
158 } float16x4x2_t;
160 typedef struct float16x8x2_t
162 float16x8_t val[2];
163 } float16x8x2_t;
165 typedef struct float32x2x2_t
167 float32x2_t val[2];
168 } float32x2x2_t;
170 typedef struct float32x4x2_t
172 float32x4_t val[2];
173 } float32x4x2_t;
175 typedef struct float64x2x2_t
177 float64x2_t val[2];
178 } float64x2x2_t;
180 typedef struct float64x1x2_t
182 float64x1_t val[2];
183 } float64x1x2_t;
185 typedef struct poly8x8x2_t
187 poly8x8_t val[2];
188 } poly8x8x2_t;
190 typedef struct poly8x16x2_t
192 poly8x16_t val[2];
193 } poly8x16x2_t;
195 typedef struct poly16x4x2_t
197 poly16x4_t val[2];
198 } poly16x4x2_t;
200 typedef struct poly16x8x2_t
202 poly16x8_t val[2];
203 } poly16x8x2_t;
205 typedef struct int8x8x3_t
207 int8x8_t val[3];
208 } int8x8x3_t;
210 typedef struct int8x16x3_t
212 int8x16_t val[3];
213 } int8x16x3_t;
215 typedef struct int16x4x3_t
217 int16x4_t val[3];
218 } int16x4x3_t;
220 typedef struct int16x8x3_t
222 int16x8_t val[3];
223 } int16x8x3_t;
225 typedef struct int32x2x3_t
227 int32x2_t val[3];
228 } int32x2x3_t;
230 typedef struct int32x4x3_t
232 int32x4_t val[3];
233 } int32x4x3_t;
235 typedef struct int64x1x3_t
237 int64x1_t val[3];
238 } int64x1x3_t;
240 typedef struct int64x2x3_t
242 int64x2_t val[3];
243 } int64x2x3_t;
245 typedef struct uint8x8x3_t
247 uint8x8_t val[3];
248 } uint8x8x3_t;
250 typedef struct uint8x16x3_t
252 uint8x16_t val[3];
253 } uint8x16x3_t;
255 typedef struct uint16x4x3_t
257 uint16x4_t val[3];
258 } uint16x4x3_t;
260 typedef struct uint16x8x3_t
262 uint16x8_t val[3];
263 } uint16x8x3_t;
265 typedef struct uint32x2x3_t
267 uint32x2_t val[3];
268 } uint32x2x3_t;
270 typedef struct uint32x4x3_t
272 uint32x4_t val[3];
273 } uint32x4x3_t;
275 typedef struct uint64x1x3_t
277 uint64x1_t val[3];
278 } uint64x1x3_t;
280 typedef struct uint64x2x3_t
282 uint64x2_t val[3];
283 } uint64x2x3_t;
285 typedef struct float16x4x3_t
287 float16x4_t val[3];
288 } float16x4x3_t;
290 typedef struct float16x8x3_t
292 float16x8_t val[3];
293 } float16x8x3_t;
295 typedef struct float32x2x3_t
297 float32x2_t val[3];
298 } float32x2x3_t;
300 typedef struct float32x4x3_t
302 float32x4_t val[3];
303 } float32x4x3_t;
305 typedef struct float64x2x3_t
307 float64x2_t val[3];
308 } float64x2x3_t;
310 typedef struct float64x1x3_t
312 float64x1_t val[3];
313 } float64x1x3_t;
315 typedef struct poly8x8x3_t
317 poly8x8_t val[3];
318 } poly8x8x3_t;
320 typedef struct poly8x16x3_t
322 poly8x16_t val[3];
323 } poly8x16x3_t;
325 typedef struct poly16x4x3_t
327 poly16x4_t val[3];
328 } poly16x4x3_t;
330 typedef struct poly16x8x3_t
332 poly16x8_t val[3];
333 } poly16x8x3_t;
335 typedef struct int8x8x4_t
337 int8x8_t val[4];
338 } int8x8x4_t;
340 typedef struct int8x16x4_t
342 int8x16_t val[4];
343 } int8x16x4_t;
345 typedef struct int16x4x4_t
347 int16x4_t val[4];
348 } int16x4x4_t;
350 typedef struct int16x8x4_t
352 int16x8_t val[4];
353 } int16x8x4_t;
355 typedef struct int32x2x4_t
357 int32x2_t val[4];
358 } int32x2x4_t;
360 typedef struct int32x4x4_t
362 int32x4_t val[4];
363 } int32x4x4_t;
365 typedef struct int64x1x4_t
367 int64x1_t val[4];
368 } int64x1x4_t;
370 typedef struct int64x2x4_t
372 int64x2_t val[4];
373 } int64x2x4_t;
375 typedef struct uint8x8x4_t
377 uint8x8_t val[4];
378 } uint8x8x4_t;
380 typedef struct uint8x16x4_t
382 uint8x16_t val[4];
383 } uint8x16x4_t;
385 typedef struct uint16x4x4_t
387 uint16x4_t val[4];
388 } uint16x4x4_t;
390 typedef struct uint16x8x4_t
392 uint16x8_t val[4];
393 } uint16x8x4_t;
395 typedef struct uint32x2x4_t
397 uint32x2_t val[4];
398 } uint32x2x4_t;
400 typedef struct uint32x4x4_t
402 uint32x4_t val[4];
403 } uint32x4x4_t;
405 typedef struct uint64x1x4_t
407 uint64x1_t val[4];
408 } uint64x1x4_t;
410 typedef struct uint64x2x4_t
412 uint64x2_t val[4];
413 } uint64x2x4_t;
415 typedef struct float16x4x4_t
417 float16x4_t val[4];
418 } float16x4x4_t;
420 typedef struct float16x8x4_t
422 float16x8_t val[4];
423 } float16x8x4_t;
425 typedef struct float32x2x4_t
427 float32x2_t val[4];
428 } float32x2x4_t;
430 typedef struct float32x4x4_t
432 float32x4_t val[4];
433 } float32x4x4_t;
435 typedef struct float64x2x4_t
437 float64x2_t val[4];
438 } float64x2x4_t;
440 typedef struct float64x1x4_t
442 float64x1_t val[4];
443 } float64x1x4_t;
445 typedef struct poly8x8x4_t
447 poly8x8_t val[4];
448 } poly8x8x4_t;
450 typedef struct poly8x16x4_t
452 poly8x16_t val[4];
453 } poly8x16x4_t;
455 typedef struct poly16x4x4_t
457 poly16x4_t val[4];
458 } poly16x4x4_t;
460 typedef struct poly16x8x4_t
462 poly16x8_t val[4];
463 } poly16x8x4_t;
465 /* __aarch64_vdup_lane internal macros. */
466 #define __aarch64_vdup_lane_any(__size, __q, __a, __b) \
467 vdup##__q##_n_##__size (__aarch64_vget_lane_any (__a, __b))
469 #define __aarch64_vdup_lane_f32(__a, __b) \
470 __aarch64_vdup_lane_any (f32, , __a, __b)
471 #define __aarch64_vdup_lane_f64(__a, __b) \
472 __aarch64_vdup_lane_any (f64, , __a, __b)
473 #define __aarch64_vdup_lane_p8(__a, __b) \
474 __aarch64_vdup_lane_any (p8, , __a, __b)
475 #define __aarch64_vdup_lane_p16(__a, __b) \
476 __aarch64_vdup_lane_any (p16, , __a, __b)
477 #define __aarch64_vdup_lane_s8(__a, __b) \
478 __aarch64_vdup_lane_any (s8, , __a, __b)
479 #define __aarch64_vdup_lane_s16(__a, __b) \
480 __aarch64_vdup_lane_any (s16, , __a, __b)
481 #define __aarch64_vdup_lane_s32(__a, __b) \
482 __aarch64_vdup_lane_any (s32, , __a, __b)
483 #define __aarch64_vdup_lane_s64(__a, __b) \
484 __aarch64_vdup_lane_any (s64, , __a, __b)
485 #define __aarch64_vdup_lane_u8(__a, __b) \
486 __aarch64_vdup_lane_any (u8, , __a, __b)
487 #define __aarch64_vdup_lane_u16(__a, __b) \
488 __aarch64_vdup_lane_any (u16, , __a, __b)
489 #define __aarch64_vdup_lane_u32(__a, __b) \
490 __aarch64_vdup_lane_any (u32, , __a, __b)
491 #define __aarch64_vdup_lane_u64(__a, __b) \
492 __aarch64_vdup_lane_any (u64, , __a, __b)
494 /* __aarch64_vdup_laneq internal macros. */
495 #define __aarch64_vdup_laneq_f32(__a, __b) \
496 __aarch64_vdup_lane_any (f32, , __a, __b)
497 #define __aarch64_vdup_laneq_f64(__a, __b) \
498 __aarch64_vdup_lane_any (f64, , __a, __b)
499 #define __aarch64_vdup_laneq_p8(__a, __b) \
500 __aarch64_vdup_lane_any (p8, , __a, __b)
501 #define __aarch64_vdup_laneq_p16(__a, __b) \
502 __aarch64_vdup_lane_any (p16, , __a, __b)
503 #define __aarch64_vdup_laneq_s8(__a, __b) \
504 __aarch64_vdup_lane_any (s8, , __a, __b)
505 #define __aarch64_vdup_laneq_s16(__a, __b) \
506 __aarch64_vdup_lane_any (s16, , __a, __b)
507 #define __aarch64_vdup_laneq_s32(__a, __b) \
508 __aarch64_vdup_lane_any (s32, , __a, __b)
509 #define __aarch64_vdup_laneq_s64(__a, __b) \
510 __aarch64_vdup_lane_any (s64, , __a, __b)
511 #define __aarch64_vdup_laneq_u8(__a, __b) \
512 __aarch64_vdup_lane_any (u8, , __a, __b)
513 #define __aarch64_vdup_laneq_u16(__a, __b) \
514 __aarch64_vdup_lane_any (u16, , __a, __b)
515 #define __aarch64_vdup_laneq_u32(__a, __b) \
516 __aarch64_vdup_lane_any (u32, , __a, __b)
517 #define __aarch64_vdup_laneq_u64(__a, __b) \
518 __aarch64_vdup_lane_any (u64, , __a, __b)
520 /* __aarch64_vdupq_lane internal macros. */
521 #define __aarch64_vdupq_lane_f32(__a, __b) \
522 __aarch64_vdup_lane_any (f32, q, __a, __b)
523 #define __aarch64_vdupq_lane_f64(__a, __b) \
524 __aarch64_vdup_lane_any (f64, q, __a, __b)
525 #define __aarch64_vdupq_lane_p8(__a, __b) \
526 __aarch64_vdup_lane_any (p8, q, __a, __b)
527 #define __aarch64_vdupq_lane_p16(__a, __b) \
528 __aarch64_vdup_lane_any (p16, q, __a, __b)
529 #define __aarch64_vdupq_lane_s8(__a, __b) \
530 __aarch64_vdup_lane_any (s8, q, __a, __b)
531 #define __aarch64_vdupq_lane_s16(__a, __b) \
532 __aarch64_vdup_lane_any (s16, q, __a, __b)
533 #define __aarch64_vdupq_lane_s32(__a, __b) \
534 __aarch64_vdup_lane_any (s32, q, __a, __b)
535 #define __aarch64_vdupq_lane_s64(__a, __b) \
536 __aarch64_vdup_lane_any (s64, q, __a, __b)
537 #define __aarch64_vdupq_lane_u8(__a, __b) \
538 __aarch64_vdup_lane_any (u8, q, __a, __b)
539 #define __aarch64_vdupq_lane_u16(__a, __b) \
540 __aarch64_vdup_lane_any (u16, q, __a, __b)
541 #define __aarch64_vdupq_lane_u32(__a, __b) \
542 __aarch64_vdup_lane_any (u32, q, __a, __b)
543 #define __aarch64_vdupq_lane_u64(__a, __b) \
544 __aarch64_vdup_lane_any (u64, q, __a, __b)
546 /* __aarch64_vdupq_laneq internal macros. */
547 #define __aarch64_vdupq_laneq_f32(__a, __b) \
548 __aarch64_vdup_lane_any (f32, q, __a, __b)
549 #define __aarch64_vdupq_laneq_f64(__a, __b) \
550 __aarch64_vdup_lane_any (f64, q, __a, __b)
551 #define __aarch64_vdupq_laneq_p8(__a, __b) \
552 __aarch64_vdup_lane_any (p8, q, __a, __b)
553 #define __aarch64_vdupq_laneq_p16(__a, __b) \
554 __aarch64_vdup_lane_any (p16, q, __a, __b)
555 #define __aarch64_vdupq_laneq_s8(__a, __b) \
556 __aarch64_vdup_lane_any (s8, q, __a, __b)
557 #define __aarch64_vdupq_laneq_s16(__a, __b) \
558 __aarch64_vdup_lane_any (s16, q, __a, __b)
559 #define __aarch64_vdupq_laneq_s32(__a, __b) \
560 __aarch64_vdup_lane_any (s32, q, __a, __b)
561 #define __aarch64_vdupq_laneq_s64(__a, __b) \
562 __aarch64_vdup_lane_any (s64, q, __a, __b)
563 #define __aarch64_vdupq_laneq_u8(__a, __b) \
564 __aarch64_vdup_lane_any (u8, q, __a, __b)
565 #define __aarch64_vdupq_laneq_u16(__a, __b) \
566 __aarch64_vdup_lane_any (u16, q, __a, __b)
567 #define __aarch64_vdupq_laneq_u32(__a, __b) \
568 __aarch64_vdup_lane_any (u32, q, __a, __b)
569 #define __aarch64_vdupq_laneq_u64(__a, __b) \
570 __aarch64_vdup_lane_any (u64, q, __a, __b)
572 /* Internal macro for lane indices. */
574 #define __AARCH64_NUM_LANES(__v) (sizeof (__v) / sizeof (__v[0]))
575 #define __AARCH64_LANE_CHECK(__vec, __idx) \
576 __builtin_aarch64_im_lane_boundsi (sizeof(__vec), sizeof(__vec[0]), __idx)
578 /* For big-endian, GCC's vector indices are the opposite way around
579 to the architectural lane indices used by Neon intrinsics. */
580 #ifdef __AARCH64EB__
581 #define __aarch64_lane(__vec, __idx) (__AARCH64_NUM_LANES (__vec) - 1 - __idx)
582 #else
583 #define __aarch64_lane(__vec, __idx) __idx
584 #endif
586 /* vget_lane internal macro. */
587 #define __aarch64_vget_lane_any(__vec, __index) \
588 __extension__ \
589 ({ \
590 __AARCH64_LANE_CHECK (__vec, __index); \
591 __vec[__aarch64_lane (__vec, __index)]; \
594 /* vset_lane and vld1_lane internal macro. */
595 #define __aarch64_vset_lane_any(__elem, __vec, __index) \
596 __extension__ \
597 ({ \
598 __AARCH64_LANE_CHECK (__vec, __index); \
599 __vec[__aarch64_lane (__vec, __index)] = __elem; \
600 __vec; \
603 /* vadd */
604 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
605 vadd_s8 (int8x8_t __a, int8x8_t __b)
607 return __a + __b;
610 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
611 vadd_s16 (int16x4_t __a, int16x4_t __b)
613 return __a + __b;
616 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
617 vadd_s32 (int32x2_t __a, int32x2_t __b)
619 return __a + __b;
622 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
623 vadd_f32 (float32x2_t __a, float32x2_t __b)
625 return __a + __b;
628 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
629 vadd_f64 (float64x1_t __a, float64x1_t __b)
631 return __a + __b;
634 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
635 vadd_u8 (uint8x8_t __a, uint8x8_t __b)
637 return __a + __b;
640 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
641 vadd_u16 (uint16x4_t __a, uint16x4_t __b)
643 return __a + __b;
646 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
647 vadd_u32 (uint32x2_t __a, uint32x2_t __b)
649 return __a + __b;
652 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
653 vadd_s64 (int64x1_t __a, int64x1_t __b)
655 return __a + __b;
658 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
659 vadd_u64 (uint64x1_t __a, uint64x1_t __b)
661 return __a + __b;
664 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
665 vaddq_s8 (int8x16_t __a, int8x16_t __b)
667 return __a + __b;
670 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
671 vaddq_s16 (int16x8_t __a, int16x8_t __b)
673 return __a + __b;
676 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
677 vaddq_s32 (int32x4_t __a, int32x4_t __b)
679 return __a + __b;
682 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
683 vaddq_s64 (int64x2_t __a, int64x2_t __b)
685 return __a + __b;
688 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
689 vaddq_f32 (float32x4_t __a, float32x4_t __b)
691 return __a + __b;
694 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
695 vaddq_f64 (float64x2_t __a, float64x2_t __b)
697 return __a + __b;
700 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
701 vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
703 return __a + __b;
706 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
707 vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
709 return __a + __b;
712 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
713 vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
715 return __a + __b;
718 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
719 vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
721 return __a + __b;
724 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
725 vaddl_s8 (int8x8_t __a, int8x8_t __b)
727 return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b);
730 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
731 vaddl_s16 (int16x4_t __a, int16x4_t __b)
733 return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b);
736 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
737 vaddl_s32 (int32x2_t __a, int32x2_t __b)
739 return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b);
742 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
743 vaddl_u8 (uint8x8_t __a, uint8x8_t __b)
745 return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a,
746 (int8x8_t) __b);
749 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
750 vaddl_u16 (uint16x4_t __a, uint16x4_t __b)
752 return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a,
753 (int16x4_t) __b);
756 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
757 vaddl_u32 (uint32x2_t __a, uint32x2_t __b)
759 return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a,
760 (int32x2_t) __b);
763 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
764 vaddl_high_s8 (int8x16_t __a, int8x16_t __b)
766 return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b);
769 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
770 vaddl_high_s16 (int16x8_t __a, int16x8_t __b)
772 return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b);
775 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
776 vaddl_high_s32 (int32x4_t __a, int32x4_t __b)
778 return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b);
781 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
782 vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b)
784 return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a,
785 (int8x16_t) __b);
788 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
789 vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b)
791 return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a,
792 (int16x8_t) __b);
795 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
796 vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b)
798 return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a,
799 (int32x4_t) __b);
802 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
803 vaddw_s8 (int16x8_t __a, int8x8_t __b)
805 return (int16x8_t) __builtin_aarch64_saddwv8qi (__a, __b);
808 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
809 vaddw_s16 (int32x4_t __a, int16x4_t __b)
811 return (int32x4_t) __builtin_aarch64_saddwv4hi (__a, __b);
814 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
815 vaddw_s32 (int64x2_t __a, int32x2_t __b)
817 return (int64x2_t) __builtin_aarch64_saddwv2si (__a, __b);
820 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
821 vaddw_u8 (uint16x8_t __a, uint8x8_t __b)
823 return (uint16x8_t) __builtin_aarch64_uaddwv8qi ((int16x8_t) __a,
824 (int8x8_t) __b);
827 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
828 vaddw_u16 (uint32x4_t __a, uint16x4_t __b)
830 return (uint32x4_t) __builtin_aarch64_uaddwv4hi ((int32x4_t) __a,
831 (int16x4_t) __b);
834 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
835 vaddw_u32 (uint64x2_t __a, uint32x2_t __b)
837 return (uint64x2_t) __builtin_aarch64_uaddwv2si ((int64x2_t) __a,
838 (int32x2_t) __b);
841 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
842 vaddw_high_s8 (int16x8_t __a, int8x16_t __b)
844 return (int16x8_t) __builtin_aarch64_saddw2v16qi (__a, __b);
847 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
848 vaddw_high_s16 (int32x4_t __a, int16x8_t __b)
850 return (int32x4_t) __builtin_aarch64_saddw2v8hi (__a, __b);
853 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
854 vaddw_high_s32 (int64x2_t __a, int32x4_t __b)
856 return (int64x2_t) __builtin_aarch64_saddw2v4si (__a, __b);
859 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
860 vaddw_high_u8 (uint16x8_t __a, uint8x16_t __b)
862 return (uint16x8_t) __builtin_aarch64_uaddw2v16qi ((int16x8_t) __a,
863 (int8x16_t) __b);
866 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
867 vaddw_high_u16 (uint32x4_t __a, uint16x8_t __b)
869 return (uint32x4_t) __builtin_aarch64_uaddw2v8hi ((int32x4_t) __a,
870 (int16x8_t) __b);
873 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
874 vaddw_high_u32 (uint64x2_t __a, uint32x4_t __b)
876 return (uint64x2_t) __builtin_aarch64_uaddw2v4si ((int64x2_t) __a,
877 (int32x4_t) __b);
880 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
881 vhadd_s8 (int8x8_t __a, int8x8_t __b)
883 return (int8x8_t) __builtin_aarch64_shaddv8qi (__a, __b);
886 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
887 vhadd_s16 (int16x4_t __a, int16x4_t __b)
889 return (int16x4_t) __builtin_aarch64_shaddv4hi (__a, __b);
892 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
893 vhadd_s32 (int32x2_t __a, int32x2_t __b)
895 return (int32x2_t) __builtin_aarch64_shaddv2si (__a, __b);
898 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
899 vhadd_u8 (uint8x8_t __a, uint8x8_t __b)
901 return (uint8x8_t) __builtin_aarch64_uhaddv8qi ((int8x8_t) __a,
902 (int8x8_t) __b);
905 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
906 vhadd_u16 (uint16x4_t __a, uint16x4_t __b)
908 return (uint16x4_t) __builtin_aarch64_uhaddv4hi ((int16x4_t) __a,
909 (int16x4_t) __b);
912 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
913 vhadd_u32 (uint32x2_t __a, uint32x2_t __b)
915 return (uint32x2_t) __builtin_aarch64_uhaddv2si ((int32x2_t) __a,
916 (int32x2_t) __b);
919 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
920 vhaddq_s8 (int8x16_t __a, int8x16_t __b)
922 return (int8x16_t) __builtin_aarch64_shaddv16qi (__a, __b);
925 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
926 vhaddq_s16 (int16x8_t __a, int16x8_t __b)
928 return (int16x8_t) __builtin_aarch64_shaddv8hi (__a, __b);
931 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
932 vhaddq_s32 (int32x4_t __a, int32x4_t __b)
934 return (int32x4_t) __builtin_aarch64_shaddv4si (__a, __b);
937 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
938 vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
940 return (uint8x16_t) __builtin_aarch64_uhaddv16qi ((int8x16_t) __a,
941 (int8x16_t) __b);
944 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
945 vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
947 return (uint16x8_t) __builtin_aarch64_uhaddv8hi ((int16x8_t) __a,
948 (int16x8_t) __b);
951 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
952 vhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
954 return (uint32x4_t) __builtin_aarch64_uhaddv4si ((int32x4_t) __a,
955 (int32x4_t) __b);
958 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
959 vrhadd_s8 (int8x8_t __a, int8x8_t __b)
961 return (int8x8_t) __builtin_aarch64_srhaddv8qi (__a, __b);
964 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
965 vrhadd_s16 (int16x4_t __a, int16x4_t __b)
967 return (int16x4_t) __builtin_aarch64_srhaddv4hi (__a, __b);
970 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
971 vrhadd_s32 (int32x2_t __a, int32x2_t __b)
973 return (int32x2_t) __builtin_aarch64_srhaddv2si (__a, __b);
976 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
977 vrhadd_u8 (uint8x8_t __a, uint8x8_t __b)
979 return (uint8x8_t) __builtin_aarch64_urhaddv8qi ((int8x8_t) __a,
980 (int8x8_t) __b);
983 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
984 vrhadd_u16 (uint16x4_t __a, uint16x4_t __b)
986 return (uint16x4_t) __builtin_aarch64_urhaddv4hi ((int16x4_t) __a,
987 (int16x4_t) __b);
990 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
991 vrhadd_u32 (uint32x2_t __a, uint32x2_t __b)
993 return (uint32x2_t) __builtin_aarch64_urhaddv2si ((int32x2_t) __a,
994 (int32x2_t) __b);
997 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
998 vrhaddq_s8 (int8x16_t __a, int8x16_t __b)
1000 return (int8x16_t) __builtin_aarch64_srhaddv16qi (__a, __b);
1003 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1004 vrhaddq_s16 (int16x8_t __a, int16x8_t __b)
1006 return (int16x8_t) __builtin_aarch64_srhaddv8hi (__a, __b);
1009 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1010 vrhaddq_s32 (int32x4_t __a, int32x4_t __b)
1012 return (int32x4_t) __builtin_aarch64_srhaddv4si (__a, __b);
1015 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1016 vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
1018 return (uint8x16_t) __builtin_aarch64_urhaddv16qi ((int8x16_t) __a,
1019 (int8x16_t) __b);
1022 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1023 vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
1025 return (uint16x8_t) __builtin_aarch64_urhaddv8hi ((int16x8_t) __a,
1026 (int16x8_t) __b);
1029 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1030 vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
1032 return (uint32x4_t) __builtin_aarch64_urhaddv4si ((int32x4_t) __a,
1033 (int32x4_t) __b);
1036 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1037 vaddhn_s16 (int16x8_t __a, int16x8_t __b)
1039 return (int8x8_t) __builtin_aarch64_addhnv8hi (__a, __b);
1042 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1043 vaddhn_s32 (int32x4_t __a, int32x4_t __b)
1045 return (int16x4_t) __builtin_aarch64_addhnv4si (__a, __b);
1048 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1049 vaddhn_s64 (int64x2_t __a, int64x2_t __b)
1051 return (int32x2_t) __builtin_aarch64_addhnv2di (__a, __b);
1054 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1055 vaddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1057 return (uint8x8_t) __builtin_aarch64_addhnv8hi ((int16x8_t) __a,
1058 (int16x8_t) __b);
1061 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1062 vaddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1064 return (uint16x4_t) __builtin_aarch64_addhnv4si ((int32x4_t) __a,
1065 (int32x4_t) __b);
1068 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1069 vaddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1071 return (uint32x2_t) __builtin_aarch64_addhnv2di ((int64x2_t) __a,
1072 (int64x2_t) __b);
1075 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1076 vraddhn_s16 (int16x8_t __a, int16x8_t __b)
1078 return (int8x8_t) __builtin_aarch64_raddhnv8hi (__a, __b);
1081 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1082 vraddhn_s32 (int32x4_t __a, int32x4_t __b)
1084 return (int16x4_t) __builtin_aarch64_raddhnv4si (__a, __b);
1087 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1088 vraddhn_s64 (int64x2_t __a, int64x2_t __b)
1090 return (int32x2_t) __builtin_aarch64_raddhnv2di (__a, __b);
1093 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1094 vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1096 return (uint8x8_t) __builtin_aarch64_raddhnv8hi ((int16x8_t) __a,
1097 (int16x8_t) __b);
1100 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1101 vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1103 return (uint16x4_t) __builtin_aarch64_raddhnv4si ((int32x4_t) __a,
1104 (int32x4_t) __b);
1107 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1108 vraddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1110 return (uint32x2_t) __builtin_aarch64_raddhnv2di ((int64x2_t) __a,
1111 (int64x2_t) __b);
1114 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1115 vaddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1117 return (int8x16_t) __builtin_aarch64_addhn2v8hi (__a, __b, __c);
1120 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1121 vaddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1123 return (int16x8_t) __builtin_aarch64_addhn2v4si (__a, __b, __c);
1126 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1127 vaddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1129 return (int32x4_t) __builtin_aarch64_addhn2v2di (__a, __b, __c);
1132 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1133 vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1135 return (uint8x16_t) __builtin_aarch64_addhn2v8hi ((int8x8_t) __a,
1136 (int16x8_t) __b,
1137 (int16x8_t) __c);
1140 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1141 vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1143 return (uint16x8_t) __builtin_aarch64_addhn2v4si ((int16x4_t) __a,
1144 (int32x4_t) __b,
1145 (int32x4_t) __c);
1148 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1149 vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1151 return (uint32x4_t) __builtin_aarch64_addhn2v2di ((int32x2_t) __a,
1152 (int64x2_t) __b,
1153 (int64x2_t) __c);
1156 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1157 vraddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1159 return (int8x16_t) __builtin_aarch64_raddhn2v8hi (__a, __b, __c);
1162 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1163 vraddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1165 return (int16x8_t) __builtin_aarch64_raddhn2v4si (__a, __b, __c);
1168 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1169 vraddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1171 return (int32x4_t) __builtin_aarch64_raddhn2v2di (__a, __b, __c);
1174 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1175 vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1177 return (uint8x16_t) __builtin_aarch64_raddhn2v8hi ((int8x8_t) __a,
1178 (int16x8_t) __b,
1179 (int16x8_t) __c);
1182 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1183 vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1185 return (uint16x8_t) __builtin_aarch64_raddhn2v4si ((int16x4_t) __a,
1186 (int32x4_t) __b,
1187 (int32x4_t) __c);
1190 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1191 vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1193 return (uint32x4_t) __builtin_aarch64_raddhn2v2di ((int32x2_t) __a,
1194 (int64x2_t) __b,
1195 (int64x2_t) __c);
1198 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1199 vdiv_f32 (float32x2_t __a, float32x2_t __b)
1201 return __a / __b;
1204 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1205 vdiv_f64 (float64x1_t __a, float64x1_t __b)
1207 return __a / __b;
1210 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1211 vdivq_f32 (float32x4_t __a, float32x4_t __b)
1213 return __a / __b;
1216 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1217 vdivq_f64 (float64x2_t __a, float64x2_t __b)
1219 return __a / __b;
1222 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1223 vmul_s8 (int8x8_t __a, int8x8_t __b)
1225 return __a * __b;
1228 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1229 vmul_s16 (int16x4_t __a, int16x4_t __b)
1231 return __a * __b;
1234 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1235 vmul_s32 (int32x2_t __a, int32x2_t __b)
1237 return __a * __b;
1240 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1241 vmul_f32 (float32x2_t __a, float32x2_t __b)
1243 return __a * __b;
1246 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1247 vmul_f64 (float64x1_t __a, float64x1_t __b)
1249 return __a * __b;
1252 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1253 vmul_u8 (uint8x8_t __a, uint8x8_t __b)
1255 return __a * __b;
1258 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1259 vmul_u16 (uint16x4_t __a, uint16x4_t __b)
1261 return __a * __b;
1264 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1265 vmul_u32 (uint32x2_t __a, uint32x2_t __b)
1267 return __a * __b;
1270 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
1271 vmul_p8 (poly8x8_t __a, poly8x8_t __b)
1273 return (poly8x8_t) __builtin_aarch64_pmulv8qi ((int8x8_t) __a,
1274 (int8x8_t) __b);
1277 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1278 vmulq_s8 (int8x16_t __a, int8x16_t __b)
1280 return __a * __b;
1283 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1284 vmulq_s16 (int16x8_t __a, int16x8_t __b)
1286 return __a * __b;
1289 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1290 vmulq_s32 (int32x4_t __a, int32x4_t __b)
1292 return __a * __b;
1295 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1296 vmulq_f32 (float32x4_t __a, float32x4_t __b)
1298 return __a * __b;
1301 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1302 vmulq_f64 (float64x2_t __a, float64x2_t __b)
1304 return __a * __b;
1307 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1308 vmulq_u8 (uint8x16_t __a, uint8x16_t __b)
1310 return __a * __b;
1313 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1314 vmulq_u16 (uint16x8_t __a, uint16x8_t __b)
1316 return __a * __b;
1319 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1320 vmulq_u32 (uint32x4_t __a, uint32x4_t __b)
1322 return __a * __b;
1325 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
1326 vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
1328 return (poly8x16_t) __builtin_aarch64_pmulv16qi ((int8x16_t) __a,
1329 (int8x16_t) __b);
1332 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1333 vand_s8 (int8x8_t __a, int8x8_t __b)
1335 return __a & __b;
1338 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1339 vand_s16 (int16x4_t __a, int16x4_t __b)
1341 return __a & __b;
1344 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1345 vand_s32 (int32x2_t __a, int32x2_t __b)
1347 return __a & __b;
1350 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1351 vand_u8 (uint8x8_t __a, uint8x8_t __b)
1353 return __a & __b;
1356 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1357 vand_u16 (uint16x4_t __a, uint16x4_t __b)
1359 return __a & __b;
1362 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1363 vand_u32 (uint32x2_t __a, uint32x2_t __b)
1365 return __a & __b;
1368 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1369 vand_s64 (int64x1_t __a, int64x1_t __b)
1371 return __a & __b;
1374 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1375 vand_u64 (uint64x1_t __a, uint64x1_t __b)
1377 return __a & __b;
1380 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1381 vandq_s8 (int8x16_t __a, int8x16_t __b)
1383 return __a & __b;
1386 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1387 vandq_s16 (int16x8_t __a, int16x8_t __b)
1389 return __a & __b;
1392 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1393 vandq_s32 (int32x4_t __a, int32x4_t __b)
1395 return __a & __b;
1398 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1399 vandq_s64 (int64x2_t __a, int64x2_t __b)
1401 return __a & __b;
1404 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1405 vandq_u8 (uint8x16_t __a, uint8x16_t __b)
1407 return __a & __b;
1410 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1411 vandq_u16 (uint16x8_t __a, uint16x8_t __b)
1413 return __a & __b;
1416 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1417 vandq_u32 (uint32x4_t __a, uint32x4_t __b)
1419 return __a & __b;
1422 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1423 vandq_u64 (uint64x2_t __a, uint64x2_t __b)
1425 return __a & __b;
1428 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1429 vorr_s8 (int8x8_t __a, int8x8_t __b)
1431 return __a | __b;
1434 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1435 vorr_s16 (int16x4_t __a, int16x4_t __b)
1437 return __a | __b;
1440 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1441 vorr_s32 (int32x2_t __a, int32x2_t __b)
1443 return __a | __b;
1446 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1447 vorr_u8 (uint8x8_t __a, uint8x8_t __b)
1449 return __a | __b;
1452 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1453 vorr_u16 (uint16x4_t __a, uint16x4_t __b)
1455 return __a | __b;
1458 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1459 vorr_u32 (uint32x2_t __a, uint32x2_t __b)
1461 return __a | __b;
1464 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1465 vorr_s64 (int64x1_t __a, int64x1_t __b)
1467 return __a | __b;
1470 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1471 vorr_u64 (uint64x1_t __a, uint64x1_t __b)
1473 return __a | __b;
1476 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1477 vorrq_s8 (int8x16_t __a, int8x16_t __b)
1479 return __a | __b;
1482 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1483 vorrq_s16 (int16x8_t __a, int16x8_t __b)
1485 return __a | __b;
1488 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1489 vorrq_s32 (int32x4_t __a, int32x4_t __b)
1491 return __a | __b;
1494 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1495 vorrq_s64 (int64x2_t __a, int64x2_t __b)
1497 return __a | __b;
1500 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1501 vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
1503 return __a | __b;
1506 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1507 vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
1509 return __a | __b;
1512 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1513 vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
1515 return __a | __b;
1518 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1519 vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
1521 return __a | __b;
1524 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1525 veor_s8 (int8x8_t __a, int8x8_t __b)
1527 return __a ^ __b;
1530 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1531 veor_s16 (int16x4_t __a, int16x4_t __b)
1533 return __a ^ __b;
1536 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1537 veor_s32 (int32x2_t __a, int32x2_t __b)
1539 return __a ^ __b;
1542 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1543 veor_u8 (uint8x8_t __a, uint8x8_t __b)
1545 return __a ^ __b;
1548 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1549 veor_u16 (uint16x4_t __a, uint16x4_t __b)
1551 return __a ^ __b;
1554 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1555 veor_u32 (uint32x2_t __a, uint32x2_t __b)
1557 return __a ^ __b;
1560 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1561 veor_s64 (int64x1_t __a, int64x1_t __b)
1563 return __a ^ __b;
1566 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1567 veor_u64 (uint64x1_t __a, uint64x1_t __b)
1569 return __a ^ __b;
1572 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1573 veorq_s8 (int8x16_t __a, int8x16_t __b)
1575 return __a ^ __b;
1578 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1579 veorq_s16 (int16x8_t __a, int16x8_t __b)
1581 return __a ^ __b;
1584 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1585 veorq_s32 (int32x4_t __a, int32x4_t __b)
1587 return __a ^ __b;
1590 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1591 veorq_s64 (int64x2_t __a, int64x2_t __b)
1593 return __a ^ __b;
1596 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1597 veorq_u8 (uint8x16_t __a, uint8x16_t __b)
1599 return __a ^ __b;
1602 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1603 veorq_u16 (uint16x8_t __a, uint16x8_t __b)
1605 return __a ^ __b;
1608 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1609 veorq_u32 (uint32x4_t __a, uint32x4_t __b)
1611 return __a ^ __b;
1614 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1615 veorq_u64 (uint64x2_t __a, uint64x2_t __b)
1617 return __a ^ __b;
1620 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1621 vbic_s8 (int8x8_t __a, int8x8_t __b)
1623 return __a & ~__b;
1626 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1627 vbic_s16 (int16x4_t __a, int16x4_t __b)
1629 return __a & ~__b;
1632 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1633 vbic_s32 (int32x2_t __a, int32x2_t __b)
1635 return __a & ~__b;
1638 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1639 vbic_u8 (uint8x8_t __a, uint8x8_t __b)
1641 return __a & ~__b;
1644 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1645 vbic_u16 (uint16x4_t __a, uint16x4_t __b)
1647 return __a & ~__b;
1650 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1651 vbic_u32 (uint32x2_t __a, uint32x2_t __b)
1653 return __a & ~__b;
1656 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1657 vbic_s64 (int64x1_t __a, int64x1_t __b)
1659 return __a & ~__b;
1662 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1663 vbic_u64 (uint64x1_t __a, uint64x1_t __b)
1665 return __a & ~__b;
1668 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1669 vbicq_s8 (int8x16_t __a, int8x16_t __b)
1671 return __a & ~__b;
1674 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1675 vbicq_s16 (int16x8_t __a, int16x8_t __b)
1677 return __a & ~__b;
1680 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1681 vbicq_s32 (int32x4_t __a, int32x4_t __b)
1683 return __a & ~__b;
1686 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1687 vbicq_s64 (int64x2_t __a, int64x2_t __b)
1689 return __a & ~__b;
1692 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1693 vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
1695 return __a & ~__b;
1698 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1699 vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
1701 return __a & ~__b;
1704 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1705 vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
1707 return __a & ~__b;
1710 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1711 vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
1713 return __a & ~__b;
1716 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1717 vorn_s8 (int8x8_t __a, int8x8_t __b)
1719 return __a | ~__b;
1722 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1723 vorn_s16 (int16x4_t __a, int16x4_t __b)
1725 return __a | ~__b;
1728 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1729 vorn_s32 (int32x2_t __a, int32x2_t __b)
1731 return __a | ~__b;
1734 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1735 vorn_u8 (uint8x8_t __a, uint8x8_t __b)
1737 return __a | ~__b;
1740 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1741 vorn_u16 (uint16x4_t __a, uint16x4_t __b)
1743 return __a | ~__b;
1746 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1747 vorn_u32 (uint32x2_t __a, uint32x2_t __b)
1749 return __a | ~__b;
1752 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1753 vorn_s64 (int64x1_t __a, int64x1_t __b)
1755 return __a | ~__b;
1758 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1759 vorn_u64 (uint64x1_t __a, uint64x1_t __b)
1761 return __a | ~__b;
1764 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1765 vornq_s8 (int8x16_t __a, int8x16_t __b)
1767 return __a | ~__b;
1770 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1771 vornq_s16 (int16x8_t __a, int16x8_t __b)
1773 return __a | ~__b;
1776 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1777 vornq_s32 (int32x4_t __a, int32x4_t __b)
1779 return __a | ~__b;
1782 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1783 vornq_s64 (int64x2_t __a, int64x2_t __b)
1785 return __a | ~__b;
1788 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1789 vornq_u8 (uint8x16_t __a, uint8x16_t __b)
1791 return __a | ~__b;
1794 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1795 vornq_u16 (uint16x8_t __a, uint16x8_t __b)
1797 return __a | ~__b;
1800 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1801 vornq_u32 (uint32x4_t __a, uint32x4_t __b)
1803 return __a | ~__b;
1806 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1807 vornq_u64 (uint64x2_t __a, uint64x2_t __b)
1809 return __a | ~__b;
1812 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1813 vsub_s8 (int8x8_t __a, int8x8_t __b)
1815 return __a - __b;
1818 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1819 vsub_s16 (int16x4_t __a, int16x4_t __b)
1821 return __a - __b;
1824 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1825 vsub_s32 (int32x2_t __a, int32x2_t __b)
1827 return __a - __b;
1830 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1831 vsub_f32 (float32x2_t __a, float32x2_t __b)
1833 return __a - __b;
1836 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1837 vsub_f64 (float64x1_t __a, float64x1_t __b)
1839 return __a - __b;
1842 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1843 vsub_u8 (uint8x8_t __a, uint8x8_t __b)
1845 return __a - __b;
1848 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1849 vsub_u16 (uint16x4_t __a, uint16x4_t __b)
1851 return __a - __b;
1854 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1855 vsub_u32 (uint32x2_t __a, uint32x2_t __b)
1857 return __a - __b;
1860 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1861 vsub_s64 (int64x1_t __a, int64x1_t __b)
1863 return __a - __b;
1866 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1867 vsub_u64 (uint64x1_t __a, uint64x1_t __b)
1869 return __a - __b;
1872 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1873 vsubq_s8 (int8x16_t __a, int8x16_t __b)
1875 return __a - __b;
1878 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1879 vsubq_s16 (int16x8_t __a, int16x8_t __b)
1881 return __a - __b;
1884 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1885 vsubq_s32 (int32x4_t __a, int32x4_t __b)
1887 return __a - __b;
1890 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1891 vsubq_s64 (int64x2_t __a, int64x2_t __b)
1893 return __a - __b;
1896 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1897 vsubq_f32 (float32x4_t __a, float32x4_t __b)
1899 return __a - __b;
1902 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1903 vsubq_f64 (float64x2_t __a, float64x2_t __b)
1905 return __a - __b;
1908 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1909 vsubq_u8 (uint8x16_t __a, uint8x16_t __b)
1911 return __a - __b;
1914 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1915 vsubq_u16 (uint16x8_t __a, uint16x8_t __b)
1917 return __a - __b;
1920 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1921 vsubq_u32 (uint32x4_t __a, uint32x4_t __b)
1923 return __a - __b;
1926 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1927 vsubq_u64 (uint64x2_t __a, uint64x2_t __b)
1929 return __a - __b;
1932 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1933 vsubl_s8 (int8x8_t __a, int8x8_t __b)
1935 return (int16x8_t) __builtin_aarch64_ssublv8qi (__a, __b);
1938 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1939 vsubl_s16 (int16x4_t __a, int16x4_t __b)
1941 return (int32x4_t) __builtin_aarch64_ssublv4hi (__a, __b);
1944 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1945 vsubl_s32 (int32x2_t __a, int32x2_t __b)
1947 return (int64x2_t) __builtin_aarch64_ssublv2si (__a, __b);
1950 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1951 vsubl_u8 (uint8x8_t __a, uint8x8_t __b)
1953 return (uint16x8_t) __builtin_aarch64_usublv8qi ((int8x8_t) __a,
1954 (int8x8_t) __b);
1957 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1958 vsubl_u16 (uint16x4_t __a, uint16x4_t __b)
1960 return (uint32x4_t) __builtin_aarch64_usublv4hi ((int16x4_t) __a,
1961 (int16x4_t) __b);
1964 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1965 vsubl_u32 (uint32x2_t __a, uint32x2_t __b)
1967 return (uint64x2_t) __builtin_aarch64_usublv2si ((int32x2_t) __a,
1968 (int32x2_t) __b);
1971 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1972 vsubl_high_s8 (int8x16_t __a, int8x16_t __b)
1974 return (int16x8_t) __builtin_aarch64_ssubl2v16qi (__a, __b);
1977 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1978 vsubl_high_s16 (int16x8_t __a, int16x8_t __b)
1980 return (int32x4_t) __builtin_aarch64_ssubl2v8hi (__a, __b);
1983 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1984 vsubl_high_s32 (int32x4_t __a, int32x4_t __b)
1986 return (int64x2_t) __builtin_aarch64_ssubl2v4si (__a, __b);
1989 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1990 vsubl_high_u8 (uint8x16_t __a, uint8x16_t __b)
1992 return (uint16x8_t) __builtin_aarch64_usubl2v16qi ((int8x16_t) __a,
1993 (int8x16_t) __b);
1996 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1997 vsubl_high_u16 (uint16x8_t __a, uint16x8_t __b)
1999 return (uint32x4_t) __builtin_aarch64_usubl2v8hi ((int16x8_t) __a,
2000 (int16x8_t) __b);
2003 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2004 vsubl_high_u32 (uint32x4_t __a, uint32x4_t __b)
2006 return (uint64x2_t) __builtin_aarch64_usubl2v4si ((int32x4_t) __a,
2007 (int32x4_t) __b);
2010 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2011 vsubw_s8 (int16x8_t __a, int8x8_t __b)
2013 return (int16x8_t) __builtin_aarch64_ssubwv8qi (__a, __b);
2016 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2017 vsubw_s16 (int32x4_t __a, int16x4_t __b)
2019 return (int32x4_t) __builtin_aarch64_ssubwv4hi (__a, __b);
2022 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2023 vsubw_s32 (int64x2_t __a, int32x2_t __b)
2025 return (int64x2_t) __builtin_aarch64_ssubwv2si (__a, __b);
2028 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2029 vsubw_u8 (uint16x8_t __a, uint8x8_t __b)
2031 return (uint16x8_t) __builtin_aarch64_usubwv8qi ((int16x8_t) __a,
2032 (int8x8_t) __b);
2035 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2036 vsubw_u16 (uint32x4_t __a, uint16x4_t __b)
2038 return (uint32x4_t) __builtin_aarch64_usubwv4hi ((int32x4_t) __a,
2039 (int16x4_t) __b);
2042 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2043 vsubw_u32 (uint64x2_t __a, uint32x2_t __b)
2045 return (uint64x2_t) __builtin_aarch64_usubwv2si ((int64x2_t) __a,
2046 (int32x2_t) __b);
2049 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2050 vsubw_high_s8 (int16x8_t __a, int8x16_t __b)
2052 return (int16x8_t) __builtin_aarch64_ssubw2v16qi (__a, __b);
2055 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2056 vsubw_high_s16 (int32x4_t __a, int16x8_t __b)
2058 return (int32x4_t) __builtin_aarch64_ssubw2v8hi (__a, __b);
2061 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2062 vsubw_high_s32 (int64x2_t __a, int32x4_t __b)
2064 return (int64x2_t) __builtin_aarch64_ssubw2v4si (__a, __b);
2067 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2068 vsubw_high_u8 (uint16x8_t __a, uint8x16_t __b)
2070 return (uint16x8_t) __builtin_aarch64_usubw2v16qi ((int16x8_t) __a,
2071 (int8x16_t) __b);
2074 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2075 vsubw_high_u16 (uint32x4_t __a, uint16x8_t __b)
2077 return (uint32x4_t) __builtin_aarch64_usubw2v8hi ((int32x4_t) __a,
2078 (int16x8_t) __b);
2081 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2082 vsubw_high_u32 (uint64x2_t __a, uint32x4_t __b)
2084 return (uint64x2_t) __builtin_aarch64_usubw2v4si ((int64x2_t) __a,
2085 (int32x4_t) __b);
2088 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2089 vqadd_s8 (int8x8_t __a, int8x8_t __b)
2091 return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b);
2094 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2095 vqadd_s16 (int16x4_t __a, int16x4_t __b)
2097 return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b);
2100 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2101 vqadd_s32 (int32x2_t __a, int32x2_t __b)
2103 return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b);
2106 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2107 vqadd_s64 (int64x1_t __a, int64x1_t __b)
2109 return (int64x1_t) {__builtin_aarch64_sqadddi (__a[0], __b[0])};
2112 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2113 vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
2115 return __builtin_aarch64_uqaddv8qi_uuu (__a, __b);
2118 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2119 vhsub_s8 (int8x8_t __a, int8x8_t __b)
2121 return (int8x8_t)__builtin_aarch64_shsubv8qi (__a, __b);
2124 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2125 vhsub_s16 (int16x4_t __a, int16x4_t __b)
2127 return (int16x4_t) __builtin_aarch64_shsubv4hi (__a, __b);
2130 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2131 vhsub_s32 (int32x2_t __a, int32x2_t __b)
2133 return (int32x2_t) __builtin_aarch64_shsubv2si (__a, __b);
2136 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2137 vhsub_u8 (uint8x8_t __a, uint8x8_t __b)
2139 return (uint8x8_t) __builtin_aarch64_uhsubv8qi ((int8x8_t) __a,
2140 (int8x8_t) __b);
2143 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2144 vhsub_u16 (uint16x4_t __a, uint16x4_t __b)
2146 return (uint16x4_t) __builtin_aarch64_uhsubv4hi ((int16x4_t) __a,
2147 (int16x4_t) __b);
2150 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2151 vhsub_u32 (uint32x2_t __a, uint32x2_t __b)
2153 return (uint32x2_t) __builtin_aarch64_uhsubv2si ((int32x2_t) __a,
2154 (int32x2_t) __b);
2157 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2158 vhsubq_s8 (int8x16_t __a, int8x16_t __b)
2160 return (int8x16_t) __builtin_aarch64_shsubv16qi (__a, __b);
2163 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2164 vhsubq_s16 (int16x8_t __a, int16x8_t __b)
2166 return (int16x8_t) __builtin_aarch64_shsubv8hi (__a, __b);
2169 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2170 vhsubq_s32 (int32x4_t __a, int32x4_t __b)
2172 return (int32x4_t) __builtin_aarch64_shsubv4si (__a, __b);
2175 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2176 vhsubq_u8 (uint8x16_t __a, uint8x16_t __b)
2178 return (uint8x16_t) __builtin_aarch64_uhsubv16qi ((int8x16_t) __a,
2179 (int8x16_t) __b);
2182 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2183 vhsubq_u16 (uint16x8_t __a, uint16x8_t __b)
2185 return (uint16x8_t) __builtin_aarch64_uhsubv8hi ((int16x8_t) __a,
2186 (int16x8_t) __b);
2189 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2190 vhsubq_u32 (uint32x4_t __a, uint32x4_t __b)
2192 return (uint32x4_t) __builtin_aarch64_uhsubv4si ((int32x4_t) __a,
2193 (int32x4_t) __b);
2196 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2197 vsubhn_s16 (int16x8_t __a, int16x8_t __b)
2199 return (int8x8_t) __builtin_aarch64_subhnv8hi (__a, __b);
2202 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2203 vsubhn_s32 (int32x4_t __a, int32x4_t __b)
2205 return (int16x4_t) __builtin_aarch64_subhnv4si (__a, __b);
2208 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2209 vsubhn_s64 (int64x2_t __a, int64x2_t __b)
2211 return (int32x2_t) __builtin_aarch64_subhnv2di (__a, __b);
2214 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2215 vsubhn_u16 (uint16x8_t __a, uint16x8_t __b)
2217 return (uint8x8_t) __builtin_aarch64_subhnv8hi ((int16x8_t) __a,
2218 (int16x8_t) __b);
2221 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2222 vsubhn_u32 (uint32x4_t __a, uint32x4_t __b)
2224 return (uint16x4_t) __builtin_aarch64_subhnv4si ((int32x4_t) __a,
2225 (int32x4_t) __b);
2228 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2229 vsubhn_u64 (uint64x2_t __a, uint64x2_t __b)
2231 return (uint32x2_t) __builtin_aarch64_subhnv2di ((int64x2_t) __a,
2232 (int64x2_t) __b);
2235 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2236 vrsubhn_s16 (int16x8_t __a, int16x8_t __b)
2238 return (int8x8_t) __builtin_aarch64_rsubhnv8hi (__a, __b);
2241 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2242 vrsubhn_s32 (int32x4_t __a, int32x4_t __b)
2244 return (int16x4_t) __builtin_aarch64_rsubhnv4si (__a, __b);
2247 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2248 vrsubhn_s64 (int64x2_t __a, int64x2_t __b)
2250 return (int32x2_t) __builtin_aarch64_rsubhnv2di (__a, __b);
2253 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2254 vrsubhn_u16 (uint16x8_t __a, uint16x8_t __b)
2256 return (uint8x8_t) __builtin_aarch64_rsubhnv8hi ((int16x8_t) __a,
2257 (int16x8_t) __b);
2260 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2261 vrsubhn_u32 (uint32x4_t __a, uint32x4_t __b)
2263 return (uint16x4_t) __builtin_aarch64_rsubhnv4si ((int32x4_t) __a,
2264 (int32x4_t) __b);
2267 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2268 vrsubhn_u64 (uint64x2_t __a, uint64x2_t __b)
2270 return (uint32x2_t) __builtin_aarch64_rsubhnv2di ((int64x2_t) __a,
2271 (int64x2_t) __b);
2274 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2275 vrsubhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
2277 return (int8x16_t) __builtin_aarch64_rsubhn2v8hi (__a, __b, __c);
2280 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2281 vrsubhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
2283 return (int16x8_t) __builtin_aarch64_rsubhn2v4si (__a, __b, __c);
2286 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2287 vrsubhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
2289 return (int32x4_t) __builtin_aarch64_rsubhn2v2di (__a, __b, __c);
2292 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2293 vrsubhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
2295 return (uint8x16_t) __builtin_aarch64_rsubhn2v8hi ((int8x8_t) __a,
2296 (int16x8_t) __b,
2297 (int16x8_t) __c);
2300 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2301 vrsubhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
2303 return (uint16x8_t) __builtin_aarch64_rsubhn2v4si ((int16x4_t) __a,
2304 (int32x4_t) __b,
2305 (int32x4_t) __c);
2308 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2309 vrsubhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
2311 return (uint32x4_t) __builtin_aarch64_rsubhn2v2di ((int32x2_t) __a,
2312 (int64x2_t) __b,
2313 (int64x2_t) __c);
2316 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2317 vsubhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
2319 return (int8x16_t) __builtin_aarch64_subhn2v8hi (__a, __b, __c);
2322 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2323 vsubhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
2325 return (int16x8_t) __builtin_aarch64_subhn2v4si (__a, __b, __c);;
2328 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2329 vsubhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
2331 return (int32x4_t) __builtin_aarch64_subhn2v2di (__a, __b, __c);
2334 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2335 vsubhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
2337 return (uint8x16_t) __builtin_aarch64_subhn2v8hi ((int8x8_t) __a,
2338 (int16x8_t) __b,
2339 (int16x8_t) __c);
2342 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2343 vsubhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
2345 return (uint16x8_t) __builtin_aarch64_subhn2v4si ((int16x4_t) __a,
2346 (int32x4_t) __b,
2347 (int32x4_t) __c);
2350 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2351 vsubhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
2353 return (uint32x4_t) __builtin_aarch64_subhn2v2di ((int32x2_t) __a,
2354 (int64x2_t) __b,
2355 (int64x2_t) __c);
2358 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2359 vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
2361 return __builtin_aarch64_uqaddv4hi_uuu (__a, __b);
2364 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2365 vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
2367 return __builtin_aarch64_uqaddv2si_uuu (__a, __b);
2370 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2371 vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
2373 return (uint64x1_t) {__builtin_aarch64_uqadddi_uuu (__a[0], __b[0])};
2376 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2377 vqaddq_s8 (int8x16_t __a, int8x16_t __b)
2379 return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b);
2382 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2383 vqaddq_s16 (int16x8_t __a, int16x8_t __b)
2385 return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b);
2388 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2389 vqaddq_s32 (int32x4_t __a, int32x4_t __b)
2391 return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b);
2394 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2395 vqaddq_s64 (int64x2_t __a, int64x2_t __b)
2397 return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b);
2400 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2401 vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
2403 return __builtin_aarch64_uqaddv16qi_uuu (__a, __b);
2406 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2407 vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
2409 return __builtin_aarch64_uqaddv8hi_uuu (__a, __b);
2412 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2413 vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
2415 return __builtin_aarch64_uqaddv4si_uuu (__a, __b);
2418 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2419 vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
2421 return __builtin_aarch64_uqaddv2di_uuu (__a, __b);
2424 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2425 vqsub_s8 (int8x8_t __a, int8x8_t __b)
2427 return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b);
2430 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2431 vqsub_s16 (int16x4_t __a, int16x4_t __b)
2433 return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b);
2436 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2437 vqsub_s32 (int32x2_t __a, int32x2_t __b)
2439 return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b);
2442 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2443 vqsub_s64 (int64x1_t __a, int64x1_t __b)
2445 return (int64x1_t) {__builtin_aarch64_sqsubdi (__a[0], __b[0])};
2448 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2449 vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
2451 return __builtin_aarch64_uqsubv8qi_uuu (__a, __b);
2454 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2455 vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
2457 return __builtin_aarch64_uqsubv4hi_uuu (__a, __b);
2460 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2461 vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
2463 return __builtin_aarch64_uqsubv2si_uuu (__a, __b);
2466 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2467 vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
2469 return (uint64x1_t) {__builtin_aarch64_uqsubdi_uuu (__a[0], __b[0])};
2472 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2473 vqsubq_s8 (int8x16_t __a, int8x16_t __b)
2475 return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b);
2478 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2479 vqsubq_s16 (int16x8_t __a, int16x8_t __b)
2481 return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b);
2484 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2485 vqsubq_s32 (int32x4_t __a, int32x4_t __b)
2487 return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b);
2490 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2491 vqsubq_s64 (int64x2_t __a, int64x2_t __b)
2493 return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b);
2496 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2497 vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
2499 return __builtin_aarch64_uqsubv16qi_uuu (__a, __b);
2502 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2503 vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
2505 return __builtin_aarch64_uqsubv8hi_uuu (__a, __b);
2508 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2509 vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
2511 return __builtin_aarch64_uqsubv4si_uuu (__a, __b);
2514 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2515 vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
2517 return __builtin_aarch64_uqsubv2di_uuu (__a, __b);
2520 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2521 vqneg_s8 (int8x8_t __a)
2523 return (int8x8_t) __builtin_aarch64_sqnegv8qi (__a);
2526 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2527 vqneg_s16 (int16x4_t __a)
2529 return (int16x4_t) __builtin_aarch64_sqnegv4hi (__a);
2532 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2533 vqneg_s32 (int32x2_t __a)
2535 return (int32x2_t) __builtin_aarch64_sqnegv2si (__a);
2538 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2539 vqneg_s64 (int64x1_t __a)
2541 return (int64x1_t) {__builtin_aarch64_sqnegdi (__a[0])};
2544 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2545 vqnegq_s8 (int8x16_t __a)
2547 return (int8x16_t) __builtin_aarch64_sqnegv16qi (__a);
2550 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2551 vqnegq_s16 (int16x8_t __a)
2553 return (int16x8_t) __builtin_aarch64_sqnegv8hi (__a);
2556 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2557 vqnegq_s32 (int32x4_t __a)
2559 return (int32x4_t) __builtin_aarch64_sqnegv4si (__a);
2562 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2563 vqabs_s8 (int8x8_t __a)
2565 return (int8x8_t) __builtin_aarch64_sqabsv8qi (__a);
2568 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2569 vqabs_s16 (int16x4_t __a)
2571 return (int16x4_t) __builtin_aarch64_sqabsv4hi (__a);
2574 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2575 vqabs_s32 (int32x2_t __a)
2577 return (int32x2_t) __builtin_aarch64_sqabsv2si (__a);
2580 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2581 vqabs_s64 (int64x1_t __a)
2583 return (int64x1_t) {__builtin_aarch64_sqabsdi (__a[0])};
2586 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2587 vqabsq_s8 (int8x16_t __a)
2589 return (int8x16_t) __builtin_aarch64_sqabsv16qi (__a);
2592 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2593 vqabsq_s16 (int16x8_t __a)
2595 return (int16x8_t) __builtin_aarch64_sqabsv8hi (__a);
2598 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2599 vqabsq_s32 (int32x4_t __a)
2601 return (int32x4_t) __builtin_aarch64_sqabsv4si (__a);
2604 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2605 vqdmulh_s16 (int16x4_t __a, int16x4_t __b)
2607 return (int16x4_t) __builtin_aarch64_sqdmulhv4hi (__a, __b);
2610 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2611 vqdmulh_s32 (int32x2_t __a, int32x2_t __b)
2613 return (int32x2_t) __builtin_aarch64_sqdmulhv2si (__a, __b);
2616 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2617 vqdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2619 return (int16x8_t) __builtin_aarch64_sqdmulhv8hi (__a, __b);
2622 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2623 vqdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2625 return (int32x4_t) __builtin_aarch64_sqdmulhv4si (__a, __b);
2628 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2629 vqrdmulh_s16 (int16x4_t __a, int16x4_t __b)
2631 return (int16x4_t) __builtin_aarch64_sqrdmulhv4hi (__a, __b);
2634 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2635 vqrdmulh_s32 (int32x2_t __a, int32x2_t __b)
2637 return (int32x2_t) __builtin_aarch64_sqrdmulhv2si (__a, __b);
2640 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2641 vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2643 return (int16x8_t) __builtin_aarch64_sqrdmulhv8hi (__a, __b);
2646 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2647 vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2649 return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b);
2652 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2653 vcreate_s8 (uint64_t __a)
2655 return (int8x8_t) __a;
2658 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2659 vcreate_s16 (uint64_t __a)
2661 return (int16x4_t) __a;
2664 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2665 vcreate_s32 (uint64_t __a)
2667 return (int32x2_t) __a;
2670 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2671 vcreate_s64 (uint64_t __a)
2673 return (int64x1_t) {__a};
2676 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
2677 vcreate_f16 (uint64_t __a)
2679 return (float16x4_t) __a;
2682 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2683 vcreate_f32 (uint64_t __a)
2685 return (float32x2_t) __a;
2688 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2689 vcreate_u8 (uint64_t __a)
2691 return (uint8x8_t) __a;
2694 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2695 vcreate_u16 (uint64_t __a)
2697 return (uint16x4_t) __a;
2700 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2701 vcreate_u32 (uint64_t __a)
2703 return (uint32x2_t) __a;
2706 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2707 vcreate_u64 (uint64_t __a)
2709 return (uint64x1_t) {__a};
2712 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
2713 vcreate_f64 (uint64_t __a)
2715 return (float64x1_t) __a;
2718 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2719 vcreate_p8 (uint64_t __a)
2721 return (poly8x8_t) __a;
2724 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2725 vcreate_p16 (uint64_t __a)
2727 return (poly16x4_t) __a;
2730 /* vget_lane */
2732 __extension__ static __inline float16_t __attribute__ ((__always_inline__))
2733 vget_lane_f16 (float16x4_t __a, const int __b)
2735 return __aarch64_vget_lane_any (__a, __b);
2738 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2739 vget_lane_f32 (float32x2_t __a, const int __b)
2741 return __aarch64_vget_lane_any (__a, __b);
2744 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2745 vget_lane_f64 (float64x1_t __a, const int __b)
2747 return __aarch64_vget_lane_any (__a, __b);
2750 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2751 vget_lane_p8 (poly8x8_t __a, const int __b)
2753 return __aarch64_vget_lane_any (__a, __b);
2756 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2757 vget_lane_p16 (poly16x4_t __a, const int __b)
2759 return __aarch64_vget_lane_any (__a, __b);
2762 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2763 vget_lane_s8 (int8x8_t __a, const int __b)
2765 return __aarch64_vget_lane_any (__a, __b);
2768 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2769 vget_lane_s16 (int16x4_t __a, const int __b)
2771 return __aarch64_vget_lane_any (__a, __b);
2774 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2775 vget_lane_s32 (int32x2_t __a, const int __b)
2777 return __aarch64_vget_lane_any (__a, __b);
2780 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2781 vget_lane_s64 (int64x1_t __a, const int __b)
2783 return __aarch64_vget_lane_any (__a, __b);
2786 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2787 vget_lane_u8 (uint8x8_t __a, const int __b)
2789 return __aarch64_vget_lane_any (__a, __b);
2792 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2793 vget_lane_u16 (uint16x4_t __a, const int __b)
2795 return __aarch64_vget_lane_any (__a, __b);
2798 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2799 vget_lane_u32 (uint32x2_t __a, const int __b)
2801 return __aarch64_vget_lane_any (__a, __b);
2804 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2805 vget_lane_u64 (uint64x1_t __a, const int __b)
2807 return __aarch64_vget_lane_any (__a, __b);
2810 /* vgetq_lane */
2812 __extension__ static __inline float16_t __attribute__ ((__always_inline__))
2813 vgetq_lane_f16 (float16x8_t __a, const int __b)
2815 return __aarch64_vget_lane_any (__a, __b);
2818 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2819 vgetq_lane_f32 (float32x4_t __a, const int __b)
2821 return __aarch64_vget_lane_any (__a, __b);
2824 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2825 vgetq_lane_f64 (float64x2_t __a, const int __b)
2827 return __aarch64_vget_lane_any (__a, __b);
2830 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2831 vgetq_lane_p8 (poly8x16_t __a, const int __b)
2833 return __aarch64_vget_lane_any (__a, __b);
2836 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2837 vgetq_lane_p16 (poly16x8_t __a, const int __b)
2839 return __aarch64_vget_lane_any (__a, __b);
2842 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2843 vgetq_lane_s8 (int8x16_t __a, const int __b)
2845 return __aarch64_vget_lane_any (__a, __b);
2848 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2849 vgetq_lane_s16 (int16x8_t __a, const int __b)
2851 return __aarch64_vget_lane_any (__a, __b);
2854 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2855 vgetq_lane_s32 (int32x4_t __a, const int __b)
2857 return __aarch64_vget_lane_any (__a, __b);
2860 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2861 vgetq_lane_s64 (int64x2_t __a, const int __b)
2863 return __aarch64_vget_lane_any (__a, __b);
2866 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2867 vgetq_lane_u8 (uint8x16_t __a, const int __b)
2869 return __aarch64_vget_lane_any (__a, __b);
2872 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2873 vgetq_lane_u16 (uint16x8_t __a, const int __b)
2875 return __aarch64_vget_lane_any (__a, __b);
2878 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2879 vgetq_lane_u32 (uint32x4_t __a, const int __b)
2881 return __aarch64_vget_lane_any (__a, __b);
2884 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2885 vgetq_lane_u64 (uint64x2_t __a, const int __b)
2887 return __aarch64_vget_lane_any (__a, __b);
2890 /* vreinterpret */
2892 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2893 vreinterpret_p8_f16 (float16x4_t __a)
2895 return (poly8x8_t) __a;
2898 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2899 vreinterpret_p8_f64 (float64x1_t __a)
2901 return (poly8x8_t) __a;
2904 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2905 vreinterpret_p8_s8 (int8x8_t __a)
2907 return (poly8x8_t) __a;
2910 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2911 vreinterpret_p8_s16 (int16x4_t __a)
2913 return (poly8x8_t) __a;
2916 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2917 vreinterpret_p8_s32 (int32x2_t __a)
2919 return (poly8x8_t) __a;
2922 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2923 vreinterpret_p8_s64 (int64x1_t __a)
2925 return (poly8x8_t) __a;
2928 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2929 vreinterpret_p8_f32 (float32x2_t __a)
2931 return (poly8x8_t) __a;
2934 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2935 vreinterpret_p8_u8 (uint8x8_t __a)
2937 return (poly8x8_t) __a;
2940 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2941 vreinterpret_p8_u16 (uint16x4_t __a)
2943 return (poly8x8_t) __a;
2946 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2947 vreinterpret_p8_u32 (uint32x2_t __a)
2949 return (poly8x8_t) __a;
2952 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2953 vreinterpret_p8_u64 (uint64x1_t __a)
2955 return (poly8x8_t) __a;
2958 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2959 vreinterpret_p8_p16 (poly16x4_t __a)
2961 return (poly8x8_t) __a;
2964 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2965 vreinterpretq_p8_f64 (float64x2_t __a)
2967 return (poly8x16_t) __a;
2970 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2971 vreinterpretq_p8_s8 (int8x16_t __a)
2973 return (poly8x16_t) __a;
2976 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2977 vreinterpretq_p8_s16 (int16x8_t __a)
2979 return (poly8x16_t) __a;
2982 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2983 vreinterpretq_p8_s32 (int32x4_t __a)
2985 return (poly8x16_t) __a;
2988 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2989 vreinterpretq_p8_s64 (int64x2_t __a)
2991 return (poly8x16_t) __a;
2994 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2995 vreinterpretq_p8_f16 (float16x8_t __a)
2997 return (poly8x16_t) __a;
3000 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
3001 vreinterpretq_p8_f32 (float32x4_t __a)
3003 return (poly8x16_t) __a;
3006 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
3007 vreinterpretq_p8_u8 (uint8x16_t __a)
3009 return (poly8x16_t) __a;
3012 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
3013 vreinterpretq_p8_u16 (uint16x8_t __a)
3015 return (poly8x16_t) __a;
3018 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
3019 vreinterpretq_p8_u32 (uint32x4_t __a)
3021 return (poly8x16_t) __a;
3024 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
3025 vreinterpretq_p8_u64 (uint64x2_t __a)
3027 return (poly8x16_t) __a;
3030 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
3031 vreinterpretq_p8_p16 (poly16x8_t __a)
3033 return (poly8x16_t) __a;
3036 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3037 vreinterpret_p16_f16 (float16x4_t __a)
3039 return (poly16x4_t) __a;
3042 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3043 vreinterpret_p16_f64 (float64x1_t __a)
3045 return (poly16x4_t) __a;
3048 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3049 vreinterpret_p16_s8 (int8x8_t __a)
3051 return (poly16x4_t) __a;
3054 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3055 vreinterpret_p16_s16 (int16x4_t __a)
3057 return (poly16x4_t) __a;
3060 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3061 vreinterpret_p16_s32 (int32x2_t __a)
3063 return (poly16x4_t) __a;
3066 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3067 vreinterpret_p16_s64 (int64x1_t __a)
3069 return (poly16x4_t) __a;
3072 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3073 vreinterpret_p16_f32 (float32x2_t __a)
3075 return (poly16x4_t) __a;
3078 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3079 vreinterpret_p16_u8 (uint8x8_t __a)
3081 return (poly16x4_t) __a;
3084 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3085 vreinterpret_p16_u16 (uint16x4_t __a)
3087 return (poly16x4_t) __a;
3090 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3091 vreinterpret_p16_u32 (uint32x2_t __a)
3093 return (poly16x4_t) __a;
3096 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3097 vreinterpret_p16_u64 (uint64x1_t __a)
3099 return (poly16x4_t) __a;
3102 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3103 vreinterpret_p16_p8 (poly8x8_t __a)
3105 return (poly16x4_t) __a;
3108 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3109 vreinterpretq_p16_f64 (float64x2_t __a)
3111 return (poly16x8_t) __a;
3114 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3115 vreinterpretq_p16_s8 (int8x16_t __a)
3117 return (poly16x8_t) __a;
3120 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3121 vreinterpretq_p16_s16 (int16x8_t __a)
3123 return (poly16x8_t) __a;
3126 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3127 vreinterpretq_p16_s32 (int32x4_t __a)
3129 return (poly16x8_t) __a;
3132 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3133 vreinterpretq_p16_s64 (int64x2_t __a)
3135 return (poly16x8_t) __a;
3138 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3139 vreinterpretq_p16_f16 (float16x8_t __a)
3141 return (poly16x8_t) __a;
3144 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3145 vreinterpretq_p16_f32 (float32x4_t __a)
3147 return (poly16x8_t) __a;
3150 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3151 vreinterpretq_p16_u8 (uint8x16_t __a)
3153 return (poly16x8_t) __a;
3156 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3157 vreinterpretq_p16_u16 (uint16x8_t __a)
3159 return (poly16x8_t) __a;
3162 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3163 vreinterpretq_p16_u32 (uint32x4_t __a)
3165 return (poly16x8_t) __a;
3168 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3169 vreinterpretq_p16_u64 (uint64x2_t __a)
3171 return (poly16x8_t) __a;
3174 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3175 vreinterpretq_p16_p8 (poly8x16_t __a)
3177 return (poly16x8_t) __a;
3180 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
3181 vreinterpret_f16_f64 (float64x1_t __a)
3183 return (float16x4_t) __a;
3186 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
3187 vreinterpret_f16_s8 (int8x8_t __a)
3189 return (float16x4_t) __a;
3192 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
3193 vreinterpret_f16_s16 (int16x4_t __a)
3195 return (float16x4_t) __a;
3198 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
3199 vreinterpret_f16_s32 (int32x2_t __a)
3201 return (float16x4_t) __a;
3204 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
3205 vreinterpret_f16_s64 (int64x1_t __a)
3207 return (float16x4_t) __a;
3210 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
3211 vreinterpret_f16_f32 (float32x2_t __a)
3213 return (float16x4_t) __a;
3216 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
3217 vreinterpret_f16_u8 (uint8x8_t __a)
3219 return (float16x4_t) __a;
3222 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
3223 vreinterpret_f16_u16 (uint16x4_t __a)
3225 return (float16x4_t) __a;
3228 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
3229 vreinterpret_f16_u32 (uint32x2_t __a)
3231 return (float16x4_t) __a;
3234 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
3235 vreinterpret_f16_u64 (uint64x1_t __a)
3237 return (float16x4_t) __a;
3240 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
3241 vreinterpret_f16_p8 (poly8x8_t __a)
3243 return (float16x4_t) __a;
3246 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
3247 vreinterpret_f16_p16 (poly16x4_t __a)
3249 return (float16x4_t) __a;
3252 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
3253 vreinterpretq_f16_f64 (float64x2_t __a)
3255 return (float16x8_t) __a;
3258 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
3259 vreinterpretq_f16_s8 (int8x16_t __a)
3261 return (float16x8_t) __a;
3264 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
3265 vreinterpretq_f16_s16 (int16x8_t __a)
3267 return (float16x8_t) __a;
3270 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
3271 vreinterpretq_f16_s32 (int32x4_t __a)
3273 return (float16x8_t) __a;
3276 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
3277 vreinterpretq_f16_s64 (int64x2_t __a)
3279 return (float16x8_t) __a;
3282 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
3283 vreinterpretq_f16_f32 (float32x4_t __a)
3285 return (float16x8_t) __a;
3288 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
3289 vreinterpretq_f16_u8 (uint8x16_t __a)
3291 return (float16x8_t) __a;
3294 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
3295 vreinterpretq_f16_u16 (uint16x8_t __a)
3297 return (float16x8_t) __a;
3300 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
3301 vreinterpretq_f16_u32 (uint32x4_t __a)
3303 return (float16x8_t) __a;
3306 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
3307 vreinterpretq_f16_u64 (uint64x2_t __a)
3309 return (float16x8_t) __a;
3312 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
3313 vreinterpretq_f16_p8 (poly8x16_t __a)
3315 return (float16x8_t) __a;
3318 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
3319 vreinterpretq_f16_p16 (poly16x8_t __a)
3321 return (float16x8_t) __a;
3324 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3325 vreinterpret_f32_f16 (float16x4_t __a)
3327 return (float32x2_t) __a;
3330 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3331 vreinterpret_f32_f64 (float64x1_t __a)
3333 return (float32x2_t) __a;
3336 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3337 vreinterpret_f32_s8 (int8x8_t __a)
3339 return (float32x2_t) __a;
3342 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3343 vreinterpret_f32_s16 (int16x4_t __a)
3345 return (float32x2_t) __a;
3348 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3349 vreinterpret_f32_s32 (int32x2_t __a)
3351 return (float32x2_t) __a;
3354 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3355 vreinterpret_f32_s64 (int64x1_t __a)
3357 return (float32x2_t) __a;
3360 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3361 vreinterpret_f32_u8 (uint8x8_t __a)
3363 return (float32x2_t) __a;
3366 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3367 vreinterpret_f32_u16 (uint16x4_t __a)
3369 return (float32x2_t) __a;
3372 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3373 vreinterpret_f32_u32 (uint32x2_t __a)
3375 return (float32x2_t) __a;
3378 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3379 vreinterpret_f32_u64 (uint64x1_t __a)
3381 return (float32x2_t) __a;
3384 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3385 vreinterpret_f32_p8 (poly8x8_t __a)
3387 return (float32x2_t) __a;
3390 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3391 vreinterpret_f32_p16 (poly16x4_t __a)
3393 return (float32x2_t) __a;
3396 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3397 vreinterpretq_f32_f16 (float16x8_t __a)
3399 return (float32x4_t) __a;
3402 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3403 vreinterpretq_f32_f64 (float64x2_t __a)
3405 return (float32x4_t) __a;
3408 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3409 vreinterpretq_f32_s8 (int8x16_t __a)
3411 return (float32x4_t) __a;
3414 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3415 vreinterpretq_f32_s16 (int16x8_t __a)
3417 return (float32x4_t) __a;
3420 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3421 vreinterpretq_f32_s32 (int32x4_t __a)
3423 return (float32x4_t) __a;
3426 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3427 vreinterpretq_f32_s64 (int64x2_t __a)
3429 return (float32x4_t) __a;
3432 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3433 vreinterpretq_f32_u8 (uint8x16_t __a)
3435 return (float32x4_t) __a;
3438 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3439 vreinterpretq_f32_u16 (uint16x8_t __a)
3441 return (float32x4_t) __a;
3444 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3445 vreinterpretq_f32_u32 (uint32x4_t __a)
3447 return (float32x4_t) __a;
3450 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3451 vreinterpretq_f32_u64 (uint64x2_t __a)
3453 return (float32x4_t) __a;
3456 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3457 vreinterpretq_f32_p8 (poly8x16_t __a)
3459 return (float32x4_t) __a;
3462 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3463 vreinterpretq_f32_p16 (poly16x8_t __a)
3465 return (float32x4_t) __a;
3468 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3469 vreinterpret_f64_f16 (float16x4_t __a)
3471 return (float64x1_t) __a;
3474 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3475 vreinterpret_f64_f32 (float32x2_t __a)
3477 return (float64x1_t) __a;
3480 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3481 vreinterpret_f64_p8 (poly8x8_t __a)
3483 return (float64x1_t) __a;
3486 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3487 vreinterpret_f64_p16 (poly16x4_t __a)
3489 return (float64x1_t) __a;
3492 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3493 vreinterpret_f64_s8 (int8x8_t __a)
3495 return (float64x1_t) __a;
3498 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3499 vreinterpret_f64_s16 (int16x4_t __a)
3501 return (float64x1_t) __a;
3504 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3505 vreinterpret_f64_s32 (int32x2_t __a)
3507 return (float64x1_t) __a;
3510 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3511 vreinterpret_f64_s64 (int64x1_t __a)
3513 return (float64x1_t) __a;
3516 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3517 vreinterpret_f64_u8 (uint8x8_t __a)
3519 return (float64x1_t) __a;
3522 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3523 vreinterpret_f64_u16 (uint16x4_t __a)
3525 return (float64x1_t) __a;
3528 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3529 vreinterpret_f64_u32 (uint32x2_t __a)
3531 return (float64x1_t) __a;
3534 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3535 vreinterpret_f64_u64 (uint64x1_t __a)
3537 return (float64x1_t) __a;
3540 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3541 vreinterpretq_f64_f16 (float16x8_t __a)
3543 return (float64x2_t) __a;
3546 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3547 vreinterpretq_f64_f32 (float32x4_t __a)
3549 return (float64x2_t) __a;
3552 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3553 vreinterpretq_f64_p8 (poly8x16_t __a)
3555 return (float64x2_t) __a;
3558 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3559 vreinterpretq_f64_p16 (poly16x8_t __a)
3561 return (float64x2_t) __a;
3564 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3565 vreinterpretq_f64_s8 (int8x16_t __a)
3567 return (float64x2_t) __a;
3570 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3571 vreinterpretq_f64_s16 (int16x8_t __a)
3573 return (float64x2_t) __a;
3576 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3577 vreinterpretq_f64_s32 (int32x4_t __a)
3579 return (float64x2_t) __a;
3582 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3583 vreinterpretq_f64_s64 (int64x2_t __a)
3585 return (float64x2_t) __a;
3588 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3589 vreinterpretq_f64_u8 (uint8x16_t __a)
3591 return (float64x2_t) __a;
3594 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3595 vreinterpretq_f64_u16 (uint16x8_t __a)
3597 return (float64x2_t) __a;
3600 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3601 vreinterpretq_f64_u32 (uint32x4_t __a)
3603 return (float64x2_t) __a;
3606 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3607 vreinterpretq_f64_u64 (uint64x2_t __a)
3609 return (float64x2_t) __a;
3612 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3613 vreinterpret_s64_f16 (float16x4_t __a)
3615 return (int64x1_t) __a;
3618 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3619 vreinterpret_s64_f64 (float64x1_t __a)
3621 return (int64x1_t) __a;
3624 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3625 vreinterpret_s64_s8 (int8x8_t __a)
3627 return (int64x1_t) __a;
3630 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3631 vreinterpret_s64_s16 (int16x4_t __a)
3633 return (int64x1_t) __a;
3636 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3637 vreinterpret_s64_s32 (int32x2_t __a)
3639 return (int64x1_t) __a;
3642 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3643 vreinterpret_s64_f32 (float32x2_t __a)
3645 return (int64x1_t) __a;
3648 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3649 vreinterpret_s64_u8 (uint8x8_t __a)
3651 return (int64x1_t) __a;
3654 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3655 vreinterpret_s64_u16 (uint16x4_t __a)
3657 return (int64x1_t) __a;
3660 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3661 vreinterpret_s64_u32 (uint32x2_t __a)
3663 return (int64x1_t) __a;
3666 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3667 vreinterpret_s64_u64 (uint64x1_t __a)
3669 return (int64x1_t) __a;
3672 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3673 vreinterpret_s64_p8 (poly8x8_t __a)
3675 return (int64x1_t) __a;
3678 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3679 vreinterpret_s64_p16 (poly16x4_t __a)
3681 return (int64x1_t) __a;
3684 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3685 vreinterpretq_s64_f64 (float64x2_t __a)
3687 return (int64x2_t) __a;
3690 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3691 vreinterpretq_s64_s8 (int8x16_t __a)
3693 return (int64x2_t) __a;
3696 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3697 vreinterpretq_s64_s16 (int16x8_t __a)
3699 return (int64x2_t) __a;
3702 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3703 vreinterpretq_s64_s32 (int32x4_t __a)
3705 return (int64x2_t) __a;
3708 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3709 vreinterpretq_s64_f16 (float16x8_t __a)
3711 return (int64x2_t) __a;
3714 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3715 vreinterpretq_s64_f32 (float32x4_t __a)
3717 return (int64x2_t) __a;
3720 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3721 vreinterpretq_s64_u8 (uint8x16_t __a)
3723 return (int64x2_t) __a;
3726 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3727 vreinterpretq_s64_u16 (uint16x8_t __a)
3729 return (int64x2_t) __a;
3732 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3733 vreinterpretq_s64_u32 (uint32x4_t __a)
3735 return (int64x2_t) __a;
3738 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3739 vreinterpretq_s64_u64 (uint64x2_t __a)
3741 return (int64x2_t) __a;
3744 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3745 vreinterpretq_s64_p8 (poly8x16_t __a)
3747 return (int64x2_t) __a;
3750 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3751 vreinterpretq_s64_p16 (poly16x8_t __a)
3753 return (int64x2_t) __a;
3756 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3757 vreinterpret_u64_f16 (float16x4_t __a)
3759 return (uint64x1_t) __a;
3762 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3763 vreinterpret_u64_f64 (float64x1_t __a)
3765 return (uint64x1_t) __a;
3768 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3769 vreinterpret_u64_s8 (int8x8_t __a)
3771 return (uint64x1_t) __a;
3774 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3775 vreinterpret_u64_s16 (int16x4_t __a)
3777 return (uint64x1_t) __a;
3780 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3781 vreinterpret_u64_s32 (int32x2_t __a)
3783 return (uint64x1_t) __a;
3786 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3787 vreinterpret_u64_s64 (int64x1_t __a)
3789 return (uint64x1_t) __a;
3792 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3793 vreinterpret_u64_f32 (float32x2_t __a)
3795 return (uint64x1_t) __a;
3798 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3799 vreinterpret_u64_u8 (uint8x8_t __a)
3801 return (uint64x1_t) __a;
3804 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3805 vreinterpret_u64_u16 (uint16x4_t __a)
3807 return (uint64x1_t) __a;
3810 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3811 vreinterpret_u64_u32 (uint32x2_t __a)
3813 return (uint64x1_t) __a;
3816 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3817 vreinterpret_u64_p8 (poly8x8_t __a)
3819 return (uint64x1_t) __a;
3822 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3823 vreinterpret_u64_p16 (poly16x4_t __a)
3825 return (uint64x1_t) __a;
3828 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3829 vreinterpretq_u64_f64 (float64x2_t __a)
3831 return (uint64x2_t) __a;
3834 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3835 vreinterpretq_u64_s8 (int8x16_t __a)
3837 return (uint64x2_t) __a;
3840 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3841 vreinterpretq_u64_s16 (int16x8_t __a)
3843 return (uint64x2_t) __a;
3846 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3847 vreinterpretq_u64_s32 (int32x4_t __a)
3849 return (uint64x2_t) __a;
3852 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3853 vreinterpretq_u64_s64 (int64x2_t __a)
3855 return (uint64x2_t) __a;
3858 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3859 vreinterpretq_u64_f16 (float16x8_t __a)
3861 return (uint64x2_t) __a;
3864 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3865 vreinterpretq_u64_f32 (float32x4_t __a)
3867 return (uint64x2_t) __a;
3870 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3871 vreinterpretq_u64_u8 (uint8x16_t __a)
3873 return (uint64x2_t) __a;
3876 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3877 vreinterpretq_u64_u16 (uint16x8_t __a)
3879 return (uint64x2_t) __a;
3882 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3883 vreinterpretq_u64_u32 (uint32x4_t __a)
3885 return (uint64x2_t) __a;
3888 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3889 vreinterpretq_u64_p8 (poly8x16_t __a)
3891 return (uint64x2_t) __a;
3894 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3895 vreinterpretq_u64_p16 (poly16x8_t __a)
3897 return (uint64x2_t) __a;
3900 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3901 vreinterpret_s8_f16 (float16x4_t __a)
3903 return (int8x8_t) __a;
3906 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3907 vreinterpret_s8_f64 (float64x1_t __a)
3909 return (int8x8_t) __a;
3912 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3913 vreinterpret_s8_s16 (int16x4_t __a)
3915 return (int8x8_t) __a;
3918 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3919 vreinterpret_s8_s32 (int32x2_t __a)
3921 return (int8x8_t) __a;
3924 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3925 vreinterpret_s8_s64 (int64x1_t __a)
3927 return (int8x8_t) __a;
3930 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3931 vreinterpret_s8_f32 (float32x2_t __a)
3933 return (int8x8_t) __a;
3936 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3937 vreinterpret_s8_u8 (uint8x8_t __a)
3939 return (int8x8_t) __a;
3942 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3943 vreinterpret_s8_u16 (uint16x4_t __a)
3945 return (int8x8_t) __a;
3948 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3949 vreinterpret_s8_u32 (uint32x2_t __a)
3951 return (int8x8_t) __a;
3954 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3955 vreinterpret_s8_u64 (uint64x1_t __a)
3957 return (int8x8_t) __a;
3960 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3961 vreinterpret_s8_p8 (poly8x8_t __a)
3963 return (int8x8_t) __a;
3966 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3967 vreinterpret_s8_p16 (poly16x4_t __a)
3969 return (int8x8_t) __a;
3972 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3973 vreinterpretq_s8_f64 (float64x2_t __a)
3975 return (int8x16_t) __a;
3978 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3979 vreinterpretq_s8_s16 (int16x8_t __a)
3981 return (int8x16_t) __a;
3984 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3985 vreinterpretq_s8_s32 (int32x4_t __a)
3987 return (int8x16_t) __a;
3990 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3991 vreinterpretq_s8_s64 (int64x2_t __a)
3993 return (int8x16_t) __a;
3996 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3997 vreinterpretq_s8_f16 (float16x8_t __a)
3999 return (int8x16_t) __a;
4002 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4003 vreinterpretq_s8_f32 (float32x4_t __a)
4005 return (int8x16_t) __a;
4008 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4009 vreinterpretq_s8_u8 (uint8x16_t __a)
4011 return (int8x16_t) __a;
4014 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4015 vreinterpretq_s8_u16 (uint16x8_t __a)
4017 return (int8x16_t) __a;
4020 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4021 vreinterpretq_s8_u32 (uint32x4_t __a)
4023 return (int8x16_t) __a;
4026 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4027 vreinterpretq_s8_u64 (uint64x2_t __a)
4029 return (int8x16_t) __a;
4032 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4033 vreinterpretq_s8_p8 (poly8x16_t __a)
4035 return (int8x16_t) __a;
4038 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4039 vreinterpretq_s8_p16 (poly16x8_t __a)
4041 return (int8x16_t) __a;
4044 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4045 vreinterpret_s16_f16 (float16x4_t __a)
4047 return (int16x4_t) __a;
4050 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4051 vreinterpret_s16_f64 (float64x1_t __a)
4053 return (int16x4_t) __a;
4056 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4057 vreinterpret_s16_s8 (int8x8_t __a)
4059 return (int16x4_t) __a;
4062 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4063 vreinterpret_s16_s32 (int32x2_t __a)
4065 return (int16x4_t) __a;
4068 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4069 vreinterpret_s16_s64 (int64x1_t __a)
4071 return (int16x4_t) __a;
4074 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4075 vreinterpret_s16_f32 (float32x2_t __a)
4077 return (int16x4_t) __a;
4080 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4081 vreinterpret_s16_u8 (uint8x8_t __a)
4083 return (int16x4_t) __a;
4086 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4087 vreinterpret_s16_u16 (uint16x4_t __a)
4089 return (int16x4_t) __a;
4092 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4093 vreinterpret_s16_u32 (uint32x2_t __a)
4095 return (int16x4_t) __a;
4098 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4099 vreinterpret_s16_u64 (uint64x1_t __a)
4101 return (int16x4_t) __a;
4104 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4105 vreinterpret_s16_p8 (poly8x8_t __a)
4107 return (int16x4_t) __a;
4110 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4111 vreinterpret_s16_p16 (poly16x4_t __a)
4113 return (int16x4_t) __a;
4116 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4117 vreinterpretq_s16_f64 (float64x2_t __a)
4119 return (int16x8_t) __a;
4122 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4123 vreinterpretq_s16_s8 (int8x16_t __a)
4125 return (int16x8_t) __a;
4128 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4129 vreinterpretq_s16_s32 (int32x4_t __a)
4131 return (int16x8_t) __a;
4134 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4135 vreinterpretq_s16_s64 (int64x2_t __a)
4137 return (int16x8_t) __a;
4140 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4141 vreinterpretq_s16_f16 (float16x8_t __a)
4143 return (int16x8_t) __a;
4146 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4147 vreinterpretq_s16_f32 (float32x4_t __a)
4149 return (int16x8_t) __a;
4152 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4153 vreinterpretq_s16_u8 (uint8x16_t __a)
4155 return (int16x8_t) __a;
4158 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4159 vreinterpretq_s16_u16 (uint16x8_t __a)
4161 return (int16x8_t) __a;
4164 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4165 vreinterpretq_s16_u32 (uint32x4_t __a)
4167 return (int16x8_t) __a;
4170 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4171 vreinterpretq_s16_u64 (uint64x2_t __a)
4173 return (int16x8_t) __a;
4176 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4177 vreinterpretq_s16_p8 (poly8x16_t __a)
4179 return (int16x8_t) __a;
4182 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4183 vreinterpretq_s16_p16 (poly16x8_t __a)
4185 return (int16x8_t) __a;
4188 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4189 vreinterpret_s32_f16 (float16x4_t __a)
4191 return (int32x2_t) __a;
4194 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4195 vreinterpret_s32_f64 (float64x1_t __a)
4197 return (int32x2_t) __a;
4200 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4201 vreinterpret_s32_s8 (int8x8_t __a)
4203 return (int32x2_t) __a;
4206 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4207 vreinterpret_s32_s16 (int16x4_t __a)
4209 return (int32x2_t) __a;
4212 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4213 vreinterpret_s32_s64 (int64x1_t __a)
4215 return (int32x2_t) __a;
4218 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4219 vreinterpret_s32_f32 (float32x2_t __a)
4221 return (int32x2_t) __a;
4224 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4225 vreinterpret_s32_u8 (uint8x8_t __a)
4227 return (int32x2_t) __a;
4230 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4231 vreinterpret_s32_u16 (uint16x4_t __a)
4233 return (int32x2_t) __a;
4236 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4237 vreinterpret_s32_u32 (uint32x2_t __a)
4239 return (int32x2_t) __a;
4242 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4243 vreinterpret_s32_u64 (uint64x1_t __a)
4245 return (int32x2_t) __a;
4248 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4249 vreinterpret_s32_p8 (poly8x8_t __a)
4251 return (int32x2_t) __a;
4254 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4255 vreinterpret_s32_p16 (poly16x4_t __a)
4257 return (int32x2_t) __a;
4260 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4261 vreinterpretq_s32_f64 (float64x2_t __a)
4263 return (int32x4_t) __a;
4266 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4267 vreinterpretq_s32_s8 (int8x16_t __a)
4269 return (int32x4_t) __a;
4272 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4273 vreinterpretq_s32_s16 (int16x8_t __a)
4275 return (int32x4_t) __a;
4278 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4279 vreinterpretq_s32_s64 (int64x2_t __a)
4281 return (int32x4_t) __a;
4284 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4285 vreinterpretq_s32_f16 (float16x8_t __a)
4287 return (int32x4_t) __a;
4290 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4291 vreinterpretq_s32_f32 (float32x4_t __a)
4293 return (int32x4_t) __a;
4296 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4297 vreinterpretq_s32_u8 (uint8x16_t __a)
4299 return (int32x4_t) __a;
4302 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4303 vreinterpretq_s32_u16 (uint16x8_t __a)
4305 return (int32x4_t) __a;
4308 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4309 vreinterpretq_s32_u32 (uint32x4_t __a)
4311 return (int32x4_t) __a;
4314 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4315 vreinterpretq_s32_u64 (uint64x2_t __a)
4317 return (int32x4_t) __a;
4320 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4321 vreinterpretq_s32_p8 (poly8x16_t __a)
4323 return (int32x4_t) __a;
4326 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4327 vreinterpretq_s32_p16 (poly16x8_t __a)
4329 return (int32x4_t) __a;
4332 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4333 vreinterpret_u8_f16 (float16x4_t __a)
4335 return (uint8x8_t) __a;
4338 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4339 vreinterpret_u8_f64 (float64x1_t __a)
4341 return (uint8x8_t) __a;
4344 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4345 vreinterpret_u8_s8 (int8x8_t __a)
4347 return (uint8x8_t) __a;
4350 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4351 vreinterpret_u8_s16 (int16x4_t __a)
4353 return (uint8x8_t) __a;
4356 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4357 vreinterpret_u8_s32 (int32x2_t __a)
4359 return (uint8x8_t) __a;
4362 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4363 vreinterpret_u8_s64 (int64x1_t __a)
4365 return (uint8x8_t) __a;
4368 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4369 vreinterpret_u8_f32 (float32x2_t __a)
4371 return (uint8x8_t) __a;
4374 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4375 vreinterpret_u8_u16 (uint16x4_t __a)
4377 return (uint8x8_t) __a;
4380 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4381 vreinterpret_u8_u32 (uint32x2_t __a)
4383 return (uint8x8_t) __a;
4386 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4387 vreinterpret_u8_u64 (uint64x1_t __a)
4389 return (uint8x8_t) __a;
4392 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4393 vreinterpret_u8_p8 (poly8x8_t __a)
4395 return (uint8x8_t) __a;
4398 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4399 vreinterpret_u8_p16 (poly16x4_t __a)
4401 return (uint8x8_t) __a;
4404 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4405 vreinterpretq_u8_f64 (float64x2_t __a)
4407 return (uint8x16_t) __a;
4410 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4411 vreinterpretq_u8_s8 (int8x16_t __a)
4413 return (uint8x16_t) __a;
4416 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4417 vreinterpretq_u8_s16 (int16x8_t __a)
4419 return (uint8x16_t) __a;
4422 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4423 vreinterpretq_u8_s32 (int32x4_t __a)
4425 return (uint8x16_t) __a;
4428 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4429 vreinterpretq_u8_s64 (int64x2_t __a)
4431 return (uint8x16_t) __a;
4434 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4435 vreinterpretq_u8_f16 (float16x8_t __a)
4437 return (uint8x16_t) __a;
4440 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4441 vreinterpretq_u8_f32 (float32x4_t __a)
4443 return (uint8x16_t) __a;
4446 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4447 vreinterpretq_u8_u16 (uint16x8_t __a)
4449 return (uint8x16_t) __a;
4452 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4453 vreinterpretq_u8_u32 (uint32x4_t __a)
4455 return (uint8x16_t) __a;
4458 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4459 vreinterpretq_u8_u64 (uint64x2_t __a)
4461 return (uint8x16_t) __a;
4464 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4465 vreinterpretq_u8_p8 (poly8x16_t __a)
4467 return (uint8x16_t) __a;
4470 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4471 vreinterpretq_u8_p16 (poly16x8_t __a)
4473 return (uint8x16_t) __a;
4476 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4477 vreinterpret_u16_f16 (float16x4_t __a)
4479 return (uint16x4_t) __a;
4482 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4483 vreinterpret_u16_f64 (float64x1_t __a)
4485 return (uint16x4_t) __a;
4488 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4489 vreinterpret_u16_s8 (int8x8_t __a)
4491 return (uint16x4_t) __a;
4494 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4495 vreinterpret_u16_s16 (int16x4_t __a)
4497 return (uint16x4_t) __a;
4500 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4501 vreinterpret_u16_s32 (int32x2_t __a)
4503 return (uint16x4_t) __a;
4506 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4507 vreinterpret_u16_s64 (int64x1_t __a)
4509 return (uint16x4_t) __a;
4512 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4513 vreinterpret_u16_f32 (float32x2_t __a)
4515 return (uint16x4_t) __a;
4518 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4519 vreinterpret_u16_u8 (uint8x8_t __a)
4521 return (uint16x4_t) __a;
4524 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4525 vreinterpret_u16_u32 (uint32x2_t __a)
4527 return (uint16x4_t) __a;
4530 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4531 vreinterpret_u16_u64 (uint64x1_t __a)
4533 return (uint16x4_t) __a;
4536 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4537 vreinterpret_u16_p8 (poly8x8_t __a)
4539 return (uint16x4_t) __a;
4542 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4543 vreinterpret_u16_p16 (poly16x4_t __a)
4545 return (uint16x4_t) __a;
4548 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4549 vreinterpretq_u16_f64 (float64x2_t __a)
4551 return (uint16x8_t) __a;
4554 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4555 vreinterpretq_u16_s8 (int8x16_t __a)
4557 return (uint16x8_t) __a;
4560 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4561 vreinterpretq_u16_s16 (int16x8_t __a)
4563 return (uint16x8_t) __a;
4566 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4567 vreinterpretq_u16_s32 (int32x4_t __a)
4569 return (uint16x8_t) __a;
4572 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4573 vreinterpretq_u16_s64 (int64x2_t __a)
4575 return (uint16x8_t) __a;
4578 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4579 vreinterpretq_u16_f16 (float16x8_t __a)
4581 return (uint16x8_t) __a;
4584 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4585 vreinterpretq_u16_f32 (float32x4_t __a)
4587 return (uint16x8_t) __a;
4590 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4591 vreinterpretq_u16_u8 (uint8x16_t __a)
4593 return (uint16x8_t) __a;
4596 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4597 vreinterpretq_u16_u32 (uint32x4_t __a)
4599 return (uint16x8_t) __a;
4602 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4603 vreinterpretq_u16_u64 (uint64x2_t __a)
4605 return (uint16x8_t) __a;
4608 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4609 vreinterpretq_u16_p8 (poly8x16_t __a)
4611 return (uint16x8_t) __a;
4614 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4615 vreinterpretq_u16_p16 (poly16x8_t __a)
4617 return (uint16x8_t) __a;
4620 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4621 vreinterpret_u32_f16 (float16x4_t __a)
4623 return (uint32x2_t) __a;
4626 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4627 vreinterpret_u32_f64 (float64x1_t __a)
4629 return (uint32x2_t) __a;
4632 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4633 vreinterpret_u32_s8 (int8x8_t __a)
4635 return (uint32x2_t) __a;
4638 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4639 vreinterpret_u32_s16 (int16x4_t __a)
4641 return (uint32x2_t) __a;
4644 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4645 vreinterpret_u32_s32 (int32x2_t __a)
4647 return (uint32x2_t) __a;
4650 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4651 vreinterpret_u32_s64 (int64x1_t __a)
4653 return (uint32x2_t) __a;
4656 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4657 vreinterpret_u32_f32 (float32x2_t __a)
4659 return (uint32x2_t) __a;
4662 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4663 vreinterpret_u32_u8 (uint8x8_t __a)
4665 return (uint32x2_t) __a;
4668 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4669 vreinterpret_u32_u16 (uint16x4_t __a)
4671 return (uint32x2_t) __a;
4674 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4675 vreinterpret_u32_u64 (uint64x1_t __a)
4677 return (uint32x2_t) __a;
4680 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4681 vreinterpret_u32_p8 (poly8x8_t __a)
4683 return (uint32x2_t) __a;
4686 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4687 vreinterpret_u32_p16 (poly16x4_t __a)
4689 return (uint32x2_t) __a;
4692 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4693 vreinterpretq_u32_f64 (float64x2_t __a)
4695 return (uint32x4_t) __a;
4698 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4699 vreinterpretq_u32_s8 (int8x16_t __a)
4701 return (uint32x4_t) __a;
4704 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4705 vreinterpretq_u32_s16 (int16x8_t __a)
4707 return (uint32x4_t) __a;
4710 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4711 vreinterpretq_u32_s32 (int32x4_t __a)
4713 return (uint32x4_t) __a;
4716 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4717 vreinterpretq_u32_s64 (int64x2_t __a)
4719 return (uint32x4_t) __a;
4722 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4723 vreinterpretq_u32_f16 (float16x8_t __a)
4725 return (uint32x4_t) __a;
4728 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4729 vreinterpretq_u32_f32 (float32x4_t __a)
4731 return (uint32x4_t) __a;
4734 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4735 vreinterpretq_u32_u8 (uint8x16_t __a)
4737 return (uint32x4_t) __a;
4740 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4741 vreinterpretq_u32_u16 (uint16x8_t __a)
4743 return (uint32x4_t) __a;
4746 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4747 vreinterpretq_u32_u64 (uint64x2_t __a)
4749 return (uint32x4_t) __a;
4752 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4753 vreinterpretq_u32_p8 (poly8x16_t __a)
4755 return (uint32x4_t) __a;
4758 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4759 vreinterpretq_u32_p16 (poly16x8_t __a)
4761 return (uint32x4_t) __a;
4764 /* vset_lane */
4766 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
4767 vset_lane_f16 (float16_t __elem, float16x4_t __vec, const int __index)
4769 return __aarch64_vset_lane_any (__elem, __vec, __index);
4772 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4773 vset_lane_f32 (float32_t __elem, float32x2_t __vec, const int __index)
4775 return __aarch64_vset_lane_any (__elem, __vec, __index);
4778 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
4779 vset_lane_f64 (float64_t __elem, float64x1_t __vec, const int __index)
4781 return __aarch64_vset_lane_any (__elem, __vec, __index);
4784 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4785 vset_lane_p8 (poly8_t __elem, poly8x8_t __vec, const int __index)
4787 return __aarch64_vset_lane_any (__elem, __vec, __index);
4790 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
4791 vset_lane_p16 (poly16_t __elem, poly16x4_t __vec, const int __index)
4793 return __aarch64_vset_lane_any (__elem, __vec, __index);
4796 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4797 vset_lane_s8 (int8_t __elem, int8x8_t __vec, const int __index)
4799 return __aarch64_vset_lane_any (__elem, __vec, __index);
4802 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4803 vset_lane_s16 (int16_t __elem, int16x4_t __vec, const int __index)
4805 return __aarch64_vset_lane_any (__elem, __vec, __index);
4808 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4809 vset_lane_s32 (int32_t __elem, int32x2_t __vec, const int __index)
4811 return __aarch64_vset_lane_any (__elem, __vec, __index);
4814 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4815 vset_lane_s64 (int64_t __elem, int64x1_t __vec, const int __index)
4817 return __aarch64_vset_lane_any (__elem, __vec, __index);
4820 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4821 vset_lane_u8 (uint8_t __elem, uint8x8_t __vec, const int __index)
4823 return __aarch64_vset_lane_any (__elem, __vec, __index);
4826 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4827 vset_lane_u16 (uint16_t __elem, uint16x4_t __vec, const int __index)
4829 return __aarch64_vset_lane_any (__elem, __vec, __index);
4832 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4833 vset_lane_u32 (uint32_t __elem, uint32x2_t __vec, const int __index)
4835 return __aarch64_vset_lane_any (__elem, __vec, __index);
4838 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4839 vset_lane_u64 (uint64_t __elem, uint64x1_t __vec, const int __index)
4841 return __aarch64_vset_lane_any (__elem, __vec, __index);
4844 /* vsetq_lane */
4846 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
4847 vsetq_lane_f16 (float16_t __elem, float16x8_t __vec, const int __index)
4849 return __aarch64_vset_lane_any (__elem, __vec, __index);
4852 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4853 vsetq_lane_f32 (float32_t __elem, float32x4_t __vec, const int __index)
4855 return __aarch64_vset_lane_any (__elem, __vec, __index);
4858 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4859 vsetq_lane_f64 (float64_t __elem, float64x2_t __vec, const int __index)
4861 return __aarch64_vset_lane_any (__elem, __vec, __index);
4864 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
4865 vsetq_lane_p8 (poly8_t __elem, poly8x16_t __vec, const int __index)
4867 return __aarch64_vset_lane_any (__elem, __vec, __index);
4870 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
4871 vsetq_lane_p16 (poly16_t __elem, poly16x8_t __vec, const int __index)
4873 return __aarch64_vset_lane_any (__elem, __vec, __index);
4876 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4877 vsetq_lane_s8 (int8_t __elem, int8x16_t __vec, const int __index)
4879 return __aarch64_vset_lane_any (__elem, __vec, __index);
4882 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4883 vsetq_lane_s16 (int16_t __elem, int16x8_t __vec, const int __index)
4885 return __aarch64_vset_lane_any (__elem, __vec, __index);
4888 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4889 vsetq_lane_s32 (int32_t __elem, int32x4_t __vec, const int __index)
4891 return __aarch64_vset_lane_any (__elem, __vec, __index);
4894 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4895 vsetq_lane_s64 (int64_t __elem, int64x2_t __vec, const int __index)
4897 return __aarch64_vset_lane_any (__elem, __vec, __index);
4900 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4901 vsetq_lane_u8 (uint8_t __elem, uint8x16_t __vec, const int __index)
4903 return __aarch64_vset_lane_any (__elem, __vec, __index);
4906 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4907 vsetq_lane_u16 (uint16_t __elem, uint16x8_t __vec, const int __index)
4909 return __aarch64_vset_lane_any (__elem, __vec, __index);
4912 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4913 vsetq_lane_u32 (uint32_t __elem, uint32x4_t __vec, const int __index)
4915 return __aarch64_vset_lane_any (__elem, __vec, __index);
4918 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4919 vsetq_lane_u64 (uint64_t __elem, uint64x2_t __vec, const int __index)
4921 return __aarch64_vset_lane_any (__elem, __vec, __index);
4924 #define __GET_LOW(__TYPE) \
4925 uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \
4926 uint64x1_t lo = vcreate_u64 (vgetq_lane_u64 (tmp, 0)); \
4927 return vreinterpret_##__TYPE##_u64 (lo);
4929 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
4930 vget_low_f16 (float16x8_t __a)
4932 __GET_LOW (f16);
4935 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4936 vget_low_f32 (float32x4_t __a)
4938 __GET_LOW (f32);
4941 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
4942 vget_low_f64 (float64x2_t __a)
4944 return (float64x1_t) {vgetq_lane_f64 (__a, 0)};
4947 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4948 vget_low_p8 (poly8x16_t __a)
4950 __GET_LOW (p8);
4953 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
4954 vget_low_p16 (poly16x8_t __a)
4956 __GET_LOW (p16);
4959 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4960 vget_low_s8 (int8x16_t __a)
4962 __GET_LOW (s8);
4965 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4966 vget_low_s16 (int16x8_t __a)
4968 __GET_LOW (s16);
4971 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4972 vget_low_s32 (int32x4_t __a)
4974 __GET_LOW (s32);
4977 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4978 vget_low_s64 (int64x2_t __a)
4980 __GET_LOW (s64);
4983 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4984 vget_low_u8 (uint8x16_t __a)
4986 __GET_LOW (u8);
4989 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4990 vget_low_u16 (uint16x8_t __a)
4992 __GET_LOW (u16);
4995 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4996 vget_low_u32 (uint32x4_t __a)
4998 __GET_LOW (u32);
5001 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
5002 vget_low_u64 (uint64x2_t __a)
5004 return vcreate_u64 (vgetq_lane_u64 (__a, 0));
5007 #undef __GET_LOW
5009 #define __GET_HIGH(__TYPE) \
5010 uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \
5011 uint64x1_t hi = vcreate_u64 (vgetq_lane_u64 (tmp, 1)); \
5012 return vreinterpret_##__TYPE##_u64 (hi);
5014 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
5015 vget_high_f16 (float16x8_t __a)
5017 __GET_HIGH (f16);
5020 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5021 vget_high_f32 (float32x4_t __a)
5023 __GET_HIGH (f32);
5026 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
5027 vget_high_f64 (float64x2_t __a)
5029 __GET_HIGH (f64);
5032 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
5033 vget_high_p8 (poly8x16_t __a)
5035 __GET_HIGH (p8);
5038 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
5039 vget_high_p16 (poly16x8_t __a)
5041 __GET_HIGH (p16);
5044 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5045 vget_high_s8 (int8x16_t __a)
5047 __GET_HIGH (s8);
5050 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5051 vget_high_s16 (int16x8_t __a)
5053 __GET_HIGH (s16);
5056 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5057 vget_high_s32 (int32x4_t __a)
5059 __GET_HIGH (s32);
5062 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
5063 vget_high_s64 (int64x2_t __a)
5065 __GET_HIGH (s64);
5068 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5069 vget_high_u8 (uint8x16_t __a)
5071 __GET_HIGH (u8);
5074 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5075 vget_high_u16 (uint16x8_t __a)
5077 __GET_HIGH (u16);
5080 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5081 vget_high_u32 (uint32x4_t __a)
5083 __GET_HIGH (u32);
5086 #undef __GET_HIGH
5088 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
5089 vget_high_u64 (uint64x2_t __a)
5091 return vcreate_u64 (vgetq_lane_u64 (__a, 1));
5094 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5095 vcombine_s8 (int8x8_t __a, int8x8_t __b)
5097 return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b);
5100 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5101 vcombine_s16 (int16x4_t __a, int16x4_t __b)
5103 return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b);
5106 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5107 vcombine_s32 (int32x2_t __a, int32x2_t __b)
5109 return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b);
5112 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
5113 vcombine_s64 (int64x1_t __a, int64x1_t __b)
5115 return __builtin_aarch64_combinedi (__a[0], __b[0]);
5118 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
5119 vcombine_f16 (float16x4_t __a, float16x4_t __b)
5121 return __builtin_aarch64_combinev4hf (__a, __b);
5124 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5125 vcombine_f32 (float32x2_t __a, float32x2_t __b)
5127 return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b);
5130 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5131 vcombine_u8 (uint8x8_t __a, uint8x8_t __b)
5133 return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
5134 (int8x8_t) __b);
5137 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5138 vcombine_u16 (uint16x4_t __a, uint16x4_t __b)
5140 return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
5141 (int16x4_t) __b);
5144 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5145 vcombine_u32 (uint32x2_t __a, uint32x2_t __b)
5147 return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a,
5148 (int32x2_t) __b);
5151 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5152 vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
5154 return (uint64x2_t) __builtin_aarch64_combinedi (__a[0], __b[0]);
5157 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5158 vcombine_f64 (float64x1_t __a, float64x1_t __b)
5160 return __builtin_aarch64_combinedf (__a[0], __b[0]);
5163 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
5164 vcombine_p8 (poly8x8_t __a, poly8x8_t __b)
5166 return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
5167 (int8x8_t) __b);
5170 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
5171 vcombine_p16 (poly16x4_t __a, poly16x4_t __b)
5173 return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
5174 (int16x4_t) __b);
5177 /* Start of temporary inline asm implementations. */
5179 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5180 vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
5182 int8x8_t result;
5183 __asm__ ("saba %0.8b,%2.8b,%3.8b"
5184 : "=w"(result)
5185 : "0"(a), "w"(b), "w"(c)
5186 : /* No clobbers */);
5187 return result;
5190 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5191 vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
5193 int16x4_t result;
5194 __asm__ ("saba %0.4h,%2.4h,%3.4h"
5195 : "=w"(result)
5196 : "0"(a), "w"(b), "w"(c)
5197 : /* No clobbers */);
5198 return result;
5201 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5202 vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
5204 int32x2_t result;
5205 __asm__ ("saba %0.2s,%2.2s,%3.2s"
5206 : "=w"(result)
5207 : "0"(a), "w"(b), "w"(c)
5208 : /* No clobbers */);
5209 return result;
5212 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5213 vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
5215 uint8x8_t result;
5216 __asm__ ("uaba %0.8b,%2.8b,%3.8b"
5217 : "=w"(result)
5218 : "0"(a), "w"(b), "w"(c)
5219 : /* No clobbers */);
5220 return result;
5223 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5224 vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
5226 uint16x4_t result;
5227 __asm__ ("uaba %0.4h,%2.4h,%3.4h"
5228 : "=w"(result)
5229 : "0"(a), "w"(b), "w"(c)
5230 : /* No clobbers */);
5231 return result;
5234 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5235 vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
5237 uint32x2_t result;
5238 __asm__ ("uaba %0.2s,%2.2s,%3.2s"
5239 : "=w"(result)
5240 : "0"(a), "w"(b), "w"(c)
5241 : /* No clobbers */);
5242 return result;
5245 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5246 vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
5248 int16x8_t result;
5249 __asm__ ("sabal2 %0.8h,%2.16b,%3.16b"
5250 : "=w"(result)
5251 : "0"(a), "w"(b), "w"(c)
5252 : /* No clobbers */);
5253 return result;
5256 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5257 vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
5259 int32x4_t result;
5260 __asm__ ("sabal2 %0.4s,%2.8h,%3.8h"
5261 : "=w"(result)
5262 : "0"(a), "w"(b), "w"(c)
5263 : /* No clobbers */);
5264 return result;
5267 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
5268 vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
5270 int64x2_t result;
5271 __asm__ ("sabal2 %0.2d,%2.4s,%3.4s"
5272 : "=w"(result)
5273 : "0"(a), "w"(b), "w"(c)
5274 : /* No clobbers */);
5275 return result;
5278 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5279 vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
5281 uint16x8_t result;
5282 __asm__ ("uabal2 %0.8h,%2.16b,%3.16b"
5283 : "=w"(result)
5284 : "0"(a), "w"(b), "w"(c)
5285 : /* No clobbers */);
5286 return result;
5289 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5290 vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
5292 uint32x4_t result;
5293 __asm__ ("uabal2 %0.4s,%2.8h,%3.8h"
5294 : "=w"(result)
5295 : "0"(a), "w"(b), "w"(c)
5296 : /* No clobbers */);
5297 return result;
5300 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5301 vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
5303 uint64x2_t result;
5304 __asm__ ("uabal2 %0.2d,%2.4s,%3.4s"
5305 : "=w"(result)
5306 : "0"(a), "w"(b), "w"(c)
5307 : /* No clobbers */);
5308 return result;
5311 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5312 vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
5314 int16x8_t result;
5315 __asm__ ("sabal %0.8h,%2.8b,%3.8b"
5316 : "=w"(result)
5317 : "0"(a), "w"(b), "w"(c)
5318 : /* No clobbers */);
5319 return result;
5322 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5323 vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
5325 int32x4_t result;
5326 __asm__ ("sabal %0.4s,%2.4h,%3.4h"
5327 : "=w"(result)
5328 : "0"(a), "w"(b), "w"(c)
5329 : /* No clobbers */);
5330 return result;
5333 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
5334 vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
5336 int64x2_t result;
5337 __asm__ ("sabal %0.2d,%2.2s,%3.2s"
5338 : "=w"(result)
5339 : "0"(a), "w"(b), "w"(c)
5340 : /* No clobbers */);
5341 return result;
5344 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5345 vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
5347 uint16x8_t result;
5348 __asm__ ("uabal %0.8h,%2.8b,%3.8b"
5349 : "=w"(result)
5350 : "0"(a), "w"(b), "w"(c)
5351 : /* No clobbers */);
5352 return result;
5355 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5356 vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
5358 uint32x4_t result;
5359 __asm__ ("uabal %0.4s,%2.4h,%3.4h"
5360 : "=w"(result)
5361 : "0"(a), "w"(b), "w"(c)
5362 : /* No clobbers */);
5363 return result;
5366 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5367 vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
5369 uint64x2_t result;
5370 __asm__ ("uabal %0.2d,%2.2s,%3.2s"
5371 : "=w"(result)
5372 : "0"(a), "w"(b), "w"(c)
5373 : /* No clobbers */);
5374 return result;
5377 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5378 vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
5380 int8x16_t result;
5381 __asm__ ("saba %0.16b,%2.16b,%3.16b"
5382 : "=w"(result)
5383 : "0"(a), "w"(b), "w"(c)
5384 : /* No clobbers */);
5385 return result;
5388 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5389 vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
5391 int16x8_t result;
5392 __asm__ ("saba %0.8h,%2.8h,%3.8h"
5393 : "=w"(result)
5394 : "0"(a), "w"(b), "w"(c)
5395 : /* No clobbers */);
5396 return result;
5399 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5400 vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
5402 int32x4_t result;
5403 __asm__ ("saba %0.4s,%2.4s,%3.4s"
5404 : "=w"(result)
5405 : "0"(a), "w"(b), "w"(c)
5406 : /* No clobbers */);
5407 return result;
5410 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5411 vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
5413 uint8x16_t result;
5414 __asm__ ("uaba %0.16b,%2.16b,%3.16b"
5415 : "=w"(result)
5416 : "0"(a), "w"(b), "w"(c)
5417 : /* No clobbers */);
5418 return result;
5421 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5422 vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
5424 uint16x8_t result;
5425 __asm__ ("uaba %0.8h,%2.8h,%3.8h"
5426 : "=w"(result)
5427 : "0"(a), "w"(b), "w"(c)
5428 : /* No clobbers */);
5429 return result;
5432 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5433 vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
5435 uint32x4_t result;
5436 __asm__ ("uaba %0.4s,%2.4s,%3.4s"
5437 : "=w"(result)
5438 : "0"(a), "w"(b), "w"(c)
5439 : /* No clobbers */);
5440 return result;
5443 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5444 vabd_f32 (float32x2_t a, float32x2_t b)
5446 float32x2_t result;
5447 __asm__ ("fabd %0.2s, %1.2s, %2.2s"
5448 : "=w"(result)
5449 : "w"(a), "w"(b)
5450 : /* No clobbers */);
5451 return result;
5454 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5455 vabd_s8 (int8x8_t a, int8x8_t b)
5457 int8x8_t result;
5458 __asm__ ("sabd %0.8b, %1.8b, %2.8b"
5459 : "=w"(result)
5460 : "w"(a), "w"(b)
5461 : /* No clobbers */);
5462 return result;
5465 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5466 vabd_s16 (int16x4_t a, int16x4_t b)
5468 int16x4_t result;
5469 __asm__ ("sabd %0.4h, %1.4h, %2.4h"
5470 : "=w"(result)
5471 : "w"(a), "w"(b)
5472 : /* No clobbers */);
5473 return result;
5476 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5477 vabd_s32 (int32x2_t a, int32x2_t b)
5479 int32x2_t result;
5480 __asm__ ("sabd %0.2s, %1.2s, %2.2s"
5481 : "=w"(result)
5482 : "w"(a), "w"(b)
5483 : /* No clobbers */);
5484 return result;
5487 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5488 vabd_u8 (uint8x8_t a, uint8x8_t b)
5490 uint8x8_t result;
5491 __asm__ ("uabd %0.8b, %1.8b, %2.8b"
5492 : "=w"(result)
5493 : "w"(a), "w"(b)
5494 : /* No clobbers */);
5495 return result;
5498 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5499 vabd_u16 (uint16x4_t a, uint16x4_t b)
5501 uint16x4_t result;
5502 __asm__ ("uabd %0.4h, %1.4h, %2.4h"
5503 : "=w"(result)
5504 : "w"(a), "w"(b)
5505 : /* No clobbers */);
5506 return result;
5509 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5510 vabd_u32 (uint32x2_t a, uint32x2_t b)
5512 uint32x2_t result;
5513 __asm__ ("uabd %0.2s, %1.2s, %2.2s"
5514 : "=w"(result)
5515 : "w"(a), "w"(b)
5516 : /* No clobbers */);
5517 return result;
5520 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
5521 vabdd_f64 (float64_t a, float64_t b)
5523 float64_t result;
5524 __asm__ ("fabd %d0, %d1, %d2"
5525 : "=w"(result)
5526 : "w"(a), "w"(b)
5527 : /* No clobbers */);
5528 return result;
5531 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5532 vabdl_high_s8 (int8x16_t a, int8x16_t b)
5534 int16x8_t result;
5535 __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b"
5536 : "=w"(result)
5537 : "w"(a), "w"(b)
5538 : /* No clobbers */);
5539 return result;
5542 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5543 vabdl_high_s16 (int16x8_t a, int16x8_t b)
5545 int32x4_t result;
5546 __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h"
5547 : "=w"(result)
5548 : "w"(a), "w"(b)
5549 : /* No clobbers */);
5550 return result;
5553 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
5554 vabdl_high_s32 (int32x4_t a, int32x4_t b)
5556 int64x2_t result;
5557 __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s"
5558 : "=w"(result)
5559 : "w"(a), "w"(b)
5560 : /* No clobbers */);
5561 return result;
5564 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5565 vabdl_high_u8 (uint8x16_t a, uint8x16_t b)
5567 uint16x8_t result;
5568 __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b"
5569 : "=w"(result)
5570 : "w"(a), "w"(b)
5571 : /* No clobbers */);
5572 return result;
5575 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5576 vabdl_high_u16 (uint16x8_t a, uint16x8_t b)
5578 uint32x4_t result;
5579 __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h"
5580 : "=w"(result)
5581 : "w"(a), "w"(b)
5582 : /* No clobbers */);
5583 return result;
5586 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5587 vabdl_high_u32 (uint32x4_t a, uint32x4_t b)
5589 uint64x2_t result;
5590 __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s"
5591 : "=w"(result)
5592 : "w"(a), "w"(b)
5593 : /* No clobbers */);
5594 return result;
5597 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5598 vabdl_s8 (int8x8_t a, int8x8_t b)
5600 int16x8_t result;
5601 __asm__ ("sabdl %0.8h, %1.8b, %2.8b"
5602 : "=w"(result)
5603 : "w"(a), "w"(b)
5604 : /* No clobbers */);
5605 return result;
5608 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5609 vabdl_s16 (int16x4_t a, int16x4_t b)
5611 int32x4_t result;
5612 __asm__ ("sabdl %0.4s, %1.4h, %2.4h"
5613 : "=w"(result)
5614 : "w"(a), "w"(b)
5615 : /* No clobbers */);
5616 return result;
5619 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
5620 vabdl_s32 (int32x2_t a, int32x2_t b)
5622 int64x2_t result;
5623 __asm__ ("sabdl %0.2d, %1.2s, %2.2s"
5624 : "=w"(result)
5625 : "w"(a), "w"(b)
5626 : /* No clobbers */);
5627 return result;
5630 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5631 vabdl_u8 (uint8x8_t a, uint8x8_t b)
5633 uint16x8_t result;
5634 __asm__ ("uabdl %0.8h, %1.8b, %2.8b"
5635 : "=w"(result)
5636 : "w"(a), "w"(b)
5637 : /* No clobbers */);
5638 return result;
5641 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5642 vabdl_u16 (uint16x4_t a, uint16x4_t b)
5644 uint32x4_t result;
5645 __asm__ ("uabdl %0.4s, %1.4h, %2.4h"
5646 : "=w"(result)
5647 : "w"(a), "w"(b)
5648 : /* No clobbers */);
5649 return result;
5652 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5653 vabdl_u32 (uint32x2_t a, uint32x2_t b)
5655 uint64x2_t result;
5656 __asm__ ("uabdl %0.2d, %1.2s, %2.2s"
5657 : "=w"(result)
5658 : "w"(a), "w"(b)
5659 : /* No clobbers */);
5660 return result;
5663 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5664 vabdq_f32 (float32x4_t a, float32x4_t b)
5666 float32x4_t result;
5667 __asm__ ("fabd %0.4s, %1.4s, %2.4s"
5668 : "=w"(result)
5669 : "w"(a), "w"(b)
5670 : /* No clobbers */);
5671 return result;
5674 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5675 vabdq_f64 (float64x2_t a, float64x2_t b)
5677 float64x2_t result;
5678 __asm__ ("fabd %0.2d, %1.2d, %2.2d"
5679 : "=w"(result)
5680 : "w"(a), "w"(b)
5681 : /* No clobbers */);
5682 return result;
5685 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5686 vabdq_s8 (int8x16_t a, int8x16_t b)
5688 int8x16_t result;
5689 __asm__ ("sabd %0.16b, %1.16b, %2.16b"
5690 : "=w"(result)
5691 : "w"(a), "w"(b)
5692 : /* No clobbers */);
5693 return result;
5696 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5697 vabdq_s16 (int16x8_t a, int16x8_t b)
5699 int16x8_t result;
5700 __asm__ ("sabd %0.8h, %1.8h, %2.8h"
5701 : "=w"(result)
5702 : "w"(a), "w"(b)
5703 : /* No clobbers */);
5704 return result;
5707 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5708 vabdq_s32 (int32x4_t a, int32x4_t b)
5710 int32x4_t result;
5711 __asm__ ("sabd %0.4s, %1.4s, %2.4s"
5712 : "=w"(result)
5713 : "w"(a), "w"(b)
5714 : /* No clobbers */);
5715 return result;
5718 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5719 vabdq_u8 (uint8x16_t a, uint8x16_t b)
5721 uint8x16_t result;
5722 __asm__ ("uabd %0.16b, %1.16b, %2.16b"
5723 : "=w"(result)
5724 : "w"(a), "w"(b)
5725 : /* No clobbers */);
5726 return result;
5729 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5730 vabdq_u16 (uint16x8_t a, uint16x8_t b)
5732 uint16x8_t result;
5733 __asm__ ("uabd %0.8h, %1.8h, %2.8h"
5734 : "=w"(result)
5735 : "w"(a), "w"(b)
5736 : /* No clobbers */);
5737 return result;
5740 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5741 vabdq_u32 (uint32x4_t a, uint32x4_t b)
5743 uint32x4_t result;
5744 __asm__ ("uabd %0.4s, %1.4s, %2.4s"
5745 : "=w"(result)
5746 : "w"(a), "w"(b)
5747 : /* No clobbers */);
5748 return result;
5751 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
5752 vabds_f32 (float32_t a, float32_t b)
5754 float32_t result;
5755 __asm__ ("fabd %s0, %s1, %s2"
5756 : "=w"(result)
5757 : "w"(a), "w"(b)
5758 : /* No clobbers */);
5759 return result;
5762 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
5763 vaddlv_s8 (int8x8_t a)
5765 int16_t result;
5766 __asm__ ("saddlv %h0,%1.8b"
5767 : "=w"(result)
5768 : "w"(a)
5769 : /* No clobbers */);
5770 return result;
5773 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
5774 vaddlv_s16 (int16x4_t a)
5776 int32_t result;
5777 __asm__ ("saddlv %s0,%1.4h"
5778 : "=w"(result)
5779 : "w"(a)
5780 : /* No clobbers */);
5781 return result;
5784 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
5785 vaddlv_u8 (uint8x8_t a)
5787 uint16_t result;
5788 __asm__ ("uaddlv %h0,%1.8b"
5789 : "=w"(result)
5790 : "w"(a)
5791 : /* No clobbers */);
5792 return result;
5795 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
5796 vaddlv_u16 (uint16x4_t a)
5798 uint32_t result;
5799 __asm__ ("uaddlv %s0,%1.4h"
5800 : "=w"(result)
5801 : "w"(a)
5802 : /* No clobbers */);
5803 return result;
5806 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
5807 vaddlvq_s8 (int8x16_t a)
5809 int16_t result;
5810 __asm__ ("saddlv %h0,%1.16b"
5811 : "=w"(result)
5812 : "w"(a)
5813 : /* No clobbers */);
5814 return result;
5817 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
5818 vaddlvq_s16 (int16x8_t a)
5820 int32_t result;
5821 __asm__ ("saddlv %s0,%1.8h"
5822 : "=w"(result)
5823 : "w"(a)
5824 : /* No clobbers */);
5825 return result;
5828 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
5829 vaddlvq_s32 (int32x4_t a)
5831 int64_t result;
5832 __asm__ ("saddlv %d0,%1.4s"
5833 : "=w"(result)
5834 : "w"(a)
5835 : /* No clobbers */);
5836 return result;
5839 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
5840 vaddlvq_u8 (uint8x16_t a)
5842 uint16_t result;
5843 __asm__ ("uaddlv %h0,%1.16b"
5844 : "=w"(result)
5845 : "w"(a)
5846 : /* No clobbers */);
5847 return result;
5850 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
5851 vaddlvq_u16 (uint16x8_t a)
5853 uint32_t result;
5854 __asm__ ("uaddlv %s0,%1.8h"
5855 : "=w"(result)
5856 : "w"(a)
5857 : /* No clobbers */);
5858 return result;
5861 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
5862 vaddlvq_u32 (uint32x4_t a)
5864 uint64_t result;
5865 __asm__ ("uaddlv %d0,%1.4s"
5866 : "=w"(result)
5867 : "w"(a)
5868 : /* No clobbers */);
5869 return result;
5872 #define vcopyq_lane_f32(a, b, c, d) \
5873 __extension__ \
5874 ({ \
5875 float32x4_t c_ = (c); \
5876 float32x4_t a_ = (a); \
5877 float32x4_t result; \
5878 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5879 : "=w"(result) \
5880 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5881 : /* No clobbers */); \
5882 result; \
5885 #define vcopyq_lane_f64(a, b, c, d) \
5886 __extension__ \
5887 ({ \
5888 float64x2_t c_ = (c); \
5889 float64x2_t a_ = (a); \
5890 float64x2_t result; \
5891 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5892 : "=w"(result) \
5893 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5894 : /* No clobbers */); \
5895 result; \
5898 #define vcopyq_lane_p8(a, b, c, d) \
5899 __extension__ \
5900 ({ \
5901 poly8x16_t c_ = (c); \
5902 poly8x16_t a_ = (a); \
5903 poly8x16_t result; \
5904 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5905 : "=w"(result) \
5906 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5907 : /* No clobbers */); \
5908 result; \
5911 #define vcopyq_lane_p16(a, b, c, d) \
5912 __extension__ \
5913 ({ \
5914 poly16x8_t c_ = (c); \
5915 poly16x8_t a_ = (a); \
5916 poly16x8_t result; \
5917 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5918 : "=w"(result) \
5919 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5920 : /* No clobbers */); \
5921 result; \
5924 #define vcopyq_lane_s8(a, b, c, d) \
5925 __extension__ \
5926 ({ \
5927 int8x16_t c_ = (c); \
5928 int8x16_t a_ = (a); \
5929 int8x16_t result; \
5930 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5931 : "=w"(result) \
5932 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5933 : /* No clobbers */); \
5934 result; \
5937 #define vcopyq_lane_s16(a, b, c, d) \
5938 __extension__ \
5939 ({ \
5940 int16x8_t c_ = (c); \
5941 int16x8_t a_ = (a); \
5942 int16x8_t result; \
5943 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5944 : "=w"(result) \
5945 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5946 : /* No clobbers */); \
5947 result; \
5950 #define vcopyq_lane_s32(a, b, c, d) \
5951 __extension__ \
5952 ({ \
5953 int32x4_t c_ = (c); \
5954 int32x4_t a_ = (a); \
5955 int32x4_t result; \
5956 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5957 : "=w"(result) \
5958 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5959 : /* No clobbers */); \
5960 result; \
5963 #define vcopyq_lane_s64(a, b, c, d) \
5964 __extension__ \
5965 ({ \
5966 int64x2_t c_ = (c); \
5967 int64x2_t a_ = (a); \
5968 int64x2_t result; \
5969 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5970 : "=w"(result) \
5971 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5972 : /* No clobbers */); \
5973 result; \
5976 #define vcopyq_lane_u8(a, b, c, d) \
5977 __extension__ \
5978 ({ \
5979 uint8x16_t c_ = (c); \
5980 uint8x16_t a_ = (a); \
5981 uint8x16_t result; \
5982 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5983 : "=w"(result) \
5984 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5985 : /* No clobbers */); \
5986 result; \
5989 #define vcopyq_lane_u16(a, b, c, d) \
5990 __extension__ \
5991 ({ \
5992 uint16x8_t c_ = (c); \
5993 uint16x8_t a_ = (a); \
5994 uint16x8_t result; \
5995 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5996 : "=w"(result) \
5997 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5998 : /* No clobbers */); \
5999 result; \
6002 #define vcopyq_lane_u32(a, b, c, d) \
6003 __extension__ \
6004 ({ \
6005 uint32x4_t c_ = (c); \
6006 uint32x4_t a_ = (a); \
6007 uint32x4_t result; \
6008 __asm__ ("ins %0.s[%2], %3.s[%4]" \
6009 : "=w"(result) \
6010 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
6011 : /* No clobbers */); \
6012 result; \
6015 #define vcopyq_lane_u64(a, b, c, d) \
6016 __extension__ \
6017 ({ \
6018 uint64x2_t c_ = (c); \
6019 uint64x2_t a_ = (a); \
6020 uint64x2_t result; \
6021 __asm__ ("ins %0.d[%2], %3.d[%4]" \
6022 : "=w"(result) \
6023 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
6024 : /* No clobbers */); \
6025 result; \
6028 #define vcvt_n_f32_s32(a, b) \
6029 __extension__ \
6030 ({ \
6031 int32x2_t a_ = (a); \
6032 float32x2_t result; \
6033 __asm__ ("scvtf %0.2s, %1.2s, #%2" \
6034 : "=w"(result) \
6035 : "w"(a_), "i"(b) \
6036 : /* No clobbers */); \
6037 result; \
6040 #define vcvt_n_f32_u32(a, b) \
6041 __extension__ \
6042 ({ \
6043 uint32x2_t a_ = (a); \
6044 float32x2_t result; \
6045 __asm__ ("ucvtf %0.2s, %1.2s, #%2" \
6046 : "=w"(result) \
6047 : "w"(a_), "i"(b) \
6048 : /* No clobbers */); \
6049 result; \
6052 #define vcvt_n_s32_f32(a, b) \
6053 __extension__ \
6054 ({ \
6055 float32x2_t a_ = (a); \
6056 int32x2_t result; \
6057 __asm__ ("fcvtzs %0.2s, %1.2s, #%2" \
6058 : "=w"(result) \
6059 : "w"(a_), "i"(b) \
6060 : /* No clobbers */); \
6061 result; \
6064 #define vcvt_n_u32_f32(a, b) \
6065 __extension__ \
6066 ({ \
6067 float32x2_t a_ = (a); \
6068 uint32x2_t result; \
6069 __asm__ ("fcvtzu %0.2s, %1.2s, #%2" \
6070 : "=w"(result) \
6071 : "w"(a_), "i"(b) \
6072 : /* No clobbers */); \
6073 result; \
6076 #define vcvtd_n_f64_s64(a, b) \
6077 __extension__ \
6078 ({ \
6079 int64_t a_ = (a); \
6080 float64_t result; \
6081 __asm__ ("scvtf %d0,%d1,%2" \
6082 : "=w"(result) \
6083 : "w"(a_), "i"(b) \
6084 : /* No clobbers */); \
6085 result; \
6088 #define vcvtd_n_f64_u64(a, b) \
6089 __extension__ \
6090 ({ \
6091 uint64_t a_ = (a); \
6092 float64_t result; \
6093 __asm__ ("ucvtf %d0,%d1,%2" \
6094 : "=w"(result) \
6095 : "w"(a_), "i"(b) \
6096 : /* No clobbers */); \
6097 result; \
6100 #define vcvtd_n_s64_f64(a, b) \
6101 __extension__ \
6102 ({ \
6103 float64_t a_ = (a); \
6104 int64_t result; \
6105 __asm__ ("fcvtzs %d0,%d1,%2" \
6106 : "=w"(result) \
6107 : "w"(a_), "i"(b) \
6108 : /* No clobbers */); \
6109 result; \
6112 #define vcvtd_n_u64_f64(a, b) \
6113 __extension__ \
6114 ({ \
6115 float64_t a_ = (a); \
6116 uint64_t result; \
6117 __asm__ ("fcvtzu %d0,%d1,%2" \
6118 : "=w"(result) \
6119 : "w"(a_), "i"(b) \
6120 : /* No clobbers */); \
6121 result; \
6124 #define vcvtq_n_f32_s32(a, b) \
6125 __extension__ \
6126 ({ \
6127 int32x4_t a_ = (a); \
6128 float32x4_t result; \
6129 __asm__ ("scvtf %0.4s, %1.4s, #%2" \
6130 : "=w"(result) \
6131 : "w"(a_), "i"(b) \
6132 : /* No clobbers */); \
6133 result; \
6136 #define vcvtq_n_f32_u32(a, b) \
6137 __extension__ \
6138 ({ \
6139 uint32x4_t a_ = (a); \
6140 float32x4_t result; \
6141 __asm__ ("ucvtf %0.4s, %1.4s, #%2" \
6142 : "=w"(result) \
6143 : "w"(a_), "i"(b) \
6144 : /* No clobbers */); \
6145 result; \
6148 #define vcvtq_n_f64_s64(a, b) \
6149 __extension__ \
6150 ({ \
6151 int64x2_t a_ = (a); \
6152 float64x2_t result; \
6153 __asm__ ("scvtf %0.2d, %1.2d, #%2" \
6154 : "=w"(result) \
6155 : "w"(a_), "i"(b) \
6156 : /* No clobbers */); \
6157 result; \
6160 #define vcvtq_n_f64_u64(a, b) \
6161 __extension__ \
6162 ({ \
6163 uint64x2_t a_ = (a); \
6164 float64x2_t result; \
6165 __asm__ ("ucvtf %0.2d, %1.2d, #%2" \
6166 : "=w"(result) \
6167 : "w"(a_), "i"(b) \
6168 : /* No clobbers */); \
6169 result; \
6172 #define vcvtq_n_s32_f32(a, b) \
6173 __extension__ \
6174 ({ \
6175 float32x4_t a_ = (a); \
6176 int32x4_t result; \
6177 __asm__ ("fcvtzs %0.4s, %1.4s, #%2" \
6178 : "=w"(result) \
6179 : "w"(a_), "i"(b) \
6180 : /* No clobbers */); \
6181 result; \
6184 #define vcvtq_n_s64_f64(a, b) \
6185 __extension__ \
6186 ({ \
6187 float64x2_t a_ = (a); \
6188 int64x2_t result; \
6189 __asm__ ("fcvtzs %0.2d, %1.2d, #%2" \
6190 : "=w"(result) \
6191 : "w"(a_), "i"(b) \
6192 : /* No clobbers */); \
6193 result; \
6196 #define vcvtq_n_u32_f32(a, b) \
6197 __extension__ \
6198 ({ \
6199 float32x4_t a_ = (a); \
6200 uint32x4_t result; \
6201 __asm__ ("fcvtzu %0.4s, %1.4s, #%2" \
6202 : "=w"(result) \
6203 : "w"(a_), "i"(b) \
6204 : /* No clobbers */); \
6205 result; \
6208 #define vcvtq_n_u64_f64(a, b) \
6209 __extension__ \
6210 ({ \
6211 float64x2_t a_ = (a); \
6212 uint64x2_t result; \
6213 __asm__ ("fcvtzu %0.2d, %1.2d, #%2" \
6214 : "=w"(result) \
6215 : "w"(a_), "i"(b) \
6216 : /* No clobbers */); \
6217 result; \
6220 #define vcvts_n_f32_s32(a, b) \
6221 __extension__ \
6222 ({ \
6223 int32_t a_ = (a); \
6224 float32_t result; \
6225 __asm__ ("scvtf %s0,%s1,%2" \
6226 : "=w"(result) \
6227 : "w"(a_), "i"(b) \
6228 : /* No clobbers */); \
6229 result; \
6232 #define vcvts_n_f32_u32(a, b) \
6233 __extension__ \
6234 ({ \
6235 uint32_t a_ = (a); \
6236 float32_t result; \
6237 __asm__ ("ucvtf %s0,%s1,%2" \
6238 : "=w"(result) \
6239 : "w"(a_), "i"(b) \
6240 : /* No clobbers */); \
6241 result; \
6244 #define vcvts_n_s32_f32(a, b) \
6245 __extension__ \
6246 ({ \
6247 float32_t a_ = (a); \
6248 int32_t result; \
6249 __asm__ ("fcvtzs %s0,%s1,%2" \
6250 : "=w"(result) \
6251 : "w"(a_), "i"(b) \
6252 : /* No clobbers */); \
6253 result; \
6256 #define vcvts_n_u32_f32(a, b) \
6257 __extension__ \
6258 ({ \
6259 float32_t a_ = (a); \
6260 uint32_t result; \
6261 __asm__ ("fcvtzu %s0,%s1,%2" \
6262 : "=w"(result) \
6263 : "w"(a_), "i"(b) \
6264 : /* No clobbers */); \
6265 result; \
6268 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6269 vcvtx_f32_f64 (float64x2_t a)
6271 float32x2_t result;
6272 __asm__ ("fcvtxn %0.2s,%1.2d"
6273 : "=w"(result)
6274 : "w"(a)
6275 : /* No clobbers */);
6276 return result;
6279 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6280 vcvtx_high_f32_f64 (float32x2_t a, float64x2_t b)
6282 float32x4_t result;
6283 __asm__ ("fcvtxn2 %0.4s,%1.2d"
6284 : "=w"(result)
6285 : "w" (b), "0"(a)
6286 : /* No clobbers */);
6287 return result;
6290 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
6291 vcvtxd_f32_f64 (float64_t a)
6293 float32_t result;
6294 __asm__ ("fcvtxn %s0,%d1"
6295 : "=w"(result)
6296 : "w"(a)
6297 : /* No clobbers */);
6298 return result;
6301 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6302 vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
6304 float32x2_t result;
6305 float32x2_t t1;
6306 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s"
6307 : "=w"(result), "=w"(t1)
6308 : "0"(a), "w"(b), "w"(c)
6309 : /* No clobbers */);
6310 return result;
6313 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6314 vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
6316 int16x4_t result;
6317 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
6318 : "=w"(result)
6319 : "0"(a), "w"(b), "x"(c)
6320 : /* No clobbers */);
6321 return result;
6324 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6325 vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
6327 int32x2_t result;
6328 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
6329 : "=w"(result)
6330 : "0"(a), "w"(b), "w"(c)
6331 : /* No clobbers */);
6332 return result;
6335 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6336 vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
6338 uint16x4_t result;
6339 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
6340 : "=w"(result)
6341 : "0"(a), "w"(b), "x"(c)
6342 : /* No clobbers */);
6343 return result;
6346 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6347 vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
6349 uint32x2_t result;
6350 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
6351 : "=w"(result)
6352 : "0"(a), "w"(b), "w"(c)
6353 : /* No clobbers */);
6354 return result;
6357 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6358 vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
6360 int8x8_t result;
6361 __asm__ ("mla %0.8b, %2.8b, %3.8b"
6362 : "=w"(result)
6363 : "0"(a), "w"(b), "w"(c)
6364 : /* No clobbers */);
6365 return result;
6368 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6369 vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
6371 int16x4_t result;
6372 __asm__ ("mla %0.4h, %2.4h, %3.4h"
6373 : "=w"(result)
6374 : "0"(a), "w"(b), "w"(c)
6375 : /* No clobbers */);
6376 return result;
6379 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6380 vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
6382 int32x2_t result;
6383 __asm__ ("mla %0.2s, %2.2s, %3.2s"
6384 : "=w"(result)
6385 : "0"(a), "w"(b), "w"(c)
6386 : /* No clobbers */);
6387 return result;
6390 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6391 vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
6393 uint8x8_t result;
6394 __asm__ ("mla %0.8b, %2.8b, %3.8b"
6395 : "=w"(result)
6396 : "0"(a), "w"(b), "w"(c)
6397 : /* No clobbers */);
6398 return result;
6401 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6402 vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
6404 uint16x4_t result;
6405 __asm__ ("mla %0.4h, %2.4h, %3.4h"
6406 : "=w"(result)
6407 : "0"(a), "w"(b), "w"(c)
6408 : /* No clobbers */);
6409 return result;
6412 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6413 vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
6415 uint32x2_t result;
6416 __asm__ ("mla %0.2s, %2.2s, %3.2s"
6417 : "=w"(result)
6418 : "0"(a), "w"(b), "w"(c)
6419 : /* No clobbers */);
6420 return result;
6423 #define vmlal_high_lane_s16(a, b, c, d) \
6424 __extension__ \
6425 ({ \
6426 int16x4_t c_ = (c); \
6427 int16x8_t b_ = (b); \
6428 int32x4_t a_ = (a); \
6429 int32x4_t result; \
6430 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
6431 : "=w"(result) \
6432 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6433 : /* No clobbers */); \
6434 result; \
6437 #define vmlal_high_lane_s32(a, b, c, d) \
6438 __extension__ \
6439 ({ \
6440 int32x2_t c_ = (c); \
6441 int32x4_t b_ = (b); \
6442 int64x2_t a_ = (a); \
6443 int64x2_t result; \
6444 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
6445 : "=w"(result) \
6446 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6447 : /* No clobbers */); \
6448 result; \
6451 #define vmlal_high_lane_u16(a, b, c, d) \
6452 __extension__ \
6453 ({ \
6454 uint16x4_t c_ = (c); \
6455 uint16x8_t b_ = (b); \
6456 uint32x4_t a_ = (a); \
6457 uint32x4_t result; \
6458 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
6459 : "=w"(result) \
6460 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6461 : /* No clobbers */); \
6462 result; \
6465 #define vmlal_high_lane_u32(a, b, c, d) \
6466 __extension__ \
6467 ({ \
6468 uint32x2_t c_ = (c); \
6469 uint32x4_t b_ = (b); \
6470 uint64x2_t a_ = (a); \
6471 uint64x2_t result; \
6472 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
6473 : "=w"(result) \
6474 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6475 : /* No clobbers */); \
6476 result; \
6479 #define vmlal_high_laneq_s16(a, b, c, d) \
6480 __extension__ \
6481 ({ \
6482 int16x8_t c_ = (c); \
6483 int16x8_t b_ = (b); \
6484 int32x4_t a_ = (a); \
6485 int32x4_t result; \
6486 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
6487 : "=w"(result) \
6488 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6489 : /* No clobbers */); \
6490 result; \
6493 #define vmlal_high_laneq_s32(a, b, c, d) \
6494 __extension__ \
6495 ({ \
6496 int32x4_t c_ = (c); \
6497 int32x4_t b_ = (b); \
6498 int64x2_t a_ = (a); \
6499 int64x2_t result; \
6500 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
6501 : "=w"(result) \
6502 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6503 : /* No clobbers */); \
6504 result; \
6507 #define vmlal_high_laneq_u16(a, b, c, d) \
6508 __extension__ \
6509 ({ \
6510 uint16x8_t c_ = (c); \
6511 uint16x8_t b_ = (b); \
6512 uint32x4_t a_ = (a); \
6513 uint32x4_t result; \
6514 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
6515 : "=w"(result) \
6516 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6517 : /* No clobbers */); \
6518 result; \
6521 #define vmlal_high_laneq_u32(a, b, c, d) \
6522 __extension__ \
6523 ({ \
6524 uint32x4_t c_ = (c); \
6525 uint32x4_t b_ = (b); \
6526 uint64x2_t a_ = (a); \
6527 uint64x2_t result; \
6528 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
6529 : "=w"(result) \
6530 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6531 : /* No clobbers */); \
6532 result; \
6535 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6536 vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
6538 int32x4_t result;
6539 __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]"
6540 : "=w"(result)
6541 : "0"(a), "w"(b), "x"(c)
6542 : /* No clobbers */);
6543 return result;
6546 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6547 vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
6549 int64x2_t result;
6550 __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]"
6551 : "=w"(result)
6552 : "0"(a), "w"(b), "w"(c)
6553 : /* No clobbers */);
6554 return result;
6557 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6558 vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
6560 uint32x4_t result;
6561 __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]"
6562 : "=w"(result)
6563 : "0"(a), "w"(b), "x"(c)
6564 : /* No clobbers */);
6565 return result;
6568 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6569 vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
6571 uint64x2_t result;
6572 __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]"
6573 : "=w"(result)
6574 : "0"(a), "w"(b), "w"(c)
6575 : /* No clobbers */);
6576 return result;
6579 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6580 vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
6582 int16x8_t result;
6583 __asm__ ("smlal2 %0.8h,%2.16b,%3.16b"
6584 : "=w"(result)
6585 : "0"(a), "w"(b), "w"(c)
6586 : /* No clobbers */);
6587 return result;
6590 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6591 vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
6593 int32x4_t result;
6594 __asm__ ("smlal2 %0.4s,%2.8h,%3.8h"
6595 : "=w"(result)
6596 : "0"(a), "w"(b), "w"(c)
6597 : /* No clobbers */);
6598 return result;
6601 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6602 vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
6604 int64x2_t result;
6605 __asm__ ("smlal2 %0.2d,%2.4s,%3.4s"
6606 : "=w"(result)
6607 : "0"(a), "w"(b), "w"(c)
6608 : /* No clobbers */);
6609 return result;
6612 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6613 vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
6615 uint16x8_t result;
6616 __asm__ ("umlal2 %0.8h,%2.16b,%3.16b"
6617 : "=w"(result)
6618 : "0"(a), "w"(b), "w"(c)
6619 : /* No clobbers */);
6620 return result;
6623 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6624 vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
6626 uint32x4_t result;
6627 __asm__ ("umlal2 %0.4s,%2.8h,%3.8h"
6628 : "=w"(result)
6629 : "0"(a), "w"(b), "w"(c)
6630 : /* No clobbers */);
6631 return result;
6634 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6635 vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
6637 uint64x2_t result;
6638 __asm__ ("umlal2 %0.2d,%2.4s,%3.4s"
6639 : "=w"(result)
6640 : "0"(a), "w"(b), "w"(c)
6641 : /* No clobbers */);
6642 return result;
6645 #define vmlal_lane_s16(a, b, c, d) \
6646 __extension__ \
6647 ({ \
6648 int16x4_t c_ = (c); \
6649 int16x4_t b_ = (b); \
6650 int32x4_t a_ = (a); \
6651 int32x4_t result; \
6652 __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]" \
6653 : "=w"(result) \
6654 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6655 : /* No clobbers */); \
6656 result; \
6659 #define vmlal_lane_s32(a, b, c, d) \
6660 __extension__ \
6661 ({ \
6662 int32x2_t c_ = (c); \
6663 int32x2_t b_ = (b); \
6664 int64x2_t a_ = (a); \
6665 int64x2_t result; \
6666 __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]" \
6667 : "=w"(result) \
6668 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6669 : /* No clobbers */); \
6670 result; \
6673 #define vmlal_lane_u16(a, b, c, d) \
6674 __extension__ \
6675 ({ \
6676 uint16x4_t c_ = (c); \
6677 uint16x4_t b_ = (b); \
6678 uint32x4_t a_ = (a); \
6679 uint32x4_t result; \
6680 __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]" \
6681 : "=w"(result) \
6682 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6683 : /* No clobbers */); \
6684 result; \
6687 #define vmlal_lane_u32(a, b, c, d) \
6688 __extension__ \
6689 ({ \
6690 uint32x2_t c_ = (c); \
6691 uint32x2_t b_ = (b); \
6692 uint64x2_t a_ = (a); \
6693 uint64x2_t result; \
6694 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
6695 : "=w"(result) \
6696 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6697 : /* No clobbers */); \
6698 result; \
6701 #define vmlal_laneq_s16(a, b, c, d) \
6702 __extension__ \
6703 ({ \
6704 int16x8_t c_ = (c); \
6705 int16x4_t b_ = (b); \
6706 int32x4_t a_ = (a); \
6707 int32x4_t result; \
6708 __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]" \
6709 : "=w"(result) \
6710 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6711 : /* No clobbers */); \
6712 result; \
6715 #define vmlal_laneq_s32(a, b, c, d) \
6716 __extension__ \
6717 ({ \
6718 int32x4_t c_ = (c); \
6719 int32x2_t b_ = (b); \
6720 int64x2_t a_ = (a); \
6721 int64x2_t result; \
6722 __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]" \
6723 : "=w"(result) \
6724 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6725 : /* No clobbers */); \
6726 result; \
6729 #define vmlal_laneq_u16(a, b, c, d) \
6730 __extension__ \
6731 ({ \
6732 uint16x8_t c_ = (c); \
6733 uint16x4_t b_ = (b); \
6734 uint32x4_t a_ = (a); \
6735 uint32x4_t result; \
6736 __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]" \
6737 : "=w"(result) \
6738 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6739 : /* No clobbers */); \
6740 result; \
6743 #define vmlal_laneq_u32(a, b, c, d) \
6744 __extension__ \
6745 ({ \
6746 uint32x4_t c_ = (c); \
6747 uint32x2_t b_ = (b); \
6748 uint64x2_t a_ = (a); \
6749 uint64x2_t result; \
6750 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
6751 : "=w"(result) \
6752 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6753 : /* No clobbers */); \
6754 result; \
6757 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6758 vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
6760 int32x4_t result;
6761 __asm__ ("smlal %0.4s,%2.4h,%3.h[0]"
6762 : "=w"(result)
6763 : "0"(a), "w"(b), "x"(c)
6764 : /* No clobbers */);
6765 return result;
6768 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6769 vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
6771 int64x2_t result;
6772 __asm__ ("smlal %0.2d,%2.2s,%3.s[0]"
6773 : "=w"(result)
6774 : "0"(a), "w"(b), "w"(c)
6775 : /* No clobbers */);
6776 return result;
6779 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6780 vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
6782 uint32x4_t result;
6783 __asm__ ("umlal %0.4s,%2.4h,%3.h[0]"
6784 : "=w"(result)
6785 : "0"(a), "w"(b), "x"(c)
6786 : /* No clobbers */);
6787 return result;
6790 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6791 vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
6793 uint64x2_t result;
6794 __asm__ ("umlal %0.2d,%2.2s,%3.s[0]"
6795 : "=w"(result)
6796 : "0"(a), "w"(b), "w"(c)
6797 : /* No clobbers */);
6798 return result;
6801 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6802 vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
6804 int16x8_t result;
6805 __asm__ ("smlal %0.8h,%2.8b,%3.8b"
6806 : "=w"(result)
6807 : "0"(a), "w"(b), "w"(c)
6808 : /* No clobbers */);
6809 return result;
6812 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6813 vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
6815 int32x4_t result;
6816 __asm__ ("smlal %0.4s,%2.4h,%3.4h"
6817 : "=w"(result)
6818 : "0"(a), "w"(b), "w"(c)
6819 : /* No clobbers */);
6820 return result;
6823 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6824 vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
6826 int64x2_t result;
6827 __asm__ ("smlal %0.2d,%2.2s,%3.2s"
6828 : "=w"(result)
6829 : "0"(a), "w"(b), "w"(c)
6830 : /* No clobbers */);
6831 return result;
6834 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6835 vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
6837 uint16x8_t result;
6838 __asm__ ("umlal %0.8h,%2.8b,%3.8b"
6839 : "=w"(result)
6840 : "0"(a), "w"(b), "w"(c)
6841 : /* No clobbers */);
6842 return result;
6845 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6846 vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
6848 uint32x4_t result;
6849 __asm__ ("umlal %0.4s,%2.4h,%3.4h"
6850 : "=w"(result)
6851 : "0"(a), "w"(b), "w"(c)
6852 : /* No clobbers */);
6853 return result;
6856 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6857 vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
6859 uint64x2_t result;
6860 __asm__ ("umlal %0.2d,%2.2s,%3.2s"
6861 : "=w"(result)
6862 : "0"(a), "w"(b), "w"(c)
6863 : /* No clobbers */);
6864 return result;
6867 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6868 vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
6870 float32x4_t result;
6871 float32x4_t t1;
6872 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s"
6873 : "=w"(result), "=w"(t1)
6874 : "0"(a), "w"(b), "w"(c)
6875 : /* No clobbers */);
6876 return result;
6879 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6880 vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
6882 int16x8_t result;
6883 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
6884 : "=w"(result)
6885 : "0"(a), "w"(b), "x"(c)
6886 : /* No clobbers */);
6887 return result;
6890 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6891 vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
6893 int32x4_t result;
6894 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
6895 : "=w"(result)
6896 : "0"(a), "w"(b), "w"(c)
6897 : /* No clobbers */);
6898 return result;
6901 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6902 vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
6904 uint16x8_t result;
6905 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
6906 : "=w"(result)
6907 : "0"(a), "w"(b), "x"(c)
6908 : /* No clobbers */);
6909 return result;
6912 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6913 vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
6915 uint32x4_t result;
6916 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
6917 : "=w"(result)
6918 : "0"(a), "w"(b), "w"(c)
6919 : /* No clobbers */);
6920 return result;
6923 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
6924 vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
6926 int8x16_t result;
6927 __asm__ ("mla %0.16b, %2.16b, %3.16b"
6928 : "=w"(result)
6929 : "0"(a), "w"(b), "w"(c)
6930 : /* No clobbers */);
6931 return result;
6934 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6935 vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
6937 int16x8_t result;
6938 __asm__ ("mla %0.8h, %2.8h, %3.8h"
6939 : "=w"(result)
6940 : "0"(a), "w"(b), "w"(c)
6941 : /* No clobbers */);
6942 return result;
6945 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6946 vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
6948 int32x4_t result;
6949 __asm__ ("mla %0.4s, %2.4s, %3.4s"
6950 : "=w"(result)
6951 : "0"(a), "w"(b), "w"(c)
6952 : /* No clobbers */);
6953 return result;
6956 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
6957 vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
6959 uint8x16_t result;
6960 __asm__ ("mla %0.16b, %2.16b, %3.16b"
6961 : "=w"(result)
6962 : "0"(a), "w"(b), "w"(c)
6963 : /* No clobbers */);
6964 return result;
6967 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6968 vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
6970 uint16x8_t result;
6971 __asm__ ("mla %0.8h, %2.8h, %3.8h"
6972 : "=w"(result)
6973 : "0"(a), "w"(b), "w"(c)
6974 : /* No clobbers */);
6975 return result;
6978 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6979 vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
6981 uint32x4_t result;
6982 __asm__ ("mla %0.4s, %2.4s, %3.4s"
6983 : "=w"(result)
6984 : "0"(a), "w"(b), "w"(c)
6985 : /* No clobbers */);
6986 return result;
6989 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6990 vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
6992 float32x2_t result;
6993 float32x2_t t1;
6994 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s"
6995 : "=w"(result), "=w"(t1)
6996 : "0"(a), "w"(b), "w"(c)
6997 : /* No clobbers */);
6998 return result;
7001 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7002 vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
7004 int16x4_t result;
7005 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
7006 : "=w"(result)
7007 : "0"(a), "w"(b), "x"(c)
7008 : /* No clobbers */);
7009 return result;
7012 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7013 vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
7015 int32x2_t result;
7016 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
7017 : "=w"(result)
7018 : "0"(a), "w"(b), "w"(c)
7019 : /* No clobbers */);
7020 return result;
7023 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7024 vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
7026 uint16x4_t result;
7027 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
7028 : "=w"(result)
7029 : "0"(a), "w"(b), "x"(c)
7030 : /* No clobbers */);
7031 return result;
7034 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7035 vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
7037 uint32x2_t result;
7038 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
7039 : "=w"(result)
7040 : "0"(a), "w"(b), "w"(c)
7041 : /* No clobbers */);
7042 return result;
7045 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7046 vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
7048 int8x8_t result;
7049 __asm__ ("mls %0.8b,%2.8b,%3.8b"
7050 : "=w"(result)
7051 : "0"(a), "w"(b), "w"(c)
7052 : /* No clobbers */);
7053 return result;
7056 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7057 vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
7059 int16x4_t result;
7060 __asm__ ("mls %0.4h,%2.4h,%3.4h"
7061 : "=w"(result)
7062 : "0"(a), "w"(b), "w"(c)
7063 : /* No clobbers */);
7064 return result;
7067 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7068 vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
7070 int32x2_t result;
7071 __asm__ ("mls %0.2s,%2.2s,%3.2s"
7072 : "=w"(result)
7073 : "0"(a), "w"(b), "w"(c)
7074 : /* No clobbers */);
7075 return result;
7078 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7079 vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
7081 uint8x8_t result;
7082 __asm__ ("mls %0.8b,%2.8b,%3.8b"
7083 : "=w"(result)
7084 : "0"(a), "w"(b), "w"(c)
7085 : /* No clobbers */);
7086 return result;
7089 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7090 vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
7092 uint16x4_t result;
7093 __asm__ ("mls %0.4h,%2.4h,%3.4h"
7094 : "=w"(result)
7095 : "0"(a), "w"(b), "w"(c)
7096 : /* No clobbers */);
7097 return result;
7100 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7101 vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
7103 uint32x2_t result;
7104 __asm__ ("mls %0.2s,%2.2s,%3.2s"
7105 : "=w"(result)
7106 : "0"(a), "w"(b), "w"(c)
7107 : /* No clobbers */);
7108 return result;
7111 #define vmlsl_high_lane_s16(a, b, c, d) \
7112 __extension__ \
7113 ({ \
7114 int16x4_t c_ = (c); \
7115 int16x8_t b_ = (b); \
7116 int32x4_t a_ = (a); \
7117 int32x4_t result; \
7118 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
7119 : "=w"(result) \
7120 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7121 : /* No clobbers */); \
7122 result; \
7125 #define vmlsl_high_lane_s32(a, b, c, d) \
7126 __extension__ \
7127 ({ \
7128 int32x2_t c_ = (c); \
7129 int32x4_t b_ = (b); \
7130 int64x2_t a_ = (a); \
7131 int64x2_t result; \
7132 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
7133 : "=w"(result) \
7134 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7135 : /* No clobbers */); \
7136 result; \
7139 #define vmlsl_high_lane_u16(a, b, c, d) \
7140 __extension__ \
7141 ({ \
7142 uint16x4_t c_ = (c); \
7143 uint16x8_t b_ = (b); \
7144 uint32x4_t a_ = (a); \
7145 uint32x4_t result; \
7146 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
7147 : "=w"(result) \
7148 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7149 : /* No clobbers */); \
7150 result; \
7153 #define vmlsl_high_lane_u32(a, b, c, d) \
7154 __extension__ \
7155 ({ \
7156 uint32x2_t c_ = (c); \
7157 uint32x4_t b_ = (b); \
7158 uint64x2_t a_ = (a); \
7159 uint64x2_t result; \
7160 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
7161 : "=w"(result) \
7162 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7163 : /* No clobbers */); \
7164 result; \
7167 #define vmlsl_high_laneq_s16(a, b, c, d) \
7168 __extension__ \
7169 ({ \
7170 int16x8_t c_ = (c); \
7171 int16x8_t b_ = (b); \
7172 int32x4_t a_ = (a); \
7173 int32x4_t result; \
7174 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
7175 : "=w"(result) \
7176 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7177 : /* No clobbers */); \
7178 result; \
7181 #define vmlsl_high_laneq_s32(a, b, c, d) \
7182 __extension__ \
7183 ({ \
7184 int32x4_t c_ = (c); \
7185 int32x4_t b_ = (b); \
7186 int64x2_t a_ = (a); \
7187 int64x2_t result; \
7188 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
7189 : "=w"(result) \
7190 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7191 : /* No clobbers */); \
7192 result; \
7195 #define vmlsl_high_laneq_u16(a, b, c, d) \
7196 __extension__ \
7197 ({ \
7198 uint16x8_t c_ = (c); \
7199 uint16x8_t b_ = (b); \
7200 uint32x4_t a_ = (a); \
7201 uint32x4_t result; \
7202 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
7203 : "=w"(result) \
7204 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7205 : /* No clobbers */); \
7206 result; \
7209 #define vmlsl_high_laneq_u32(a, b, c, d) \
7210 __extension__ \
7211 ({ \
7212 uint32x4_t c_ = (c); \
7213 uint32x4_t b_ = (b); \
7214 uint64x2_t a_ = (a); \
7215 uint64x2_t result; \
7216 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
7217 : "=w"(result) \
7218 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7219 : /* No clobbers */); \
7220 result; \
7223 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7224 vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
7226 int32x4_t result;
7227 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]"
7228 : "=w"(result)
7229 : "0"(a), "w"(b), "x"(c)
7230 : /* No clobbers */);
7231 return result;
7234 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7235 vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
7237 int64x2_t result;
7238 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]"
7239 : "=w"(result)
7240 : "0"(a), "w"(b), "w"(c)
7241 : /* No clobbers */);
7242 return result;
7245 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7246 vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
7248 uint32x4_t result;
7249 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]"
7250 : "=w"(result)
7251 : "0"(a), "w"(b), "x"(c)
7252 : /* No clobbers */);
7253 return result;
7256 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7257 vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
7259 uint64x2_t result;
7260 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]"
7261 : "=w"(result)
7262 : "0"(a), "w"(b), "w"(c)
7263 : /* No clobbers */);
7264 return result;
7267 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7268 vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
7270 int16x8_t result;
7271 __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b"
7272 : "=w"(result)
7273 : "0"(a), "w"(b), "w"(c)
7274 : /* No clobbers */);
7275 return result;
7278 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7279 vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
7281 int32x4_t result;
7282 __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h"
7283 : "=w"(result)
7284 : "0"(a), "w"(b), "w"(c)
7285 : /* No clobbers */);
7286 return result;
7289 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7290 vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
7292 int64x2_t result;
7293 __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s"
7294 : "=w"(result)
7295 : "0"(a), "w"(b), "w"(c)
7296 : /* No clobbers */);
7297 return result;
7300 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7301 vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
7303 uint16x8_t result;
7304 __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b"
7305 : "=w"(result)
7306 : "0"(a), "w"(b), "w"(c)
7307 : /* No clobbers */);
7308 return result;
7311 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7312 vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
7314 uint32x4_t result;
7315 __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h"
7316 : "=w"(result)
7317 : "0"(a), "w"(b), "w"(c)
7318 : /* No clobbers */);
7319 return result;
7322 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7323 vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
7325 uint64x2_t result;
7326 __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s"
7327 : "=w"(result)
7328 : "0"(a), "w"(b), "w"(c)
7329 : /* No clobbers */);
7330 return result;
7333 #define vmlsl_lane_s16(a, b, c, d) \
7334 __extension__ \
7335 ({ \
7336 int16x4_t c_ = (c); \
7337 int16x4_t b_ = (b); \
7338 int32x4_t a_ = (a); \
7339 int32x4_t result; \
7340 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
7341 : "=w"(result) \
7342 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7343 : /* No clobbers */); \
7344 result; \
7347 #define vmlsl_lane_s32(a, b, c, d) \
7348 __extension__ \
7349 ({ \
7350 int32x2_t c_ = (c); \
7351 int32x2_t b_ = (b); \
7352 int64x2_t a_ = (a); \
7353 int64x2_t result; \
7354 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
7355 : "=w"(result) \
7356 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7357 : /* No clobbers */); \
7358 result; \
7361 #define vmlsl_lane_u16(a, b, c, d) \
7362 __extension__ \
7363 ({ \
7364 uint16x4_t c_ = (c); \
7365 uint16x4_t b_ = (b); \
7366 uint32x4_t a_ = (a); \
7367 uint32x4_t result; \
7368 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
7369 : "=w"(result) \
7370 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7371 : /* No clobbers */); \
7372 result; \
7375 #define vmlsl_lane_u32(a, b, c, d) \
7376 __extension__ \
7377 ({ \
7378 uint32x2_t c_ = (c); \
7379 uint32x2_t b_ = (b); \
7380 uint64x2_t a_ = (a); \
7381 uint64x2_t result; \
7382 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
7383 : "=w"(result) \
7384 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7385 : /* No clobbers */); \
7386 result; \
7389 #define vmlsl_laneq_s16(a, b, c, d) \
7390 __extension__ \
7391 ({ \
7392 int16x8_t c_ = (c); \
7393 int16x4_t b_ = (b); \
7394 int32x4_t a_ = (a); \
7395 int32x4_t result; \
7396 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
7397 : "=w"(result) \
7398 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7399 : /* No clobbers */); \
7400 result; \
7403 #define vmlsl_laneq_s32(a, b, c, d) \
7404 __extension__ \
7405 ({ \
7406 int32x4_t c_ = (c); \
7407 int32x2_t b_ = (b); \
7408 int64x2_t a_ = (a); \
7409 int64x2_t result; \
7410 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
7411 : "=w"(result) \
7412 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7413 : /* No clobbers */); \
7414 result; \
7417 #define vmlsl_laneq_u16(a, b, c, d) \
7418 __extension__ \
7419 ({ \
7420 uint16x8_t c_ = (c); \
7421 uint16x4_t b_ = (b); \
7422 uint32x4_t a_ = (a); \
7423 uint32x4_t result; \
7424 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
7425 : "=w"(result) \
7426 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7427 : /* No clobbers */); \
7428 result; \
7431 #define vmlsl_laneq_u32(a, b, c, d) \
7432 __extension__ \
7433 ({ \
7434 uint32x4_t c_ = (c); \
7435 uint32x2_t b_ = (b); \
7436 uint64x2_t a_ = (a); \
7437 uint64x2_t result; \
7438 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
7439 : "=w"(result) \
7440 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7441 : /* No clobbers */); \
7442 result; \
7445 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7446 vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
7448 int32x4_t result;
7449 __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]"
7450 : "=w"(result)
7451 : "0"(a), "w"(b), "x"(c)
7452 : /* No clobbers */);
7453 return result;
7456 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7457 vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
7459 int64x2_t result;
7460 __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]"
7461 : "=w"(result)
7462 : "0"(a), "w"(b), "w"(c)
7463 : /* No clobbers */);
7464 return result;
7467 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7468 vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
7470 uint32x4_t result;
7471 __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]"
7472 : "=w"(result)
7473 : "0"(a), "w"(b), "x"(c)
7474 : /* No clobbers */);
7475 return result;
7478 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7479 vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
7481 uint64x2_t result;
7482 __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]"
7483 : "=w"(result)
7484 : "0"(a), "w"(b), "w"(c)
7485 : /* No clobbers */);
7486 return result;
7489 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7490 vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
7492 int16x8_t result;
7493 __asm__ ("smlsl %0.8h, %2.8b, %3.8b"
7494 : "=w"(result)
7495 : "0"(a), "w"(b), "w"(c)
7496 : /* No clobbers */);
7497 return result;
7500 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7501 vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
7503 int32x4_t result;
7504 __asm__ ("smlsl %0.4s, %2.4h, %3.4h"
7505 : "=w"(result)
7506 : "0"(a), "w"(b), "w"(c)
7507 : /* No clobbers */);
7508 return result;
7511 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7512 vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
7514 int64x2_t result;
7515 __asm__ ("smlsl %0.2d, %2.2s, %3.2s"
7516 : "=w"(result)
7517 : "0"(a), "w"(b), "w"(c)
7518 : /* No clobbers */);
7519 return result;
7522 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7523 vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
7525 uint16x8_t result;
7526 __asm__ ("umlsl %0.8h, %2.8b, %3.8b"
7527 : "=w"(result)
7528 : "0"(a), "w"(b), "w"(c)
7529 : /* No clobbers */);
7530 return result;
7533 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7534 vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
7536 uint32x4_t result;
7537 __asm__ ("umlsl %0.4s, %2.4h, %3.4h"
7538 : "=w"(result)
7539 : "0"(a), "w"(b), "w"(c)
7540 : /* No clobbers */);
7541 return result;
7544 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7545 vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
7547 uint64x2_t result;
7548 __asm__ ("umlsl %0.2d, %2.2s, %3.2s"
7549 : "=w"(result)
7550 : "0"(a), "w"(b), "w"(c)
7551 : /* No clobbers */);
7552 return result;
7555 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7556 vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
7558 float32x4_t result;
7559 float32x4_t t1;
7560 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s"
7561 : "=w"(result), "=w"(t1)
7562 : "0"(a), "w"(b), "w"(c)
7563 : /* No clobbers */);
7564 return result;
7567 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7568 vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
7570 int16x8_t result;
7571 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
7572 : "=w"(result)
7573 : "0"(a), "w"(b), "x"(c)
7574 : /* No clobbers */);
7575 return result;
7578 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7579 vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
7581 int32x4_t result;
7582 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
7583 : "=w"(result)
7584 : "0"(a), "w"(b), "w"(c)
7585 : /* No clobbers */);
7586 return result;
7589 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7590 vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
7592 uint16x8_t result;
7593 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
7594 : "=w"(result)
7595 : "0"(a), "w"(b), "x"(c)
7596 : /* No clobbers */);
7597 return result;
7600 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7601 vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
7603 uint32x4_t result;
7604 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
7605 : "=w"(result)
7606 : "0"(a), "w"(b), "w"(c)
7607 : /* No clobbers */);
7608 return result;
7611 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7612 vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
7614 int8x16_t result;
7615 __asm__ ("mls %0.16b,%2.16b,%3.16b"
7616 : "=w"(result)
7617 : "0"(a), "w"(b), "w"(c)
7618 : /* No clobbers */);
7619 return result;
7622 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7623 vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
7625 int16x8_t result;
7626 __asm__ ("mls %0.8h,%2.8h,%3.8h"
7627 : "=w"(result)
7628 : "0"(a), "w"(b), "w"(c)
7629 : /* No clobbers */);
7630 return result;
7633 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7634 vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
7636 int32x4_t result;
7637 __asm__ ("mls %0.4s,%2.4s,%3.4s"
7638 : "=w"(result)
7639 : "0"(a), "w"(b), "w"(c)
7640 : /* No clobbers */);
7641 return result;
7644 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7645 vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
7647 uint8x16_t result;
7648 __asm__ ("mls %0.16b,%2.16b,%3.16b"
7649 : "=w"(result)
7650 : "0"(a), "w"(b), "w"(c)
7651 : /* No clobbers */);
7652 return result;
7655 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7656 vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
7658 uint16x8_t result;
7659 __asm__ ("mls %0.8h,%2.8h,%3.8h"
7660 : "=w"(result)
7661 : "0"(a), "w"(b), "w"(c)
7662 : /* No clobbers */);
7663 return result;
7666 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7667 vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
7669 uint32x4_t result;
7670 __asm__ ("mls %0.4s,%2.4s,%3.4s"
7671 : "=w"(result)
7672 : "0"(a), "w"(b), "w"(c)
7673 : /* No clobbers */);
7674 return result;
7677 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7678 vmovl_high_s8 (int8x16_t a)
7680 int16x8_t result;
7681 __asm__ ("sshll2 %0.8h,%1.16b,#0"
7682 : "=w"(result)
7683 : "w"(a)
7684 : /* No clobbers */);
7685 return result;
7688 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7689 vmovl_high_s16 (int16x8_t a)
7691 int32x4_t result;
7692 __asm__ ("sshll2 %0.4s,%1.8h,#0"
7693 : "=w"(result)
7694 : "w"(a)
7695 : /* No clobbers */);
7696 return result;
7699 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7700 vmovl_high_s32 (int32x4_t a)
7702 int64x2_t result;
7703 __asm__ ("sshll2 %0.2d,%1.4s,#0"
7704 : "=w"(result)
7705 : "w"(a)
7706 : /* No clobbers */);
7707 return result;
7710 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7711 vmovl_high_u8 (uint8x16_t a)
7713 uint16x8_t result;
7714 __asm__ ("ushll2 %0.8h,%1.16b,#0"
7715 : "=w"(result)
7716 : "w"(a)
7717 : /* No clobbers */);
7718 return result;
7721 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7722 vmovl_high_u16 (uint16x8_t a)
7724 uint32x4_t result;
7725 __asm__ ("ushll2 %0.4s,%1.8h,#0"
7726 : "=w"(result)
7727 : "w"(a)
7728 : /* No clobbers */);
7729 return result;
7732 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7733 vmovl_high_u32 (uint32x4_t a)
7735 uint64x2_t result;
7736 __asm__ ("ushll2 %0.2d,%1.4s,#0"
7737 : "=w"(result)
7738 : "w"(a)
7739 : /* No clobbers */);
7740 return result;
7743 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7744 vmovl_s8 (int8x8_t a)
7746 int16x8_t result;
7747 __asm__ ("sshll %0.8h,%1.8b,#0"
7748 : "=w"(result)
7749 : "w"(a)
7750 : /* No clobbers */);
7751 return result;
7754 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7755 vmovl_s16 (int16x4_t a)
7757 int32x4_t result;
7758 __asm__ ("sshll %0.4s,%1.4h,#0"
7759 : "=w"(result)
7760 : "w"(a)
7761 : /* No clobbers */);
7762 return result;
7765 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7766 vmovl_s32 (int32x2_t a)
7768 int64x2_t result;
7769 __asm__ ("sshll %0.2d,%1.2s,#0"
7770 : "=w"(result)
7771 : "w"(a)
7772 : /* No clobbers */);
7773 return result;
7776 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7777 vmovl_u8 (uint8x8_t a)
7779 uint16x8_t result;
7780 __asm__ ("ushll %0.8h,%1.8b,#0"
7781 : "=w"(result)
7782 : "w"(a)
7783 : /* No clobbers */);
7784 return result;
7787 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7788 vmovl_u16 (uint16x4_t a)
7790 uint32x4_t result;
7791 __asm__ ("ushll %0.4s,%1.4h,#0"
7792 : "=w"(result)
7793 : "w"(a)
7794 : /* No clobbers */);
7795 return result;
7798 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7799 vmovl_u32 (uint32x2_t a)
7801 uint64x2_t result;
7802 __asm__ ("ushll %0.2d,%1.2s,#0"
7803 : "=w"(result)
7804 : "w"(a)
7805 : /* No clobbers */);
7806 return result;
7809 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7810 vmovn_high_s16 (int8x8_t a, int16x8_t b)
7812 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
7813 __asm__ ("xtn2 %0.16b,%1.8h"
7814 : "+w"(result)
7815 : "w"(b)
7816 : /* No clobbers */);
7817 return result;
7820 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7821 vmovn_high_s32 (int16x4_t a, int32x4_t b)
7823 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
7824 __asm__ ("xtn2 %0.8h,%1.4s"
7825 : "+w"(result)
7826 : "w"(b)
7827 : /* No clobbers */);
7828 return result;
7831 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7832 vmovn_high_s64 (int32x2_t a, int64x2_t b)
7834 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
7835 __asm__ ("xtn2 %0.4s,%1.2d"
7836 : "+w"(result)
7837 : "w"(b)
7838 : /* No clobbers */);
7839 return result;
7842 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7843 vmovn_high_u16 (uint8x8_t a, uint16x8_t b)
7845 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
7846 __asm__ ("xtn2 %0.16b,%1.8h"
7847 : "+w"(result)
7848 : "w"(b)
7849 : /* No clobbers */);
7850 return result;
7853 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7854 vmovn_high_u32 (uint16x4_t a, uint32x4_t b)
7856 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
7857 __asm__ ("xtn2 %0.8h,%1.4s"
7858 : "+w"(result)
7859 : "w"(b)
7860 : /* No clobbers */);
7861 return result;
7864 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7865 vmovn_high_u64 (uint32x2_t a, uint64x2_t b)
7867 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
7868 __asm__ ("xtn2 %0.4s,%1.2d"
7869 : "+w"(result)
7870 : "w"(b)
7871 : /* No clobbers */);
7872 return result;
7875 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7876 vmovn_s16 (int16x8_t a)
7878 int8x8_t result;
7879 __asm__ ("xtn %0.8b,%1.8h"
7880 : "=w"(result)
7881 : "w"(a)
7882 : /* No clobbers */);
7883 return result;
7886 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7887 vmovn_s32 (int32x4_t a)
7889 int16x4_t result;
7890 __asm__ ("xtn %0.4h,%1.4s"
7891 : "=w"(result)
7892 : "w"(a)
7893 : /* No clobbers */);
7894 return result;
7897 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7898 vmovn_s64 (int64x2_t a)
7900 int32x2_t result;
7901 __asm__ ("xtn %0.2s,%1.2d"
7902 : "=w"(result)
7903 : "w"(a)
7904 : /* No clobbers */);
7905 return result;
7908 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7909 vmovn_u16 (uint16x8_t a)
7911 uint8x8_t result;
7912 __asm__ ("xtn %0.8b,%1.8h"
7913 : "=w"(result)
7914 : "w"(a)
7915 : /* No clobbers */);
7916 return result;
7919 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7920 vmovn_u32 (uint32x4_t a)
7922 uint16x4_t result;
7923 __asm__ ("xtn %0.4h,%1.4s"
7924 : "=w"(result)
7925 : "w"(a)
7926 : /* No clobbers */);
7927 return result;
7930 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7931 vmovn_u64 (uint64x2_t a)
7933 uint32x2_t result;
7934 __asm__ ("xtn %0.2s,%1.2d"
7935 : "=w"(result)
7936 : "w"(a)
7937 : /* No clobbers */);
7938 return result;
7941 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7942 vmul_n_f32 (float32x2_t a, float32_t b)
7944 float32x2_t result;
7945 __asm__ ("fmul %0.2s,%1.2s,%2.s[0]"
7946 : "=w"(result)
7947 : "w"(a), "w"(b)
7948 : /* No clobbers */);
7949 return result;
7952 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7953 vmul_n_s16 (int16x4_t a, int16_t b)
7955 int16x4_t result;
7956 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
7957 : "=w"(result)
7958 : "w"(a), "x"(b)
7959 : /* No clobbers */);
7960 return result;
7963 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7964 vmul_n_s32 (int32x2_t a, int32_t b)
7966 int32x2_t result;
7967 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
7968 : "=w"(result)
7969 : "w"(a), "w"(b)
7970 : /* No clobbers */);
7971 return result;
7974 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7975 vmul_n_u16 (uint16x4_t a, uint16_t b)
7977 uint16x4_t result;
7978 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
7979 : "=w"(result)
7980 : "w"(a), "x"(b)
7981 : /* No clobbers */);
7982 return result;
7985 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7986 vmul_n_u32 (uint32x2_t a, uint32_t b)
7988 uint32x2_t result;
7989 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
7990 : "=w"(result)
7991 : "w"(a), "w"(b)
7992 : /* No clobbers */);
7993 return result;
7996 #define vmull_high_lane_s16(a, b, c) \
7997 __extension__ \
7998 ({ \
7999 int16x4_t b_ = (b); \
8000 int16x8_t a_ = (a); \
8001 int32x4_t result; \
8002 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
8003 : "=w"(result) \
8004 : "w"(a_), "x"(b_), "i"(c) \
8005 : /* No clobbers */); \
8006 result; \
8009 #define vmull_high_lane_s32(a, b, c) \
8010 __extension__ \
8011 ({ \
8012 int32x2_t b_ = (b); \
8013 int32x4_t a_ = (a); \
8014 int64x2_t result; \
8015 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
8016 : "=w"(result) \
8017 : "w"(a_), "w"(b_), "i"(c) \
8018 : /* No clobbers */); \
8019 result; \
8022 #define vmull_high_lane_u16(a, b, c) \
8023 __extension__ \
8024 ({ \
8025 uint16x4_t b_ = (b); \
8026 uint16x8_t a_ = (a); \
8027 uint32x4_t result; \
8028 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
8029 : "=w"(result) \
8030 : "w"(a_), "x"(b_), "i"(c) \
8031 : /* No clobbers */); \
8032 result; \
8035 #define vmull_high_lane_u32(a, b, c) \
8036 __extension__ \
8037 ({ \
8038 uint32x2_t b_ = (b); \
8039 uint32x4_t a_ = (a); \
8040 uint64x2_t result; \
8041 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
8042 : "=w"(result) \
8043 : "w"(a_), "w"(b_), "i"(c) \
8044 : /* No clobbers */); \
8045 result; \
8048 #define vmull_high_laneq_s16(a, b, c) \
8049 __extension__ \
8050 ({ \
8051 int16x8_t b_ = (b); \
8052 int16x8_t a_ = (a); \
8053 int32x4_t result; \
8054 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
8055 : "=w"(result) \
8056 : "w"(a_), "x"(b_), "i"(c) \
8057 : /* No clobbers */); \
8058 result; \
8061 #define vmull_high_laneq_s32(a, b, c) \
8062 __extension__ \
8063 ({ \
8064 int32x4_t b_ = (b); \
8065 int32x4_t a_ = (a); \
8066 int64x2_t result; \
8067 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
8068 : "=w"(result) \
8069 : "w"(a_), "w"(b_), "i"(c) \
8070 : /* No clobbers */); \
8071 result; \
8074 #define vmull_high_laneq_u16(a, b, c) \
8075 __extension__ \
8076 ({ \
8077 uint16x8_t b_ = (b); \
8078 uint16x8_t a_ = (a); \
8079 uint32x4_t result; \
8080 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
8081 : "=w"(result) \
8082 : "w"(a_), "x"(b_), "i"(c) \
8083 : /* No clobbers */); \
8084 result; \
8087 #define vmull_high_laneq_u32(a, b, c) \
8088 __extension__ \
8089 ({ \
8090 uint32x4_t b_ = (b); \
8091 uint32x4_t a_ = (a); \
8092 uint64x2_t result; \
8093 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
8094 : "=w"(result) \
8095 : "w"(a_), "w"(b_), "i"(c) \
8096 : /* No clobbers */); \
8097 result; \
8100 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8101 vmull_high_n_s16 (int16x8_t a, int16_t b)
8103 int32x4_t result;
8104 __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]"
8105 : "=w"(result)
8106 : "w"(a), "x"(b)
8107 : /* No clobbers */);
8108 return result;
8111 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8112 vmull_high_n_s32 (int32x4_t a, int32_t b)
8114 int64x2_t result;
8115 __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]"
8116 : "=w"(result)
8117 : "w"(a), "w"(b)
8118 : /* No clobbers */);
8119 return result;
8122 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8123 vmull_high_n_u16 (uint16x8_t a, uint16_t b)
8125 uint32x4_t result;
8126 __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]"
8127 : "=w"(result)
8128 : "w"(a), "x"(b)
8129 : /* No clobbers */);
8130 return result;
8133 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8134 vmull_high_n_u32 (uint32x4_t a, uint32_t b)
8136 uint64x2_t result;
8137 __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]"
8138 : "=w"(result)
8139 : "w"(a), "w"(b)
8140 : /* No clobbers */);
8141 return result;
8144 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
8145 vmull_high_p8 (poly8x16_t a, poly8x16_t b)
8147 poly16x8_t result;
8148 __asm__ ("pmull2 %0.8h,%1.16b,%2.16b"
8149 : "=w"(result)
8150 : "w"(a), "w"(b)
8151 : /* No clobbers */);
8152 return result;
8155 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8156 vmull_high_s8 (int8x16_t a, int8x16_t b)
8158 int16x8_t result;
8159 __asm__ ("smull2 %0.8h,%1.16b,%2.16b"
8160 : "=w"(result)
8161 : "w"(a), "w"(b)
8162 : /* No clobbers */);
8163 return result;
8166 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8167 vmull_high_s16 (int16x8_t a, int16x8_t b)
8169 int32x4_t result;
8170 __asm__ ("smull2 %0.4s,%1.8h,%2.8h"
8171 : "=w"(result)
8172 : "w"(a), "w"(b)
8173 : /* No clobbers */);
8174 return result;
8177 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8178 vmull_high_s32 (int32x4_t a, int32x4_t b)
8180 int64x2_t result;
8181 __asm__ ("smull2 %0.2d,%1.4s,%2.4s"
8182 : "=w"(result)
8183 : "w"(a), "w"(b)
8184 : /* No clobbers */);
8185 return result;
8188 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8189 vmull_high_u8 (uint8x16_t a, uint8x16_t b)
8191 uint16x8_t result;
8192 __asm__ ("umull2 %0.8h,%1.16b,%2.16b"
8193 : "=w"(result)
8194 : "w"(a), "w"(b)
8195 : /* No clobbers */);
8196 return result;
8199 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8200 vmull_high_u16 (uint16x8_t a, uint16x8_t b)
8202 uint32x4_t result;
8203 __asm__ ("umull2 %0.4s,%1.8h,%2.8h"
8204 : "=w"(result)
8205 : "w"(a), "w"(b)
8206 : /* No clobbers */);
8207 return result;
8210 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8211 vmull_high_u32 (uint32x4_t a, uint32x4_t b)
8213 uint64x2_t result;
8214 __asm__ ("umull2 %0.2d,%1.4s,%2.4s"
8215 : "=w"(result)
8216 : "w"(a), "w"(b)
8217 : /* No clobbers */);
8218 return result;
8221 #define vmull_lane_s16(a, b, c) \
8222 __extension__ \
8223 ({ \
8224 int16x4_t b_ = (b); \
8225 int16x4_t a_ = (a); \
8226 int32x4_t result; \
8227 __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \
8228 : "=w"(result) \
8229 : "w"(a_), "x"(b_), "i"(c) \
8230 : /* No clobbers */); \
8231 result; \
8234 #define vmull_lane_s32(a, b, c) \
8235 __extension__ \
8236 ({ \
8237 int32x2_t b_ = (b); \
8238 int32x2_t a_ = (a); \
8239 int64x2_t result; \
8240 __asm__ ("smull %0.2d,%1.2s,%2.s[%3]" \
8241 : "=w"(result) \
8242 : "w"(a_), "w"(b_), "i"(c) \
8243 : /* No clobbers */); \
8244 result; \
8247 #define vmull_lane_u16(a, b, c) \
8248 __extension__ \
8249 ({ \
8250 uint16x4_t b_ = (b); \
8251 uint16x4_t a_ = (a); \
8252 uint32x4_t result; \
8253 __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \
8254 : "=w"(result) \
8255 : "w"(a_), "x"(b_), "i"(c) \
8256 : /* No clobbers */); \
8257 result; \
8260 #define vmull_lane_u32(a, b, c) \
8261 __extension__ \
8262 ({ \
8263 uint32x2_t b_ = (b); \
8264 uint32x2_t a_ = (a); \
8265 uint64x2_t result; \
8266 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
8267 : "=w"(result) \
8268 : "w"(a_), "w"(b_), "i"(c) \
8269 : /* No clobbers */); \
8270 result; \
8273 #define vmull_laneq_s16(a, b, c) \
8274 __extension__ \
8275 ({ \
8276 int16x8_t b_ = (b); \
8277 int16x4_t a_ = (a); \
8278 int32x4_t result; \
8279 __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \
8280 : "=w"(result) \
8281 : "w"(a_), "x"(b_), "i"(c) \
8282 : /* No clobbers */); \
8283 result; \
8286 #define vmull_laneq_s32(a, b, c) \
8287 __extension__ \
8288 ({ \
8289 int32x4_t b_ = (b); \
8290 int32x2_t a_ = (a); \
8291 int64x2_t result; \
8292 __asm__ ("smull %0.2d, %1.2s, %2.s[%3]" \
8293 : "=w"(result) \
8294 : "w"(a_), "w"(b_), "i"(c) \
8295 : /* No clobbers */); \
8296 result; \
8299 #define vmull_laneq_u16(a, b, c) \
8300 __extension__ \
8301 ({ \
8302 uint16x8_t b_ = (b); \
8303 uint16x4_t a_ = (a); \
8304 uint32x4_t result; \
8305 __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \
8306 : "=w"(result) \
8307 : "w"(a_), "x"(b_), "i"(c) \
8308 : /* No clobbers */); \
8309 result; \
8312 #define vmull_laneq_u32(a, b, c) \
8313 __extension__ \
8314 ({ \
8315 uint32x4_t b_ = (b); \
8316 uint32x2_t a_ = (a); \
8317 uint64x2_t result; \
8318 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
8319 : "=w"(result) \
8320 : "w"(a_), "w"(b_), "i"(c) \
8321 : /* No clobbers */); \
8322 result; \
8325 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8326 vmull_n_s16 (int16x4_t a, int16_t b)
8328 int32x4_t result;
8329 __asm__ ("smull %0.4s,%1.4h,%2.h[0]"
8330 : "=w"(result)
8331 : "w"(a), "x"(b)
8332 : /* No clobbers */);
8333 return result;
8336 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8337 vmull_n_s32 (int32x2_t a, int32_t b)
8339 int64x2_t result;
8340 __asm__ ("smull %0.2d,%1.2s,%2.s[0]"
8341 : "=w"(result)
8342 : "w"(a), "w"(b)
8343 : /* No clobbers */);
8344 return result;
8347 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8348 vmull_n_u16 (uint16x4_t a, uint16_t b)
8350 uint32x4_t result;
8351 __asm__ ("umull %0.4s,%1.4h,%2.h[0]"
8352 : "=w"(result)
8353 : "w"(a), "x"(b)
8354 : /* No clobbers */);
8355 return result;
8358 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8359 vmull_n_u32 (uint32x2_t a, uint32_t b)
8361 uint64x2_t result;
8362 __asm__ ("umull %0.2d,%1.2s,%2.s[0]"
8363 : "=w"(result)
8364 : "w"(a), "w"(b)
8365 : /* No clobbers */);
8366 return result;
8369 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
8370 vmull_p8 (poly8x8_t a, poly8x8_t b)
8372 poly16x8_t result;
8373 __asm__ ("pmull %0.8h, %1.8b, %2.8b"
8374 : "=w"(result)
8375 : "w"(a), "w"(b)
8376 : /* No clobbers */);
8377 return result;
8380 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8381 vmull_s8 (int8x8_t a, int8x8_t b)
8383 int16x8_t result;
8384 __asm__ ("smull %0.8h, %1.8b, %2.8b"
8385 : "=w"(result)
8386 : "w"(a), "w"(b)
8387 : /* No clobbers */);
8388 return result;
8391 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8392 vmull_s16 (int16x4_t a, int16x4_t b)
8394 int32x4_t result;
8395 __asm__ ("smull %0.4s, %1.4h, %2.4h"
8396 : "=w"(result)
8397 : "w"(a), "w"(b)
8398 : /* No clobbers */);
8399 return result;
8402 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8403 vmull_s32 (int32x2_t a, int32x2_t b)
8405 int64x2_t result;
8406 __asm__ ("smull %0.2d, %1.2s, %2.2s"
8407 : "=w"(result)
8408 : "w"(a), "w"(b)
8409 : /* No clobbers */);
8410 return result;
8413 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8414 vmull_u8 (uint8x8_t a, uint8x8_t b)
8416 uint16x8_t result;
8417 __asm__ ("umull %0.8h, %1.8b, %2.8b"
8418 : "=w"(result)
8419 : "w"(a), "w"(b)
8420 : /* No clobbers */);
8421 return result;
8424 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8425 vmull_u16 (uint16x4_t a, uint16x4_t b)
8427 uint32x4_t result;
8428 __asm__ ("umull %0.4s, %1.4h, %2.4h"
8429 : "=w"(result)
8430 : "w"(a), "w"(b)
8431 : /* No clobbers */);
8432 return result;
8435 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8436 vmull_u32 (uint32x2_t a, uint32x2_t b)
8438 uint64x2_t result;
8439 __asm__ ("umull %0.2d, %1.2s, %2.2s"
8440 : "=w"(result)
8441 : "w"(a), "w"(b)
8442 : /* No clobbers */);
8443 return result;
8446 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8447 vmulq_n_f32 (float32x4_t a, float32_t b)
8449 float32x4_t result;
8450 __asm__ ("fmul %0.4s,%1.4s,%2.s[0]"
8451 : "=w"(result)
8452 : "w"(a), "w"(b)
8453 : /* No clobbers */);
8454 return result;
8457 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8458 vmulq_n_f64 (float64x2_t a, float64_t b)
8460 float64x2_t result;
8461 __asm__ ("fmul %0.2d,%1.2d,%2.d[0]"
8462 : "=w"(result)
8463 : "w"(a), "w"(b)
8464 : /* No clobbers */);
8465 return result;
8468 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8469 vmulq_n_s16 (int16x8_t a, int16_t b)
8471 int16x8_t result;
8472 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
8473 : "=w"(result)
8474 : "w"(a), "x"(b)
8475 : /* No clobbers */);
8476 return result;
8479 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8480 vmulq_n_s32 (int32x4_t a, int32_t b)
8482 int32x4_t result;
8483 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
8484 : "=w"(result)
8485 : "w"(a), "w"(b)
8486 : /* No clobbers */);
8487 return result;
8490 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8491 vmulq_n_u16 (uint16x8_t a, uint16_t b)
8493 uint16x8_t result;
8494 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
8495 : "=w"(result)
8496 : "w"(a), "x"(b)
8497 : /* No clobbers */);
8498 return result;
8501 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8502 vmulq_n_u32 (uint32x4_t a, uint32_t b)
8504 uint32x4_t result;
8505 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
8506 : "=w"(result)
8507 : "w"(a), "w"(b)
8508 : /* No clobbers */);
8509 return result;
8512 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
8513 vmvn_p8 (poly8x8_t a)
8515 poly8x8_t result;
8516 __asm__ ("mvn %0.8b,%1.8b"
8517 : "=w"(result)
8518 : "w"(a)
8519 : /* No clobbers */);
8520 return result;
8523 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
8524 vmvn_s8 (int8x8_t a)
8526 int8x8_t result;
8527 __asm__ ("mvn %0.8b,%1.8b"
8528 : "=w"(result)
8529 : "w"(a)
8530 : /* No clobbers */);
8531 return result;
8534 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8535 vmvn_s16 (int16x4_t a)
8537 int16x4_t result;
8538 __asm__ ("mvn %0.8b,%1.8b"
8539 : "=w"(result)
8540 : "w"(a)
8541 : /* No clobbers */);
8542 return result;
8545 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8546 vmvn_s32 (int32x2_t a)
8548 int32x2_t result;
8549 __asm__ ("mvn %0.8b,%1.8b"
8550 : "=w"(result)
8551 : "w"(a)
8552 : /* No clobbers */);
8553 return result;
8556 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8557 vmvn_u8 (uint8x8_t a)
8559 uint8x8_t result;
8560 __asm__ ("mvn %0.8b,%1.8b"
8561 : "=w"(result)
8562 : "w"(a)
8563 : /* No clobbers */);
8564 return result;
8567 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8568 vmvn_u16 (uint16x4_t a)
8570 uint16x4_t result;
8571 __asm__ ("mvn %0.8b,%1.8b"
8572 : "=w"(result)
8573 : "w"(a)
8574 : /* No clobbers */);
8575 return result;
8578 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8579 vmvn_u32 (uint32x2_t a)
8581 uint32x2_t result;
8582 __asm__ ("mvn %0.8b,%1.8b"
8583 : "=w"(result)
8584 : "w"(a)
8585 : /* No clobbers */);
8586 return result;
8589 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
8590 vmvnq_p8 (poly8x16_t a)
8592 poly8x16_t result;
8593 __asm__ ("mvn %0.16b,%1.16b"
8594 : "=w"(result)
8595 : "w"(a)
8596 : /* No clobbers */);
8597 return result;
8600 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8601 vmvnq_s8 (int8x16_t a)
8603 int8x16_t result;
8604 __asm__ ("mvn %0.16b,%1.16b"
8605 : "=w"(result)
8606 : "w"(a)
8607 : /* No clobbers */);
8608 return result;
8611 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8612 vmvnq_s16 (int16x8_t a)
8614 int16x8_t result;
8615 __asm__ ("mvn %0.16b,%1.16b"
8616 : "=w"(result)
8617 : "w"(a)
8618 : /* No clobbers */);
8619 return result;
8622 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8623 vmvnq_s32 (int32x4_t a)
8625 int32x4_t result;
8626 __asm__ ("mvn %0.16b,%1.16b"
8627 : "=w"(result)
8628 : "w"(a)
8629 : /* No clobbers */);
8630 return result;
8633 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8634 vmvnq_u8 (uint8x16_t a)
8636 uint8x16_t result;
8637 __asm__ ("mvn %0.16b,%1.16b"
8638 : "=w"(result)
8639 : "w"(a)
8640 : /* No clobbers */);
8641 return result;
8644 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8645 vmvnq_u16 (uint16x8_t a)
8647 uint16x8_t result;
8648 __asm__ ("mvn %0.16b,%1.16b"
8649 : "=w"(result)
8650 : "w"(a)
8651 : /* No clobbers */);
8652 return result;
8655 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8656 vmvnq_u32 (uint32x4_t a)
8658 uint32x4_t result;
8659 __asm__ ("mvn %0.16b,%1.16b"
8660 : "=w"(result)
8661 : "w"(a)
8662 : /* No clobbers */);
8663 return result;
8667 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8668 vpadal_s8 (int16x4_t a, int8x8_t b)
8670 int16x4_t result;
8671 __asm__ ("sadalp %0.4h,%2.8b"
8672 : "=w"(result)
8673 : "0"(a), "w"(b)
8674 : /* No clobbers */);
8675 return result;
8678 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8679 vpadal_s16 (int32x2_t a, int16x4_t b)
8681 int32x2_t result;
8682 __asm__ ("sadalp %0.2s,%2.4h"
8683 : "=w"(result)
8684 : "0"(a), "w"(b)
8685 : /* No clobbers */);
8686 return result;
8689 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
8690 vpadal_s32 (int64x1_t a, int32x2_t b)
8692 int64x1_t result;
8693 __asm__ ("sadalp %0.1d,%2.2s"
8694 : "=w"(result)
8695 : "0"(a), "w"(b)
8696 : /* No clobbers */);
8697 return result;
8700 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8701 vpadal_u8 (uint16x4_t a, uint8x8_t b)
8703 uint16x4_t result;
8704 __asm__ ("uadalp %0.4h,%2.8b"
8705 : "=w"(result)
8706 : "0"(a), "w"(b)
8707 : /* No clobbers */);
8708 return result;
8711 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8712 vpadal_u16 (uint32x2_t a, uint16x4_t b)
8714 uint32x2_t result;
8715 __asm__ ("uadalp %0.2s,%2.4h"
8716 : "=w"(result)
8717 : "0"(a), "w"(b)
8718 : /* No clobbers */);
8719 return result;
8722 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8723 vpadal_u32 (uint64x1_t a, uint32x2_t b)
8725 uint64x1_t result;
8726 __asm__ ("uadalp %0.1d,%2.2s"
8727 : "=w"(result)
8728 : "0"(a), "w"(b)
8729 : /* No clobbers */);
8730 return result;
8733 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8734 vpadalq_s8 (int16x8_t a, int8x16_t b)
8736 int16x8_t result;
8737 __asm__ ("sadalp %0.8h,%2.16b"
8738 : "=w"(result)
8739 : "0"(a), "w"(b)
8740 : /* No clobbers */);
8741 return result;
8744 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8745 vpadalq_s16 (int32x4_t a, int16x8_t b)
8747 int32x4_t result;
8748 __asm__ ("sadalp %0.4s,%2.8h"
8749 : "=w"(result)
8750 : "0"(a), "w"(b)
8751 : /* No clobbers */);
8752 return result;
8755 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8756 vpadalq_s32 (int64x2_t a, int32x4_t b)
8758 int64x2_t result;
8759 __asm__ ("sadalp %0.2d,%2.4s"
8760 : "=w"(result)
8761 : "0"(a), "w"(b)
8762 : /* No clobbers */);
8763 return result;
8766 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8767 vpadalq_u8 (uint16x8_t a, uint8x16_t b)
8769 uint16x8_t result;
8770 __asm__ ("uadalp %0.8h,%2.16b"
8771 : "=w"(result)
8772 : "0"(a), "w"(b)
8773 : /* No clobbers */);
8774 return result;
8777 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8778 vpadalq_u16 (uint32x4_t a, uint16x8_t b)
8780 uint32x4_t result;
8781 __asm__ ("uadalp %0.4s,%2.8h"
8782 : "=w"(result)
8783 : "0"(a), "w"(b)
8784 : /* No clobbers */);
8785 return result;
8788 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8789 vpadalq_u32 (uint64x2_t a, uint32x4_t b)
8791 uint64x2_t result;
8792 __asm__ ("uadalp %0.2d,%2.4s"
8793 : "=w"(result)
8794 : "0"(a), "w"(b)
8795 : /* No clobbers */);
8796 return result;
8799 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8800 vpadd_f32 (float32x2_t a, float32x2_t b)
8802 float32x2_t result;
8803 __asm__ ("faddp %0.2s,%1.2s,%2.2s"
8804 : "=w"(result)
8805 : "w"(a), "w"(b)
8806 : /* No clobbers */);
8807 return result;
8810 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8811 vpaddl_s8 (int8x8_t a)
8813 int16x4_t result;
8814 __asm__ ("saddlp %0.4h,%1.8b"
8815 : "=w"(result)
8816 : "w"(a)
8817 : /* No clobbers */);
8818 return result;
8821 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8822 vpaddl_s16 (int16x4_t a)
8824 int32x2_t result;
8825 __asm__ ("saddlp %0.2s,%1.4h"
8826 : "=w"(result)
8827 : "w"(a)
8828 : /* No clobbers */);
8829 return result;
8832 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
8833 vpaddl_s32 (int32x2_t a)
8835 int64x1_t result;
8836 __asm__ ("saddlp %0.1d,%1.2s"
8837 : "=w"(result)
8838 : "w"(a)
8839 : /* No clobbers */);
8840 return result;
8843 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8844 vpaddl_u8 (uint8x8_t a)
8846 uint16x4_t result;
8847 __asm__ ("uaddlp %0.4h,%1.8b"
8848 : "=w"(result)
8849 : "w"(a)
8850 : /* No clobbers */);
8851 return result;
8854 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8855 vpaddl_u16 (uint16x4_t a)
8857 uint32x2_t result;
8858 __asm__ ("uaddlp %0.2s,%1.4h"
8859 : "=w"(result)
8860 : "w"(a)
8861 : /* No clobbers */);
8862 return result;
8865 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8866 vpaddl_u32 (uint32x2_t a)
8868 uint64x1_t result;
8869 __asm__ ("uaddlp %0.1d,%1.2s"
8870 : "=w"(result)
8871 : "w"(a)
8872 : /* No clobbers */);
8873 return result;
8876 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8877 vpaddlq_s8 (int8x16_t a)
8879 int16x8_t result;
8880 __asm__ ("saddlp %0.8h,%1.16b"
8881 : "=w"(result)
8882 : "w"(a)
8883 : /* No clobbers */);
8884 return result;
8887 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8888 vpaddlq_s16 (int16x8_t a)
8890 int32x4_t result;
8891 __asm__ ("saddlp %0.4s,%1.8h"
8892 : "=w"(result)
8893 : "w"(a)
8894 : /* No clobbers */);
8895 return result;
8898 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8899 vpaddlq_s32 (int32x4_t a)
8901 int64x2_t result;
8902 __asm__ ("saddlp %0.2d,%1.4s"
8903 : "=w"(result)
8904 : "w"(a)
8905 : /* No clobbers */);
8906 return result;
8909 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8910 vpaddlq_u8 (uint8x16_t a)
8912 uint16x8_t result;
8913 __asm__ ("uaddlp %0.8h,%1.16b"
8914 : "=w"(result)
8915 : "w"(a)
8916 : /* No clobbers */);
8917 return result;
8920 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8921 vpaddlq_u16 (uint16x8_t a)
8923 uint32x4_t result;
8924 __asm__ ("uaddlp %0.4s,%1.8h"
8925 : "=w"(result)
8926 : "w"(a)
8927 : /* No clobbers */);
8928 return result;
8931 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8932 vpaddlq_u32 (uint32x4_t a)
8934 uint64x2_t result;
8935 __asm__ ("uaddlp %0.2d,%1.4s"
8936 : "=w"(result)
8937 : "w"(a)
8938 : /* No clobbers */);
8939 return result;
8942 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8943 vpaddq_f32 (float32x4_t a, float32x4_t b)
8945 float32x4_t result;
8946 __asm__ ("faddp %0.4s,%1.4s,%2.4s"
8947 : "=w"(result)
8948 : "w"(a), "w"(b)
8949 : /* No clobbers */);
8950 return result;
8953 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8954 vpaddq_f64 (float64x2_t a, float64x2_t b)
8956 float64x2_t result;
8957 __asm__ ("faddp %0.2d,%1.2d,%2.2d"
8958 : "=w"(result)
8959 : "w"(a), "w"(b)
8960 : /* No clobbers */);
8961 return result;
8964 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8965 vpaddq_s8 (int8x16_t a, int8x16_t b)
8967 int8x16_t result;
8968 __asm__ ("addp %0.16b,%1.16b,%2.16b"
8969 : "=w"(result)
8970 : "w"(a), "w"(b)
8971 : /* No clobbers */);
8972 return result;
8975 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8976 vpaddq_s16 (int16x8_t a, int16x8_t b)
8978 int16x8_t result;
8979 __asm__ ("addp %0.8h,%1.8h,%2.8h"
8980 : "=w"(result)
8981 : "w"(a), "w"(b)
8982 : /* No clobbers */);
8983 return result;
8986 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8987 vpaddq_s32 (int32x4_t a, int32x4_t b)
8989 int32x4_t result;
8990 __asm__ ("addp %0.4s,%1.4s,%2.4s"
8991 : "=w"(result)
8992 : "w"(a), "w"(b)
8993 : /* No clobbers */);
8994 return result;
8997 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8998 vpaddq_s64 (int64x2_t a, int64x2_t b)
9000 int64x2_t result;
9001 __asm__ ("addp %0.2d,%1.2d,%2.2d"
9002 : "=w"(result)
9003 : "w"(a), "w"(b)
9004 : /* No clobbers */);
9005 return result;
9008 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9009 vpaddq_u8 (uint8x16_t a, uint8x16_t b)
9011 uint8x16_t result;
9012 __asm__ ("addp %0.16b,%1.16b,%2.16b"
9013 : "=w"(result)
9014 : "w"(a), "w"(b)
9015 : /* No clobbers */);
9016 return result;
9019 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9020 vpaddq_u16 (uint16x8_t a, uint16x8_t b)
9022 uint16x8_t result;
9023 __asm__ ("addp %0.8h,%1.8h,%2.8h"
9024 : "=w"(result)
9025 : "w"(a), "w"(b)
9026 : /* No clobbers */);
9027 return result;
9030 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9031 vpaddq_u32 (uint32x4_t a, uint32x4_t b)
9033 uint32x4_t result;
9034 __asm__ ("addp %0.4s,%1.4s,%2.4s"
9035 : "=w"(result)
9036 : "w"(a), "w"(b)
9037 : /* No clobbers */);
9038 return result;
9041 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9042 vpaddq_u64 (uint64x2_t a, uint64x2_t b)
9044 uint64x2_t result;
9045 __asm__ ("addp %0.2d,%1.2d,%2.2d"
9046 : "=w"(result)
9047 : "w"(a), "w"(b)
9048 : /* No clobbers */);
9049 return result;
9052 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9053 vpadds_f32 (float32x2_t a)
9055 float32_t result;
9056 __asm__ ("faddp %s0,%1.2s"
9057 : "=w"(result)
9058 : "w"(a)
9059 : /* No clobbers */);
9060 return result;
9063 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9064 vqdmulh_n_s16 (int16x4_t a, int16_t b)
9066 int16x4_t result;
9067 __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]"
9068 : "=w"(result)
9069 : "w"(a), "x"(b)
9070 : /* No clobbers */);
9071 return result;
9074 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9075 vqdmulh_n_s32 (int32x2_t a, int32_t b)
9077 int32x2_t result;
9078 __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]"
9079 : "=w"(result)
9080 : "w"(a), "w"(b)
9081 : /* No clobbers */);
9082 return result;
9085 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9086 vqdmulhq_n_s16 (int16x8_t a, int16_t b)
9088 int16x8_t result;
9089 __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]"
9090 : "=w"(result)
9091 : "w"(a), "x"(b)
9092 : /* No clobbers */);
9093 return result;
9096 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9097 vqdmulhq_n_s32 (int32x4_t a, int32_t b)
9099 int32x4_t result;
9100 __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]"
9101 : "=w"(result)
9102 : "w"(a), "w"(b)
9103 : /* No clobbers */);
9104 return result;
9107 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9108 vqmovn_high_s16 (int8x8_t a, int16x8_t b)
9110 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
9111 __asm__ ("sqxtn2 %0.16b, %1.8h"
9112 : "+w"(result)
9113 : "w"(b)
9114 : /* No clobbers */);
9115 return result;
9118 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9119 vqmovn_high_s32 (int16x4_t a, int32x4_t b)
9121 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
9122 __asm__ ("sqxtn2 %0.8h, %1.4s"
9123 : "+w"(result)
9124 : "w"(b)
9125 : /* No clobbers */);
9126 return result;
9129 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9130 vqmovn_high_s64 (int32x2_t a, int64x2_t b)
9132 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
9133 __asm__ ("sqxtn2 %0.4s, %1.2d"
9134 : "+w"(result)
9135 : "w"(b)
9136 : /* No clobbers */);
9137 return result;
9140 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9141 vqmovn_high_u16 (uint8x8_t a, uint16x8_t b)
9143 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
9144 __asm__ ("uqxtn2 %0.16b, %1.8h"
9145 : "+w"(result)
9146 : "w"(b)
9147 : /* No clobbers */);
9148 return result;
9151 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9152 vqmovn_high_u32 (uint16x4_t a, uint32x4_t b)
9154 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
9155 __asm__ ("uqxtn2 %0.8h, %1.4s"
9156 : "+w"(result)
9157 : "w"(b)
9158 : /* No clobbers */);
9159 return result;
9162 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9163 vqmovn_high_u64 (uint32x2_t a, uint64x2_t b)
9165 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
9166 __asm__ ("uqxtn2 %0.4s, %1.2d"
9167 : "+w"(result)
9168 : "w"(b)
9169 : /* No clobbers */);
9170 return result;
9173 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9174 vqmovun_high_s16 (uint8x8_t a, int16x8_t b)
9176 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
9177 __asm__ ("sqxtun2 %0.16b, %1.8h"
9178 : "+w"(result)
9179 : "w"(b)
9180 : /* No clobbers */);
9181 return result;
9184 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9185 vqmovun_high_s32 (uint16x4_t a, int32x4_t b)
9187 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
9188 __asm__ ("sqxtun2 %0.8h, %1.4s"
9189 : "+w"(result)
9190 : "w"(b)
9191 : /* No clobbers */);
9192 return result;
9195 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9196 vqmovun_high_s64 (uint32x2_t a, int64x2_t b)
9198 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
9199 __asm__ ("sqxtun2 %0.4s, %1.2d"
9200 : "+w"(result)
9201 : "w"(b)
9202 : /* No clobbers */);
9203 return result;
9206 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9207 vqrdmulh_n_s16 (int16x4_t a, int16_t b)
9209 int16x4_t result;
9210 __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
9211 : "=w"(result)
9212 : "w"(a), "x"(b)
9213 : /* No clobbers */);
9214 return result;
9217 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9218 vqrdmulh_n_s32 (int32x2_t a, int32_t b)
9220 int32x2_t result;
9221 __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]"
9222 : "=w"(result)
9223 : "w"(a), "w"(b)
9224 : /* No clobbers */);
9225 return result;
9228 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9229 vqrdmulhq_n_s16 (int16x8_t a, int16_t b)
9231 int16x8_t result;
9232 __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
9233 : "=w"(result)
9234 : "w"(a), "x"(b)
9235 : /* No clobbers */);
9236 return result;
9239 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9240 vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
9242 int32x4_t result;
9243 __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]"
9244 : "=w"(result)
9245 : "w"(a), "w"(b)
9246 : /* No clobbers */);
9247 return result;
9250 #define vqrshrn_high_n_s16(a, b, c) \
9251 __extension__ \
9252 ({ \
9253 int16x8_t b_ = (b); \
9254 int8x8_t a_ = (a); \
9255 int8x16_t result = vcombine_s8 \
9256 (a_, vcreate_s8 \
9257 (__AARCH64_UINT64_C (0x0))); \
9258 __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \
9259 : "+w"(result) \
9260 : "w"(b_), "i"(c) \
9261 : /* No clobbers */); \
9262 result; \
9265 #define vqrshrn_high_n_s32(a, b, c) \
9266 __extension__ \
9267 ({ \
9268 int32x4_t b_ = (b); \
9269 int16x4_t a_ = (a); \
9270 int16x8_t result = vcombine_s16 \
9271 (a_, vcreate_s16 \
9272 (__AARCH64_UINT64_C (0x0))); \
9273 __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \
9274 : "+w"(result) \
9275 : "w"(b_), "i"(c) \
9276 : /* No clobbers */); \
9277 result; \
9280 #define vqrshrn_high_n_s64(a, b, c) \
9281 __extension__ \
9282 ({ \
9283 int64x2_t b_ = (b); \
9284 int32x2_t a_ = (a); \
9285 int32x4_t result = vcombine_s32 \
9286 (a_, vcreate_s32 \
9287 (__AARCH64_UINT64_C (0x0))); \
9288 __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \
9289 : "+w"(result) \
9290 : "w"(b_), "i"(c) \
9291 : /* No clobbers */); \
9292 result; \
9295 #define vqrshrn_high_n_u16(a, b, c) \
9296 __extension__ \
9297 ({ \
9298 uint16x8_t b_ = (b); \
9299 uint8x8_t a_ = (a); \
9300 uint8x16_t result = vcombine_u8 \
9301 (a_, vcreate_u8 \
9302 (__AARCH64_UINT64_C (0x0))); \
9303 __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \
9304 : "+w"(result) \
9305 : "w"(b_), "i"(c) \
9306 : /* No clobbers */); \
9307 result; \
9310 #define vqrshrn_high_n_u32(a, b, c) \
9311 __extension__ \
9312 ({ \
9313 uint32x4_t b_ = (b); \
9314 uint16x4_t a_ = (a); \
9315 uint16x8_t result = vcombine_u16 \
9316 (a_, vcreate_u16 \
9317 (__AARCH64_UINT64_C (0x0))); \
9318 __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \
9319 : "+w"(result) \
9320 : "w"(b_), "i"(c) \
9321 : /* No clobbers */); \
9322 result; \
9325 #define vqrshrn_high_n_u64(a, b, c) \
9326 __extension__ \
9327 ({ \
9328 uint64x2_t b_ = (b); \
9329 uint32x2_t a_ = (a); \
9330 uint32x4_t result = vcombine_u32 \
9331 (a_, vcreate_u32 \
9332 (__AARCH64_UINT64_C (0x0))); \
9333 __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \
9334 : "+w"(result) \
9335 : "w"(b_), "i"(c) \
9336 : /* No clobbers */); \
9337 result; \
9340 #define vqrshrun_high_n_s16(a, b, c) \
9341 __extension__ \
9342 ({ \
9343 int16x8_t b_ = (b); \
9344 uint8x8_t a_ = (a); \
9345 uint8x16_t result = vcombine_u8 \
9346 (a_, vcreate_u8 \
9347 (__AARCH64_UINT64_C (0x0))); \
9348 __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \
9349 : "+w"(result) \
9350 : "w"(b_), "i"(c) \
9351 : /* No clobbers */); \
9352 result; \
9355 #define vqrshrun_high_n_s32(a, b, c) \
9356 __extension__ \
9357 ({ \
9358 int32x4_t b_ = (b); \
9359 uint16x4_t a_ = (a); \
9360 uint16x8_t result = vcombine_u16 \
9361 (a_, vcreate_u16 \
9362 (__AARCH64_UINT64_C (0x0))); \
9363 __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \
9364 : "+w"(result) \
9365 : "w"(b_), "i"(c) \
9366 : /* No clobbers */); \
9367 result; \
9370 #define vqrshrun_high_n_s64(a, b, c) \
9371 __extension__ \
9372 ({ \
9373 int64x2_t b_ = (b); \
9374 uint32x2_t a_ = (a); \
9375 uint32x4_t result = vcombine_u32 \
9376 (a_, vcreate_u32 \
9377 (__AARCH64_UINT64_C (0x0))); \
9378 __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \
9379 : "+w"(result) \
9380 : "w"(b_), "i"(c) \
9381 : /* No clobbers */); \
9382 result; \
9385 #define vqshrn_high_n_s16(a, b, c) \
9386 __extension__ \
9387 ({ \
9388 int16x8_t b_ = (b); \
9389 int8x8_t a_ = (a); \
9390 int8x16_t result = vcombine_s8 \
9391 (a_, vcreate_s8 \
9392 (__AARCH64_UINT64_C (0x0))); \
9393 __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \
9394 : "+w"(result) \
9395 : "w"(b_), "i"(c) \
9396 : /* No clobbers */); \
9397 result; \
9400 #define vqshrn_high_n_s32(a, b, c) \
9401 __extension__ \
9402 ({ \
9403 int32x4_t b_ = (b); \
9404 int16x4_t a_ = (a); \
9405 int16x8_t result = vcombine_s16 \
9406 (a_, vcreate_s16 \
9407 (__AARCH64_UINT64_C (0x0))); \
9408 __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \
9409 : "+w"(result) \
9410 : "w"(b_), "i"(c) \
9411 : /* No clobbers */); \
9412 result; \
9415 #define vqshrn_high_n_s64(a, b, c) \
9416 __extension__ \
9417 ({ \
9418 int64x2_t b_ = (b); \
9419 int32x2_t a_ = (a); \
9420 int32x4_t result = vcombine_s32 \
9421 (a_, vcreate_s32 \
9422 (__AARCH64_UINT64_C (0x0))); \
9423 __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \
9424 : "+w"(result) \
9425 : "w"(b_), "i"(c) \
9426 : /* No clobbers */); \
9427 result; \
9430 #define vqshrn_high_n_u16(a, b, c) \
9431 __extension__ \
9432 ({ \
9433 uint16x8_t b_ = (b); \
9434 uint8x8_t a_ = (a); \
9435 uint8x16_t result = vcombine_u8 \
9436 (a_, vcreate_u8 \
9437 (__AARCH64_UINT64_C (0x0))); \
9438 __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \
9439 : "+w"(result) \
9440 : "w"(b_), "i"(c) \
9441 : /* No clobbers */); \
9442 result; \
9445 #define vqshrn_high_n_u32(a, b, c) \
9446 __extension__ \
9447 ({ \
9448 uint32x4_t b_ = (b); \
9449 uint16x4_t a_ = (a); \
9450 uint16x8_t result = vcombine_u16 \
9451 (a_, vcreate_u16 \
9452 (__AARCH64_UINT64_C (0x0))); \
9453 __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \
9454 : "+w"(result) \
9455 : "w"(b_), "i"(c) \
9456 : /* No clobbers */); \
9457 result; \
9460 #define vqshrn_high_n_u64(a, b, c) \
9461 __extension__ \
9462 ({ \
9463 uint64x2_t b_ = (b); \
9464 uint32x2_t a_ = (a); \
9465 uint32x4_t result = vcombine_u32 \
9466 (a_, vcreate_u32 \
9467 (__AARCH64_UINT64_C (0x0))); \
9468 __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \
9469 : "+w"(result) \
9470 : "w"(b_), "i"(c) \
9471 : /* No clobbers */); \
9472 result; \
9475 #define vqshrun_high_n_s16(a, b, c) \
9476 __extension__ \
9477 ({ \
9478 int16x8_t b_ = (b); \
9479 uint8x8_t a_ = (a); \
9480 uint8x16_t result = vcombine_u8 \
9481 (a_, vcreate_u8 \
9482 (__AARCH64_UINT64_C (0x0))); \
9483 __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \
9484 : "+w"(result) \
9485 : "w"(b_), "i"(c) \
9486 : /* No clobbers */); \
9487 result; \
9490 #define vqshrun_high_n_s32(a, b, c) \
9491 __extension__ \
9492 ({ \
9493 int32x4_t b_ = (b); \
9494 uint16x4_t a_ = (a); \
9495 uint16x8_t result = vcombine_u16 \
9496 (a_, vcreate_u16 \
9497 (__AARCH64_UINT64_C (0x0))); \
9498 __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \
9499 : "+w"(result) \
9500 : "w"(b_), "i"(c) \
9501 : /* No clobbers */); \
9502 result; \
9505 #define vqshrun_high_n_s64(a, b, c) \
9506 __extension__ \
9507 ({ \
9508 int64x2_t b_ = (b); \
9509 uint32x2_t a_ = (a); \
9510 uint32x4_t result = vcombine_u32 \
9511 (a_, vcreate_u32 \
9512 (__AARCH64_UINT64_C (0x0))); \
9513 __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \
9514 : "+w"(result) \
9515 : "w"(b_), "i"(c) \
9516 : /* No clobbers */); \
9517 result; \
9520 #define vrshrn_high_n_s16(a, b, c) \
9521 __extension__ \
9522 ({ \
9523 int16x8_t b_ = (b); \
9524 int8x8_t a_ = (a); \
9525 int8x16_t result = vcombine_s8 \
9526 (a_, vcreate_s8 \
9527 (__AARCH64_UINT64_C (0x0))); \
9528 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
9529 : "+w"(result) \
9530 : "w"(b_), "i"(c) \
9531 : /* No clobbers */); \
9532 result; \
9535 #define vrshrn_high_n_s32(a, b, c) \
9536 __extension__ \
9537 ({ \
9538 int32x4_t b_ = (b); \
9539 int16x4_t a_ = (a); \
9540 int16x8_t result = vcombine_s16 \
9541 (a_, vcreate_s16 \
9542 (__AARCH64_UINT64_C (0x0))); \
9543 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
9544 : "+w"(result) \
9545 : "w"(b_), "i"(c) \
9546 : /* No clobbers */); \
9547 result; \
9550 #define vrshrn_high_n_s64(a, b, c) \
9551 __extension__ \
9552 ({ \
9553 int64x2_t b_ = (b); \
9554 int32x2_t a_ = (a); \
9555 int32x4_t result = vcombine_s32 \
9556 (a_, vcreate_s32 \
9557 (__AARCH64_UINT64_C (0x0))); \
9558 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
9559 : "+w"(result) \
9560 : "w"(b_), "i"(c) \
9561 : /* No clobbers */); \
9562 result; \
9565 #define vrshrn_high_n_u16(a, b, c) \
9566 __extension__ \
9567 ({ \
9568 uint16x8_t b_ = (b); \
9569 uint8x8_t a_ = (a); \
9570 uint8x16_t result = vcombine_u8 \
9571 (a_, vcreate_u8 \
9572 (__AARCH64_UINT64_C (0x0))); \
9573 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
9574 : "+w"(result) \
9575 : "w"(b_), "i"(c) \
9576 : /* No clobbers */); \
9577 result; \
9580 #define vrshrn_high_n_u32(a, b, c) \
9581 __extension__ \
9582 ({ \
9583 uint32x4_t b_ = (b); \
9584 uint16x4_t a_ = (a); \
9585 uint16x8_t result = vcombine_u16 \
9586 (a_, vcreate_u16 \
9587 (__AARCH64_UINT64_C (0x0))); \
9588 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
9589 : "+w"(result) \
9590 : "w"(b_), "i"(c) \
9591 : /* No clobbers */); \
9592 result; \
9595 #define vrshrn_high_n_u64(a, b, c) \
9596 __extension__ \
9597 ({ \
9598 uint64x2_t b_ = (b); \
9599 uint32x2_t a_ = (a); \
9600 uint32x4_t result = vcombine_u32 \
9601 (a_, vcreate_u32 \
9602 (__AARCH64_UINT64_C (0x0))); \
9603 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
9604 : "+w"(result) \
9605 : "w"(b_), "i"(c) \
9606 : /* No clobbers */); \
9607 result; \
9610 #define vrshrn_n_s16(a, b) \
9611 __extension__ \
9612 ({ \
9613 int16x8_t a_ = (a); \
9614 int8x8_t result; \
9615 __asm__ ("rshrn %0.8b,%1.8h,%2" \
9616 : "=w"(result) \
9617 : "w"(a_), "i"(b) \
9618 : /* No clobbers */); \
9619 result; \
9622 #define vrshrn_n_s32(a, b) \
9623 __extension__ \
9624 ({ \
9625 int32x4_t a_ = (a); \
9626 int16x4_t result; \
9627 __asm__ ("rshrn %0.4h,%1.4s,%2" \
9628 : "=w"(result) \
9629 : "w"(a_), "i"(b) \
9630 : /* No clobbers */); \
9631 result; \
9634 #define vrshrn_n_s64(a, b) \
9635 __extension__ \
9636 ({ \
9637 int64x2_t a_ = (a); \
9638 int32x2_t result; \
9639 __asm__ ("rshrn %0.2s,%1.2d,%2" \
9640 : "=w"(result) \
9641 : "w"(a_), "i"(b) \
9642 : /* No clobbers */); \
9643 result; \
9646 #define vrshrn_n_u16(a, b) \
9647 __extension__ \
9648 ({ \
9649 uint16x8_t a_ = (a); \
9650 uint8x8_t result; \
9651 __asm__ ("rshrn %0.8b,%1.8h,%2" \
9652 : "=w"(result) \
9653 : "w"(a_), "i"(b) \
9654 : /* No clobbers */); \
9655 result; \
9658 #define vrshrn_n_u32(a, b) \
9659 __extension__ \
9660 ({ \
9661 uint32x4_t a_ = (a); \
9662 uint16x4_t result; \
9663 __asm__ ("rshrn %0.4h,%1.4s,%2" \
9664 : "=w"(result) \
9665 : "w"(a_), "i"(b) \
9666 : /* No clobbers */); \
9667 result; \
9670 #define vrshrn_n_u64(a, b) \
9671 __extension__ \
9672 ({ \
9673 uint64x2_t a_ = (a); \
9674 uint32x2_t result; \
9675 __asm__ ("rshrn %0.2s,%1.2d,%2" \
9676 : "=w"(result) \
9677 : "w"(a_), "i"(b) \
9678 : /* No clobbers */); \
9679 result; \
9682 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9683 vrsqrte_f32 (float32x2_t a)
9685 float32x2_t result;
9686 __asm__ ("frsqrte %0.2s,%1.2s"
9687 : "=w"(result)
9688 : "w"(a)
9689 : /* No clobbers */);
9690 return result;
9693 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
9694 vrsqrte_f64 (float64x1_t a)
9696 float64x1_t result;
9697 __asm__ ("frsqrte %d0,%d1"
9698 : "=w"(result)
9699 : "w"(a)
9700 : /* No clobbers */);
9701 return result;
9704 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9705 vrsqrte_u32 (uint32x2_t a)
9707 uint32x2_t result;
9708 __asm__ ("ursqrte %0.2s,%1.2s"
9709 : "=w"(result)
9710 : "w"(a)
9711 : /* No clobbers */);
9712 return result;
9715 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9716 vrsqrted_f64 (float64_t a)
9718 float64_t result;
9719 __asm__ ("frsqrte %d0,%d1"
9720 : "=w"(result)
9721 : "w"(a)
9722 : /* No clobbers */);
9723 return result;
9726 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9727 vrsqrteq_f32 (float32x4_t a)
9729 float32x4_t result;
9730 __asm__ ("frsqrte %0.4s,%1.4s"
9731 : "=w"(result)
9732 : "w"(a)
9733 : /* No clobbers */);
9734 return result;
9737 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9738 vrsqrteq_f64 (float64x2_t a)
9740 float64x2_t result;
9741 __asm__ ("frsqrte %0.2d,%1.2d"
9742 : "=w"(result)
9743 : "w"(a)
9744 : /* No clobbers */);
9745 return result;
9748 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9749 vrsqrteq_u32 (uint32x4_t a)
9751 uint32x4_t result;
9752 __asm__ ("ursqrte %0.4s,%1.4s"
9753 : "=w"(result)
9754 : "w"(a)
9755 : /* No clobbers */);
9756 return result;
9759 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9760 vrsqrtes_f32 (float32_t a)
9762 float32_t result;
9763 __asm__ ("frsqrte %s0,%s1"
9764 : "=w"(result)
9765 : "w"(a)
9766 : /* No clobbers */);
9767 return result;
9770 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9771 vrsqrts_f32 (float32x2_t a, float32x2_t b)
9773 float32x2_t result;
9774 __asm__ ("frsqrts %0.2s,%1.2s,%2.2s"
9775 : "=w"(result)
9776 : "w"(a), "w"(b)
9777 : /* No clobbers */);
9778 return result;
9781 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9782 vrsqrtsd_f64 (float64_t a, float64_t b)
9784 float64_t result;
9785 __asm__ ("frsqrts %d0,%d1,%d2"
9786 : "=w"(result)
9787 : "w"(a), "w"(b)
9788 : /* No clobbers */);
9789 return result;
9792 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9793 vrsqrtsq_f32 (float32x4_t a, float32x4_t b)
9795 float32x4_t result;
9796 __asm__ ("frsqrts %0.4s,%1.4s,%2.4s"
9797 : "=w"(result)
9798 : "w"(a), "w"(b)
9799 : /* No clobbers */);
9800 return result;
9803 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9804 vrsqrtsq_f64 (float64x2_t a, float64x2_t b)
9806 float64x2_t result;
9807 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
9808 : "=w"(result)
9809 : "w"(a), "w"(b)
9810 : /* No clobbers */);
9811 return result;
9814 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9815 vrsqrtss_f32 (float32_t a, float32_t b)
9817 float32_t result;
9818 __asm__ ("frsqrts %s0,%s1,%s2"
9819 : "=w"(result)
9820 : "w"(a), "w"(b)
9821 : /* No clobbers */);
9822 return result;
9825 #define vshrn_high_n_s16(a, b, c) \
9826 __extension__ \
9827 ({ \
9828 int16x8_t b_ = (b); \
9829 int8x8_t a_ = (a); \
9830 int8x16_t result = vcombine_s8 \
9831 (a_, vcreate_s8 \
9832 (__AARCH64_UINT64_C (0x0))); \
9833 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
9834 : "+w"(result) \
9835 : "w"(b_), "i"(c) \
9836 : /* No clobbers */); \
9837 result; \
9840 #define vshrn_high_n_s32(a, b, c) \
9841 __extension__ \
9842 ({ \
9843 int32x4_t b_ = (b); \
9844 int16x4_t a_ = (a); \
9845 int16x8_t result = vcombine_s16 \
9846 (a_, vcreate_s16 \
9847 (__AARCH64_UINT64_C (0x0))); \
9848 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
9849 : "+w"(result) \
9850 : "w"(b_), "i"(c) \
9851 : /* No clobbers */); \
9852 result; \
9855 #define vshrn_high_n_s64(a, b, c) \
9856 __extension__ \
9857 ({ \
9858 int64x2_t b_ = (b); \
9859 int32x2_t a_ = (a); \
9860 int32x4_t result = vcombine_s32 \
9861 (a_, vcreate_s32 \
9862 (__AARCH64_UINT64_C (0x0))); \
9863 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
9864 : "+w"(result) \
9865 : "w"(b_), "i"(c) \
9866 : /* No clobbers */); \
9867 result; \
9870 #define vshrn_high_n_u16(a, b, c) \
9871 __extension__ \
9872 ({ \
9873 uint16x8_t b_ = (b); \
9874 uint8x8_t a_ = (a); \
9875 uint8x16_t result = vcombine_u8 \
9876 (a_, vcreate_u8 \
9877 (__AARCH64_UINT64_C (0x0))); \
9878 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
9879 : "+w"(result) \
9880 : "w"(b_), "i"(c) \
9881 : /* No clobbers */); \
9882 result; \
9885 #define vshrn_high_n_u32(a, b, c) \
9886 __extension__ \
9887 ({ \
9888 uint32x4_t b_ = (b); \
9889 uint16x4_t a_ = (a); \
9890 uint16x8_t result = vcombine_u16 \
9891 (a_, vcreate_u16 \
9892 (__AARCH64_UINT64_C (0x0))); \
9893 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
9894 : "+w"(result) \
9895 : "w"(b_), "i"(c) \
9896 : /* No clobbers */); \
9897 result; \
9900 #define vshrn_high_n_u64(a, b, c) \
9901 __extension__ \
9902 ({ \
9903 uint64x2_t b_ = (b); \
9904 uint32x2_t a_ = (a); \
9905 uint32x4_t result = vcombine_u32 \
9906 (a_, vcreate_u32 \
9907 (__AARCH64_UINT64_C (0x0))); \
9908 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
9909 : "+w"(result) \
9910 : "w"(b_), "i"(c) \
9911 : /* No clobbers */); \
9912 result; \
9915 #define vshrn_n_s16(a, b) \
9916 __extension__ \
9917 ({ \
9918 int16x8_t a_ = (a); \
9919 int8x8_t result; \
9920 __asm__ ("shrn %0.8b,%1.8h,%2" \
9921 : "=w"(result) \
9922 : "w"(a_), "i"(b) \
9923 : /* No clobbers */); \
9924 result; \
9927 #define vshrn_n_s32(a, b) \
9928 __extension__ \
9929 ({ \
9930 int32x4_t a_ = (a); \
9931 int16x4_t result; \
9932 __asm__ ("shrn %0.4h,%1.4s,%2" \
9933 : "=w"(result) \
9934 : "w"(a_), "i"(b) \
9935 : /* No clobbers */); \
9936 result; \
9939 #define vshrn_n_s64(a, b) \
9940 __extension__ \
9941 ({ \
9942 int64x2_t a_ = (a); \
9943 int32x2_t result; \
9944 __asm__ ("shrn %0.2s,%1.2d,%2" \
9945 : "=w"(result) \
9946 : "w"(a_), "i"(b) \
9947 : /* No clobbers */); \
9948 result; \
9951 #define vshrn_n_u16(a, b) \
9952 __extension__ \
9953 ({ \
9954 uint16x8_t a_ = (a); \
9955 uint8x8_t result; \
9956 __asm__ ("shrn %0.8b,%1.8h,%2" \
9957 : "=w"(result) \
9958 : "w"(a_), "i"(b) \
9959 : /* No clobbers */); \
9960 result; \
9963 #define vshrn_n_u32(a, b) \
9964 __extension__ \
9965 ({ \
9966 uint32x4_t a_ = (a); \
9967 uint16x4_t result; \
9968 __asm__ ("shrn %0.4h,%1.4s,%2" \
9969 : "=w"(result) \
9970 : "w"(a_), "i"(b) \
9971 : /* No clobbers */); \
9972 result; \
9975 #define vshrn_n_u64(a, b) \
9976 __extension__ \
9977 ({ \
9978 uint64x2_t a_ = (a); \
9979 uint32x2_t result; \
9980 __asm__ ("shrn %0.2s,%1.2d,%2" \
9981 : "=w"(result) \
9982 : "w"(a_), "i"(b) \
9983 : /* No clobbers */); \
9984 result; \
9987 #define vsli_n_p8(a, b, c) \
9988 __extension__ \
9989 ({ \
9990 poly8x8_t b_ = (b); \
9991 poly8x8_t a_ = (a); \
9992 poly8x8_t result; \
9993 __asm__ ("sli %0.8b,%2.8b,%3" \
9994 : "=w"(result) \
9995 : "0"(a_), "w"(b_), "i"(c) \
9996 : /* No clobbers */); \
9997 result; \
10000 #define vsli_n_p16(a, b, c) \
10001 __extension__ \
10002 ({ \
10003 poly16x4_t b_ = (b); \
10004 poly16x4_t a_ = (a); \
10005 poly16x4_t result; \
10006 __asm__ ("sli %0.4h,%2.4h,%3" \
10007 : "=w"(result) \
10008 : "0"(a_), "w"(b_), "i"(c) \
10009 : /* No clobbers */); \
10010 result; \
10013 #define vsliq_n_p8(a, b, c) \
10014 __extension__ \
10015 ({ \
10016 poly8x16_t b_ = (b); \
10017 poly8x16_t a_ = (a); \
10018 poly8x16_t result; \
10019 __asm__ ("sli %0.16b,%2.16b,%3" \
10020 : "=w"(result) \
10021 : "0"(a_), "w"(b_), "i"(c) \
10022 : /* No clobbers */); \
10023 result; \
10026 #define vsliq_n_p16(a, b, c) \
10027 __extension__ \
10028 ({ \
10029 poly16x8_t b_ = (b); \
10030 poly16x8_t a_ = (a); \
10031 poly16x8_t result; \
10032 __asm__ ("sli %0.8h,%2.8h,%3" \
10033 : "=w"(result) \
10034 : "0"(a_), "w"(b_), "i"(c) \
10035 : /* No clobbers */); \
10036 result; \
10039 #define vsri_n_p8(a, b, c) \
10040 __extension__ \
10041 ({ \
10042 poly8x8_t b_ = (b); \
10043 poly8x8_t a_ = (a); \
10044 poly8x8_t result; \
10045 __asm__ ("sri %0.8b,%2.8b,%3" \
10046 : "=w"(result) \
10047 : "0"(a_), "w"(b_), "i"(c) \
10048 : /* No clobbers */); \
10049 result; \
10052 #define vsri_n_p16(a, b, c) \
10053 __extension__ \
10054 ({ \
10055 poly16x4_t b_ = (b); \
10056 poly16x4_t a_ = (a); \
10057 poly16x4_t result; \
10058 __asm__ ("sri %0.4h,%2.4h,%3" \
10059 : "=w"(result) \
10060 : "0"(a_), "w"(b_), "i"(c) \
10061 : /* No clobbers */); \
10062 result; \
10065 #define vsriq_n_p8(a, b, c) \
10066 __extension__ \
10067 ({ \
10068 poly8x16_t b_ = (b); \
10069 poly8x16_t a_ = (a); \
10070 poly8x16_t result; \
10071 __asm__ ("sri %0.16b,%2.16b,%3" \
10072 : "=w"(result) \
10073 : "0"(a_), "w"(b_), "i"(c) \
10074 : /* No clobbers */); \
10075 result; \
10078 #define vsriq_n_p16(a, b, c) \
10079 __extension__ \
10080 ({ \
10081 poly16x8_t b_ = (b); \
10082 poly16x8_t a_ = (a); \
10083 poly16x8_t result; \
10084 __asm__ ("sri %0.8h,%2.8h,%3" \
10085 : "=w"(result) \
10086 : "0"(a_), "w"(b_), "i"(c) \
10087 : /* No clobbers */); \
10088 result; \
10091 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10092 vtst_p8 (poly8x8_t a, poly8x8_t b)
10094 uint8x8_t result;
10095 __asm__ ("cmtst %0.8b, %1.8b, %2.8b"
10096 : "=w"(result)
10097 : "w"(a), "w"(b)
10098 : /* No clobbers */);
10099 return result;
10102 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10103 vtst_p16 (poly16x4_t a, poly16x4_t b)
10105 uint16x4_t result;
10106 __asm__ ("cmtst %0.4h, %1.4h, %2.4h"
10107 : "=w"(result)
10108 : "w"(a), "w"(b)
10109 : /* No clobbers */);
10110 return result;
10113 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10114 vtstq_p8 (poly8x16_t a, poly8x16_t b)
10116 uint8x16_t result;
10117 __asm__ ("cmtst %0.16b, %1.16b, %2.16b"
10118 : "=w"(result)
10119 : "w"(a), "w"(b)
10120 : /* No clobbers */);
10121 return result;
10124 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10125 vtstq_p16 (poly16x8_t a, poly16x8_t b)
10127 uint16x8_t result;
10128 __asm__ ("cmtst %0.8h, %1.8h, %2.8h"
10129 : "=w"(result)
10130 : "w"(a), "w"(b)
10131 : /* No clobbers */);
10132 return result;
10135 /* End of temporary inline asm implementations. */
10137 /* Start of temporary inline asm for vldn, vstn and friends. */
10139 /* Create struct element types for duplicating loads.
10141 Create 2 element structures of:
10143 +------+----+----+----+----+
10144 | | 8 | 16 | 32 | 64 |
10145 +------+----+----+----+----+
10146 |int | Y | Y | N | N |
10147 +------+----+----+----+----+
10148 |uint | Y | Y | N | N |
10149 +------+----+----+----+----+
10150 |float | - | Y | N | N |
10151 +------+----+----+----+----+
10152 |poly | Y | Y | - | - |
10153 +------+----+----+----+----+
10155 Create 3 element structures of:
10157 +------+----+----+----+----+
10158 | | 8 | 16 | 32 | 64 |
10159 +------+----+----+----+----+
10160 |int | Y | Y | Y | Y |
10161 +------+----+----+----+----+
10162 |uint | Y | Y | Y | Y |
10163 +------+----+----+----+----+
10164 |float | - | Y | Y | Y |
10165 +------+----+----+----+----+
10166 |poly | Y | Y | - | - |
10167 +------+----+----+----+----+
10169 Create 4 element structures of:
10171 +------+----+----+----+----+
10172 | | 8 | 16 | 32 | 64 |
10173 +------+----+----+----+----+
10174 |int | Y | N | N | Y |
10175 +------+----+----+----+----+
10176 |uint | Y | N | N | Y |
10177 +------+----+----+----+----+
10178 |float | - | N | N | Y |
10179 +------+----+----+----+----+
10180 |poly | Y | N | - | - |
10181 +------+----+----+----+----+
10183 This is required for casting memory reference. */
10184 #define __STRUCTN(t, sz, nelem) \
10185 typedef struct t ## sz ## x ## nelem ## _t { \
10186 t ## sz ## _t val[nelem]; \
10187 } t ## sz ## x ## nelem ## _t;
10189 /* 2-element structs. */
10190 __STRUCTN (int, 8, 2)
10191 __STRUCTN (int, 16, 2)
10192 __STRUCTN (uint, 8, 2)
10193 __STRUCTN (uint, 16, 2)
10194 __STRUCTN (float, 16, 2)
10195 __STRUCTN (poly, 8, 2)
10196 __STRUCTN (poly, 16, 2)
10197 /* 3-element structs. */
10198 __STRUCTN (int, 8, 3)
10199 __STRUCTN (int, 16, 3)
10200 __STRUCTN (int, 32, 3)
10201 __STRUCTN (int, 64, 3)
10202 __STRUCTN (uint, 8, 3)
10203 __STRUCTN (uint, 16, 3)
10204 __STRUCTN (uint, 32, 3)
10205 __STRUCTN (uint, 64, 3)
10206 __STRUCTN (float, 16, 3)
10207 __STRUCTN (float, 32, 3)
10208 __STRUCTN (float, 64, 3)
10209 __STRUCTN (poly, 8, 3)
10210 __STRUCTN (poly, 16, 3)
10211 /* 4-element structs. */
10212 __STRUCTN (int, 8, 4)
10213 __STRUCTN (int, 64, 4)
10214 __STRUCTN (uint, 8, 4)
10215 __STRUCTN (uint, 64, 4)
10216 __STRUCTN (poly, 8, 4)
10217 __STRUCTN (float, 64, 4)
10218 #undef __STRUCTN
10221 #define __ST2_LANE_FUNC(intype, largetype, ptrtype, mode, \
10222 qmode, ptr_mode, funcsuffix, signedtype) \
10223 __extension__ static __inline void \
10224 __attribute__ ((__always_inline__)) \
10225 vst2_lane_ ## funcsuffix (ptrtype *__ptr, \
10226 intype __b, const int __c) \
10228 __builtin_aarch64_simd_oi __o; \
10229 largetype __temp; \
10230 __temp.val[0] \
10231 = vcombine_##funcsuffix (__b.val[0], \
10232 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10233 __temp.val[1] \
10234 = vcombine_##funcsuffix (__b.val[1], \
10235 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10236 __o = __builtin_aarch64_set_qregoi##qmode (__o, \
10237 (signedtype) __temp.val[0], 0); \
10238 __o = __builtin_aarch64_set_qregoi##qmode (__o, \
10239 (signedtype) __temp.val[1], 1); \
10240 __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
10241 __ptr, __o, __c); \
10244 __ST2_LANE_FUNC (float16x4x2_t, float16x8x2_t, float16_t, v4hf, v8hf, hf, f16,
10245 float16x8_t)
10246 __ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v2sf, v4sf, sf, f32,
10247 float32x4_t)
10248 __ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, df, v2df, df, f64,
10249 float64x2_t)
10250 __ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v8qi, v16qi, qi, p8,
10251 int8x16_t)
10252 __ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi, p16,
10253 int16x8_t)
10254 __ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8,
10255 int8x16_t)
10256 __ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16,
10257 int16x8_t)
10258 __ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v2si, v4si, si, s32,
10259 int32x4_t)
10260 __ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, di, v2di, di, s64,
10261 int64x2_t)
10262 __ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v8qi, v16qi, qi, u8,
10263 int8x16_t)
10264 __ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v4hi, v8hi, hi, u16,
10265 int16x8_t)
10266 __ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v2si, v4si, si, u32,
10267 int32x4_t)
10268 __ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, di, v2di, di, u64,
10269 int64x2_t)
10271 #undef __ST2_LANE_FUNC
10272 #define __ST2_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
10273 __extension__ static __inline void \
10274 __attribute__ ((__always_inline__)) \
10275 vst2q_lane_ ## funcsuffix (ptrtype *__ptr, \
10276 intype __b, const int __c) \
10278 union { intype __i; \
10279 __builtin_aarch64_simd_oi __o; } __temp = { __b }; \
10280 __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
10281 __ptr, __temp.__o, __c); \
10284 __ST2_LANE_FUNC (float16x8x2_t, float16_t, v8hf, hf, f16)
10285 __ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32)
10286 __ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64)
10287 __ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8)
10288 __ST2_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16)
10289 __ST2_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8)
10290 __ST2_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16)
10291 __ST2_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32)
10292 __ST2_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64)
10293 __ST2_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8)
10294 __ST2_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16)
10295 __ST2_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32)
10296 __ST2_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64)
10298 #define __ST3_LANE_FUNC(intype, largetype, ptrtype, mode, \
10299 qmode, ptr_mode, funcsuffix, signedtype) \
10300 __extension__ static __inline void \
10301 __attribute__ ((__always_inline__)) \
10302 vst3_lane_ ## funcsuffix (ptrtype *__ptr, \
10303 intype __b, const int __c) \
10305 __builtin_aarch64_simd_ci __o; \
10306 largetype __temp; \
10307 __temp.val[0] \
10308 = vcombine_##funcsuffix (__b.val[0], \
10309 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10310 __temp.val[1] \
10311 = vcombine_##funcsuffix (__b.val[1], \
10312 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10313 __temp.val[2] \
10314 = vcombine_##funcsuffix (__b.val[2], \
10315 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10316 __o = __builtin_aarch64_set_qregci##qmode (__o, \
10317 (signedtype) __temp.val[0], 0); \
10318 __o = __builtin_aarch64_set_qregci##qmode (__o, \
10319 (signedtype) __temp.val[1], 1); \
10320 __o = __builtin_aarch64_set_qregci##qmode (__o, \
10321 (signedtype) __temp.val[2], 2); \
10322 __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
10323 __ptr, __o, __c); \
10326 __ST3_LANE_FUNC (float16x4x3_t, float16x8x3_t, float16_t, v4hf, v8hf, hf, f16,
10327 float16x8_t)
10328 __ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v2sf, v4sf, sf, f32,
10329 float32x4_t)
10330 __ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, df, v2df, df, f64,
10331 float64x2_t)
10332 __ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v8qi, v16qi, qi, p8,
10333 int8x16_t)
10334 __ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi, p16,
10335 int16x8_t)
10336 __ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8,
10337 int8x16_t)
10338 __ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16,
10339 int16x8_t)
10340 __ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v2si, v4si, si, s32,
10341 int32x4_t)
10342 __ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, di, v2di, di, s64,
10343 int64x2_t)
10344 __ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v8qi, v16qi, qi, u8,
10345 int8x16_t)
10346 __ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v4hi, v8hi, hi, u16,
10347 int16x8_t)
10348 __ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v2si, v4si, si, u32,
10349 int32x4_t)
10350 __ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, di, v2di, di, u64,
10351 int64x2_t)
10353 #undef __ST3_LANE_FUNC
10354 #define __ST3_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
10355 __extension__ static __inline void \
10356 __attribute__ ((__always_inline__)) \
10357 vst3q_lane_ ## funcsuffix (ptrtype *__ptr, \
10358 intype __b, const int __c) \
10360 union { intype __i; \
10361 __builtin_aarch64_simd_ci __o; } __temp = { __b }; \
10362 __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
10363 __ptr, __temp.__o, __c); \
10366 __ST3_LANE_FUNC (float16x8x3_t, float16_t, v8hf, hf, f16)
10367 __ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32)
10368 __ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64)
10369 __ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8)
10370 __ST3_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16)
10371 __ST3_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8)
10372 __ST3_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16)
10373 __ST3_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32)
10374 __ST3_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64)
10375 __ST3_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8)
10376 __ST3_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16)
10377 __ST3_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32)
10378 __ST3_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64)
10380 #define __ST4_LANE_FUNC(intype, largetype, ptrtype, mode, \
10381 qmode, ptr_mode, funcsuffix, signedtype) \
10382 __extension__ static __inline void \
10383 __attribute__ ((__always_inline__)) \
10384 vst4_lane_ ## funcsuffix (ptrtype *__ptr, \
10385 intype __b, const int __c) \
10387 __builtin_aarch64_simd_xi __o; \
10388 largetype __temp; \
10389 __temp.val[0] \
10390 = vcombine_##funcsuffix (__b.val[0], \
10391 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10392 __temp.val[1] \
10393 = vcombine_##funcsuffix (__b.val[1], \
10394 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10395 __temp.val[2] \
10396 = vcombine_##funcsuffix (__b.val[2], \
10397 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10398 __temp.val[3] \
10399 = vcombine_##funcsuffix (__b.val[3], \
10400 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10401 __o = __builtin_aarch64_set_qregxi##qmode (__o, \
10402 (signedtype) __temp.val[0], 0); \
10403 __o = __builtin_aarch64_set_qregxi##qmode (__o, \
10404 (signedtype) __temp.val[1], 1); \
10405 __o = __builtin_aarch64_set_qregxi##qmode (__o, \
10406 (signedtype) __temp.val[2], 2); \
10407 __o = __builtin_aarch64_set_qregxi##qmode (__o, \
10408 (signedtype) __temp.val[3], 3); \
10409 __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
10410 __ptr, __o, __c); \
10413 __ST4_LANE_FUNC (float16x4x4_t, float16x8x4_t, float16_t, v4hf, v8hf, hf, f16,
10414 float16x8_t)
10415 __ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v2sf, v4sf, sf, f32,
10416 float32x4_t)
10417 __ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, df, v2df, df, f64,
10418 float64x2_t)
10419 __ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi, p8,
10420 int8x16_t)
10421 __ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi, p16,
10422 int16x8_t)
10423 __ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8,
10424 int8x16_t)
10425 __ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16,
10426 int16x8_t)
10427 __ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v2si, v4si, si, s32,
10428 int32x4_t)
10429 __ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, di, v2di, di, s64,
10430 int64x2_t)
10431 __ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v8qi, v16qi, qi, u8,
10432 int8x16_t)
10433 __ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v4hi, v8hi, hi, u16,
10434 int16x8_t)
10435 __ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v2si, v4si, si, u32,
10436 int32x4_t)
10437 __ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, di, v2di, di, u64,
10438 int64x2_t)
10440 #undef __ST4_LANE_FUNC
10441 #define __ST4_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
10442 __extension__ static __inline void \
10443 __attribute__ ((__always_inline__)) \
10444 vst4q_lane_ ## funcsuffix (ptrtype *__ptr, \
10445 intype __b, const int __c) \
10447 union { intype __i; \
10448 __builtin_aarch64_simd_xi __o; } __temp = { __b }; \
10449 __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
10450 __ptr, __temp.__o, __c); \
10453 __ST4_LANE_FUNC (float16x8x4_t, float16_t, v8hf, hf, f16)
10454 __ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32)
10455 __ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64)
10456 __ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8)
10457 __ST4_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16)
10458 __ST4_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8)
10459 __ST4_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16)
10460 __ST4_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32)
10461 __ST4_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64)
10462 __ST4_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8)
10463 __ST4_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16)
10464 __ST4_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32)
10465 __ST4_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64)
10467 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
10468 vaddlv_s32 (int32x2_t a)
10470 int64_t result;
10471 __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
10472 return result;
10475 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
10476 vaddlv_u32 (uint32x2_t a)
10478 uint64_t result;
10479 __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
10480 return result;
10483 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10484 vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
10486 return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c);
10489 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10490 vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
10492 return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c);
10495 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10496 vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
10498 return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c);
10501 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10502 vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
10504 return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c);
10507 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10508 vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
10510 return __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c);
10513 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10514 vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
10516 return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c);
10519 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10520 vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
10522 return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c);
10525 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10526 vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
10528 return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c);
10531 /* Table intrinsics. */
10533 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
10534 vqtbl1_p8 (poly8x16_t a, uint8x8_t b)
10536 poly8x8_t result;
10537 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
10538 : "=w"(result)
10539 : "w"(a), "w"(b)
10540 : /* No clobbers */);
10541 return result;
10544 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10545 vqtbl1_s8 (int8x16_t a, uint8x8_t b)
10547 int8x8_t result;
10548 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
10549 : "=w"(result)
10550 : "w"(a), "w"(b)
10551 : /* No clobbers */);
10552 return result;
10555 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10556 vqtbl1_u8 (uint8x16_t a, uint8x8_t b)
10558 uint8x8_t result;
10559 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
10560 : "=w"(result)
10561 : "w"(a), "w"(b)
10562 : /* No clobbers */);
10563 return result;
10566 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
10567 vqtbl1q_p8 (poly8x16_t a, uint8x16_t b)
10569 poly8x16_t result;
10570 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
10571 : "=w"(result)
10572 : "w"(a), "w"(b)
10573 : /* No clobbers */);
10574 return result;
10577 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10578 vqtbl1q_s8 (int8x16_t a, uint8x16_t b)
10580 int8x16_t result;
10581 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
10582 : "=w"(result)
10583 : "w"(a), "w"(b)
10584 : /* No clobbers */);
10585 return result;
10588 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10589 vqtbl1q_u8 (uint8x16_t a, uint8x16_t b)
10591 uint8x16_t result;
10592 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
10593 : "=w"(result)
10594 : "w"(a), "w"(b)
10595 : /* No clobbers */);
10596 return result;
10599 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10600 vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx)
10602 int8x8_t result = r;
10603 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
10604 : "+w"(result)
10605 : "w"(tab), "w"(idx)
10606 : /* No clobbers */);
10607 return result;
10610 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10611 vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx)
10613 uint8x8_t result = r;
10614 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
10615 : "+w"(result)
10616 : "w"(tab), "w"(idx)
10617 : /* No clobbers */);
10618 return result;
10621 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
10622 vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx)
10624 poly8x8_t result = r;
10625 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
10626 : "+w"(result)
10627 : "w"(tab), "w"(idx)
10628 : /* No clobbers */);
10629 return result;
10632 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10633 vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx)
10635 int8x16_t result = r;
10636 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
10637 : "+w"(result)
10638 : "w"(tab), "w"(idx)
10639 : /* No clobbers */);
10640 return result;
10643 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10644 vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx)
10646 uint8x16_t result = r;
10647 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
10648 : "+w"(result)
10649 : "w"(tab), "w"(idx)
10650 : /* No clobbers */);
10651 return result;
10654 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
10655 vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx)
10657 poly8x16_t result = r;
10658 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
10659 : "+w"(result)
10660 : "w"(tab), "w"(idx)
10661 : /* No clobbers */);
10662 return result;
10665 /* V7 legacy table intrinsics. */
10667 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10668 vtbl1_s8 (int8x8_t tab, int8x8_t idx)
10670 int8x8_t result;
10671 int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
10672 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
10673 : "=w"(result)
10674 : "w"(temp), "w"(idx)
10675 : /* No clobbers */);
10676 return result;
10679 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10680 vtbl1_u8 (uint8x8_t tab, uint8x8_t idx)
10682 uint8x8_t result;
10683 uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
10684 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
10685 : "=w"(result)
10686 : "w"(temp), "w"(idx)
10687 : /* No clobbers */);
10688 return result;
10691 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
10692 vtbl1_p8 (poly8x8_t tab, uint8x8_t idx)
10694 poly8x8_t result;
10695 poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0)));
10696 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
10697 : "=w"(result)
10698 : "w"(temp), "w"(idx)
10699 : /* No clobbers */);
10700 return result;
10703 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10704 vtbl2_s8 (int8x8x2_t tab, int8x8_t idx)
10706 int8x8_t result;
10707 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
10708 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
10709 : "=w"(result)
10710 : "w"(temp), "w"(idx)
10711 : /* No clobbers */);
10712 return result;
10715 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10716 vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx)
10718 uint8x8_t result;
10719 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
10720 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
10721 : "=w"(result)
10722 : "w"(temp), "w"(idx)
10723 : /* No clobbers */);
10724 return result;
10727 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
10728 vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx)
10730 poly8x8_t result;
10731 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
10732 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
10733 : "=w"(result)
10734 : "w"(temp), "w"(idx)
10735 : /* No clobbers */);
10736 return result;
10739 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10740 vtbl3_s8 (int8x8x3_t tab, int8x8_t idx)
10742 int8x8_t result;
10743 int8x16x2_t temp;
10744 __builtin_aarch64_simd_oi __o;
10745 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
10746 temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0)));
10747 __o = __builtin_aarch64_set_qregoiv16qi (__o,
10748 (int8x16_t) temp.val[0], 0);
10749 __o = __builtin_aarch64_set_qregoiv16qi (__o,
10750 (int8x16_t) temp.val[1], 1);
10751 result = __builtin_aarch64_tbl3v8qi (__o, idx);
10752 return result;
10755 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10756 vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx)
10758 uint8x8_t result;
10759 uint8x16x2_t temp;
10760 __builtin_aarch64_simd_oi __o;
10761 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
10762 temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0)));
10763 __o = __builtin_aarch64_set_qregoiv16qi (__o,
10764 (int8x16_t) temp.val[0], 0);
10765 __o = __builtin_aarch64_set_qregoiv16qi (__o,
10766 (int8x16_t) temp.val[1], 1);
10767 result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
10768 return result;
10771 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
10772 vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx)
10774 poly8x8_t result;
10775 poly8x16x2_t temp;
10776 __builtin_aarch64_simd_oi __o;
10777 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
10778 temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0)));
10779 __o = __builtin_aarch64_set_qregoiv16qi (__o,
10780 (int8x16_t) temp.val[0], 0);
10781 __o = __builtin_aarch64_set_qregoiv16qi (__o,
10782 (int8x16_t) temp.val[1], 1);
10783 result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
10784 return result;
10787 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10788 vtbl4_s8 (int8x8x4_t tab, int8x8_t idx)
10790 int8x8_t result;
10791 int8x16x2_t temp;
10792 __builtin_aarch64_simd_oi __o;
10793 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
10794 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
10795 __o = __builtin_aarch64_set_qregoiv16qi (__o,
10796 (int8x16_t) temp.val[0], 0);
10797 __o = __builtin_aarch64_set_qregoiv16qi (__o,
10798 (int8x16_t) temp.val[1], 1);
10799 result = __builtin_aarch64_tbl3v8qi (__o, idx);
10800 return result;
10803 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10804 vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx)
10806 uint8x8_t result;
10807 uint8x16x2_t temp;
10808 __builtin_aarch64_simd_oi __o;
10809 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
10810 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
10811 __o = __builtin_aarch64_set_qregoiv16qi (__o,
10812 (int8x16_t) temp.val[0], 0);
10813 __o = __builtin_aarch64_set_qregoiv16qi (__o,
10814 (int8x16_t) temp.val[1], 1);
10815 result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
10816 return result;
10819 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
10820 vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx)
10822 poly8x8_t result;
10823 poly8x16x2_t temp;
10824 __builtin_aarch64_simd_oi __o;
10825 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
10826 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
10827 __o = __builtin_aarch64_set_qregoiv16qi (__o,
10828 (int8x16_t) temp.val[0], 0);
10829 __o = __builtin_aarch64_set_qregoiv16qi (__o,
10830 (int8x16_t) temp.val[1], 1);
10831 result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
10832 return result;
10835 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10836 vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx)
10838 int8x8_t result = r;
10839 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
10840 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
10841 : "+w"(result)
10842 : "w"(temp), "w"(idx)
10843 : /* No clobbers */);
10844 return result;
10847 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10848 vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx)
10850 uint8x8_t result = r;
10851 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
10852 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
10853 : "+w"(result)
10854 : "w"(temp), "w"(idx)
10855 : /* No clobbers */);
10856 return result;
10859 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
10860 vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx)
10862 poly8x8_t result = r;
10863 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
10864 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
10865 : "+w"(result)
10866 : "w"(temp), "w"(idx)
10867 : /* No clobbers */);
10868 return result;
10871 /* End of temporary inline asm. */
10873 /* Start of optimal implementations in approved order. */
10875 /* vabs */
10877 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10878 vabs_f32 (float32x2_t __a)
10880 return __builtin_aarch64_absv2sf (__a);
10883 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
10884 vabs_f64 (float64x1_t __a)
10886 return (float64x1_t) {__builtin_fabs (__a[0])};
10889 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10890 vabs_s8 (int8x8_t __a)
10892 return __builtin_aarch64_absv8qi (__a);
10895 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10896 vabs_s16 (int16x4_t __a)
10898 return __builtin_aarch64_absv4hi (__a);
10901 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10902 vabs_s32 (int32x2_t __a)
10904 return __builtin_aarch64_absv2si (__a);
10907 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
10908 vabs_s64 (int64x1_t __a)
10910 return (int64x1_t) {__builtin_aarch64_absdi (__a[0])};
10913 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10914 vabsq_f32 (float32x4_t __a)
10916 return __builtin_aarch64_absv4sf (__a);
10919 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10920 vabsq_f64 (float64x2_t __a)
10922 return __builtin_aarch64_absv2df (__a);
10925 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10926 vabsq_s8 (int8x16_t __a)
10928 return __builtin_aarch64_absv16qi (__a);
10931 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10932 vabsq_s16 (int16x8_t __a)
10934 return __builtin_aarch64_absv8hi (__a);
10937 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10938 vabsq_s32 (int32x4_t __a)
10940 return __builtin_aarch64_absv4si (__a);
10943 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
10944 vabsq_s64 (int64x2_t __a)
10946 return __builtin_aarch64_absv2di (__a);
10949 /* vadd */
10951 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
10952 vaddd_s64 (int64_t __a, int64_t __b)
10954 return __a + __b;
10957 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
10958 vaddd_u64 (uint64_t __a, uint64_t __b)
10960 return __a + __b;
10963 /* vaddv */
10965 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
10966 vaddv_s8 (int8x8_t __a)
10968 return __builtin_aarch64_reduc_plus_scal_v8qi (__a);
10971 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
10972 vaddv_s16 (int16x4_t __a)
10974 return __builtin_aarch64_reduc_plus_scal_v4hi (__a);
10977 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
10978 vaddv_s32 (int32x2_t __a)
10980 return __builtin_aarch64_reduc_plus_scal_v2si (__a);
10983 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
10984 vaddv_u8 (uint8x8_t __a)
10986 return (uint8_t) __builtin_aarch64_reduc_plus_scal_v8qi ((int8x8_t) __a);
10989 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
10990 vaddv_u16 (uint16x4_t __a)
10992 return (uint16_t) __builtin_aarch64_reduc_plus_scal_v4hi ((int16x4_t) __a);
10995 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
10996 vaddv_u32 (uint32x2_t __a)
10998 return (int32_t) __builtin_aarch64_reduc_plus_scal_v2si ((int32x2_t) __a);
11001 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
11002 vaddvq_s8 (int8x16_t __a)
11004 return __builtin_aarch64_reduc_plus_scal_v16qi (__a);
11007 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
11008 vaddvq_s16 (int16x8_t __a)
11010 return __builtin_aarch64_reduc_plus_scal_v8hi (__a);
11013 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
11014 vaddvq_s32 (int32x4_t __a)
11016 return __builtin_aarch64_reduc_plus_scal_v4si (__a);
11019 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
11020 vaddvq_s64 (int64x2_t __a)
11022 return __builtin_aarch64_reduc_plus_scal_v2di (__a);
11025 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
11026 vaddvq_u8 (uint8x16_t __a)
11028 return (uint8_t) __builtin_aarch64_reduc_plus_scal_v16qi ((int8x16_t) __a);
11031 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
11032 vaddvq_u16 (uint16x8_t __a)
11034 return (uint16_t) __builtin_aarch64_reduc_plus_scal_v8hi ((int16x8_t) __a);
11037 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
11038 vaddvq_u32 (uint32x4_t __a)
11040 return (uint32_t) __builtin_aarch64_reduc_plus_scal_v4si ((int32x4_t) __a);
11043 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11044 vaddvq_u64 (uint64x2_t __a)
11046 return (uint64_t) __builtin_aarch64_reduc_plus_scal_v2di ((int64x2_t) __a);
11049 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
11050 vaddv_f32 (float32x2_t __a)
11052 return __builtin_aarch64_reduc_plus_scal_v2sf (__a);
11055 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
11056 vaddvq_f32 (float32x4_t __a)
11058 return __builtin_aarch64_reduc_plus_scal_v4sf (__a);
11061 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
11062 vaddvq_f64 (float64x2_t __a)
11064 return __builtin_aarch64_reduc_plus_scal_v2df (__a);
11067 /* vbsl */
11069 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11070 vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c)
11072 return __builtin_aarch64_simd_bslv2sf_suss (__a, __b, __c);
11075 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
11076 vbsl_f64 (uint64x1_t __a, float64x1_t __b, float64x1_t __c)
11078 return (float64x1_t)
11079 { __builtin_aarch64_simd_bsldf_suss (__a[0], __b[0], __c[0]) };
11082 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11083 vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c)
11085 return __builtin_aarch64_simd_bslv8qi_pupp (__a, __b, __c);
11088 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
11089 vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c)
11091 return __builtin_aarch64_simd_bslv4hi_pupp (__a, __b, __c);
11094 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11095 vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c)
11097 return __builtin_aarch64_simd_bslv8qi_suss (__a, __b, __c);
11100 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11101 vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c)
11103 return __builtin_aarch64_simd_bslv4hi_suss (__a, __b, __c);
11106 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11107 vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c)
11109 return __builtin_aarch64_simd_bslv2si_suss (__a, __b, __c);
11112 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
11113 vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c)
11115 return (int64x1_t)
11116 {__builtin_aarch64_simd_bsldi_suss (__a[0], __b[0], __c[0])};
11119 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11120 vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
11122 return __builtin_aarch64_simd_bslv8qi_uuuu (__a, __b, __c);
11125 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11126 vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
11128 return __builtin_aarch64_simd_bslv4hi_uuuu (__a, __b, __c);
11131 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11132 vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
11134 return __builtin_aarch64_simd_bslv2si_uuuu (__a, __b, __c);
11137 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11138 vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c)
11140 return (uint64x1_t)
11141 {__builtin_aarch64_simd_bsldi_uuuu (__a[0], __b[0], __c[0])};
11144 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11145 vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c)
11147 return __builtin_aarch64_simd_bslv4sf_suss (__a, __b, __c);
11150 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11151 vbslq_f64 (uint64x2_t __a, float64x2_t __b, float64x2_t __c)
11153 return __builtin_aarch64_simd_bslv2df_suss (__a, __b, __c);
11156 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
11157 vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c)
11159 return __builtin_aarch64_simd_bslv16qi_pupp (__a, __b, __c);
11162 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
11163 vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c)
11165 return __builtin_aarch64_simd_bslv8hi_pupp (__a, __b, __c);
11168 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11169 vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c)
11171 return __builtin_aarch64_simd_bslv16qi_suss (__a, __b, __c);
11174 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11175 vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c)
11177 return __builtin_aarch64_simd_bslv8hi_suss (__a, __b, __c);
11180 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11181 vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c)
11183 return __builtin_aarch64_simd_bslv4si_suss (__a, __b, __c);
11186 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
11187 vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c)
11189 return __builtin_aarch64_simd_bslv2di_suss (__a, __b, __c);
11192 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11193 vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
11195 return __builtin_aarch64_simd_bslv16qi_uuuu (__a, __b, __c);
11198 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11199 vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
11201 return __builtin_aarch64_simd_bslv8hi_uuuu (__a, __b, __c);
11204 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11205 vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
11207 return __builtin_aarch64_simd_bslv4si_uuuu (__a, __b, __c);
11210 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11211 vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
11213 return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c);
11216 /* ARMv8.1 instrinsics. */
11217 #pragma GCC push_options
11218 #pragma GCC target ("arch=armv8.1-a")
11220 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11221 vqrdmlah_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
11223 return __builtin_aarch64_sqrdmlahv4hi (__a, __b, __c);
11226 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11227 vqrdmlah_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
11229 return __builtin_aarch64_sqrdmlahv2si (__a, __b, __c);
11232 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11233 vqrdmlahq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
11235 return __builtin_aarch64_sqrdmlahv8hi (__a, __b, __c);
11238 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11239 vqrdmlahq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
11241 return __builtin_aarch64_sqrdmlahv4si (__a, __b, __c);
11244 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11245 vqrdmlsh_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
11247 return __builtin_aarch64_sqrdmlshv4hi (__a, __b, __c);
11250 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11251 vqrdmlsh_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
11253 return __builtin_aarch64_sqrdmlshv2si (__a, __b, __c);
11256 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11257 vqrdmlshq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
11259 return __builtin_aarch64_sqrdmlshv8hi (__a, __b, __c);
11262 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11263 vqrdmlshq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
11265 return __builtin_aarch64_sqrdmlshv4si (__a, __b, __c);
11268 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11269 vqrdmlah_laneq_s16 (int16x4_t __a, int16x4_t __b, int16x8_t __c, const int __d)
11271 return __builtin_aarch64_sqrdmlah_laneqv4hi (__a, __b, __c, __d);
11274 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11275 vqrdmlah_laneq_s32 (int32x2_t __a, int32x2_t __b, int32x4_t __c, const int __d)
11277 return __builtin_aarch64_sqrdmlah_laneqv2si (__a, __b, __c, __d);
11280 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11281 vqrdmlahq_laneq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c, const int __d)
11283 return __builtin_aarch64_sqrdmlah_laneqv8hi (__a, __b, __c, __d);
11286 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11287 vqrdmlahq_laneq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c, const int __d)
11289 return __builtin_aarch64_sqrdmlah_laneqv4si (__a, __b, __c, __d);
11292 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11293 vqrdmlsh_laneq_s16 (int16x4_t __a, int16x4_t __b, int16x8_t __c, const int __d)
11295 return __builtin_aarch64_sqrdmlsh_laneqv4hi (__a, __b, __c, __d);
11298 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11299 vqrdmlsh_laneq_s32 (int32x2_t __a, int32x2_t __b, int32x4_t __c, const int __d)
11301 return __builtin_aarch64_sqrdmlsh_laneqv2si (__a, __b, __c, __d);
11304 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11305 vqrdmlshq_laneq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c, const int __d)
11307 return __builtin_aarch64_sqrdmlsh_laneqv8hi (__a, __b, __c, __d);
11310 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11311 vqrdmlshq_laneq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c, const int __d)
11313 return __builtin_aarch64_sqrdmlsh_laneqv4si (__a, __b, __c, __d);
11316 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11317 vqrdmlah_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
11319 return __builtin_aarch64_sqrdmlah_lanev4hi (__a, __b, __c, __d);
11322 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11323 vqrdmlah_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
11325 return __builtin_aarch64_sqrdmlah_lanev2si (__a, __b, __c, __d);
11328 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11329 vqrdmlahq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
11331 return __builtin_aarch64_sqrdmlah_lanev8hi (__a, __b, __c, __d);
11334 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11335 vqrdmlahq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
11337 return __builtin_aarch64_sqrdmlah_lanev4si (__a, __b, __c, __d);
11340 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
11341 vqrdmlahh_s16 (int16_t __a, int16_t __b, int16_t __c)
11343 return (int16_t) __builtin_aarch64_sqrdmlahhi (__a, __b, __c);
11346 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
11347 vqrdmlahh_lane_s16 (int16_t __a, int16_t __b, int16x4_t __c, const int __d)
11349 return __builtin_aarch64_sqrdmlah_lanehi (__a, __b, __c, __d);
11352 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
11353 vqrdmlahh_laneq_s16 (int16_t __a, int16_t __b, int16x8_t __c, const int __d)
11355 return __builtin_aarch64_sqrdmlah_laneqhi (__a, __b, __c, __d);
11358 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
11359 vqrdmlahs_s32 (int32_t __a, int32_t __b, int32_t __c)
11361 return (int32_t) __builtin_aarch64_sqrdmlahsi (__a, __b, __c);
11364 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
11365 vqrdmlahs_lane_s32 (int32_t __a, int32_t __b, int32x2_t __c, const int __d)
11367 return __builtin_aarch64_sqrdmlah_lanesi (__a, __b, __c, __d);
11370 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
11371 vqrdmlahs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d)
11373 return __builtin_aarch64_sqrdmlah_laneqsi (__a, __b, __c, __d);
11376 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11377 vqrdmlsh_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
11379 return __builtin_aarch64_sqrdmlsh_lanev4hi (__a, __b, __c, __d);
11382 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11383 vqrdmlsh_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
11385 return __builtin_aarch64_sqrdmlsh_lanev2si (__a, __b, __c, __d);
11388 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11389 vqrdmlshq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
11391 return __builtin_aarch64_sqrdmlsh_lanev8hi (__a, __b, __c, __d);
11394 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11395 vqrdmlshq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
11397 return __builtin_aarch64_sqrdmlsh_lanev4si (__a, __b, __c, __d);
11400 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
11401 vqrdmlshh_s16 (int16_t __a, int16_t __b, int16_t __c)
11403 return (int16_t) __builtin_aarch64_sqrdmlshhi (__a, __b, __c);
11406 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
11407 vqrdmlshh_lane_s16 (int16_t __a, int16_t __b, int16x4_t __c, const int __d)
11409 return __builtin_aarch64_sqrdmlsh_lanehi (__a, __b, __c, __d);
11412 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
11413 vqrdmlshh_laneq_s16 (int16_t __a, int16_t __b, int16x8_t __c, const int __d)
11415 return __builtin_aarch64_sqrdmlsh_laneqhi (__a, __b, __c, __d);
11418 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
11419 vqrdmlshs_s32 (int32_t __a, int32_t __b, int32_t __c)
11421 return (int32_t) __builtin_aarch64_sqrdmlshsi (__a, __b, __c);
11424 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
11425 vqrdmlshs_lane_s32 (int32_t __a, int32_t __b, int32x2_t __c, const int __d)
11427 return __builtin_aarch64_sqrdmlsh_lanesi (__a, __b, __c, __d);
11430 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
11431 vqrdmlshs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d)
11433 return __builtin_aarch64_sqrdmlsh_laneqsi (__a, __b, __c, __d);
11435 #pragma GCC pop_options
11437 #pragma GCC push_options
11438 #pragma GCC target ("+nothing+crypto")
11439 /* vaes */
11441 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11442 vaeseq_u8 (uint8x16_t data, uint8x16_t key)
11444 return __builtin_aarch64_crypto_aesev16qi_uuu (data, key);
11447 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11448 vaesdq_u8 (uint8x16_t data, uint8x16_t key)
11450 return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key);
11453 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11454 vaesmcq_u8 (uint8x16_t data)
11456 return __builtin_aarch64_crypto_aesmcv16qi_uu (data);
11459 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11460 vaesimcq_u8 (uint8x16_t data)
11462 return __builtin_aarch64_crypto_aesimcv16qi_uu (data);
11464 #pragma GCC pop_options
11466 /* vcage */
11468 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11469 vcage_f64 (float64x1_t __a, float64x1_t __b)
11471 return vabs_f64 (__a) >= vabs_f64 (__b);
11474 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
11475 vcages_f32 (float32_t __a, float32_t __b)
11477 return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0;
11480 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11481 vcage_f32 (float32x2_t __a, float32x2_t __b)
11483 return vabs_f32 (__a) >= vabs_f32 (__b);
11486 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11487 vcageq_f32 (float32x4_t __a, float32x4_t __b)
11489 return vabsq_f32 (__a) >= vabsq_f32 (__b);
11492 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11493 vcaged_f64 (float64_t __a, float64_t __b)
11495 return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0;
11498 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11499 vcageq_f64 (float64x2_t __a, float64x2_t __b)
11501 return vabsq_f64 (__a) >= vabsq_f64 (__b);
11504 /* vcagt */
11506 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
11507 vcagts_f32 (float32_t __a, float32_t __b)
11509 return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0;
11512 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11513 vcagt_f32 (float32x2_t __a, float32x2_t __b)
11515 return vabs_f32 (__a) > vabs_f32 (__b);
11518 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11519 vcagt_f64 (float64x1_t __a, float64x1_t __b)
11521 return vabs_f64 (__a) > vabs_f64 (__b);
11524 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11525 vcagtq_f32 (float32x4_t __a, float32x4_t __b)
11527 return vabsq_f32 (__a) > vabsq_f32 (__b);
11530 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11531 vcagtd_f64 (float64_t __a, float64_t __b)
11533 return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0;
11536 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11537 vcagtq_f64 (float64x2_t __a, float64x2_t __b)
11539 return vabsq_f64 (__a) > vabsq_f64 (__b);
11542 /* vcale */
11544 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11545 vcale_f32 (float32x2_t __a, float32x2_t __b)
11547 return vabs_f32 (__a) <= vabs_f32 (__b);
11550 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11551 vcale_f64 (float64x1_t __a, float64x1_t __b)
11553 return vabs_f64 (__a) <= vabs_f64 (__b);
11556 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11557 vcaled_f64 (float64_t __a, float64_t __b)
11559 return __builtin_fabs (__a) <= __builtin_fabs (__b) ? -1 : 0;
11562 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
11563 vcales_f32 (float32_t __a, float32_t __b)
11565 return __builtin_fabsf (__a) <= __builtin_fabsf (__b) ? -1 : 0;
11568 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11569 vcaleq_f32 (float32x4_t __a, float32x4_t __b)
11571 return vabsq_f32 (__a) <= vabsq_f32 (__b);
11574 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11575 vcaleq_f64 (float64x2_t __a, float64x2_t __b)
11577 return vabsq_f64 (__a) <= vabsq_f64 (__b);
11580 /* vcalt */
11582 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11583 vcalt_f32 (float32x2_t __a, float32x2_t __b)
11585 return vabs_f32 (__a) < vabs_f32 (__b);
11588 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11589 vcalt_f64 (float64x1_t __a, float64x1_t __b)
11591 return vabs_f64 (__a) < vabs_f64 (__b);
11594 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11595 vcaltd_f64 (float64_t __a, float64_t __b)
11597 return __builtin_fabs (__a) < __builtin_fabs (__b) ? -1 : 0;
11600 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11601 vcaltq_f32 (float32x4_t __a, float32x4_t __b)
11603 return vabsq_f32 (__a) < vabsq_f32 (__b);
11606 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11607 vcaltq_f64 (float64x2_t __a, float64x2_t __b)
11609 return vabsq_f64 (__a) < vabsq_f64 (__b);
11612 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
11613 vcalts_f32 (float32_t __a, float32_t __b)
11615 return __builtin_fabsf (__a) < __builtin_fabsf (__b) ? -1 : 0;
11618 /* vceq - vector. */
11620 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11621 vceq_f32 (float32x2_t __a, float32x2_t __b)
11623 return (uint32x2_t) (__a == __b);
11626 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11627 vceq_f64 (float64x1_t __a, float64x1_t __b)
11629 return (uint64x1_t) (__a == __b);
11632 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11633 vceq_p8 (poly8x8_t __a, poly8x8_t __b)
11635 return (uint8x8_t) (__a == __b);
11638 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11639 vceq_s8 (int8x8_t __a, int8x8_t __b)
11641 return (uint8x8_t) (__a == __b);
11644 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11645 vceq_s16 (int16x4_t __a, int16x4_t __b)
11647 return (uint16x4_t) (__a == __b);
11650 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11651 vceq_s32 (int32x2_t __a, int32x2_t __b)
11653 return (uint32x2_t) (__a == __b);
11656 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11657 vceq_s64 (int64x1_t __a, int64x1_t __b)
11659 return (uint64x1_t) (__a == __b);
11662 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11663 vceq_u8 (uint8x8_t __a, uint8x8_t __b)
11665 return (__a == __b);
11668 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11669 vceq_u16 (uint16x4_t __a, uint16x4_t __b)
11671 return (__a == __b);
11674 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11675 vceq_u32 (uint32x2_t __a, uint32x2_t __b)
11677 return (__a == __b);
11680 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11681 vceq_u64 (uint64x1_t __a, uint64x1_t __b)
11683 return (__a == __b);
11686 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11687 vceqq_f32 (float32x4_t __a, float32x4_t __b)
11689 return (uint32x4_t) (__a == __b);
11692 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11693 vceqq_f64 (float64x2_t __a, float64x2_t __b)
11695 return (uint64x2_t) (__a == __b);
11698 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11699 vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
11701 return (uint8x16_t) (__a == __b);
11704 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11705 vceqq_s8 (int8x16_t __a, int8x16_t __b)
11707 return (uint8x16_t) (__a == __b);
11710 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11711 vceqq_s16 (int16x8_t __a, int16x8_t __b)
11713 return (uint16x8_t) (__a == __b);
11716 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11717 vceqq_s32 (int32x4_t __a, int32x4_t __b)
11719 return (uint32x4_t) (__a == __b);
11722 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11723 vceqq_s64 (int64x2_t __a, int64x2_t __b)
11725 return (uint64x2_t) (__a == __b);
11728 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11729 vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
11731 return (__a == __b);
11734 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11735 vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
11737 return (__a == __b);
11740 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11741 vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
11743 return (__a == __b);
11746 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11747 vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
11749 return (__a == __b);
11752 /* vceq - scalar. */
11754 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
11755 vceqs_f32 (float32_t __a, float32_t __b)
11757 return __a == __b ? -1 : 0;
11760 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11761 vceqd_s64 (int64_t __a, int64_t __b)
11763 return __a == __b ? -1ll : 0ll;
11766 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11767 vceqd_u64 (uint64_t __a, uint64_t __b)
11769 return __a == __b ? -1ll : 0ll;
11772 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11773 vceqd_f64 (float64_t __a, float64_t __b)
11775 return __a == __b ? -1ll : 0ll;
11778 /* vceqz - vector. */
11780 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11781 vceqz_f32 (float32x2_t __a)
11783 return (uint32x2_t) (__a == 0.0f);
11786 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11787 vceqz_f64 (float64x1_t __a)
11789 return (uint64x1_t) (__a == (float64x1_t) {0.0});
11792 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11793 vceqz_p8 (poly8x8_t __a)
11795 return (uint8x8_t) (__a == 0);
11798 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11799 vceqz_s8 (int8x8_t __a)
11801 return (uint8x8_t) (__a == 0);
11804 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11805 vceqz_s16 (int16x4_t __a)
11807 return (uint16x4_t) (__a == 0);
11810 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11811 vceqz_s32 (int32x2_t __a)
11813 return (uint32x2_t) (__a == 0);
11816 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11817 vceqz_s64 (int64x1_t __a)
11819 return (uint64x1_t) (__a == __AARCH64_INT64_C (0));
11822 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11823 vceqz_u8 (uint8x8_t __a)
11825 return (__a == 0);
11828 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11829 vceqz_u16 (uint16x4_t __a)
11831 return (__a == 0);
11834 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11835 vceqz_u32 (uint32x2_t __a)
11837 return (__a == 0);
11840 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11841 vceqz_u64 (uint64x1_t __a)
11843 return (__a == __AARCH64_UINT64_C (0));
11846 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11847 vceqzq_f32 (float32x4_t __a)
11849 return (uint32x4_t) (__a == 0.0f);
11852 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11853 vceqzq_f64 (float64x2_t __a)
11855 return (uint64x2_t) (__a == 0.0f);
11858 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11859 vceqzq_p8 (poly8x16_t __a)
11861 return (uint8x16_t) (__a == 0);
11864 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11865 vceqzq_s8 (int8x16_t __a)
11867 return (uint8x16_t) (__a == 0);
11870 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11871 vceqzq_s16 (int16x8_t __a)
11873 return (uint16x8_t) (__a == 0);
11876 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11877 vceqzq_s32 (int32x4_t __a)
11879 return (uint32x4_t) (__a == 0);
11882 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11883 vceqzq_s64 (int64x2_t __a)
11885 return (uint64x2_t) (__a == __AARCH64_INT64_C (0));
11888 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11889 vceqzq_u8 (uint8x16_t __a)
11891 return (__a == 0);
11894 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11895 vceqzq_u16 (uint16x8_t __a)
11897 return (__a == 0);
11900 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11901 vceqzq_u32 (uint32x4_t __a)
11903 return (__a == 0);
11906 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11907 vceqzq_u64 (uint64x2_t __a)
11909 return (__a == __AARCH64_UINT64_C (0));
11912 /* vceqz - scalar. */
11914 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
11915 vceqzs_f32 (float32_t __a)
11917 return __a == 0.0f ? -1 : 0;
11920 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11921 vceqzd_s64 (int64_t __a)
11923 return __a == 0 ? -1ll : 0ll;
11926 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11927 vceqzd_u64 (uint64_t __a)
11929 return __a == 0 ? -1ll : 0ll;
11932 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11933 vceqzd_f64 (float64_t __a)
11935 return __a == 0.0 ? -1ll : 0ll;
11938 /* vcge - vector. */
11940 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11941 vcge_f32 (float32x2_t __a, float32x2_t __b)
11943 return (uint32x2_t) (__a >= __b);
11946 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11947 vcge_f64 (float64x1_t __a, float64x1_t __b)
11949 return (uint64x1_t) (__a >= __b);
11952 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11953 vcge_s8 (int8x8_t __a, int8x8_t __b)
11955 return (uint8x8_t) (__a >= __b);
11958 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11959 vcge_s16 (int16x4_t __a, int16x4_t __b)
11961 return (uint16x4_t) (__a >= __b);
11964 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11965 vcge_s32 (int32x2_t __a, int32x2_t __b)
11967 return (uint32x2_t) (__a >= __b);
11970 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11971 vcge_s64 (int64x1_t __a, int64x1_t __b)
11973 return (uint64x1_t) (__a >= __b);
11976 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11977 vcge_u8 (uint8x8_t __a, uint8x8_t __b)
11979 return (__a >= __b);
11982 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11983 vcge_u16 (uint16x4_t __a, uint16x4_t __b)
11985 return (__a >= __b);
11988 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11989 vcge_u32 (uint32x2_t __a, uint32x2_t __b)
11991 return (__a >= __b);
11994 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11995 vcge_u64 (uint64x1_t __a, uint64x1_t __b)
11997 return (__a >= __b);
12000 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12001 vcgeq_f32 (float32x4_t __a, float32x4_t __b)
12003 return (uint32x4_t) (__a >= __b);
12006 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12007 vcgeq_f64 (float64x2_t __a, float64x2_t __b)
12009 return (uint64x2_t) (__a >= __b);
12012 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12013 vcgeq_s8 (int8x16_t __a, int8x16_t __b)
12015 return (uint8x16_t) (__a >= __b);
12018 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12019 vcgeq_s16 (int16x8_t __a, int16x8_t __b)
12021 return (uint16x8_t) (__a >= __b);
12024 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12025 vcgeq_s32 (int32x4_t __a, int32x4_t __b)
12027 return (uint32x4_t) (__a >= __b);
12030 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12031 vcgeq_s64 (int64x2_t __a, int64x2_t __b)
12033 return (uint64x2_t) (__a >= __b);
12036 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12037 vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
12039 return (__a >= __b);
12042 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12043 vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
12045 return (__a >= __b);
12048 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12049 vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
12051 return (__a >= __b);
12054 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12055 vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
12057 return (__a >= __b);
12060 /* vcge - scalar. */
12062 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12063 vcges_f32 (float32_t __a, float32_t __b)
12065 return __a >= __b ? -1 : 0;
12068 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12069 vcged_s64 (int64_t __a, int64_t __b)
12071 return __a >= __b ? -1ll : 0ll;
12074 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12075 vcged_u64 (uint64_t __a, uint64_t __b)
12077 return __a >= __b ? -1ll : 0ll;
12080 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12081 vcged_f64 (float64_t __a, float64_t __b)
12083 return __a >= __b ? -1ll : 0ll;
12086 /* vcgez - vector. */
12088 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12089 vcgez_f32 (float32x2_t __a)
12091 return (uint32x2_t) (__a >= 0.0f);
12094 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12095 vcgez_f64 (float64x1_t __a)
12097 return (uint64x1_t) (__a[0] >= (float64x1_t) {0.0});
12100 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12101 vcgez_s8 (int8x8_t __a)
12103 return (uint8x8_t) (__a >= 0);
12106 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12107 vcgez_s16 (int16x4_t __a)
12109 return (uint16x4_t) (__a >= 0);
12112 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12113 vcgez_s32 (int32x2_t __a)
12115 return (uint32x2_t) (__a >= 0);
12118 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12119 vcgez_s64 (int64x1_t __a)
12121 return (uint64x1_t) (__a >= __AARCH64_INT64_C (0));
12124 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12125 vcgezq_f32 (float32x4_t __a)
12127 return (uint32x4_t) (__a >= 0.0f);
12130 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12131 vcgezq_f64 (float64x2_t __a)
12133 return (uint64x2_t) (__a >= 0.0);
12136 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12137 vcgezq_s8 (int8x16_t __a)
12139 return (uint8x16_t) (__a >= 0);
12142 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12143 vcgezq_s16 (int16x8_t __a)
12145 return (uint16x8_t) (__a >= 0);
12148 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12149 vcgezq_s32 (int32x4_t __a)
12151 return (uint32x4_t) (__a >= 0);
12154 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12155 vcgezq_s64 (int64x2_t __a)
12157 return (uint64x2_t) (__a >= __AARCH64_INT64_C (0));
12160 /* vcgez - scalar. */
12162 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12163 vcgezs_f32 (float32_t __a)
12165 return __a >= 0.0f ? -1 : 0;
12168 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12169 vcgezd_s64 (int64_t __a)
12171 return __a >= 0 ? -1ll : 0ll;
12174 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12175 vcgezd_f64 (float64_t __a)
12177 return __a >= 0.0 ? -1ll : 0ll;
12180 /* vcgt - vector. */
12182 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12183 vcgt_f32 (float32x2_t __a, float32x2_t __b)
12185 return (uint32x2_t) (__a > __b);
12188 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12189 vcgt_f64 (float64x1_t __a, float64x1_t __b)
12191 return (uint64x1_t) (__a > __b);
12194 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12195 vcgt_s8 (int8x8_t __a, int8x8_t __b)
12197 return (uint8x8_t) (__a > __b);
12200 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12201 vcgt_s16 (int16x4_t __a, int16x4_t __b)
12203 return (uint16x4_t) (__a > __b);
12206 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12207 vcgt_s32 (int32x2_t __a, int32x2_t __b)
12209 return (uint32x2_t) (__a > __b);
12212 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12213 vcgt_s64 (int64x1_t __a, int64x1_t __b)
12215 return (uint64x1_t) (__a > __b);
12218 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12219 vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
12221 return (__a > __b);
12224 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12225 vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
12227 return (__a > __b);
12230 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12231 vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
12233 return (__a > __b);
12236 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12237 vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
12239 return (__a > __b);
12242 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12243 vcgtq_f32 (float32x4_t __a, float32x4_t __b)
12245 return (uint32x4_t) (__a > __b);
12248 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12249 vcgtq_f64 (float64x2_t __a, float64x2_t __b)
12251 return (uint64x2_t) (__a > __b);
12254 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12255 vcgtq_s8 (int8x16_t __a, int8x16_t __b)
12257 return (uint8x16_t) (__a > __b);
12260 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12261 vcgtq_s16 (int16x8_t __a, int16x8_t __b)
12263 return (uint16x8_t) (__a > __b);
12266 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12267 vcgtq_s32 (int32x4_t __a, int32x4_t __b)
12269 return (uint32x4_t) (__a > __b);
12272 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12273 vcgtq_s64 (int64x2_t __a, int64x2_t __b)
12275 return (uint64x2_t) (__a > __b);
12278 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12279 vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
12281 return (__a > __b);
12284 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12285 vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
12287 return (__a > __b);
12290 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12291 vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
12293 return (__a > __b);
12296 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12297 vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
12299 return (__a > __b);
12302 /* vcgt - scalar. */
12304 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12305 vcgts_f32 (float32_t __a, float32_t __b)
12307 return __a > __b ? -1 : 0;
12310 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12311 vcgtd_s64 (int64_t __a, int64_t __b)
12313 return __a > __b ? -1ll : 0ll;
12316 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12317 vcgtd_u64 (uint64_t __a, uint64_t __b)
12319 return __a > __b ? -1ll : 0ll;
12322 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12323 vcgtd_f64 (float64_t __a, float64_t __b)
12325 return __a > __b ? -1ll : 0ll;
12328 /* vcgtz - vector. */
12330 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12331 vcgtz_f32 (float32x2_t __a)
12333 return (uint32x2_t) (__a > 0.0f);
12336 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12337 vcgtz_f64 (float64x1_t __a)
12339 return (uint64x1_t) (__a > (float64x1_t) {0.0});
12342 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12343 vcgtz_s8 (int8x8_t __a)
12345 return (uint8x8_t) (__a > 0);
12348 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12349 vcgtz_s16 (int16x4_t __a)
12351 return (uint16x4_t) (__a > 0);
12354 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12355 vcgtz_s32 (int32x2_t __a)
12357 return (uint32x2_t) (__a > 0);
12360 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12361 vcgtz_s64 (int64x1_t __a)
12363 return (uint64x1_t) (__a > __AARCH64_INT64_C (0));
12366 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12367 vcgtzq_f32 (float32x4_t __a)
12369 return (uint32x4_t) (__a > 0.0f);
12372 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12373 vcgtzq_f64 (float64x2_t __a)
12375 return (uint64x2_t) (__a > 0.0);
12378 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12379 vcgtzq_s8 (int8x16_t __a)
12381 return (uint8x16_t) (__a > 0);
12384 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12385 vcgtzq_s16 (int16x8_t __a)
12387 return (uint16x8_t) (__a > 0);
12390 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12391 vcgtzq_s32 (int32x4_t __a)
12393 return (uint32x4_t) (__a > 0);
12396 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12397 vcgtzq_s64 (int64x2_t __a)
12399 return (uint64x2_t) (__a > __AARCH64_INT64_C (0));
12402 /* vcgtz - scalar. */
12404 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12405 vcgtzs_f32 (float32_t __a)
12407 return __a > 0.0f ? -1 : 0;
12410 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12411 vcgtzd_s64 (int64_t __a)
12413 return __a > 0 ? -1ll : 0ll;
12416 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12417 vcgtzd_f64 (float64_t __a)
12419 return __a > 0.0 ? -1ll : 0ll;
12422 /* vcle - vector. */
12424 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12425 vcle_f32 (float32x2_t __a, float32x2_t __b)
12427 return (uint32x2_t) (__a <= __b);
12430 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12431 vcle_f64 (float64x1_t __a, float64x1_t __b)
12433 return (uint64x1_t) (__a <= __b);
12436 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12437 vcle_s8 (int8x8_t __a, int8x8_t __b)
12439 return (uint8x8_t) (__a <= __b);
12442 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12443 vcle_s16 (int16x4_t __a, int16x4_t __b)
12445 return (uint16x4_t) (__a <= __b);
12448 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12449 vcle_s32 (int32x2_t __a, int32x2_t __b)
12451 return (uint32x2_t) (__a <= __b);
12454 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12455 vcle_s64 (int64x1_t __a, int64x1_t __b)
12457 return (uint64x1_t) (__a <= __b);
12460 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12461 vcle_u8 (uint8x8_t __a, uint8x8_t __b)
12463 return (__a <= __b);
12466 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12467 vcle_u16 (uint16x4_t __a, uint16x4_t __b)
12469 return (__a <= __b);
12472 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12473 vcle_u32 (uint32x2_t __a, uint32x2_t __b)
12475 return (__a <= __b);
12478 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12479 vcle_u64 (uint64x1_t __a, uint64x1_t __b)
12481 return (__a <= __b);
12484 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12485 vcleq_f32 (float32x4_t __a, float32x4_t __b)
12487 return (uint32x4_t) (__a <= __b);
12490 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12491 vcleq_f64 (float64x2_t __a, float64x2_t __b)
12493 return (uint64x2_t) (__a <= __b);
12496 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12497 vcleq_s8 (int8x16_t __a, int8x16_t __b)
12499 return (uint8x16_t) (__a <= __b);
12502 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12503 vcleq_s16 (int16x8_t __a, int16x8_t __b)
12505 return (uint16x8_t) (__a <= __b);
12508 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12509 vcleq_s32 (int32x4_t __a, int32x4_t __b)
12511 return (uint32x4_t) (__a <= __b);
12514 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12515 vcleq_s64 (int64x2_t __a, int64x2_t __b)
12517 return (uint64x2_t) (__a <= __b);
12520 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12521 vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
12523 return (__a <= __b);
12526 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12527 vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
12529 return (__a <= __b);
12532 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12533 vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
12535 return (__a <= __b);
12538 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12539 vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
12541 return (__a <= __b);
12544 /* vcle - scalar. */
12546 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12547 vcles_f32 (float32_t __a, float32_t __b)
12549 return __a <= __b ? -1 : 0;
12552 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12553 vcled_s64 (int64_t __a, int64_t __b)
12555 return __a <= __b ? -1ll : 0ll;
12558 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12559 vcled_u64 (uint64_t __a, uint64_t __b)
12561 return __a <= __b ? -1ll : 0ll;
12564 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12565 vcled_f64 (float64_t __a, float64_t __b)
12567 return __a <= __b ? -1ll : 0ll;
12570 /* vclez - vector. */
12572 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12573 vclez_f32 (float32x2_t __a)
12575 return (uint32x2_t) (__a <= 0.0f);
12578 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12579 vclez_f64 (float64x1_t __a)
12581 return (uint64x1_t) (__a <= (float64x1_t) {0.0});
12584 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12585 vclez_s8 (int8x8_t __a)
12587 return (uint8x8_t) (__a <= 0);
12590 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12591 vclez_s16 (int16x4_t __a)
12593 return (uint16x4_t) (__a <= 0);
12596 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12597 vclez_s32 (int32x2_t __a)
12599 return (uint32x2_t) (__a <= 0);
12602 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12603 vclez_s64 (int64x1_t __a)
12605 return (uint64x1_t) (__a <= __AARCH64_INT64_C (0));
12608 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12609 vclezq_f32 (float32x4_t __a)
12611 return (uint32x4_t) (__a <= 0.0f);
12614 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12615 vclezq_f64 (float64x2_t __a)
12617 return (uint64x2_t) (__a <= 0.0);
12620 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12621 vclezq_s8 (int8x16_t __a)
12623 return (uint8x16_t) (__a <= 0);
12626 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12627 vclezq_s16 (int16x8_t __a)
12629 return (uint16x8_t) (__a <= 0);
12632 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12633 vclezq_s32 (int32x4_t __a)
12635 return (uint32x4_t) (__a <= 0);
12638 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12639 vclezq_s64 (int64x2_t __a)
12641 return (uint64x2_t) (__a <= __AARCH64_INT64_C (0));
12644 /* vclez - scalar. */
12646 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12647 vclezs_f32 (float32_t __a)
12649 return __a <= 0.0f ? -1 : 0;
12652 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12653 vclezd_s64 (int64_t __a)
12655 return __a <= 0 ? -1ll : 0ll;
12658 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12659 vclezd_f64 (float64_t __a)
12661 return __a <= 0.0 ? -1ll : 0ll;
12664 /* vclt - vector. */
12666 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12667 vclt_f32 (float32x2_t __a, float32x2_t __b)
12669 return (uint32x2_t) (__a < __b);
12672 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12673 vclt_f64 (float64x1_t __a, float64x1_t __b)
12675 return (uint64x1_t) (__a < __b);
12678 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12679 vclt_s8 (int8x8_t __a, int8x8_t __b)
12681 return (uint8x8_t) (__a < __b);
12684 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12685 vclt_s16 (int16x4_t __a, int16x4_t __b)
12687 return (uint16x4_t) (__a < __b);
12690 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12691 vclt_s32 (int32x2_t __a, int32x2_t __b)
12693 return (uint32x2_t) (__a < __b);
12696 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12697 vclt_s64 (int64x1_t __a, int64x1_t __b)
12699 return (uint64x1_t) (__a < __b);
12702 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12703 vclt_u8 (uint8x8_t __a, uint8x8_t __b)
12705 return (__a < __b);
12708 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12709 vclt_u16 (uint16x4_t __a, uint16x4_t __b)
12711 return (__a < __b);
12714 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12715 vclt_u32 (uint32x2_t __a, uint32x2_t __b)
12717 return (__a < __b);
12720 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12721 vclt_u64 (uint64x1_t __a, uint64x1_t __b)
12723 return (__a < __b);
12726 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12727 vcltq_f32 (float32x4_t __a, float32x4_t __b)
12729 return (uint32x4_t) (__a < __b);
12732 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12733 vcltq_f64 (float64x2_t __a, float64x2_t __b)
12735 return (uint64x2_t) (__a < __b);
12738 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12739 vcltq_s8 (int8x16_t __a, int8x16_t __b)
12741 return (uint8x16_t) (__a < __b);
12744 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12745 vcltq_s16 (int16x8_t __a, int16x8_t __b)
12747 return (uint16x8_t) (__a < __b);
12750 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12751 vcltq_s32 (int32x4_t __a, int32x4_t __b)
12753 return (uint32x4_t) (__a < __b);
12756 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12757 vcltq_s64 (int64x2_t __a, int64x2_t __b)
12759 return (uint64x2_t) (__a < __b);
12762 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12763 vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
12765 return (__a < __b);
12768 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12769 vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
12771 return (__a < __b);
12774 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12775 vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
12777 return (__a < __b);
12780 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12781 vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
12783 return (__a < __b);
12786 /* vclt - scalar. */
12788 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12789 vclts_f32 (float32_t __a, float32_t __b)
12791 return __a < __b ? -1 : 0;
12794 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12795 vcltd_s64 (int64_t __a, int64_t __b)
12797 return __a < __b ? -1ll : 0ll;
12800 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12801 vcltd_u64 (uint64_t __a, uint64_t __b)
12803 return __a < __b ? -1ll : 0ll;
12806 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12807 vcltd_f64 (float64_t __a, float64_t __b)
12809 return __a < __b ? -1ll : 0ll;
12812 /* vcltz - vector. */
12814 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12815 vcltz_f32 (float32x2_t __a)
12817 return (uint32x2_t) (__a < 0.0f);
12820 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12821 vcltz_f64 (float64x1_t __a)
12823 return (uint64x1_t) (__a < (float64x1_t) {0.0});
12826 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12827 vcltz_s8 (int8x8_t __a)
12829 return (uint8x8_t) (__a < 0);
12832 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12833 vcltz_s16 (int16x4_t __a)
12835 return (uint16x4_t) (__a < 0);
12838 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12839 vcltz_s32 (int32x2_t __a)
12841 return (uint32x2_t) (__a < 0);
12844 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12845 vcltz_s64 (int64x1_t __a)
12847 return (uint64x1_t) (__a < __AARCH64_INT64_C (0));
12850 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12851 vcltzq_f32 (float32x4_t __a)
12853 return (uint32x4_t) (__a < 0.0f);
12856 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12857 vcltzq_f64 (float64x2_t __a)
12859 return (uint64x2_t) (__a < 0.0);
12862 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12863 vcltzq_s8 (int8x16_t __a)
12865 return (uint8x16_t) (__a < 0);
12868 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12869 vcltzq_s16 (int16x8_t __a)
12871 return (uint16x8_t) (__a < 0);
12874 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12875 vcltzq_s32 (int32x4_t __a)
12877 return (uint32x4_t) (__a < 0);
12880 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12881 vcltzq_s64 (int64x2_t __a)
12883 return (uint64x2_t) (__a < __AARCH64_INT64_C (0));
12886 /* vcltz - scalar. */
12888 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12889 vcltzs_f32 (float32_t __a)
12891 return __a < 0.0f ? -1 : 0;
12894 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12895 vcltzd_s64 (int64_t __a)
12897 return __a < 0 ? -1ll : 0ll;
12900 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12901 vcltzd_f64 (float64_t __a)
12903 return __a < 0.0 ? -1ll : 0ll;
12906 /* vcls. */
12908 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12909 vcls_s8 (int8x8_t __a)
12911 return __builtin_aarch64_clrsbv8qi (__a);
12914 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12915 vcls_s16 (int16x4_t __a)
12917 return __builtin_aarch64_clrsbv4hi (__a);
12920 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12921 vcls_s32 (int32x2_t __a)
12923 return __builtin_aarch64_clrsbv2si (__a);
12926 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12927 vclsq_s8 (int8x16_t __a)
12929 return __builtin_aarch64_clrsbv16qi (__a);
12932 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12933 vclsq_s16 (int16x8_t __a)
12935 return __builtin_aarch64_clrsbv8hi (__a);
12938 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12939 vclsq_s32 (int32x4_t __a)
12941 return __builtin_aarch64_clrsbv4si (__a);
12944 /* vclz. */
12946 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12947 vclz_s8 (int8x8_t __a)
12949 return __builtin_aarch64_clzv8qi (__a);
12952 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12953 vclz_s16 (int16x4_t __a)
12955 return __builtin_aarch64_clzv4hi (__a);
12958 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12959 vclz_s32 (int32x2_t __a)
12961 return __builtin_aarch64_clzv2si (__a);
12964 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12965 vclz_u8 (uint8x8_t __a)
12967 return (uint8x8_t)__builtin_aarch64_clzv8qi ((int8x8_t)__a);
12970 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12971 vclz_u16 (uint16x4_t __a)
12973 return (uint16x4_t)__builtin_aarch64_clzv4hi ((int16x4_t)__a);
12976 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12977 vclz_u32 (uint32x2_t __a)
12979 return (uint32x2_t)__builtin_aarch64_clzv2si ((int32x2_t)__a);
12982 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12983 vclzq_s8 (int8x16_t __a)
12985 return __builtin_aarch64_clzv16qi (__a);
12988 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12989 vclzq_s16 (int16x8_t __a)
12991 return __builtin_aarch64_clzv8hi (__a);
12994 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12995 vclzq_s32 (int32x4_t __a)
12997 return __builtin_aarch64_clzv4si (__a);
13000 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13001 vclzq_u8 (uint8x16_t __a)
13003 return (uint8x16_t)__builtin_aarch64_clzv16qi ((int8x16_t)__a);
13006 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13007 vclzq_u16 (uint16x8_t __a)
13009 return (uint16x8_t)__builtin_aarch64_clzv8hi ((int16x8_t)__a);
13012 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13013 vclzq_u32 (uint32x4_t __a)
13015 return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a);
13018 /* vcnt. */
13020 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13021 vcnt_p8 (poly8x8_t __a)
13023 return (poly8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a);
13026 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13027 vcnt_s8 (int8x8_t __a)
13029 return __builtin_aarch64_popcountv8qi (__a);
13032 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13033 vcnt_u8 (uint8x8_t __a)
13035 return (uint8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a);
13038 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13039 vcntq_p8 (poly8x16_t __a)
13041 return (poly8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a);
13044 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13045 vcntq_s8 (int8x16_t __a)
13047 return __builtin_aarch64_popcountv16qi (__a);
13050 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13051 vcntq_u8 (uint8x16_t __a)
13053 return (uint8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a);
13056 /* vcvt (double -> float). */
13058 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
13059 vcvt_f16_f32 (float32x4_t __a)
13061 return __builtin_aarch64_float_truncate_lo_v4hf (__a);
13064 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
13065 vcvt_high_f16_f32 (float16x4_t __a, float32x4_t __b)
13067 return __builtin_aarch64_float_truncate_hi_v8hf (__a, __b);
13070 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13071 vcvt_f32_f64 (float64x2_t __a)
13073 return __builtin_aarch64_float_truncate_lo_v2sf (__a);
13076 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13077 vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
13079 return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b);
13082 /* vcvt (float -> double). */
13084 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13085 vcvt_f32_f16 (float16x4_t __a)
13087 return __builtin_aarch64_float_extend_lo_v4sf (__a);
13090 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13091 vcvt_f64_f32 (float32x2_t __a)
13094 return __builtin_aarch64_float_extend_lo_v2df (__a);
13097 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13098 vcvt_high_f32_f16 (float16x8_t __a)
13100 return __builtin_aarch64_vec_unpacks_hi_v8hf (__a);
13103 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13104 vcvt_high_f64_f32 (float32x4_t __a)
13106 return __builtin_aarch64_vec_unpacks_hi_v4sf (__a);
13109 /* vcvt (<u>int -> float) */
13111 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
13112 vcvtd_f64_s64 (int64_t __a)
13114 return (float64_t) __a;
13117 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
13118 vcvtd_f64_u64 (uint64_t __a)
13120 return (float64_t) __a;
13123 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
13124 vcvts_f32_s32 (int32_t __a)
13126 return (float32_t) __a;
13129 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
13130 vcvts_f32_u32 (uint32_t __a)
13132 return (float32_t) __a;
13135 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13136 vcvt_f32_s32 (int32x2_t __a)
13138 return __builtin_aarch64_floatv2siv2sf (__a);
13141 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13142 vcvt_f32_u32 (uint32x2_t __a)
13144 return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a);
13147 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13148 vcvtq_f32_s32 (int32x4_t __a)
13150 return __builtin_aarch64_floatv4siv4sf (__a);
13153 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13154 vcvtq_f32_u32 (uint32x4_t __a)
13156 return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a);
13159 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13160 vcvtq_f64_s64 (int64x2_t __a)
13162 return __builtin_aarch64_floatv2div2df (__a);
13165 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13166 vcvtq_f64_u64 (uint64x2_t __a)
13168 return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a);
13171 /* vcvt (float -> <u>int) */
13173 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
13174 vcvtd_s64_f64 (float64_t __a)
13176 return (int64_t) __a;
13179 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13180 vcvtd_u64_f64 (float64_t __a)
13182 return (uint64_t) __a;
13185 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
13186 vcvts_s32_f32 (float32_t __a)
13188 return (int32_t) __a;
13191 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13192 vcvts_u32_f32 (float32_t __a)
13194 return (uint32_t) __a;
13197 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13198 vcvt_s32_f32 (float32x2_t __a)
13200 return __builtin_aarch64_lbtruncv2sfv2si (__a);
13203 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13204 vcvt_u32_f32 (float32x2_t __a)
13206 return __builtin_aarch64_lbtruncuv2sfv2si_us (__a);
13209 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13210 vcvtq_s32_f32 (float32x4_t __a)
13212 return __builtin_aarch64_lbtruncv4sfv4si (__a);
13215 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13216 vcvtq_u32_f32 (float32x4_t __a)
13218 return __builtin_aarch64_lbtruncuv4sfv4si_us (__a);
13221 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13222 vcvt_s64_f64 (float64x1_t __a)
13224 return (int64x1_t) {vcvtd_s64_f64 (__a[0])};
13227 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13228 vcvt_u64_f64 (float64x1_t __a)
13230 return (uint64x1_t) {vcvtd_u64_f64 (__a[0])};
13233 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13234 vcvtq_s64_f64 (float64x2_t __a)
13236 return __builtin_aarch64_lbtruncv2dfv2di (__a);
13239 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13240 vcvtq_u64_f64 (float64x2_t __a)
13242 return __builtin_aarch64_lbtruncuv2dfv2di_us (__a);
13245 /* vcvta */
13247 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
13248 vcvtad_s64_f64 (float64_t __a)
13250 return __builtin_aarch64_lrounddfdi (__a);
13253 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13254 vcvtad_u64_f64 (float64_t __a)
13256 return __builtin_aarch64_lroundudfdi_us (__a);
13259 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
13260 vcvtas_s32_f32 (float32_t __a)
13262 return __builtin_aarch64_lroundsfsi (__a);
13265 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13266 vcvtas_u32_f32 (float32_t __a)
13268 return __builtin_aarch64_lroundusfsi_us (__a);
13271 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13272 vcvta_s32_f32 (float32x2_t __a)
13274 return __builtin_aarch64_lroundv2sfv2si (__a);
13277 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13278 vcvta_u32_f32 (float32x2_t __a)
13280 return __builtin_aarch64_lrounduv2sfv2si_us (__a);
13283 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13284 vcvtaq_s32_f32 (float32x4_t __a)
13286 return __builtin_aarch64_lroundv4sfv4si (__a);
13289 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13290 vcvtaq_u32_f32 (float32x4_t __a)
13292 return __builtin_aarch64_lrounduv4sfv4si_us (__a);
13295 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13296 vcvta_s64_f64 (float64x1_t __a)
13298 return (int64x1_t) {vcvtad_s64_f64 (__a[0])};
13301 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13302 vcvta_u64_f64 (float64x1_t __a)
13304 return (uint64x1_t) {vcvtad_u64_f64 (__a[0])};
13307 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13308 vcvtaq_s64_f64 (float64x2_t __a)
13310 return __builtin_aarch64_lroundv2dfv2di (__a);
13313 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13314 vcvtaq_u64_f64 (float64x2_t __a)
13316 return __builtin_aarch64_lrounduv2dfv2di_us (__a);
13319 /* vcvtm */
13321 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
13322 vcvtmd_s64_f64 (float64_t __a)
13324 return __builtin_llfloor (__a);
13327 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13328 vcvtmd_u64_f64 (float64_t __a)
13330 return __builtin_aarch64_lfloorudfdi_us (__a);
13333 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
13334 vcvtms_s32_f32 (float32_t __a)
13336 return __builtin_ifloorf (__a);
13339 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13340 vcvtms_u32_f32 (float32_t __a)
13342 return __builtin_aarch64_lfloorusfsi_us (__a);
13345 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13346 vcvtm_s32_f32 (float32x2_t __a)
13348 return __builtin_aarch64_lfloorv2sfv2si (__a);
13351 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13352 vcvtm_u32_f32 (float32x2_t __a)
13354 return __builtin_aarch64_lflooruv2sfv2si_us (__a);
13357 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13358 vcvtmq_s32_f32 (float32x4_t __a)
13360 return __builtin_aarch64_lfloorv4sfv4si (__a);
13363 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13364 vcvtmq_u32_f32 (float32x4_t __a)
13366 return __builtin_aarch64_lflooruv4sfv4si_us (__a);
13369 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13370 vcvtm_s64_f64 (float64x1_t __a)
13372 return (int64x1_t) {vcvtmd_s64_f64 (__a[0])};
13375 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13376 vcvtm_u64_f64 (float64x1_t __a)
13378 return (uint64x1_t) {vcvtmd_u64_f64 (__a[0])};
13381 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13382 vcvtmq_s64_f64 (float64x2_t __a)
13384 return __builtin_aarch64_lfloorv2dfv2di (__a);
13387 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13388 vcvtmq_u64_f64 (float64x2_t __a)
13390 return __builtin_aarch64_lflooruv2dfv2di_us (__a);
13393 /* vcvtn */
13395 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
13396 vcvtnd_s64_f64 (float64_t __a)
13398 return __builtin_aarch64_lfrintndfdi (__a);
13401 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13402 vcvtnd_u64_f64 (float64_t __a)
13404 return __builtin_aarch64_lfrintnudfdi_us (__a);
13407 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
13408 vcvtns_s32_f32 (float32_t __a)
13410 return __builtin_aarch64_lfrintnsfsi (__a);
13413 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13414 vcvtns_u32_f32 (float32_t __a)
13416 return __builtin_aarch64_lfrintnusfsi_us (__a);
13419 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13420 vcvtn_s32_f32 (float32x2_t __a)
13422 return __builtin_aarch64_lfrintnv2sfv2si (__a);
13425 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13426 vcvtn_u32_f32 (float32x2_t __a)
13428 return __builtin_aarch64_lfrintnuv2sfv2si_us (__a);
13431 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13432 vcvtnq_s32_f32 (float32x4_t __a)
13434 return __builtin_aarch64_lfrintnv4sfv4si (__a);
13437 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13438 vcvtnq_u32_f32 (float32x4_t __a)
13440 return __builtin_aarch64_lfrintnuv4sfv4si_us (__a);
13443 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13444 vcvtn_s64_f64 (float64x1_t __a)
13446 return (int64x1_t) {vcvtnd_s64_f64 (__a[0])};
13449 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13450 vcvtn_u64_f64 (float64x1_t __a)
13452 return (uint64x1_t) {vcvtnd_u64_f64 (__a[0])};
13455 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13456 vcvtnq_s64_f64 (float64x2_t __a)
13458 return __builtin_aarch64_lfrintnv2dfv2di (__a);
13461 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13462 vcvtnq_u64_f64 (float64x2_t __a)
13464 return __builtin_aarch64_lfrintnuv2dfv2di_us (__a);
13467 /* vcvtp */
13469 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
13470 vcvtpd_s64_f64 (float64_t __a)
13472 return __builtin_llceil (__a);
13475 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13476 vcvtpd_u64_f64 (float64_t __a)
13478 return __builtin_aarch64_lceiludfdi_us (__a);
13481 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
13482 vcvtps_s32_f32 (float32_t __a)
13484 return __builtin_iceilf (__a);
13487 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13488 vcvtps_u32_f32 (float32_t __a)
13490 return __builtin_aarch64_lceilusfsi_us (__a);
13493 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13494 vcvtp_s32_f32 (float32x2_t __a)
13496 return __builtin_aarch64_lceilv2sfv2si (__a);
13499 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13500 vcvtp_u32_f32 (float32x2_t __a)
13502 return __builtin_aarch64_lceiluv2sfv2si_us (__a);
13505 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13506 vcvtpq_s32_f32 (float32x4_t __a)
13508 return __builtin_aarch64_lceilv4sfv4si (__a);
13511 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13512 vcvtpq_u32_f32 (float32x4_t __a)
13514 return __builtin_aarch64_lceiluv4sfv4si_us (__a);
13517 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13518 vcvtp_s64_f64 (float64x1_t __a)
13520 return (int64x1_t) {vcvtpd_s64_f64 (__a[0])};
13523 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13524 vcvtp_u64_f64 (float64x1_t __a)
13526 return (uint64x1_t) {vcvtpd_u64_f64 (__a[0])};
13529 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13530 vcvtpq_s64_f64 (float64x2_t __a)
13532 return __builtin_aarch64_lceilv2dfv2di (__a);
13535 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13536 vcvtpq_u64_f64 (float64x2_t __a)
13538 return __builtin_aarch64_lceiluv2dfv2di_us (__a);
13541 /* vdup_n */
13543 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13544 vdup_n_f32 (float32_t __a)
13546 return (float32x2_t) {__a, __a};
13549 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
13550 vdup_n_f64 (float64_t __a)
13552 return (float64x1_t) {__a};
13555 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13556 vdup_n_p8 (poly8_t __a)
13558 return (poly8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
13561 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
13562 vdup_n_p16 (poly16_t __a)
13564 return (poly16x4_t) {__a, __a, __a, __a};
13567 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13568 vdup_n_s8 (int8_t __a)
13570 return (int8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
13573 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13574 vdup_n_s16 (int16_t __a)
13576 return (int16x4_t) {__a, __a, __a, __a};
13579 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13580 vdup_n_s32 (int32_t __a)
13582 return (int32x2_t) {__a, __a};
13585 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13586 vdup_n_s64 (int64_t __a)
13588 return (int64x1_t) {__a};
13591 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13592 vdup_n_u8 (uint8_t __a)
13594 return (uint8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
13597 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13598 vdup_n_u16 (uint16_t __a)
13600 return (uint16x4_t) {__a, __a, __a, __a};
13603 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13604 vdup_n_u32 (uint32_t __a)
13606 return (uint32x2_t) {__a, __a};
13609 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13610 vdup_n_u64 (uint64_t __a)
13612 return (uint64x1_t) {__a};
13615 /* vdupq_n */
13617 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13618 vdupq_n_f32 (float32_t __a)
13620 return (float32x4_t) {__a, __a, __a, __a};
13623 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13624 vdupq_n_f64 (float64_t __a)
13626 return (float64x2_t) {__a, __a};
13629 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13630 vdupq_n_p8 (uint32_t __a)
13632 return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
13633 __a, __a, __a, __a, __a, __a, __a, __a};
13636 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13637 vdupq_n_p16 (uint32_t __a)
13639 return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
13642 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13643 vdupq_n_s8 (int32_t __a)
13645 return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
13646 __a, __a, __a, __a, __a, __a, __a, __a};
13649 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13650 vdupq_n_s16 (int32_t __a)
13652 return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
13655 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13656 vdupq_n_s32 (int32_t __a)
13658 return (int32x4_t) {__a, __a, __a, __a};
13661 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13662 vdupq_n_s64 (int64_t __a)
13664 return (int64x2_t) {__a, __a};
13667 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13668 vdupq_n_u8 (uint32_t __a)
13670 return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
13671 __a, __a, __a, __a, __a, __a, __a, __a};
13674 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13675 vdupq_n_u16 (uint32_t __a)
13677 return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
13680 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13681 vdupq_n_u32 (uint32_t __a)
13683 return (uint32x4_t) {__a, __a, __a, __a};
13686 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13687 vdupq_n_u64 (uint64_t __a)
13689 return (uint64x2_t) {__a, __a};
13692 /* vdup_lane */
13694 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13695 vdup_lane_f32 (float32x2_t __a, const int __b)
13697 return __aarch64_vdup_lane_f32 (__a, __b);
13700 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
13701 vdup_lane_f64 (float64x1_t __a, const int __b)
13703 return __aarch64_vdup_lane_f64 (__a, __b);
13706 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13707 vdup_lane_p8 (poly8x8_t __a, const int __b)
13709 return __aarch64_vdup_lane_p8 (__a, __b);
13712 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
13713 vdup_lane_p16 (poly16x4_t __a, const int __b)
13715 return __aarch64_vdup_lane_p16 (__a, __b);
13718 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13719 vdup_lane_s8 (int8x8_t __a, const int __b)
13721 return __aarch64_vdup_lane_s8 (__a, __b);
13724 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13725 vdup_lane_s16 (int16x4_t __a, const int __b)
13727 return __aarch64_vdup_lane_s16 (__a, __b);
13730 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13731 vdup_lane_s32 (int32x2_t __a, const int __b)
13733 return __aarch64_vdup_lane_s32 (__a, __b);
13736 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13737 vdup_lane_s64 (int64x1_t __a, const int __b)
13739 return __aarch64_vdup_lane_s64 (__a, __b);
13742 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13743 vdup_lane_u8 (uint8x8_t __a, const int __b)
13745 return __aarch64_vdup_lane_u8 (__a, __b);
13748 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13749 vdup_lane_u16 (uint16x4_t __a, const int __b)
13751 return __aarch64_vdup_lane_u16 (__a, __b);
13754 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13755 vdup_lane_u32 (uint32x2_t __a, const int __b)
13757 return __aarch64_vdup_lane_u32 (__a, __b);
13760 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13761 vdup_lane_u64 (uint64x1_t __a, const int __b)
13763 return __aarch64_vdup_lane_u64 (__a, __b);
13766 /* vdup_laneq */
13768 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13769 vdup_laneq_f32 (float32x4_t __a, const int __b)
13771 return __aarch64_vdup_laneq_f32 (__a, __b);
13774 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
13775 vdup_laneq_f64 (float64x2_t __a, const int __b)
13777 return __aarch64_vdup_laneq_f64 (__a, __b);
13780 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13781 vdup_laneq_p8 (poly8x16_t __a, const int __b)
13783 return __aarch64_vdup_laneq_p8 (__a, __b);
13786 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
13787 vdup_laneq_p16 (poly16x8_t __a, const int __b)
13789 return __aarch64_vdup_laneq_p16 (__a, __b);
13792 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13793 vdup_laneq_s8 (int8x16_t __a, const int __b)
13795 return __aarch64_vdup_laneq_s8 (__a, __b);
13798 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13799 vdup_laneq_s16 (int16x8_t __a, const int __b)
13801 return __aarch64_vdup_laneq_s16 (__a, __b);
13804 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13805 vdup_laneq_s32 (int32x4_t __a, const int __b)
13807 return __aarch64_vdup_laneq_s32 (__a, __b);
13810 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13811 vdup_laneq_s64 (int64x2_t __a, const int __b)
13813 return __aarch64_vdup_laneq_s64 (__a, __b);
13816 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13817 vdup_laneq_u8 (uint8x16_t __a, const int __b)
13819 return __aarch64_vdup_laneq_u8 (__a, __b);
13822 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13823 vdup_laneq_u16 (uint16x8_t __a, const int __b)
13825 return __aarch64_vdup_laneq_u16 (__a, __b);
13828 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13829 vdup_laneq_u32 (uint32x4_t __a, const int __b)
13831 return __aarch64_vdup_laneq_u32 (__a, __b);
13834 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13835 vdup_laneq_u64 (uint64x2_t __a, const int __b)
13837 return __aarch64_vdup_laneq_u64 (__a, __b);
13840 /* vdupq_lane */
13841 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13842 vdupq_lane_f32 (float32x2_t __a, const int __b)
13844 return __aarch64_vdupq_lane_f32 (__a, __b);
13847 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13848 vdupq_lane_f64 (float64x1_t __a, const int __b)
13850 return __aarch64_vdupq_lane_f64 (__a, __b);
13853 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13854 vdupq_lane_p8 (poly8x8_t __a, const int __b)
13856 return __aarch64_vdupq_lane_p8 (__a, __b);
13859 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13860 vdupq_lane_p16 (poly16x4_t __a, const int __b)
13862 return __aarch64_vdupq_lane_p16 (__a, __b);
13865 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13866 vdupq_lane_s8 (int8x8_t __a, const int __b)
13868 return __aarch64_vdupq_lane_s8 (__a, __b);
13871 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13872 vdupq_lane_s16 (int16x4_t __a, const int __b)
13874 return __aarch64_vdupq_lane_s16 (__a, __b);
13877 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13878 vdupq_lane_s32 (int32x2_t __a, const int __b)
13880 return __aarch64_vdupq_lane_s32 (__a, __b);
13883 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13884 vdupq_lane_s64 (int64x1_t __a, const int __b)
13886 return __aarch64_vdupq_lane_s64 (__a, __b);
13889 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13890 vdupq_lane_u8 (uint8x8_t __a, const int __b)
13892 return __aarch64_vdupq_lane_u8 (__a, __b);
13895 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13896 vdupq_lane_u16 (uint16x4_t __a, const int __b)
13898 return __aarch64_vdupq_lane_u16 (__a, __b);
13901 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13902 vdupq_lane_u32 (uint32x2_t __a, const int __b)
13904 return __aarch64_vdupq_lane_u32 (__a, __b);
13907 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13908 vdupq_lane_u64 (uint64x1_t __a, const int __b)
13910 return __aarch64_vdupq_lane_u64 (__a, __b);
13913 /* vdupq_laneq */
13914 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13915 vdupq_laneq_f32 (float32x4_t __a, const int __b)
13917 return __aarch64_vdupq_laneq_f32 (__a, __b);
13920 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13921 vdupq_laneq_f64 (float64x2_t __a, const int __b)
13923 return __aarch64_vdupq_laneq_f64 (__a, __b);
13926 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13927 vdupq_laneq_p8 (poly8x16_t __a, const int __b)
13929 return __aarch64_vdupq_laneq_p8 (__a, __b);
13932 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13933 vdupq_laneq_p16 (poly16x8_t __a, const int __b)
13935 return __aarch64_vdupq_laneq_p16 (__a, __b);
13938 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13939 vdupq_laneq_s8 (int8x16_t __a, const int __b)
13941 return __aarch64_vdupq_laneq_s8 (__a, __b);
13944 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13945 vdupq_laneq_s16 (int16x8_t __a, const int __b)
13947 return __aarch64_vdupq_laneq_s16 (__a, __b);
13950 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13951 vdupq_laneq_s32 (int32x4_t __a, const int __b)
13953 return __aarch64_vdupq_laneq_s32 (__a, __b);
13956 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13957 vdupq_laneq_s64 (int64x2_t __a, const int __b)
13959 return __aarch64_vdupq_laneq_s64 (__a, __b);
13962 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13963 vdupq_laneq_u8 (uint8x16_t __a, const int __b)
13965 return __aarch64_vdupq_laneq_u8 (__a, __b);
13968 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13969 vdupq_laneq_u16 (uint16x8_t __a, const int __b)
13971 return __aarch64_vdupq_laneq_u16 (__a, __b);
13974 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13975 vdupq_laneq_u32 (uint32x4_t __a, const int __b)
13977 return __aarch64_vdupq_laneq_u32 (__a, __b);
13980 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13981 vdupq_laneq_u64 (uint64x2_t __a, const int __b)
13983 return __aarch64_vdupq_laneq_u64 (__a, __b);
13986 /* vdupb_lane */
13987 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
13988 vdupb_lane_p8 (poly8x8_t __a, const int __b)
13990 return __aarch64_vget_lane_any (__a, __b);
13993 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
13994 vdupb_lane_s8 (int8x8_t __a, const int __b)
13996 return __aarch64_vget_lane_any (__a, __b);
13999 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
14000 vdupb_lane_u8 (uint8x8_t __a, const int __b)
14002 return __aarch64_vget_lane_any (__a, __b);
14005 /* vduph_lane */
14006 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
14007 vduph_lane_p16 (poly16x4_t __a, const int __b)
14009 return __aarch64_vget_lane_any (__a, __b);
14012 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
14013 vduph_lane_s16 (int16x4_t __a, const int __b)
14015 return __aarch64_vget_lane_any (__a, __b);
14018 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
14019 vduph_lane_u16 (uint16x4_t __a, const int __b)
14021 return __aarch64_vget_lane_any (__a, __b);
14024 /* vdups_lane */
14025 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
14026 vdups_lane_f32 (float32x2_t __a, const int __b)
14028 return __aarch64_vget_lane_any (__a, __b);
14031 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
14032 vdups_lane_s32 (int32x2_t __a, const int __b)
14034 return __aarch64_vget_lane_any (__a, __b);
14037 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14038 vdups_lane_u32 (uint32x2_t __a, const int __b)
14040 return __aarch64_vget_lane_any (__a, __b);
14043 /* vdupd_lane */
14044 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
14045 vdupd_lane_f64 (float64x1_t __a, const int __b)
14047 __AARCH64_LANE_CHECK (__a, __b);
14048 return __a[0];
14051 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
14052 vdupd_lane_s64 (int64x1_t __a, const int __b)
14054 __AARCH64_LANE_CHECK (__a, __b);
14055 return __a[0];
14058 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14059 vdupd_lane_u64 (uint64x1_t __a, const int __b)
14061 __AARCH64_LANE_CHECK (__a, __b);
14062 return __a[0];
14065 /* vdupb_laneq */
14066 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
14067 vdupb_laneq_p8 (poly8x16_t __a, const int __b)
14069 return __aarch64_vget_lane_any (__a, __b);
14072 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
14073 vdupb_laneq_s8 (int8x16_t __a, const int __attribute__ ((unused)) __b)
14075 return __aarch64_vget_lane_any (__a, __b);
14078 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
14079 vdupb_laneq_u8 (uint8x16_t __a, const int __b)
14081 return __aarch64_vget_lane_any (__a, __b);
14084 /* vduph_laneq */
14085 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
14086 vduph_laneq_p16 (poly16x8_t __a, const int __b)
14088 return __aarch64_vget_lane_any (__a, __b);
14091 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
14092 vduph_laneq_s16 (int16x8_t __a, const int __b)
14094 return __aarch64_vget_lane_any (__a, __b);
14097 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
14098 vduph_laneq_u16 (uint16x8_t __a, const int __b)
14100 return __aarch64_vget_lane_any (__a, __b);
14103 /* vdups_laneq */
14104 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
14105 vdups_laneq_f32 (float32x4_t __a, const int __b)
14107 return __aarch64_vget_lane_any (__a, __b);
14110 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
14111 vdups_laneq_s32 (int32x4_t __a, const int __b)
14113 return __aarch64_vget_lane_any (__a, __b);
14116 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14117 vdups_laneq_u32 (uint32x4_t __a, const int __b)
14119 return __aarch64_vget_lane_any (__a, __b);
14122 /* vdupd_laneq */
14123 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
14124 vdupd_laneq_f64 (float64x2_t __a, const int __b)
14126 return __aarch64_vget_lane_any (__a, __b);
14129 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
14130 vdupd_laneq_s64 (int64x2_t __a, const int __b)
14132 return __aarch64_vget_lane_any (__a, __b);
14135 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14136 vdupd_laneq_u64 (uint64x2_t __a, const int __b)
14138 return __aarch64_vget_lane_any (__a, __b);
14141 /* vext */
14143 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14144 vext_f32 (float32x2_t __a, float32x2_t __b, __const int __c)
14146 __AARCH64_LANE_CHECK (__a, __c);
14147 #ifdef __AARCH64EB__
14148 return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
14149 #else
14150 return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
14151 #endif
14154 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
14155 vext_f64 (float64x1_t __a, float64x1_t __b, __const int __c)
14157 __AARCH64_LANE_CHECK (__a, __c);
14158 /* The only possible index to the assembler instruction returns element 0. */
14159 return __a;
14161 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14162 vext_p8 (poly8x8_t __a, poly8x8_t __b, __const int __c)
14164 __AARCH64_LANE_CHECK (__a, __c);
14165 #ifdef __AARCH64EB__
14166 return __builtin_shuffle (__b, __a, (uint8x8_t)
14167 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
14168 #else
14169 return __builtin_shuffle (__a, __b,
14170 (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
14171 #endif
14174 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
14175 vext_p16 (poly16x4_t __a, poly16x4_t __b, __const int __c)
14177 __AARCH64_LANE_CHECK (__a, __c);
14178 #ifdef __AARCH64EB__
14179 return __builtin_shuffle (__b, __a,
14180 (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
14181 #else
14182 return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
14183 #endif
14186 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14187 vext_s8 (int8x8_t __a, int8x8_t __b, __const int __c)
14189 __AARCH64_LANE_CHECK (__a, __c);
14190 #ifdef __AARCH64EB__
14191 return __builtin_shuffle (__b, __a, (uint8x8_t)
14192 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
14193 #else
14194 return __builtin_shuffle (__a, __b,
14195 (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
14196 #endif
14199 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14200 vext_s16 (int16x4_t __a, int16x4_t __b, __const int __c)
14202 __AARCH64_LANE_CHECK (__a, __c);
14203 #ifdef __AARCH64EB__
14204 return __builtin_shuffle (__b, __a,
14205 (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
14206 #else
14207 return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
14208 #endif
14211 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14212 vext_s32 (int32x2_t __a, int32x2_t __b, __const int __c)
14214 __AARCH64_LANE_CHECK (__a, __c);
14215 #ifdef __AARCH64EB__
14216 return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
14217 #else
14218 return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
14219 #endif
14222 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
14223 vext_s64 (int64x1_t __a, int64x1_t __b, __const int __c)
14225 __AARCH64_LANE_CHECK (__a, __c);
14226 /* The only possible index to the assembler instruction returns element 0. */
14227 return __a;
14230 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14231 vext_u8 (uint8x8_t __a, uint8x8_t __b, __const int __c)
14233 __AARCH64_LANE_CHECK (__a, __c);
14234 #ifdef __AARCH64EB__
14235 return __builtin_shuffle (__b, __a, (uint8x8_t)
14236 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
14237 #else
14238 return __builtin_shuffle (__a, __b,
14239 (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
14240 #endif
14243 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14244 vext_u16 (uint16x4_t __a, uint16x4_t __b, __const int __c)
14246 __AARCH64_LANE_CHECK (__a, __c);
14247 #ifdef __AARCH64EB__
14248 return __builtin_shuffle (__b, __a,
14249 (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
14250 #else
14251 return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
14252 #endif
14255 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14256 vext_u32 (uint32x2_t __a, uint32x2_t __b, __const int __c)
14258 __AARCH64_LANE_CHECK (__a, __c);
14259 #ifdef __AARCH64EB__
14260 return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
14261 #else
14262 return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
14263 #endif
14266 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14267 vext_u64 (uint64x1_t __a, uint64x1_t __b, __const int __c)
14269 __AARCH64_LANE_CHECK (__a, __c);
14270 /* The only possible index to the assembler instruction returns element 0. */
14271 return __a;
14274 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14275 vextq_f32 (float32x4_t __a, float32x4_t __b, __const int __c)
14277 __AARCH64_LANE_CHECK (__a, __c);
14278 #ifdef __AARCH64EB__
14279 return __builtin_shuffle (__b, __a,
14280 (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
14281 #else
14282 return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
14283 #endif
14286 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14287 vextq_f64 (float64x2_t __a, float64x2_t __b, __const int __c)
14289 __AARCH64_LANE_CHECK (__a, __c);
14290 #ifdef __AARCH64EB__
14291 return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
14292 #else
14293 return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
14294 #endif
14297 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14298 vextq_p8 (poly8x16_t __a, poly8x16_t __b, __const int __c)
14300 __AARCH64_LANE_CHECK (__a, __c);
14301 #ifdef __AARCH64EB__
14302 return __builtin_shuffle (__b, __a, (uint8x16_t)
14303 {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
14304 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
14305 #else
14306 return __builtin_shuffle (__a, __b, (uint8x16_t)
14307 {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
14308 __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
14309 #endif
14312 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
14313 vextq_p16 (poly16x8_t __a, poly16x8_t __b, __const int __c)
14315 __AARCH64_LANE_CHECK (__a, __c);
14316 #ifdef __AARCH64EB__
14317 return __builtin_shuffle (__b, __a, (uint16x8_t)
14318 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
14319 #else
14320 return __builtin_shuffle (__a, __b,
14321 (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
14322 #endif
14325 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14326 vextq_s8 (int8x16_t __a, int8x16_t __b, __const int __c)
14328 __AARCH64_LANE_CHECK (__a, __c);
14329 #ifdef __AARCH64EB__
14330 return __builtin_shuffle (__b, __a, (uint8x16_t)
14331 {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
14332 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
14333 #else
14334 return __builtin_shuffle (__a, __b, (uint8x16_t)
14335 {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
14336 __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
14337 #endif
14340 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14341 vextq_s16 (int16x8_t __a, int16x8_t __b, __const int __c)
14343 __AARCH64_LANE_CHECK (__a, __c);
14344 #ifdef __AARCH64EB__
14345 return __builtin_shuffle (__b, __a, (uint16x8_t)
14346 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
14347 #else
14348 return __builtin_shuffle (__a, __b,
14349 (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
14350 #endif
14353 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14354 vextq_s32 (int32x4_t __a, int32x4_t __b, __const int __c)
14356 __AARCH64_LANE_CHECK (__a, __c);
14357 #ifdef __AARCH64EB__
14358 return __builtin_shuffle (__b, __a,
14359 (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
14360 #else
14361 return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
14362 #endif
14365 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14366 vextq_s64 (int64x2_t __a, int64x2_t __b, __const int __c)
14368 __AARCH64_LANE_CHECK (__a, __c);
14369 #ifdef __AARCH64EB__
14370 return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
14371 #else
14372 return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
14373 #endif
14376 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14377 vextq_u8 (uint8x16_t __a, uint8x16_t __b, __const int __c)
14379 __AARCH64_LANE_CHECK (__a, __c);
14380 #ifdef __AARCH64EB__
14381 return __builtin_shuffle (__b, __a, (uint8x16_t)
14382 {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
14383 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
14384 #else
14385 return __builtin_shuffle (__a, __b, (uint8x16_t)
14386 {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
14387 __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
14388 #endif
14391 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14392 vextq_u16 (uint16x8_t __a, uint16x8_t __b, __const int __c)
14394 __AARCH64_LANE_CHECK (__a, __c);
14395 #ifdef __AARCH64EB__
14396 return __builtin_shuffle (__b, __a, (uint16x8_t)
14397 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
14398 #else
14399 return __builtin_shuffle (__a, __b,
14400 (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
14401 #endif
14404 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14405 vextq_u32 (uint32x4_t __a, uint32x4_t __b, __const int __c)
14407 __AARCH64_LANE_CHECK (__a, __c);
14408 #ifdef __AARCH64EB__
14409 return __builtin_shuffle (__b, __a,
14410 (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
14411 #else
14412 return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
14413 #endif
14416 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14417 vextq_u64 (uint64x2_t __a, uint64x2_t __b, __const int __c)
14419 __AARCH64_LANE_CHECK (__a, __c);
14420 #ifdef __AARCH64EB__
14421 return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
14422 #else
14423 return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
14424 #endif
14427 /* vfma */
14429 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
14430 vfma_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
14432 return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])};
14435 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14436 vfma_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
14438 return __builtin_aarch64_fmav2sf (__b, __c, __a);
14441 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14442 vfmaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
14444 return __builtin_aarch64_fmav4sf (__b, __c, __a);
14447 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14448 vfmaq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c)
14450 return __builtin_aarch64_fmav2df (__b, __c, __a);
14453 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14454 vfma_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
14456 return __builtin_aarch64_fmav2sf (__b, vdup_n_f32 (__c), __a);
14459 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14460 vfmaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
14462 return __builtin_aarch64_fmav4sf (__b, vdupq_n_f32 (__c), __a);
14465 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14466 vfmaq_n_f64 (float64x2_t __a, float64x2_t __b, float64_t __c)
14468 return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c), __a);
14471 /* vfma_lane */
14473 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14474 vfma_lane_f32 (float32x2_t __a, float32x2_t __b,
14475 float32x2_t __c, const int __lane)
14477 return __builtin_aarch64_fmav2sf (__b,
14478 __aarch64_vdup_lane_f32 (__c, __lane),
14479 __a);
14482 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
14483 vfma_lane_f64 (float64x1_t __a, float64x1_t __b,
14484 float64x1_t __c, const int __lane)
14486 return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])};
14489 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
14490 vfmad_lane_f64 (float64_t __a, float64_t __b,
14491 float64x1_t __c, const int __lane)
14493 return __builtin_fma (__b, __c[0], __a);
14496 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
14497 vfmas_lane_f32 (float32_t __a, float32_t __b,
14498 float32x2_t __c, const int __lane)
14500 return __builtin_fmaf (__b, __aarch64_vget_lane_any (__c, __lane), __a);
14503 /* vfma_laneq */
14505 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14506 vfma_laneq_f32 (float32x2_t __a, float32x2_t __b,
14507 float32x4_t __c, const int __lane)
14509 return __builtin_aarch64_fmav2sf (__b,
14510 __aarch64_vdup_laneq_f32 (__c, __lane),
14511 __a);
14514 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
14515 vfma_laneq_f64 (float64x1_t __a, float64x1_t __b,
14516 float64x2_t __c, const int __lane)
14518 float64_t __c0 = __aarch64_vget_lane_any (__c, __lane);
14519 return (float64x1_t) {__builtin_fma (__b[0], __c0, __a[0])};
14522 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
14523 vfmad_laneq_f64 (float64_t __a, float64_t __b,
14524 float64x2_t __c, const int __lane)
14526 return __builtin_fma (__b, __aarch64_vget_lane_any (__c, __lane), __a);
14529 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
14530 vfmas_laneq_f32 (float32_t __a, float32_t __b,
14531 float32x4_t __c, const int __lane)
14533 return __builtin_fmaf (__b, __aarch64_vget_lane_any (__c, __lane), __a);
14536 /* vfmaq_lane */
14538 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14539 vfmaq_lane_f32 (float32x4_t __a, float32x4_t __b,
14540 float32x2_t __c, const int __lane)
14542 return __builtin_aarch64_fmav4sf (__b,
14543 __aarch64_vdupq_lane_f32 (__c, __lane),
14544 __a);
14547 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14548 vfmaq_lane_f64 (float64x2_t __a, float64x2_t __b,
14549 float64x1_t __c, const int __lane)
14551 return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c[0]), __a);
14554 /* vfmaq_laneq */
14556 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14557 vfmaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
14558 float32x4_t __c, const int __lane)
14560 return __builtin_aarch64_fmav4sf (__b,
14561 __aarch64_vdupq_laneq_f32 (__c, __lane),
14562 __a);
14565 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14566 vfmaq_laneq_f64 (float64x2_t __a, float64x2_t __b,
14567 float64x2_t __c, const int __lane)
14569 return __builtin_aarch64_fmav2df (__b,
14570 __aarch64_vdupq_laneq_f64 (__c, __lane),
14571 __a);
14574 /* vfms */
14576 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
14577 vfms_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
14579 return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])};
14582 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14583 vfms_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
14585 return __builtin_aarch64_fmav2sf (-__b, __c, __a);
14588 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14589 vfmsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
14591 return __builtin_aarch64_fmav4sf (-__b, __c, __a);
14594 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14595 vfmsq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c)
14597 return __builtin_aarch64_fmav2df (-__b, __c, __a);
14601 /* vfms_lane */
14603 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14604 vfms_lane_f32 (float32x2_t __a, float32x2_t __b,
14605 float32x2_t __c, const int __lane)
14607 return __builtin_aarch64_fmav2sf (-__b,
14608 __aarch64_vdup_lane_f32 (__c, __lane),
14609 __a);
14612 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
14613 vfms_lane_f64 (float64x1_t __a, float64x1_t __b,
14614 float64x1_t __c, const int __lane)
14616 return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])};
14619 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
14620 vfmsd_lane_f64 (float64_t __a, float64_t __b,
14621 float64x1_t __c, const int __lane)
14623 return __builtin_fma (-__b, __c[0], __a);
14626 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
14627 vfmss_lane_f32 (float32_t __a, float32_t __b,
14628 float32x2_t __c, const int __lane)
14630 return __builtin_fmaf (-__b, __aarch64_vget_lane_any (__c, __lane), __a);
14633 /* vfms_laneq */
14635 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14636 vfms_laneq_f32 (float32x2_t __a, float32x2_t __b,
14637 float32x4_t __c, const int __lane)
14639 return __builtin_aarch64_fmav2sf (-__b,
14640 __aarch64_vdup_laneq_f32 (__c, __lane),
14641 __a);
14644 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
14645 vfms_laneq_f64 (float64x1_t __a, float64x1_t __b,
14646 float64x2_t __c, const int __lane)
14648 float64_t __c0 = __aarch64_vget_lane_any (__c, __lane);
14649 return (float64x1_t) {__builtin_fma (-__b[0], __c0, __a[0])};
14652 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
14653 vfmsd_laneq_f64 (float64_t __a, float64_t __b,
14654 float64x2_t __c, const int __lane)
14656 return __builtin_fma (-__b, __aarch64_vget_lane_any (__c, __lane), __a);
14659 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
14660 vfmss_laneq_f32 (float32_t __a, float32_t __b,
14661 float32x4_t __c, const int __lane)
14663 return __builtin_fmaf (-__b, __aarch64_vget_lane_any (__c, __lane), __a);
14666 /* vfmsq_lane */
14668 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14669 vfmsq_lane_f32 (float32x4_t __a, float32x4_t __b,
14670 float32x2_t __c, const int __lane)
14672 return __builtin_aarch64_fmav4sf (-__b,
14673 __aarch64_vdupq_lane_f32 (__c, __lane),
14674 __a);
14677 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14678 vfmsq_lane_f64 (float64x2_t __a, float64x2_t __b,
14679 float64x1_t __c, const int __lane)
14681 return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c[0]), __a);
14684 /* vfmsq_laneq */
14686 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14687 vfmsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
14688 float32x4_t __c, const int __lane)
14690 return __builtin_aarch64_fmav4sf (-__b,
14691 __aarch64_vdupq_laneq_f32 (__c, __lane),
14692 __a);
14695 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14696 vfmsq_laneq_f64 (float64x2_t __a, float64x2_t __b,
14697 float64x2_t __c, const int __lane)
14699 return __builtin_aarch64_fmav2df (-__b,
14700 __aarch64_vdupq_laneq_f64 (__c, __lane),
14701 __a);
14704 /* vld1 */
14706 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
14707 vld1_f16 (const float16_t *__a)
14709 return __builtin_aarch64_ld1v4hf (__a);
14712 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14713 vld1_f32 (const float32_t *a)
14715 return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a);
14718 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
14719 vld1_f64 (const float64_t *a)
14721 return (float64x1_t) {*a};
14724 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14725 vld1_p8 (const poly8_t *a)
14727 return (poly8x8_t)
14728 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
14731 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
14732 vld1_p16 (const poly16_t *a)
14734 return (poly16x4_t)
14735 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
14738 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14739 vld1_s8 (const int8_t *a)
14741 return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
14744 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14745 vld1_s16 (const int16_t *a)
14747 return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
14750 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14751 vld1_s32 (const int32_t *a)
14753 return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
14756 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
14757 vld1_s64 (const int64_t *a)
14759 return (int64x1_t) {*a};
14762 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14763 vld1_u8 (const uint8_t *a)
14765 return (uint8x8_t)
14766 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
14769 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14770 vld1_u16 (const uint16_t *a)
14772 return (uint16x4_t)
14773 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
14776 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14777 vld1_u32 (const uint32_t *a)
14779 return (uint32x2_t)
14780 __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
14783 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14784 vld1_u64 (const uint64_t *a)
14786 return (uint64x1_t) {*a};
14789 /* vld1q */
14791 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
14792 vld1q_f16 (const float16_t *__a)
14794 return __builtin_aarch64_ld1v8hf (__a);
14797 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14798 vld1q_f32 (const float32_t *a)
14800 return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a);
14803 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14804 vld1q_f64 (const float64_t *a)
14806 return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a);
14809 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14810 vld1q_p8 (const poly8_t *a)
14812 return (poly8x16_t)
14813 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
14816 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
14817 vld1q_p16 (const poly16_t *a)
14819 return (poly16x8_t)
14820 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
14823 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14824 vld1q_s8 (const int8_t *a)
14826 return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
14829 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14830 vld1q_s16 (const int16_t *a)
14832 return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
14835 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14836 vld1q_s32 (const int32_t *a)
14838 return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
14841 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14842 vld1q_s64 (const int64_t *a)
14844 return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
14847 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14848 vld1q_u8 (const uint8_t *a)
14850 return (uint8x16_t)
14851 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
14854 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14855 vld1q_u16 (const uint16_t *a)
14857 return (uint16x8_t)
14858 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
14861 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14862 vld1q_u32 (const uint32_t *a)
14864 return (uint32x4_t)
14865 __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
14868 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14869 vld1q_u64 (const uint64_t *a)
14871 return (uint64x2_t)
14872 __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
14875 /* vld1_dup */
14877 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
14878 vld1_dup_f16 (const float16_t* __a)
14880 float16_t __f = *__a;
14881 return (float16x4_t) { __f, __f, __f, __f };
14884 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14885 vld1_dup_f32 (const float32_t* __a)
14887 return vdup_n_f32 (*__a);
14890 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
14891 vld1_dup_f64 (const float64_t* __a)
14893 return vdup_n_f64 (*__a);
14896 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14897 vld1_dup_p8 (const poly8_t* __a)
14899 return vdup_n_p8 (*__a);
14902 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
14903 vld1_dup_p16 (const poly16_t* __a)
14905 return vdup_n_p16 (*__a);
14908 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14909 vld1_dup_s8 (const int8_t* __a)
14911 return vdup_n_s8 (*__a);
14914 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14915 vld1_dup_s16 (const int16_t* __a)
14917 return vdup_n_s16 (*__a);
14920 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14921 vld1_dup_s32 (const int32_t* __a)
14923 return vdup_n_s32 (*__a);
14926 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
14927 vld1_dup_s64 (const int64_t* __a)
14929 return vdup_n_s64 (*__a);
14932 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14933 vld1_dup_u8 (const uint8_t* __a)
14935 return vdup_n_u8 (*__a);
14938 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14939 vld1_dup_u16 (const uint16_t* __a)
14941 return vdup_n_u16 (*__a);
14944 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14945 vld1_dup_u32 (const uint32_t* __a)
14947 return vdup_n_u32 (*__a);
14950 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14951 vld1_dup_u64 (const uint64_t* __a)
14953 return vdup_n_u64 (*__a);
14956 /* vld1q_dup */
14958 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
14959 vld1q_dup_f16 (const float16_t* __a)
14961 float16_t __f = *__a;
14962 return (float16x8_t) { __f, __f, __f, __f, __f, __f, __f, __f };
14965 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14966 vld1q_dup_f32 (const float32_t* __a)
14968 return vdupq_n_f32 (*__a);
14971 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14972 vld1q_dup_f64 (const float64_t* __a)
14974 return vdupq_n_f64 (*__a);
14977 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14978 vld1q_dup_p8 (const poly8_t* __a)
14980 return vdupq_n_p8 (*__a);
14983 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
14984 vld1q_dup_p16 (const poly16_t* __a)
14986 return vdupq_n_p16 (*__a);
14989 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14990 vld1q_dup_s8 (const int8_t* __a)
14992 return vdupq_n_s8 (*__a);
14995 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14996 vld1q_dup_s16 (const int16_t* __a)
14998 return vdupq_n_s16 (*__a);
15001 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15002 vld1q_dup_s32 (const int32_t* __a)
15004 return vdupq_n_s32 (*__a);
15007 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15008 vld1q_dup_s64 (const int64_t* __a)
15010 return vdupq_n_s64 (*__a);
15013 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15014 vld1q_dup_u8 (const uint8_t* __a)
15016 return vdupq_n_u8 (*__a);
15019 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15020 vld1q_dup_u16 (const uint16_t* __a)
15022 return vdupq_n_u16 (*__a);
15025 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15026 vld1q_dup_u32 (const uint32_t* __a)
15028 return vdupq_n_u32 (*__a);
15031 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15032 vld1q_dup_u64 (const uint64_t* __a)
15034 return vdupq_n_u64 (*__a);
15037 /* vld1_lane */
15039 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15040 vld1_lane_f16 (const float16_t *__src, float16x4_t __vec, const int __lane)
15042 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15045 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15046 vld1_lane_f32 (const float32_t *__src, float32x2_t __vec, const int __lane)
15048 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15051 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15052 vld1_lane_f64 (const float64_t *__src, float64x1_t __vec, const int __lane)
15054 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15057 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15058 vld1_lane_p8 (const poly8_t *__src, poly8x8_t __vec, const int __lane)
15060 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15063 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
15064 vld1_lane_p16 (const poly16_t *__src, poly16x4_t __vec, const int __lane)
15066 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15069 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15070 vld1_lane_s8 (const int8_t *__src, int8x8_t __vec, const int __lane)
15072 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15075 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15076 vld1_lane_s16 (const int16_t *__src, int16x4_t __vec, const int __lane)
15078 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15081 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15082 vld1_lane_s32 (const int32_t *__src, int32x2_t __vec, const int __lane)
15084 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15087 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15088 vld1_lane_s64 (const int64_t *__src, int64x1_t __vec, const int __lane)
15090 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15093 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15094 vld1_lane_u8 (const uint8_t *__src, uint8x8_t __vec, const int __lane)
15096 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15099 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15100 vld1_lane_u16 (const uint16_t *__src, uint16x4_t __vec, const int __lane)
15102 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15105 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15106 vld1_lane_u32 (const uint32_t *__src, uint32x2_t __vec, const int __lane)
15108 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15111 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15112 vld1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane)
15114 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15117 /* vld1q_lane */
15119 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15120 vld1q_lane_f16 (const float16_t *__src, float16x8_t __vec, const int __lane)
15122 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15125 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15126 vld1q_lane_f32 (const float32_t *__src, float32x4_t __vec, const int __lane)
15128 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15131 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15132 vld1q_lane_f64 (const float64_t *__src, float64x2_t __vec, const int __lane)
15134 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15137 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15138 vld1q_lane_p8 (const poly8_t *__src, poly8x16_t __vec, const int __lane)
15140 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15143 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
15144 vld1q_lane_p16 (const poly16_t *__src, poly16x8_t __vec, const int __lane)
15146 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15149 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15150 vld1q_lane_s8 (const int8_t *__src, int8x16_t __vec, const int __lane)
15152 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15155 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15156 vld1q_lane_s16 (const int16_t *__src, int16x8_t __vec, const int __lane)
15158 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15161 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15162 vld1q_lane_s32 (const int32_t *__src, int32x4_t __vec, const int __lane)
15164 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15167 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15168 vld1q_lane_s64 (const int64_t *__src, int64x2_t __vec, const int __lane)
15170 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15173 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15174 vld1q_lane_u8 (const uint8_t *__src, uint8x16_t __vec, const int __lane)
15176 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15179 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15180 vld1q_lane_u16 (const uint16_t *__src, uint16x8_t __vec, const int __lane)
15182 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15185 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15186 vld1q_lane_u32 (const uint32_t *__src, uint32x4_t __vec, const int __lane)
15188 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15191 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15192 vld1q_lane_u64 (const uint64_t *__src, uint64x2_t __vec, const int __lane)
15194 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15197 /* vldn */
15199 __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
15200 vld2_s64 (const int64_t * __a)
15202 int64x1x2_t ret;
15203 __builtin_aarch64_simd_oi __o;
15204 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
15205 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
15206 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
15207 return ret;
15210 __extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
15211 vld2_u64 (const uint64_t * __a)
15213 uint64x1x2_t ret;
15214 __builtin_aarch64_simd_oi __o;
15215 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
15216 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
15217 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
15218 return ret;
15221 __extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
15222 vld2_f64 (const float64_t * __a)
15224 float64x1x2_t ret;
15225 __builtin_aarch64_simd_oi __o;
15226 __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a);
15227 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
15228 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
15229 return ret;
15232 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
15233 vld2_s8 (const int8_t * __a)
15235 int8x8x2_t ret;
15236 __builtin_aarch64_simd_oi __o;
15237 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
15238 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
15239 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
15240 return ret;
15243 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
15244 vld2_p8 (const poly8_t * __a)
15246 poly8x8x2_t ret;
15247 __builtin_aarch64_simd_oi __o;
15248 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
15249 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
15250 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
15251 return ret;
15254 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
15255 vld2_s16 (const int16_t * __a)
15257 int16x4x2_t ret;
15258 __builtin_aarch64_simd_oi __o;
15259 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
15260 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
15261 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
15262 return ret;
15265 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
15266 vld2_p16 (const poly16_t * __a)
15268 poly16x4x2_t ret;
15269 __builtin_aarch64_simd_oi __o;
15270 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
15271 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
15272 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
15273 return ret;
15276 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
15277 vld2_s32 (const int32_t * __a)
15279 int32x2x2_t ret;
15280 __builtin_aarch64_simd_oi __o;
15281 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
15282 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
15283 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
15284 return ret;
15287 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
15288 vld2_u8 (const uint8_t * __a)
15290 uint8x8x2_t ret;
15291 __builtin_aarch64_simd_oi __o;
15292 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
15293 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
15294 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
15295 return ret;
15298 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
15299 vld2_u16 (const uint16_t * __a)
15301 uint16x4x2_t ret;
15302 __builtin_aarch64_simd_oi __o;
15303 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
15304 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
15305 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
15306 return ret;
15309 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
15310 vld2_u32 (const uint32_t * __a)
15312 uint32x2x2_t ret;
15313 __builtin_aarch64_simd_oi __o;
15314 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
15315 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
15316 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
15317 return ret;
15320 __extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
15321 vld2_f16 (const float16_t * __a)
15323 float16x4x2_t ret;
15324 __builtin_aarch64_simd_oi __o;
15325 __o = __builtin_aarch64_ld2v4hf (__a);
15326 ret.val[0] = __builtin_aarch64_get_dregoiv4hf (__o, 0);
15327 ret.val[1] = __builtin_aarch64_get_dregoiv4hf (__o, 1);
15328 return ret;
15331 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
15332 vld2_f32 (const float32_t * __a)
15334 float32x2x2_t ret;
15335 __builtin_aarch64_simd_oi __o;
15336 __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a);
15337 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
15338 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
15339 return ret;
15342 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
15343 vld2q_s8 (const int8_t * __a)
15345 int8x16x2_t ret;
15346 __builtin_aarch64_simd_oi __o;
15347 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
15348 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
15349 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
15350 return ret;
15353 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
15354 vld2q_p8 (const poly8_t * __a)
15356 poly8x16x2_t ret;
15357 __builtin_aarch64_simd_oi __o;
15358 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
15359 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
15360 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
15361 return ret;
15364 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
15365 vld2q_s16 (const int16_t * __a)
15367 int16x8x2_t ret;
15368 __builtin_aarch64_simd_oi __o;
15369 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
15370 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
15371 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
15372 return ret;
15375 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
15376 vld2q_p16 (const poly16_t * __a)
15378 poly16x8x2_t ret;
15379 __builtin_aarch64_simd_oi __o;
15380 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
15381 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
15382 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
15383 return ret;
15386 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
15387 vld2q_s32 (const int32_t * __a)
15389 int32x4x2_t ret;
15390 __builtin_aarch64_simd_oi __o;
15391 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
15392 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
15393 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
15394 return ret;
15397 __extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
15398 vld2q_s64 (const int64_t * __a)
15400 int64x2x2_t ret;
15401 __builtin_aarch64_simd_oi __o;
15402 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
15403 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
15404 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
15405 return ret;
15408 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
15409 vld2q_u8 (const uint8_t * __a)
15411 uint8x16x2_t ret;
15412 __builtin_aarch64_simd_oi __o;
15413 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
15414 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
15415 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
15416 return ret;
15419 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
15420 vld2q_u16 (const uint16_t * __a)
15422 uint16x8x2_t ret;
15423 __builtin_aarch64_simd_oi __o;
15424 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
15425 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
15426 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
15427 return ret;
15430 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
15431 vld2q_u32 (const uint32_t * __a)
15433 uint32x4x2_t ret;
15434 __builtin_aarch64_simd_oi __o;
15435 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
15436 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
15437 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
15438 return ret;
15441 __extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
15442 vld2q_u64 (const uint64_t * __a)
15444 uint64x2x2_t ret;
15445 __builtin_aarch64_simd_oi __o;
15446 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
15447 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
15448 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
15449 return ret;
15452 __extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
15453 vld2q_f16 (const float16_t * __a)
15455 float16x8x2_t ret;
15456 __builtin_aarch64_simd_oi __o;
15457 __o = __builtin_aarch64_ld2v8hf (__a);
15458 ret.val[0] = __builtin_aarch64_get_qregoiv8hf (__o, 0);
15459 ret.val[1] = __builtin_aarch64_get_qregoiv8hf (__o, 1);
15460 return ret;
15463 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
15464 vld2q_f32 (const float32_t * __a)
15466 float32x4x2_t ret;
15467 __builtin_aarch64_simd_oi __o;
15468 __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a);
15469 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
15470 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
15471 return ret;
15474 __extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
15475 vld2q_f64 (const float64_t * __a)
15477 float64x2x2_t ret;
15478 __builtin_aarch64_simd_oi __o;
15479 __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a);
15480 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
15481 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
15482 return ret;
15485 __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
15486 vld3_s64 (const int64_t * __a)
15488 int64x1x3_t ret;
15489 __builtin_aarch64_simd_ci __o;
15490 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
15491 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
15492 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
15493 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
15494 return ret;
15497 __extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
15498 vld3_u64 (const uint64_t * __a)
15500 uint64x1x3_t ret;
15501 __builtin_aarch64_simd_ci __o;
15502 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
15503 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
15504 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
15505 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
15506 return ret;
15509 __extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
15510 vld3_f64 (const float64_t * __a)
15512 float64x1x3_t ret;
15513 __builtin_aarch64_simd_ci __o;
15514 __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a);
15515 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)};
15516 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)};
15517 ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)};
15518 return ret;
15521 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
15522 vld3_s8 (const int8_t * __a)
15524 int8x8x3_t ret;
15525 __builtin_aarch64_simd_ci __o;
15526 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
15527 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
15528 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
15529 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
15530 return ret;
15533 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
15534 vld3_p8 (const poly8_t * __a)
15536 poly8x8x3_t ret;
15537 __builtin_aarch64_simd_ci __o;
15538 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
15539 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
15540 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
15541 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
15542 return ret;
15545 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
15546 vld3_s16 (const int16_t * __a)
15548 int16x4x3_t ret;
15549 __builtin_aarch64_simd_ci __o;
15550 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
15551 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
15552 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
15553 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
15554 return ret;
15557 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
15558 vld3_p16 (const poly16_t * __a)
15560 poly16x4x3_t ret;
15561 __builtin_aarch64_simd_ci __o;
15562 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
15563 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
15564 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
15565 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
15566 return ret;
15569 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
15570 vld3_s32 (const int32_t * __a)
15572 int32x2x3_t ret;
15573 __builtin_aarch64_simd_ci __o;
15574 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
15575 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
15576 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
15577 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
15578 return ret;
15581 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
15582 vld3_u8 (const uint8_t * __a)
15584 uint8x8x3_t ret;
15585 __builtin_aarch64_simd_ci __o;
15586 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
15587 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
15588 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
15589 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
15590 return ret;
15593 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
15594 vld3_u16 (const uint16_t * __a)
15596 uint16x4x3_t ret;
15597 __builtin_aarch64_simd_ci __o;
15598 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
15599 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
15600 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
15601 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
15602 return ret;
15605 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
15606 vld3_u32 (const uint32_t * __a)
15608 uint32x2x3_t ret;
15609 __builtin_aarch64_simd_ci __o;
15610 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
15611 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
15612 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
15613 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
15614 return ret;
15617 __extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__))
15618 vld3_f16 (const float16_t * __a)
15620 float16x4x3_t ret;
15621 __builtin_aarch64_simd_ci __o;
15622 __o = __builtin_aarch64_ld3v4hf (__a);
15623 ret.val[0] = __builtin_aarch64_get_dregciv4hf (__o, 0);
15624 ret.val[1] = __builtin_aarch64_get_dregciv4hf (__o, 1);
15625 ret.val[2] = __builtin_aarch64_get_dregciv4hf (__o, 2);
15626 return ret;
15629 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
15630 vld3_f32 (const float32_t * __a)
15632 float32x2x3_t ret;
15633 __builtin_aarch64_simd_ci __o;
15634 __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a);
15635 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
15636 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
15637 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
15638 return ret;
15641 __extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
15642 vld3q_s8 (const int8_t * __a)
15644 int8x16x3_t ret;
15645 __builtin_aarch64_simd_ci __o;
15646 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
15647 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
15648 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
15649 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
15650 return ret;
15653 __extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
15654 vld3q_p8 (const poly8_t * __a)
15656 poly8x16x3_t ret;
15657 __builtin_aarch64_simd_ci __o;
15658 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
15659 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
15660 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
15661 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
15662 return ret;
15665 __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
15666 vld3q_s16 (const int16_t * __a)
15668 int16x8x3_t ret;
15669 __builtin_aarch64_simd_ci __o;
15670 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
15671 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
15672 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
15673 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
15674 return ret;
15677 __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
15678 vld3q_p16 (const poly16_t * __a)
15680 poly16x8x3_t ret;
15681 __builtin_aarch64_simd_ci __o;
15682 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
15683 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
15684 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
15685 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
15686 return ret;
15689 __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
15690 vld3q_s32 (const int32_t * __a)
15692 int32x4x3_t ret;
15693 __builtin_aarch64_simd_ci __o;
15694 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
15695 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
15696 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
15697 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
15698 return ret;
15701 __extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
15702 vld3q_s64 (const int64_t * __a)
15704 int64x2x3_t ret;
15705 __builtin_aarch64_simd_ci __o;
15706 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
15707 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
15708 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
15709 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
15710 return ret;
15713 __extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
15714 vld3q_u8 (const uint8_t * __a)
15716 uint8x16x3_t ret;
15717 __builtin_aarch64_simd_ci __o;
15718 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
15719 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
15720 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
15721 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
15722 return ret;
15725 __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
15726 vld3q_u16 (const uint16_t * __a)
15728 uint16x8x3_t ret;
15729 __builtin_aarch64_simd_ci __o;
15730 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
15731 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
15732 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
15733 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
15734 return ret;
15737 __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
15738 vld3q_u32 (const uint32_t * __a)
15740 uint32x4x3_t ret;
15741 __builtin_aarch64_simd_ci __o;
15742 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
15743 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
15744 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
15745 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
15746 return ret;
15749 __extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
15750 vld3q_u64 (const uint64_t * __a)
15752 uint64x2x3_t ret;
15753 __builtin_aarch64_simd_ci __o;
15754 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
15755 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
15756 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
15757 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
15758 return ret;
15761 __extension__ static __inline float16x8x3_t __attribute__ ((__always_inline__))
15762 vld3q_f16 (const float16_t * __a)
15764 float16x8x3_t ret;
15765 __builtin_aarch64_simd_ci __o;
15766 __o = __builtin_aarch64_ld3v8hf (__a);
15767 ret.val[0] = __builtin_aarch64_get_qregciv8hf (__o, 0);
15768 ret.val[1] = __builtin_aarch64_get_qregciv8hf (__o, 1);
15769 ret.val[2] = __builtin_aarch64_get_qregciv8hf (__o, 2);
15770 return ret;
15773 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
15774 vld3q_f32 (const float32_t * __a)
15776 float32x4x3_t ret;
15777 __builtin_aarch64_simd_ci __o;
15778 __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a);
15779 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
15780 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
15781 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
15782 return ret;
15785 __extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
15786 vld3q_f64 (const float64_t * __a)
15788 float64x2x3_t ret;
15789 __builtin_aarch64_simd_ci __o;
15790 __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a);
15791 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
15792 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
15793 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
15794 return ret;
15797 __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
15798 vld4_s64 (const int64_t * __a)
15800 int64x1x4_t ret;
15801 __builtin_aarch64_simd_xi __o;
15802 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
15803 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
15804 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
15805 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
15806 ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
15807 return ret;
15810 __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
15811 vld4_u64 (const uint64_t * __a)
15813 uint64x1x4_t ret;
15814 __builtin_aarch64_simd_xi __o;
15815 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
15816 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
15817 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
15818 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
15819 ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
15820 return ret;
15823 __extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
15824 vld4_f64 (const float64_t * __a)
15826 float64x1x4_t ret;
15827 __builtin_aarch64_simd_xi __o;
15828 __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a);
15829 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)};
15830 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)};
15831 ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)};
15832 ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)};
15833 return ret;
15836 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
15837 vld4_s8 (const int8_t * __a)
15839 int8x8x4_t ret;
15840 __builtin_aarch64_simd_xi __o;
15841 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
15842 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
15843 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
15844 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
15845 ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
15846 return ret;
15849 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
15850 vld4_p8 (const poly8_t * __a)
15852 poly8x8x4_t ret;
15853 __builtin_aarch64_simd_xi __o;
15854 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
15855 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
15856 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
15857 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
15858 ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
15859 return ret;
15862 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
15863 vld4_s16 (const int16_t * __a)
15865 int16x4x4_t ret;
15866 __builtin_aarch64_simd_xi __o;
15867 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
15868 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
15869 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
15870 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
15871 ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
15872 return ret;
15875 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
15876 vld4_p16 (const poly16_t * __a)
15878 poly16x4x4_t ret;
15879 __builtin_aarch64_simd_xi __o;
15880 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
15881 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
15882 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
15883 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
15884 ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
15885 return ret;
15888 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
15889 vld4_s32 (const int32_t * __a)
15891 int32x2x4_t ret;
15892 __builtin_aarch64_simd_xi __o;
15893 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
15894 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
15895 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
15896 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
15897 ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
15898 return ret;
15901 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
15902 vld4_u8 (const uint8_t * __a)
15904 uint8x8x4_t ret;
15905 __builtin_aarch64_simd_xi __o;
15906 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
15907 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
15908 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
15909 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
15910 ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
15911 return ret;
15914 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
15915 vld4_u16 (const uint16_t * __a)
15917 uint16x4x4_t ret;
15918 __builtin_aarch64_simd_xi __o;
15919 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
15920 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
15921 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
15922 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
15923 ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
15924 return ret;
15927 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
15928 vld4_u32 (const uint32_t * __a)
15930 uint32x2x4_t ret;
15931 __builtin_aarch64_simd_xi __o;
15932 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
15933 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
15934 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
15935 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
15936 ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
15937 return ret;
15940 __extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__))
15941 vld4_f16 (const float16_t * __a)
15943 float16x4x4_t ret;
15944 __builtin_aarch64_simd_xi __o;
15945 __o = __builtin_aarch64_ld4v4hf (__a);
15946 ret.val[0] = __builtin_aarch64_get_dregxiv4hf (__o, 0);
15947 ret.val[1] = __builtin_aarch64_get_dregxiv4hf (__o, 1);
15948 ret.val[2] = __builtin_aarch64_get_dregxiv4hf (__o, 2);
15949 ret.val[3] = __builtin_aarch64_get_dregxiv4hf (__o, 3);
15950 return ret;
15953 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
15954 vld4_f32 (const float32_t * __a)
15956 float32x2x4_t ret;
15957 __builtin_aarch64_simd_xi __o;
15958 __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a);
15959 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
15960 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
15961 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
15962 ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
15963 return ret;
15966 __extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
15967 vld4q_s8 (const int8_t * __a)
15969 int8x16x4_t ret;
15970 __builtin_aarch64_simd_xi __o;
15971 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
15972 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
15973 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
15974 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
15975 ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
15976 return ret;
15979 __extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
15980 vld4q_p8 (const poly8_t * __a)
15982 poly8x16x4_t ret;
15983 __builtin_aarch64_simd_xi __o;
15984 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
15985 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
15986 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
15987 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
15988 ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
15989 return ret;
15992 __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
15993 vld4q_s16 (const int16_t * __a)
15995 int16x8x4_t ret;
15996 __builtin_aarch64_simd_xi __o;
15997 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
15998 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
15999 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
16000 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
16001 ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
16002 return ret;
16005 __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
16006 vld4q_p16 (const poly16_t * __a)
16008 poly16x8x4_t ret;
16009 __builtin_aarch64_simd_xi __o;
16010 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
16011 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
16012 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
16013 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
16014 ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
16015 return ret;
16018 __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
16019 vld4q_s32 (const int32_t * __a)
16021 int32x4x4_t ret;
16022 __builtin_aarch64_simd_xi __o;
16023 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
16024 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
16025 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
16026 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
16027 ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
16028 return ret;
16031 __extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
16032 vld4q_s64 (const int64_t * __a)
16034 int64x2x4_t ret;
16035 __builtin_aarch64_simd_xi __o;
16036 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
16037 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
16038 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
16039 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
16040 ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
16041 return ret;
16044 __extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
16045 vld4q_u8 (const uint8_t * __a)
16047 uint8x16x4_t ret;
16048 __builtin_aarch64_simd_xi __o;
16049 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
16050 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
16051 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
16052 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
16053 ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
16054 return ret;
16057 __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
16058 vld4q_u16 (const uint16_t * __a)
16060 uint16x8x4_t ret;
16061 __builtin_aarch64_simd_xi __o;
16062 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
16063 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
16064 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
16065 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
16066 ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
16067 return ret;
16070 __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
16071 vld4q_u32 (const uint32_t * __a)
16073 uint32x4x4_t ret;
16074 __builtin_aarch64_simd_xi __o;
16075 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
16076 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
16077 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
16078 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
16079 ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
16080 return ret;
16083 __extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
16084 vld4q_u64 (const uint64_t * __a)
16086 uint64x2x4_t ret;
16087 __builtin_aarch64_simd_xi __o;
16088 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
16089 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
16090 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
16091 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
16092 ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
16093 return ret;
16096 __extension__ static __inline float16x8x4_t __attribute__ ((__always_inline__))
16097 vld4q_f16 (const float16_t * __a)
16099 float16x8x4_t ret;
16100 __builtin_aarch64_simd_xi __o;
16101 __o = __builtin_aarch64_ld4v8hf (__a);
16102 ret.val[0] = __builtin_aarch64_get_qregxiv8hf (__o, 0);
16103 ret.val[1] = __builtin_aarch64_get_qregxiv8hf (__o, 1);
16104 ret.val[2] = __builtin_aarch64_get_qregxiv8hf (__o, 2);
16105 ret.val[3] = __builtin_aarch64_get_qregxiv8hf (__o, 3);
16106 return ret;
16109 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
16110 vld4q_f32 (const float32_t * __a)
16112 float32x4x4_t ret;
16113 __builtin_aarch64_simd_xi __o;
16114 __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a);
16115 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
16116 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
16117 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
16118 ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
16119 return ret;
16122 __extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
16123 vld4q_f64 (const float64_t * __a)
16125 float64x2x4_t ret;
16126 __builtin_aarch64_simd_xi __o;
16127 __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a);
16128 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
16129 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
16130 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
16131 ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
16132 return ret;
16135 /* vldn_dup */
16137 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
16138 vld2_dup_s8 (const int8_t * __a)
16140 int8x8x2_t ret;
16141 __builtin_aarch64_simd_oi __o;
16142 __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16143 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
16144 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
16145 return ret;
16148 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
16149 vld2_dup_s16 (const int16_t * __a)
16151 int16x4x2_t ret;
16152 __builtin_aarch64_simd_oi __o;
16153 __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16154 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
16155 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
16156 return ret;
16159 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
16160 vld2_dup_s32 (const int32_t * __a)
16162 int32x2x2_t ret;
16163 __builtin_aarch64_simd_oi __o;
16164 __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a);
16165 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
16166 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
16167 return ret;
16170 __extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
16171 vld2_dup_f16 (const float16_t * __a)
16173 float16x4x2_t ret;
16174 __builtin_aarch64_simd_oi __o;
16175 __o = __builtin_aarch64_ld2rv4hf ((const __builtin_aarch64_simd_hf *) __a);
16176 ret.val[0] = __builtin_aarch64_get_dregoiv4hf (__o, 0);
16177 ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 1);
16178 return ret;
16181 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
16182 vld2_dup_f32 (const float32_t * __a)
16184 float32x2x2_t ret;
16185 __builtin_aarch64_simd_oi __o;
16186 __o = __builtin_aarch64_ld2rv2sf ((const __builtin_aarch64_simd_sf *) __a);
16187 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
16188 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
16189 return ret;
16192 __extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
16193 vld2_dup_f64 (const float64_t * __a)
16195 float64x1x2_t ret;
16196 __builtin_aarch64_simd_oi __o;
16197 __o = __builtin_aarch64_ld2rdf ((const __builtin_aarch64_simd_df *) __a);
16198 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
16199 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
16200 return ret;
16203 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
16204 vld2_dup_u8 (const uint8_t * __a)
16206 uint8x8x2_t ret;
16207 __builtin_aarch64_simd_oi __o;
16208 __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16209 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
16210 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
16211 return ret;
16214 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
16215 vld2_dup_u16 (const uint16_t * __a)
16217 uint16x4x2_t ret;
16218 __builtin_aarch64_simd_oi __o;
16219 __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16220 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
16221 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
16222 return ret;
16225 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
16226 vld2_dup_u32 (const uint32_t * __a)
16228 uint32x2x2_t ret;
16229 __builtin_aarch64_simd_oi __o;
16230 __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a);
16231 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
16232 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
16233 return ret;
16236 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
16237 vld2_dup_p8 (const poly8_t * __a)
16239 poly8x8x2_t ret;
16240 __builtin_aarch64_simd_oi __o;
16241 __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16242 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
16243 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
16244 return ret;
16247 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
16248 vld2_dup_p16 (const poly16_t * __a)
16250 poly16x4x2_t ret;
16251 __builtin_aarch64_simd_oi __o;
16252 __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16253 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
16254 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
16255 return ret;
16258 __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
16259 vld2_dup_s64 (const int64_t * __a)
16261 int64x1x2_t ret;
16262 __builtin_aarch64_simd_oi __o;
16263 __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a);
16264 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
16265 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
16266 return ret;
16269 __extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
16270 vld2_dup_u64 (const uint64_t * __a)
16272 uint64x1x2_t ret;
16273 __builtin_aarch64_simd_oi __o;
16274 __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a);
16275 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
16276 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
16277 return ret;
16280 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
16281 vld2q_dup_s8 (const int8_t * __a)
16283 int8x16x2_t ret;
16284 __builtin_aarch64_simd_oi __o;
16285 __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16286 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
16287 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
16288 return ret;
16291 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
16292 vld2q_dup_p8 (const poly8_t * __a)
16294 poly8x16x2_t ret;
16295 __builtin_aarch64_simd_oi __o;
16296 __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16297 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
16298 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
16299 return ret;
16302 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
16303 vld2q_dup_s16 (const int16_t * __a)
16305 int16x8x2_t ret;
16306 __builtin_aarch64_simd_oi __o;
16307 __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16308 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
16309 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
16310 return ret;
16313 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
16314 vld2q_dup_p16 (const poly16_t * __a)
16316 poly16x8x2_t ret;
16317 __builtin_aarch64_simd_oi __o;
16318 __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16319 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
16320 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
16321 return ret;
16324 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
16325 vld2q_dup_s32 (const int32_t * __a)
16327 int32x4x2_t ret;
16328 __builtin_aarch64_simd_oi __o;
16329 __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a);
16330 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
16331 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
16332 return ret;
16335 __extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
16336 vld2q_dup_s64 (const int64_t * __a)
16338 int64x2x2_t ret;
16339 __builtin_aarch64_simd_oi __o;
16340 __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
16341 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
16342 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
16343 return ret;
16346 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
16347 vld2q_dup_u8 (const uint8_t * __a)
16349 uint8x16x2_t ret;
16350 __builtin_aarch64_simd_oi __o;
16351 __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16352 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
16353 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
16354 return ret;
16357 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
16358 vld2q_dup_u16 (const uint16_t * __a)
16360 uint16x8x2_t ret;
16361 __builtin_aarch64_simd_oi __o;
16362 __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16363 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
16364 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
16365 return ret;
16368 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
16369 vld2q_dup_u32 (const uint32_t * __a)
16371 uint32x4x2_t ret;
16372 __builtin_aarch64_simd_oi __o;
16373 __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a);
16374 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
16375 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
16376 return ret;
16379 __extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
16380 vld2q_dup_u64 (const uint64_t * __a)
16382 uint64x2x2_t ret;
16383 __builtin_aarch64_simd_oi __o;
16384 __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
16385 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
16386 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
16387 return ret;
16390 __extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
16391 vld2q_dup_f16 (const float16_t * __a)
16393 float16x8x2_t ret;
16394 __builtin_aarch64_simd_oi __o;
16395 __o = __builtin_aarch64_ld2rv8hf ((const __builtin_aarch64_simd_hf *) __a);
16396 ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregoiv8hf (__o, 0);
16397 ret.val[1] = __builtin_aarch64_get_qregoiv8hf (__o, 1);
16398 return ret;
16401 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
16402 vld2q_dup_f32 (const float32_t * __a)
16404 float32x4x2_t ret;
16405 __builtin_aarch64_simd_oi __o;
16406 __o = __builtin_aarch64_ld2rv4sf ((const __builtin_aarch64_simd_sf *) __a);
16407 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
16408 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
16409 return ret;
16412 __extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
16413 vld2q_dup_f64 (const float64_t * __a)
16415 float64x2x2_t ret;
16416 __builtin_aarch64_simd_oi __o;
16417 __o = __builtin_aarch64_ld2rv2df ((const __builtin_aarch64_simd_df *) __a);
16418 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
16419 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
16420 return ret;
16423 __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
16424 vld3_dup_s64 (const int64_t * __a)
16426 int64x1x3_t ret;
16427 __builtin_aarch64_simd_ci __o;
16428 __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a);
16429 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
16430 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
16431 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
16432 return ret;
16435 __extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
16436 vld3_dup_u64 (const uint64_t * __a)
16438 uint64x1x3_t ret;
16439 __builtin_aarch64_simd_ci __o;
16440 __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a);
16441 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
16442 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
16443 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
16444 return ret;
16447 __extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
16448 vld3_dup_f64 (const float64_t * __a)
16450 float64x1x3_t ret;
16451 __builtin_aarch64_simd_ci __o;
16452 __o = __builtin_aarch64_ld3rdf ((const __builtin_aarch64_simd_df *) __a);
16453 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)};
16454 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)};
16455 ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)};
16456 return ret;
16459 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
16460 vld3_dup_s8 (const int8_t * __a)
16462 int8x8x3_t ret;
16463 __builtin_aarch64_simd_ci __o;
16464 __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16465 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
16466 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
16467 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
16468 return ret;
16471 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
16472 vld3_dup_p8 (const poly8_t * __a)
16474 poly8x8x3_t ret;
16475 __builtin_aarch64_simd_ci __o;
16476 __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16477 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
16478 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
16479 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
16480 return ret;
16483 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
16484 vld3_dup_s16 (const int16_t * __a)
16486 int16x4x3_t ret;
16487 __builtin_aarch64_simd_ci __o;
16488 __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16489 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
16490 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
16491 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
16492 return ret;
16495 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
16496 vld3_dup_p16 (const poly16_t * __a)
16498 poly16x4x3_t ret;
16499 __builtin_aarch64_simd_ci __o;
16500 __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16501 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
16502 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
16503 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
16504 return ret;
16507 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
16508 vld3_dup_s32 (const int32_t * __a)
16510 int32x2x3_t ret;
16511 __builtin_aarch64_simd_ci __o;
16512 __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a);
16513 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
16514 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
16515 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
16516 return ret;
16519 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
16520 vld3_dup_u8 (const uint8_t * __a)
16522 uint8x8x3_t ret;
16523 __builtin_aarch64_simd_ci __o;
16524 __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16525 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
16526 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
16527 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
16528 return ret;
16531 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
16532 vld3_dup_u16 (const uint16_t * __a)
16534 uint16x4x3_t ret;
16535 __builtin_aarch64_simd_ci __o;
16536 __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16537 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
16538 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
16539 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
16540 return ret;
16543 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
16544 vld3_dup_u32 (const uint32_t * __a)
16546 uint32x2x3_t ret;
16547 __builtin_aarch64_simd_ci __o;
16548 __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a);
16549 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
16550 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
16551 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
16552 return ret;
16555 __extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__))
16556 vld3_dup_f16 (const float16_t * __a)
16558 float16x4x3_t ret;
16559 __builtin_aarch64_simd_ci __o;
16560 __o = __builtin_aarch64_ld3rv4hf ((const __builtin_aarch64_simd_hf *) __a);
16561 ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 0);
16562 ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 1);
16563 ret.val[2] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 2);
16564 return ret;
16567 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
16568 vld3_dup_f32 (const float32_t * __a)
16570 float32x2x3_t ret;
16571 __builtin_aarch64_simd_ci __o;
16572 __o = __builtin_aarch64_ld3rv2sf ((const __builtin_aarch64_simd_sf *) __a);
16573 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
16574 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
16575 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
16576 return ret;
16579 __extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
16580 vld3q_dup_s8 (const int8_t * __a)
16582 int8x16x3_t ret;
16583 __builtin_aarch64_simd_ci __o;
16584 __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16585 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
16586 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
16587 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
16588 return ret;
16591 __extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
16592 vld3q_dup_p8 (const poly8_t * __a)
16594 poly8x16x3_t ret;
16595 __builtin_aarch64_simd_ci __o;
16596 __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16597 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
16598 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
16599 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
16600 return ret;
16603 __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
16604 vld3q_dup_s16 (const int16_t * __a)
16606 int16x8x3_t ret;
16607 __builtin_aarch64_simd_ci __o;
16608 __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16609 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
16610 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
16611 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
16612 return ret;
16615 __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
16616 vld3q_dup_p16 (const poly16_t * __a)
16618 poly16x8x3_t ret;
16619 __builtin_aarch64_simd_ci __o;
16620 __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16621 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
16622 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
16623 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
16624 return ret;
16627 __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
16628 vld3q_dup_s32 (const int32_t * __a)
16630 int32x4x3_t ret;
16631 __builtin_aarch64_simd_ci __o;
16632 __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a);
16633 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
16634 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
16635 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
16636 return ret;
16639 __extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
16640 vld3q_dup_s64 (const int64_t * __a)
16642 int64x2x3_t ret;
16643 __builtin_aarch64_simd_ci __o;
16644 __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
16645 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
16646 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
16647 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
16648 return ret;
16651 __extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
16652 vld3q_dup_u8 (const uint8_t * __a)
16654 uint8x16x3_t ret;
16655 __builtin_aarch64_simd_ci __o;
16656 __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16657 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
16658 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
16659 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
16660 return ret;
16663 __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
16664 vld3q_dup_u16 (const uint16_t * __a)
16666 uint16x8x3_t ret;
16667 __builtin_aarch64_simd_ci __o;
16668 __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16669 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
16670 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
16671 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
16672 return ret;
16675 __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
16676 vld3q_dup_u32 (const uint32_t * __a)
16678 uint32x4x3_t ret;
16679 __builtin_aarch64_simd_ci __o;
16680 __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a);
16681 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
16682 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
16683 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
16684 return ret;
16687 __extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
16688 vld3q_dup_u64 (const uint64_t * __a)
16690 uint64x2x3_t ret;
16691 __builtin_aarch64_simd_ci __o;
16692 __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
16693 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
16694 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
16695 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
16696 return ret;
16699 __extension__ static __inline float16x8x3_t __attribute__ ((__always_inline__))
16700 vld3q_dup_f16 (const float16_t * __a)
16702 float16x8x3_t ret;
16703 __builtin_aarch64_simd_ci __o;
16704 __o = __builtin_aarch64_ld3rv8hf ((const __builtin_aarch64_simd_hf *) __a);
16705 ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 0);
16706 ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 1);
16707 ret.val[2] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 2);
16708 return ret;
16711 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
16712 vld3q_dup_f32 (const float32_t * __a)
16714 float32x4x3_t ret;
16715 __builtin_aarch64_simd_ci __o;
16716 __o = __builtin_aarch64_ld3rv4sf ((const __builtin_aarch64_simd_sf *) __a);
16717 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
16718 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
16719 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
16720 return ret;
16723 __extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
16724 vld3q_dup_f64 (const float64_t * __a)
16726 float64x2x3_t ret;
16727 __builtin_aarch64_simd_ci __o;
16728 __o = __builtin_aarch64_ld3rv2df ((const __builtin_aarch64_simd_df *) __a);
16729 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
16730 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
16731 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
16732 return ret;
16735 __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
16736 vld4_dup_s64 (const int64_t * __a)
16738 int64x1x4_t ret;
16739 __builtin_aarch64_simd_xi __o;
16740 __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a);
16741 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
16742 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
16743 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
16744 ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
16745 return ret;
16748 __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
16749 vld4_dup_u64 (const uint64_t * __a)
16751 uint64x1x4_t ret;
16752 __builtin_aarch64_simd_xi __o;
16753 __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a);
16754 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
16755 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
16756 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
16757 ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
16758 return ret;
16761 __extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
16762 vld4_dup_f64 (const float64_t * __a)
16764 float64x1x4_t ret;
16765 __builtin_aarch64_simd_xi __o;
16766 __o = __builtin_aarch64_ld4rdf ((const __builtin_aarch64_simd_df *) __a);
16767 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)};
16768 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)};
16769 ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)};
16770 ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)};
16771 return ret;
16774 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
16775 vld4_dup_s8 (const int8_t * __a)
16777 int8x8x4_t ret;
16778 __builtin_aarch64_simd_xi __o;
16779 __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16780 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
16781 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
16782 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
16783 ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
16784 return ret;
16787 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
16788 vld4_dup_p8 (const poly8_t * __a)
16790 poly8x8x4_t ret;
16791 __builtin_aarch64_simd_xi __o;
16792 __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16793 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
16794 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
16795 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
16796 ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
16797 return ret;
16800 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
16801 vld4_dup_s16 (const int16_t * __a)
16803 int16x4x4_t ret;
16804 __builtin_aarch64_simd_xi __o;
16805 __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16806 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
16807 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
16808 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
16809 ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
16810 return ret;
16813 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
16814 vld4_dup_p16 (const poly16_t * __a)
16816 poly16x4x4_t ret;
16817 __builtin_aarch64_simd_xi __o;
16818 __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16819 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
16820 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
16821 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
16822 ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
16823 return ret;
16826 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
16827 vld4_dup_s32 (const int32_t * __a)
16829 int32x2x4_t ret;
16830 __builtin_aarch64_simd_xi __o;
16831 __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a);
16832 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
16833 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
16834 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
16835 ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
16836 return ret;
16839 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
16840 vld4_dup_u8 (const uint8_t * __a)
16842 uint8x8x4_t ret;
16843 __builtin_aarch64_simd_xi __o;
16844 __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16845 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
16846 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
16847 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
16848 ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
16849 return ret;
16852 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
16853 vld4_dup_u16 (const uint16_t * __a)
16855 uint16x4x4_t ret;
16856 __builtin_aarch64_simd_xi __o;
16857 __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16858 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
16859 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
16860 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
16861 ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
16862 return ret;
16865 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
16866 vld4_dup_u32 (const uint32_t * __a)
16868 uint32x2x4_t ret;
16869 __builtin_aarch64_simd_xi __o;
16870 __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a);
16871 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
16872 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
16873 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
16874 ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
16875 return ret;
16878 __extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__))
16879 vld4_dup_f16 (const float16_t * __a)
16881 float16x4x4_t ret;
16882 __builtin_aarch64_simd_xi __o;
16883 __o = __builtin_aarch64_ld4rv4hf ((const __builtin_aarch64_simd_hf *) __a);
16884 ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 0);
16885 ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 1);
16886 ret.val[2] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 2);
16887 ret.val[3] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 3);
16888 return ret;
16891 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
16892 vld4_dup_f32 (const float32_t * __a)
16894 float32x2x4_t ret;
16895 __builtin_aarch64_simd_xi __o;
16896 __o = __builtin_aarch64_ld4rv2sf ((const __builtin_aarch64_simd_sf *) __a);
16897 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
16898 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
16899 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
16900 ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
16901 return ret;
16904 __extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
16905 vld4q_dup_s8 (const int8_t * __a)
16907 int8x16x4_t ret;
16908 __builtin_aarch64_simd_xi __o;
16909 __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16910 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
16911 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
16912 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
16913 ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
16914 return ret;
16917 __extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
16918 vld4q_dup_p8 (const poly8_t * __a)
16920 poly8x16x4_t ret;
16921 __builtin_aarch64_simd_xi __o;
16922 __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16923 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
16924 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
16925 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
16926 ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
16927 return ret;
16930 __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
16931 vld4q_dup_s16 (const int16_t * __a)
16933 int16x8x4_t ret;
16934 __builtin_aarch64_simd_xi __o;
16935 __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16936 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
16937 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
16938 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
16939 ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
16940 return ret;
16943 __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
16944 vld4q_dup_p16 (const poly16_t * __a)
16946 poly16x8x4_t ret;
16947 __builtin_aarch64_simd_xi __o;
16948 __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16949 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
16950 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
16951 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
16952 ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
16953 return ret;
16956 __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
16957 vld4q_dup_s32 (const int32_t * __a)
16959 int32x4x4_t ret;
16960 __builtin_aarch64_simd_xi __o;
16961 __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a);
16962 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
16963 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
16964 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
16965 ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
16966 return ret;
16969 __extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
16970 vld4q_dup_s64 (const int64_t * __a)
16972 int64x2x4_t ret;
16973 __builtin_aarch64_simd_xi __o;
16974 __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
16975 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
16976 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
16977 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
16978 ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
16979 return ret;
16982 __extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
16983 vld4q_dup_u8 (const uint8_t * __a)
16985 uint8x16x4_t ret;
16986 __builtin_aarch64_simd_xi __o;
16987 __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16988 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
16989 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
16990 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
16991 ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
16992 return ret;
16995 __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
16996 vld4q_dup_u16 (const uint16_t * __a)
16998 uint16x8x4_t ret;
16999 __builtin_aarch64_simd_xi __o;
17000 __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
17001 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
17002 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
17003 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
17004 ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
17005 return ret;
17008 __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
17009 vld4q_dup_u32 (const uint32_t * __a)
17011 uint32x4x4_t ret;
17012 __builtin_aarch64_simd_xi __o;
17013 __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a);
17014 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
17015 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
17016 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
17017 ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
17018 return ret;
17021 __extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
17022 vld4q_dup_u64 (const uint64_t * __a)
17024 uint64x2x4_t ret;
17025 __builtin_aarch64_simd_xi __o;
17026 __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
17027 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
17028 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
17029 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
17030 ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
17031 return ret;
17034 __extension__ static __inline float16x8x4_t __attribute__ ((__always_inline__))
17035 vld4q_dup_f16 (const float16_t * __a)
17037 float16x8x4_t ret;
17038 __builtin_aarch64_simd_xi __o;
17039 __o = __builtin_aarch64_ld4rv8hf ((const __builtin_aarch64_simd_hf *) __a);
17040 ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 0);
17041 ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 1);
17042 ret.val[2] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 2);
17043 ret.val[3] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 3);
17044 return ret;
17047 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
17048 vld4q_dup_f32 (const float32_t * __a)
17050 float32x4x4_t ret;
17051 __builtin_aarch64_simd_xi __o;
17052 __o = __builtin_aarch64_ld4rv4sf ((const __builtin_aarch64_simd_sf *) __a);
17053 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
17054 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
17055 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
17056 ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
17057 return ret;
17060 __extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
17061 vld4q_dup_f64 (const float64_t * __a)
17063 float64x2x4_t ret;
17064 __builtin_aarch64_simd_xi __o;
17065 __o = __builtin_aarch64_ld4rv2df ((const __builtin_aarch64_simd_df *) __a);
17066 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
17067 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
17068 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
17069 ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
17070 return ret;
17073 /* vld2_lane */
17075 #define __LD2_LANE_FUNC(intype, vectype, largetype, ptrtype, mode, \
17076 qmode, ptrmode, funcsuffix, signedtype) \
17077 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
17078 vld2_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
17080 __builtin_aarch64_simd_oi __o; \
17081 largetype __temp; \
17082 __temp.val[0] = \
17083 vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); \
17084 __temp.val[1] = \
17085 vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); \
17086 __o = __builtin_aarch64_set_qregoi##qmode (__o, \
17087 (signedtype) __temp.val[0], \
17088 0); \
17089 __o = __builtin_aarch64_set_qregoi##qmode (__o, \
17090 (signedtype) __temp.val[1], \
17091 1); \
17092 __o = __builtin_aarch64_ld2_lane##mode ( \
17093 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
17094 __b.val[0] = (vectype) __builtin_aarch64_get_dregoidi (__o, 0); \
17095 __b.val[1] = (vectype) __builtin_aarch64_get_dregoidi (__o, 1); \
17096 return __b; \
17099 __LD2_LANE_FUNC (float16x4x2_t, float16x4_t, float16x8x2_t, float16_t, v4hf,
17100 v8hf, hf, f16, float16x8_t)
17101 __LD2_LANE_FUNC (float32x2x2_t, float32x2_t, float32x4x2_t, float32_t, v2sf, v4sf,
17102 sf, f32, float32x4_t)
17103 __LD2_LANE_FUNC (float64x1x2_t, float64x1_t, float64x2x2_t, float64_t, df, v2df,
17104 df, f64, float64x2_t)
17105 __LD2_LANE_FUNC (poly8x8x2_t, poly8x8_t, poly8x16x2_t, poly8_t, v8qi, v16qi, qi, p8,
17106 int8x16_t)
17107 __LD2_LANE_FUNC (poly16x4x2_t, poly16x4_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi,
17108 p16, int16x8_t)
17109 __LD2_LANE_FUNC (int8x8x2_t, int8x8_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8,
17110 int8x16_t)
17111 __LD2_LANE_FUNC (int16x4x2_t, int16x4_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16,
17112 int16x8_t)
17113 __LD2_LANE_FUNC (int32x2x2_t, int32x2_t, int32x4x2_t, int32_t, v2si, v4si, si, s32,
17114 int32x4_t)
17115 __LD2_LANE_FUNC (int64x1x2_t, int64x1_t, int64x2x2_t, int64_t, di, v2di, di, s64,
17116 int64x2_t)
17117 __LD2_LANE_FUNC (uint8x8x2_t, uint8x8_t, uint8x16x2_t, uint8_t, v8qi, v16qi, qi, u8,
17118 int8x16_t)
17119 __LD2_LANE_FUNC (uint16x4x2_t, uint16x4_t, uint16x8x2_t, uint16_t, v4hi, v8hi, hi,
17120 u16, int16x8_t)
17121 __LD2_LANE_FUNC (uint32x2x2_t, uint32x2_t, uint32x4x2_t, uint32_t, v2si, v4si, si,
17122 u32, int32x4_t)
17123 __LD2_LANE_FUNC (uint64x1x2_t, uint64x1_t, uint64x2x2_t, uint64_t, di, v2di, di,
17124 u64, int64x2_t)
17126 #undef __LD2_LANE_FUNC
17128 /* vld2q_lane */
17130 #define __LD2_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
17131 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
17132 vld2q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
17134 __builtin_aarch64_simd_oi __o; \
17135 intype ret; \
17136 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __b.val[0], 0); \
17137 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __b.val[1], 1); \
17138 __o = __builtin_aarch64_ld2_lane##mode ( \
17139 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
17140 ret.val[0] = (vtype) __builtin_aarch64_get_qregoiv4si (__o, 0); \
17141 ret.val[1] = (vtype) __builtin_aarch64_get_qregoiv4si (__o, 1); \
17142 return ret; \
17145 __LD2_LANE_FUNC (float16x8x2_t, float16x8_t, float16_t, v8hf, hf, f16)
17146 __LD2_LANE_FUNC (float32x4x2_t, float32x4_t, float32_t, v4sf, sf, f32)
17147 __LD2_LANE_FUNC (float64x2x2_t, float64x2_t, float64_t, v2df, df, f64)
17148 __LD2_LANE_FUNC (poly8x16x2_t, poly8x16_t, poly8_t, v16qi, qi, p8)
17149 __LD2_LANE_FUNC (poly16x8x2_t, poly16x8_t, poly16_t, v8hi, hi, p16)
17150 __LD2_LANE_FUNC (int8x16x2_t, int8x16_t, int8_t, v16qi, qi, s8)
17151 __LD2_LANE_FUNC (int16x8x2_t, int16x8_t, int16_t, v8hi, hi, s16)
17152 __LD2_LANE_FUNC (int32x4x2_t, int32x4_t, int32_t, v4si, si, s32)
17153 __LD2_LANE_FUNC (int64x2x2_t, int64x2_t, int64_t, v2di, di, s64)
17154 __LD2_LANE_FUNC (uint8x16x2_t, uint8x16_t, uint8_t, v16qi, qi, u8)
17155 __LD2_LANE_FUNC (uint16x8x2_t, uint16x8_t, uint16_t, v8hi, hi, u16)
17156 __LD2_LANE_FUNC (uint32x4x2_t, uint32x4_t, uint32_t, v4si, si, u32)
17157 __LD2_LANE_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, v2di, di, u64)
17159 #undef __LD2_LANE_FUNC
17161 /* vld3_lane */
17163 #define __LD3_LANE_FUNC(intype, vectype, largetype, ptrtype, mode, \
17164 qmode, ptrmode, funcsuffix, signedtype) \
17165 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
17166 vld3_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
17168 __builtin_aarch64_simd_ci __o; \
17169 largetype __temp; \
17170 __temp.val[0] = \
17171 vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); \
17172 __temp.val[1] = \
17173 vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); \
17174 __temp.val[2] = \
17175 vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0)); \
17176 __o = __builtin_aarch64_set_qregci##qmode (__o, \
17177 (signedtype) __temp.val[0], \
17178 0); \
17179 __o = __builtin_aarch64_set_qregci##qmode (__o, \
17180 (signedtype) __temp.val[1], \
17181 1); \
17182 __o = __builtin_aarch64_set_qregci##qmode (__o, \
17183 (signedtype) __temp.val[2], \
17184 2); \
17185 __o = __builtin_aarch64_ld3_lane##mode ( \
17186 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
17187 __b.val[0] = (vectype) __builtin_aarch64_get_dregcidi (__o, 0); \
17188 __b.val[1] = (vectype) __builtin_aarch64_get_dregcidi (__o, 1); \
17189 __b.val[2] = (vectype) __builtin_aarch64_get_dregcidi (__o, 2); \
17190 return __b; \
17193 __LD3_LANE_FUNC (float16x4x3_t, float16x4_t, float16x8x3_t, float16_t, v4hf,
17194 v8hf, hf, f16, float16x8_t)
17195 __LD3_LANE_FUNC (float32x2x3_t, float32x2_t, float32x4x3_t, float32_t, v2sf, v4sf,
17196 sf, f32, float32x4_t)
17197 __LD3_LANE_FUNC (float64x1x3_t, float64x1_t, float64x2x3_t, float64_t, df, v2df,
17198 df, f64, float64x2_t)
17199 __LD3_LANE_FUNC (poly8x8x3_t, poly8x8_t, poly8x16x3_t, poly8_t, v8qi, v16qi, qi, p8,
17200 int8x16_t)
17201 __LD3_LANE_FUNC (poly16x4x3_t, poly16x4_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi,
17202 p16, int16x8_t)
17203 __LD3_LANE_FUNC (int8x8x3_t, int8x8_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8,
17204 int8x16_t)
17205 __LD3_LANE_FUNC (int16x4x3_t, int16x4_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16,
17206 int16x8_t)
17207 __LD3_LANE_FUNC (int32x2x3_t, int32x2_t, int32x4x3_t, int32_t, v2si, v4si, si, s32,
17208 int32x4_t)
17209 __LD3_LANE_FUNC (int64x1x3_t, int64x1_t, int64x2x3_t, int64_t, di, v2di, di, s64,
17210 int64x2_t)
17211 __LD3_LANE_FUNC (uint8x8x3_t, uint8x8_t, uint8x16x3_t, uint8_t, v8qi, v16qi, qi, u8,
17212 int8x16_t)
17213 __LD3_LANE_FUNC (uint16x4x3_t, uint16x4_t, uint16x8x3_t, uint16_t, v4hi, v8hi, hi,
17214 u16, int16x8_t)
17215 __LD3_LANE_FUNC (uint32x2x3_t, uint32x2_t, uint32x4x3_t, uint32_t, v2si, v4si, si,
17216 u32, int32x4_t)
17217 __LD3_LANE_FUNC (uint64x1x3_t, uint64x1_t, uint64x2x3_t, uint64_t, di, v2di, di,
17218 u64, int64x2_t)
17220 #undef __LD3_LANE_FUNC
17222 /* vld3q_lane */
17224 #define __LD3_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
17225 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
17226 vld3q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
17228 __builtin_aarch64_simd_ci __o; \
17229 intype ret; \
17230 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0); \
17231 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1); \
17232 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2); \
17233 __o = __builtin_aarch64_ld3_lane##mode ( \
17234 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
17235 ret.val[0] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 0); \
17236 ret.val[1] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 1); \
17237 ret.val[2] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 2); \
17238 return ret; \
17241 __LD3_LANE_FUNC (float16x8x3_t, float16x8_t, float16_t, v8hf, hf, f16)
17242 __LD3_LANE_FUNC (float32x4x3_t, float32x4_t, float32_t, v4sf, sf, f32)
17243 __LD3_LANE_FUNC (float64x2x3_t, float64x2_t, float64_t, v2df, df, f64)
17244 __LD3_LANE_FUNC (poly8x16x3_t, poly8x16_t, poly8_t, v16qi, qi, p8)
17245 __LD3_LANE_FUNC (poly16x8x3_t, poly16x8_t, poly16_t, v8hi, hi, p16)
17246 __LD3_LANE_FUNC (int8x16x3_t, int8x16_t, int8_t, v16qi, qi, s8)
17247 __LD3_LANE_FUNC (int16x8x3_t, int16x8_t, int16_t, v8hi, hi, s16)
17248 __LD3_LANE_FUNC (int32x4x3_t, int32x4_t, int32_t, v4si, si, s32)
17249 __LD3_LANE_FUNC (int64x2x3_t, int64x2_t, int64_t, v2di, di, s64)
17250 __LD3_LANE_FUNC (uint8x16x3_t, uint8x16_t, uint8_t, v16qi, qi, u8)
17251 __LD3_LANE_FUNC (uint16x8x3_t, uint16x8_t, uint16_t, v8hi, hi, u16)
17252 __LD3_LANE_FUNC (uint32x4x3_t, uint32x4_t, uint32_t, v4si, si, u32)
17253 __LD3_LANE_FUNC (uint64x2x3_t, uint64x2_t, uint64_t, v2di, di, u64)
17255 #undef __LD3_LANE_FUNC
17257 /* vld4_lane */
17259 #define __LD4_LANE_FUNC(intype, vectype, largetype, ptrtype, mode, \
17260 qmode, ptrmode, funcsuffix, signedtype) \
17261 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
17262 vld4_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
17264 __builtin_aarch64_simd_xi __o; \
17265 largetype __temp; \
17266 __temp.val[0] = \
17267 vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); \
17268 __temp.val[1] = \
17269 vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); \
17270 __temp.val[2] = \
17271 vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0)); \
17272 __temp.val[3] = \
17273 vcombine_##funcsuffix (__b.val[3], vcreate_##funcsuffix (0)); \
17274 __o = __builtin_aarch64_set_qregxi##qmode (__o, \
17275 (signedtype) __temp.val[0], \
17276 0); \
17277 __o = __builtin_aarch64_set_qregxi##qmode (__o, \
17278 (signedtype) __temp.val[1], \
17279 1); \
17280 __o = __builtin_aarch64_set_qregxi##qmode (__o, \
17281 (signedtype) __temp.val[2], \
17282 2); \
17283 __o = __builtin_aarch64_set_qregxi##qmode (__o, \
17284 (signedtype) __temp.val[3], \
17285 3); \
17286 __o = __builtin_aarch64_ld4_lane##mode ( \
17287 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
17288 __b.val[0] = (vectype) __builtin_aarch64_get_dregxidi (__o, 0); \
17289 __b.val[1] = (vectype) __builtin_aarch64_get_dregxidi (__o, 1); \
17290 __b.val[2] = (vectype) __builtin_aarch64_get_dregxidi (__o, 2); \
17291 __b.val[3] = (vectype) __builtin_aarch64_get_dregxidi (__o, 3); \
17292 return __b; \
17295 /* vld4q_lane */
17297 __LD4_LANE_FUNC (float16x4x4_t, float16x4_t, float16x8x4_t, float16_t, v4hf,
17298 v8hf, hf, f16, float16x8_t)
17299 __LD4_LANE_FUNC (float32x2x4_t, float32x2_t, float32x4x4_t, float32_t, v2sf, v4sf,
17300 sf, f32, float32x4_t)
17301 __LD4_LANE_FUNC (float64x1x4_t, float64x1_t, float64x2x4_t, float64_t, df, v2df,
17302 df, f64, float64x2_t)
17303 __LD4_LANE_FUNC (poly8x8x4_t, poly8x8_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi, p8,
17304 int8x16_t)
17305 __LD4_LANE_FUNC (poly16x4x4_t, poly16x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi,
17306 p16, int16x8_t)
17307 __LD4_LANE_FUNC (int8x8x4_t, int8x8_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8,
17308 int8x16_t)
17309 __LD4_LANE_FUNC (int16x4x4_t, int16x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16,
17310 int16x8_t)
17311 __LD4_LANE_FUNC (int32x2x4_t, int32x2_t, int32x4x4_t, int32_t, v2si, v4si, si, s32,
17312 int32x4_t)
17313 __LD4_LANE_FUNC (int64x1x4_t, int64x1_t, int64x2x4_t, int64_t, di, v2di, di, s64,
17314 int64x2_t)
17315 __LD4_LANE_FUNC (uint8x8x4_t, uint8x8_t, uint8x16x4_t, uint8_t, v8qi, v16qi, qi, u8,
17316 int8x16_t)
17317 __LD4_LANE_FUNC (uint16x4x4_t, uint16x4_t, uint16x8x4_t, uint16_t, v4hi, v8hi, hi,
17318 u16, int16x8_t)
17319 __LD4_LANE_FUNC (uint32x2x4_t, uint32x2_t, uint32x4x4_t, uint32_t, v2si, v4si, si,
17320 u32, int32x4_t)
17321 __LD4_LANE_FUNC (uint64x1x4_t, uint64x1_t, uint64x2x4_t, uint64_t, di, v2di, di,
17322 u64, int64x2_t)
17324 #undef __LD4_LANE_FUNC
17326 /* vld4q_lane */
17328 #define __LD4_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
17329 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
17330 vld4q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
17332 __builtin_aarch64_simd_xi __o; \
17333 intype ret; \
17334 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); \
17335 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); \
17336 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); \
17337 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); \
17338 __o = __builtin_aarch64_ld4_lane##mode ( \
17339 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
17340 ret.val[0] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 0); \
17341 ret.val[1] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 1); \
17342 ret.val[2] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 2); \
17343 ret.val[3] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 3); \
17344 return ret; \
17347 __LD4_LANE_FUNC (float16x8x4_t, float16x8_t, float16_t, v8hf, hf, f16)
17348 __LD4_LANE_FUNC (float32x4x4_t, float32x4_t, float32_t, v4sf, sf, f32)
17349 __LD4_LANE_FUNC (float64x2x4_t, float64x2_t, float64_t, v2df, df, f64)
17350 __LD4_LANE_FUNC (poly8x16x4_t, poly8x16_t, poly8_t, v16qi, qi, p8)
17351 __LD4_LANE_FUNC (poly16x8x4_t, poly16x8_t, poly16_t, v8hi, hi, p16)
17352 __LD4_LANE_FUNC (int8x16x4_t, int8x16_t, int8_t, v16qi, qi, s8)
17353 __LD4_LANE_FUNC (int16x8x4_t, int16x8_t, int16_t, v8hi, hi, s16)
17354 __LD4_LANE_FUNC (int32x4x4_t, int32x4_t, int32_t, v4si, si, s32)
17355 __LD4_LANE_FUNC (int64x2x4_t, int64x2_t, int64_t, v2di, di, s64)
17356 __LD4_LANE_FUNC (uint8x16x4_t, uint8x16_t, uint8_t, v16qi, qi, u8)
17357 __LD4_LANE_FUNC (uint16x8x4_t, uint16x8_t, uint16_t, v8hi, hi, u16)
17358 __LD4_LANE_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, v4si, si, u32)
17359 __LD4_LANE_FUNC (uint64x2x4_t, uint64x2_t, uint64_t, v2di, di, u64)
17361 #undef __LD4_LANE_FUNC
17363 /* vmax */
17365 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17366 vmax_f32 (float32x2_t __a, float32x2_t __b)
17368 return __builtin_aarch64_smax_nanv2sf (__a, __b);
17371 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17372 vmax_s8 (int8x8_t __a, int8x8_t __b)
17374 return __builtin_aarch64_smaxv8qi (__a, __b);
17377 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17378 vmax_s16 (int16x4_t __a, int16x4_t __b)
17380 return __builtin_aarch64_smaxv4hi (__a, __b);
17383 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17384 vmax_s32 (int32x2_t __a, int32x2_t __b)
17386 return __builtin_aarch64_smaxv2si (__a, __b);
17389 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17390 vmax_u8 (uint8x8_t __a, uint8x8_t __b)
17392 return (uint8x8_t) __builtin_aarch64_umaxv8qi ((int8x8_t) __a,
17393 (int8x8_t) __b);
17396 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17397 vmax_u16 (uint16x4_t __a, uint16x4_t __b)
17399 return (uint16x4_t) __builtin_aarch64_umaxv4hi ((int16x4_t) __a,
17400 (int16x4_t) __b);
17403 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17404 vmax_u32 (uint32x2_t __a, uint32x2_t __b)
17406 return (uint32x2_t) __builtin_aarch64_umaxv2si ((int32x2_t) __a,
17407 (int32x2_t) __b);
17410 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17411 vmaxq_f32 (float32x4_t __a, float32x4_t __b)
17413 return __builtin_aarch64_smax_nanv4sf (__a, __b);
17416 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17417 vmaxq_f64 (float64x2_t __a, float64x2_t __b)
17419 return __builtin_aarch64_smax_nanv2df (__a, __b);
17422 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
17423 vmaxq_s8 (int8x16_t __a, int8x16_t __b)
17425 return __builtin_aarch64_smaxv16qi (__a, __b);
17428 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
17429 vmaxq_s16 (int16x8_t __a, int16x8_t __b)
17431 return __builtin_aarch64_smaxv8hi (__a, __b);
17434 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17435 vmaxq_s32 (int32x4_t __a, int32x4_t __b)
17437 return __builtin_aarch64_smaxv4si (__a, __b);
17440 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17441 vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
17443 return (uint8x16_t) __builtin_aarch64_umaxv16qi ((int8x16_t) __a,
17444 (int8x16_t) __b);
17447 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17448 vmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
17450 return (uint16x8_t) __builtin_aarch64_umaxv8hi ((int16x8_t) __a,
17451 (int16x8_t) __b);
17454 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17455 vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
17457 return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a,
17458 (int32x4_t) __b);
17460 /* vmulx */
17462 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17463 vmulx_f32 (float32x2_t __a, float32x2_t __b)
17465 return __builtin_aarch64_fmulxv2sf (__a, __b);
17468 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17469 vmulxq_f32 (float32x4_t __a, float32x4_t __b)
17471 return __builtin_aarch64_fmulxv4sf (__a, __b);
17474 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
17475 vmulx_f64 (float64x1_t __a, float64x1_t __b)
17477 return (float64x1_t) {__builtin_aarch64_fmulxdf (__a[0], __b[0])};
17480 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17481 vmulxq_f64 (float64x2_t __a, float64x2_t __b)
17483 return __builtin_aarch64_fmulxv2df (__a, __b);
17486 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17487 vmulxs_f32 (float32_t __a, float32_t __b)
17489 return __builtin_aarch64_fmulxsf (__a, __b);
17492 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17493 vmulxd_f64 (float64_t __a, float64_t __b)
17495 return __builtin_aarch64_fmulxdf (__a, __b);
17498 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17499 vmulx_lane_f32 (float32x2_t __a, float32x2_t __v, const int __lane)
17501 return vmulx_f32 (__a, __aarch64_vdup_lane_f32 (__v, __lane));
17504 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
17505 vmulx_lane_f64 (float64x1_t __a, float64x1_t __v, const int __lane)
17507 return vmulx_f64 (__a, __aarch64_vdup_lane_f64 (__v, __lane));
17510 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17511 vmulxq_lane_f32 (float32x4_t __a, float32x2_t __v, const int __lane)
17513 return vmulxq_f32 (__a, __aarch64_vdupq_lane_f32 (__v, __lane));
17516 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17517 vmulxq_lane_f64 (float64x2_t __a, float64x1_t __v, const int __lane)
17519 return vmulxq_f64 (__a, __aarch64_vdupq_lane_f64 (__v, __lane));
17522 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17523 vmulx_laneq_f32 (float32x2_t __a, float32x4_t __v, const int __lane)
17525 return vmulx_f32 (__a, __aarch64_vdup_laneq_f32 (__v, __lane));
17528 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
17529 vmulx_laneq_f64 (float64x1_t __a, float64x2_t __v, const int __lane)
17531 return vmulx_f64 (__a, __aarch64_vdup_laneq_f64 (__v, __lane));
17534 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17535 vmulxq_laneq_f32 (float32x4_t __a, float32x4_t __v, const int __lane)
17537 return vmulxq_f32 (__a, __aarch64_vdupq_laneq_f32 (__v, __lane));
17540 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17541 vmulxq_laneq_f64 (float64x2_t __a, float64x2_t __v, const int __lane)
17543 return vmulxq_f64 (__a, __aarch64_vdupq_laneq_f64 (__v, __lane));
17546 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17547 vmulxs_lane_f32 (float32_t __a, float32x2_t __v, const int __lane)
17549 return vmulxs_f32 (__a, __aarch64_vget_lane_any (__v, __lane));
17552 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17553 vmulxs_laneq_f32 (float32_t __a, float32x4_t __v, const int __lane)
17555 return vmulxs_f32 (__a, __aarch64_vget_lane_any (__v, __lane));
17558 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17559 vmulxd_lane_f64 (float64_t __a, float64x1_t __v, const int __lane)
17561 return vmulxd_f64 (__a, __aarch64_vget_lane_any (__v, __lane));
17564 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17565 vmulxd_laneq_f64 (float64_t __a, float64x2_t __v, const int __lane)
17567 return vmulxd_f64 (__a, __aarch64_vget_lane_any (__v, __lane));
17570 /* vpmax */
17572 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17573 vpmax_s8 (int8x8_t a, int8x8_t b)
17575 return __builtin_aarch64_smaxpv8qi (a, b);
17578 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17579 vpmax_s16 (int16x4_t a, int16x4_t b)
17581 return __builtin_aarch64_smaxpv4hi (a, b);
17584 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17585 vpmax_s32 (int32x2_t a, int32x2_t b)
17587 return __builtin_aarch64_smaxpv2si (a, b);
17590 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17591 vpmax_u8 (uint8x8_t a, uint8x8_t b)
17593 return (uint8x8_t) __builtin_aarch64_umaxpv8qi ((int8x8_t) a,
17594 (int8x8_t) b);
17597 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17598 vpmax_u16 (uint16x4_t a, uint16x4_t b)
17600 return (uint16x4_t) __builtin_aarch64_umaxpv4hi ((int16x4_t) a,
17601 (int16x4_t) b);
17604 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17605 vpmax_u32 (uint32x2_t a, uint32x2_t b)
17607 return (uint32x2_t) __builtin_aarch64_umaxpv2si ((int32x2_t) a,
17608 (int32x2_t) b);
17611 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
17612 vpmaxq_s8 (int8x16_t a, int8x16_t b)
17614 return __builtin_aarch64_smaxpv16qi (a, b);
17617 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
17618 vpmaxq_s16 (int16x8_t a, int16x8_t b)
17620 return __builtin_aarch64_smaxpv8hi (a, b);
17623 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17624 vpmaxq_s32 (int32x4_t a, int32x4_t b)
17626 return __builtin_aarch64_smaxpv4si (a, b);
17629 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17630 vpmaxq_u8 (uint8x16_t a, uint8x16_t b)
17632 return (uint8x16_t) __builtin_aarch64_umaxpv16qi ((int8x16_t) a,
17633 (int8x16_t) b);
17636 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17637 vpmaxq_u16 (uint16x8_t a, uint16x8_t b)
17639 return (uint16x8_t) __builtin_aarch64_umaxpv8hi ((int16x8_t) a,
17640 (int16x8_t) b);
17643 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17644 vpmaxq_u32 (uint32x4_t a, uint32x4_t b)
17646 return (uint32x4_t) __builtin_aarch64_umaxpv4si ((int32x4_t) a,
17647 (int32x4_t) b);
17650 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17651 vpmax_f32 (float32x2_t a, float32x2_t b)
17653 return __builtin_aarch64_smax_nanpv2sf (a, b);
17656 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17657 vpmaxq_f32 (float32x4_t a, float32x4_t b)
17659 return __builtin_aarch64_smax_nanpv4sf (a, b);
17662 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17663 vpmaxq_f64 (float64x2_t a, float64x2_t b)
17665 return __builtin_aarch64_smax_nanpv2df (a, b);
17668 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17669 vpmaxqd_f64 (float64x2_t a)
17671 return __builtin_aarch64_reduc_smax_nan_scal_v2df (a);
17674 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17675 vpmaxs_f32 (float32x2_t a)
17677 return __builtin_aarch64_reduc_smax_nan_scal_v2sf (a);
17680 /* vpmaxnm */
17682 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17683 vpmaxnm_f32 (float32x2_t a, float32x2_t b)
17685 return __builtin_aarch64_smaxpv2sf (a, b);
17688 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17689 vpmaxnmq_f32 (float32x4_t a, float32x4_t b)
17691 return __builtin_aarch64_smaxpv4sf (a, b);
17694 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17695 vpmaxnmq_f64 (float64x2_t a, float64x2_t b)
17697 return __builtin_aarch64_smaxpv2df (a, b);
17700 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17701 vpmaxnmqd_f64 (float64x2_t a)
17703 return __builtin_aarch64_reduc_smax_scal_v2df (a);
17706 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17707 vpmaxnms_f32 (float32x2_t a)
17709 return __builtin_aarch64_reduc_smax_scal_v2sf (a);
17712 /* vpmin */
17714 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17715 vpmin_s8 (int8x8_t a, int8x8_t b)
17717 return __builtin_aarch64_sminpv8qi (a, b);
17720 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17721 vpmin_s16 (int16x4_t a, int16x4_t b)
17723 return __builtin_aarch64_sminpv4hi (a, b);
17726 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17727 vpmin_s32 (int32x2_t a, int32x2_t b)
17729 return __builtin_aarch64_sminpv2si (a, b);
17732 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17733 vpmin_u8 (uint8x8_t a, uint8x8_t b)
17735 return (uint8x8_t) __builtin_aarch64_uminpv8qi ((int8x8_t) a,
17736 (int8x8_t) b);
17739 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17740 vpmin_u16 (uint16x4_t a, uint16x4_t b)
17742 return (uint16x4_t) __builtin_aarch64_uminpv4hi ((int16x4_t) a,
17743 (int16x4_t) b);
17746 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17747 vpmin_u32 (uint32x2_t a, uint32x2_t b)
17749 return (uint32x2_t) __builtin_aarch64_uminpv2si ((int32x2_t) a,
17750 (int32x2_t) b);
17753 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
17754 vpminq_s8 (int8x16_t a, int8x16_t b)
17756 return __builtin_aarch64_sminpv16qi (a, b);
17759 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
17760 vpminq_s16 (int16x8_t a, int16x8_t b)
17762 return __builtin_aarch64_sminpv8hi (a, b);
17765 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17766 vpminq_s32 (int32x4_t a, int32x4_t b)
17768 return __builtin_aarch64_sminpv4si (a, b);
17771 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17772 vpminq_u8 (uint8x16_t a, uint8x16_t b)
17774 return (uint8x16_t) __builtin_aarch64_uminpv16qi ((int8x16_t) a,
17775 (int8x16_t) b);
17778 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17779 vpminq_u16 (uint16x8_t a, uint16x8_t b)
17781 return (uint16x8_t) __builtin_aarch64_uminpv8hi ((int16x8_t) a,
17782 (int16x8_t) b);
17785 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17786 vpminq_u32 (uint32x4_t a, uint32x4_t b)
17788 return (uint32x4_t) __builtin_aarch64_uminpv4si ((int32x4_t) a,
17789 (int32x4_t) b);
17792 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17793 vpmin_f32 (float32x2_t a, float32x2_t b)
17795 return __builtin_aarch64_smin_nanpv2sf (a, b);
17798 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17799 vpminq_f32 (float32x4_t a, float32x4_t b)
17801 return __builtin_aarch64_smin_nanpv4sf (a, b);
17804 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17805 vpminq_f64 (float64x2_t a, float64x2_t b)
17807 return __builtin_aarch64_smin_nanpv2df (a, b);
17810 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17811 vpminqd_f64 (float64x2_t a)
17813 return __builtin_aarch64_reduc_smin_nan_scal_v2df (a);
17816 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17817 vpmins_f32 (float32x2_t a)
17819 return __builtin_aarch64_reduc_smin_nan_scal_v2sf (a);
17822 /* vpminnm */
17824 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17825 vpminnm_f32 (float32x2_t a, float32x2_t b)
17827 return __builtin_aarch64_sminpv2sf (a, b);
17830 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17831 vpminnmq_f32 (float32x4_t a, float32x4_t b)
17833 return __builtin_aarch64_sminpv4sf (a, b);
17836 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17837 vpminnmq_f64 (float64x2_t a, float64x2_t b)
17839 return __builtin_aarch64_sminpv2df (a, b);
17842 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17843 vpminnmqd_f64 (float64x2_t a)
17845 return __builtin_aarch64_reduc_smin_scal_v2df (a);
17848 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17849 vpminnms_f32 (float32x2_t a)
17851 return __builtin_aarch64_reduc_smin_scal_v2sf (a);
17854 /* vmaxnm */
17856 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17857 vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
17859 return __builtin_aarch64_smaxv2sf (__a, __b);
17862 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17863 vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
17865 return __builtin_aarch64_smaxv4sf (__a, __b);
17868 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17869 vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
17871 return __builtin_aarch64_smaxv2df (__a, __b);
17874 /* vmaxv */
17876 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17877 vmaxv_f32 (float32x2_t __a)
17879 return __builtin_aarch64_reduc_smax_nan_scal_v2sf (__a);
17882 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
17883 vmaxv_s8 (int8x8_t __a)
17885 return __builtin_aarch64_reduc_smax_scal_v8qi (__a);
17888 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
17889 vmaxv_s16 (int16x4_t __a)
17891 return __builtin_aarch64_reduc_smax_scal_v4hi (__a);
17894 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17895 vmaxv_s32 (int32x2_t __a)
17897 return __builtin_aarch64_reduc_smax_scal_v2si (__a);
17900 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
17901 vmaxv_u8 (uint8x8_t __a)
17903 return __builtin_aarch64_reduc_umax_scal_v8qi_uu (__a);
17906 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
17907 vmaxv_u16 (uint16x4_t __a)
17909 return __builtin_aarch64_reduc_umax_scal_v4hi_uu (__a);
17912 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17913 vmaxv_u32 (uint32x2_t __a)
17915 return __builtin_aarch64_reduc_umax_scal_v2si_uu (__a);
17918 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17919 vmaxvq_f32 (float32x4_t __a)
17921 return __builtin_aarch64_reduc_smax_nan_scal_v4sf (__a);
17924 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17925 vmaxvq_f64 (float64x2_t __a)
17927 return __builtin_aarch64_reduc_smax_nan_scal_v2df (__a);
17930 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
17931 vmaxvq_s8 (int8x16_t __a)
17933 return __builtin_aarch64_reduc_smax_scal_v16qi (__a);
17936 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
17937 vmaxvq_s16 (int16x8_t __a)
17939 return __builtin_aarch64_reduc_smax_scal_v8hi (__a);
17942 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17943 vmaxvq_s32 (int32x4_t __a)
17945 return __builtin_aarch64_reduc_smax_scal_v4si (__a);
17948 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
17949 vmaxvq_u8 (uint8x16_t __a)
17951 return __builtin_aarch64_reduc_umax_scal_v16qi_uu (__a);
17954 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
17955 vmaxvq_u16 (uint16x8_t __a)
17957 return __builtin_aarch64_reduc_umax_scal_v8hi_uu (__a);
17960 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17961 vmaxvq_u32 (uint32x4_t __a)
17963 return __builtin_aarch64_reduc_umax_scal_v4si_uu (__a);
17966 /* vmaxnmv */
17968 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17969 vmaxnmv_f32 (float32x2_t __a)
17971 return __builtin_aarch64_reduc_smax_scal_v2sf (__a);
17974 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17975 vmaxnmvq_f32 (float32x4_t __a)
17977 return __builtin_aarch64_reduc_smax_scal_v4sf (__a);
17980 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17981 vmaxnmvq_f64 (float64x2_t __a)
17983 return __builtin_aarch64_reduc_smax_scal_v2df (__a);
17986 /* vmin */
17988 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17989 vmin_f32 (float32x2_t __a, float32x2_t __b)
17991 return __builtin_aarch64_smin_nanv2sf (__a, __b);
17994 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17995 vmin_s8 (int8x8_t __a, int8x8_t __b)
17997 return __builtin_aarch64_sminv8qi (__a, __b);
18000 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18001 vmin_s16 (int16x4_t __a, int16x4_t __b)
18003 return __builtin_aarch64_sminv4hi (__a, __b);
18006 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18007 vmin_s32 (int32x2_t __a, int32x2_t __b)
18009 return __builtin_aarch64_sminv2si (__a, __b);
18012 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18013 vmin_u8 (uint8x8_t __a, uint8x8_t __b)
18015 return (uint8x8_t) __builtin_aarch64_uminv8qi ((int8x8_t) __a,
18016 (int8x8_t) __b);
18019 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18020 vmin_u16 (uint16x4_t __a, uint16x4_t __b)
18022 return (uint16x4_t) __builtin_aarch64_uminv4hi ((int16x4_t) __a,
18023 (int16x4_t) __b);
18026 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18027 vmin_u32 (uint32x2_t __a, uint32x2_t __b)
18029 return (uint32x2_t) __builtin_aarch64_uminv2si ((int32x2_t) __a,
18030 (int32x2_t) __b);
18033 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18034 vminq_f32 (float32x4_t __a, float32x4_t __b)
18036 return __builtin_aarch64_smin_nanv4sf (__a, __b);
18039 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18040 vminq_f64 (float64x2_t __a, float64x2_t __b)
18042 return __builtin_aarch64_smin_nanv2df (__a, __b);
18045 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18046 vminq_s8 (int8x16_t __a, int8x16_t __b)
18048 return __builtin_aarch64_sminv16qi (__a, __b);
18051 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18052 vminq_s16 (int16x8_t __a, int16x8_t __b)
18054 return __builtin_aarch64_sminv8hi (__a, __b);
18057 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18058 vminq_s32 (int32x4_t __a, int32x4_t __b)
18060 return __builtin_aarch64_sminv4si (__a, __b);
18063 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18064 vminq_u8 (uint8x16_t __a, uint8x16_t __b)
18066 return (uint8x16_t) __builtin_aarch64_uminv16qi ((int8x16_t) __a,
18067 (int8x16_t) __b);
18070 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18071 vminq_u16 (uint16x8_t __a, uint16x8_t __b)
18073 return (uint16x8_t) __builtin_aarch64_uminv8hi ((int16x8_t) __a,
18074 (int16x8_t) __b);
18077 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18078 vminq_u32 (uint32x4_t __a, uint32x4_t __b)
18080 return (uint32x4_t) __builtin_aarch64_uminv4si ((int32x4_t) __a,
18081 (int32x4_t) __b);
18084 /* vminnm */
18086 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18087 vminnm_f32 (float32x2_t __a, float32x2_t __b)
18089 return __builtin_aarch64_sminv2sf (__a, __b);
18092 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18093 vminnmq_f32 (float32x4_t __a, float32x4_t __b)
18095 return __builtin_aarch64_sminv4sf (__a, __b);
18098 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18099 vminnmq_f64 (float64x2_t __a, float64x2_t __b)
18101 return __builtin_aarch64_sminv2df (__a, __b);
18104 /* vminv */
18106 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18107 vminv_f32 (float32x2_t __a)
18109 return __builtin_aarch64_reduc_smin_nan_scal_v2sf (__a);
18112 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18113 vminv_s8 (int8x8_t __a)
18115 return __builtin_aarch64_reduc_smin_scal_v8qi (__a);
18118 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18119 vminv_s16 (int16x4_t __a)
18121 return __builtin_aarch64_reduc_smin_scal_v4hi (__a);
18124 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18125 vminv_s32 (int32x2_t __a)
18127 return __builtin_aarch64_reduc_smin_scal_v2si (__a);
18130 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18131 vminv_u8 (uint8x8_t __a)
18133 return __builtin_aarch64_reduc_umin_scal_v8qi_uu (__a);
18136 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18137 vminv_u16 (uint16x4_t __a)
18139 return __builtin_aarch64_reduc_umin_scal_v4hi_uu (__a);
18142 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18143 vminv_u32 (uint32x2_t __a)
18145 return __builtin_aarch64_reduc_umin_scal_v2si_uu (__a);
18148 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18149 vminvq_f32 (float32x4_t __a)
18151 return __builtin_aarch64_reduc_smin_nan_scal_v4sf (__a);
18154 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18155 vminvq_f64 (float64x2_t __a)
18157 return __builtin_aarch64_reduc_smin_nan_scal_v2df (__a);
18160 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18161 vminvq_s8 (int8x16_t __a)
18163 return __builtin_aarch64_reduc_smin_scal_v16qi (__a);
18166 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18167 vminvq_s16 (int16x8_t __a)
18169 return __builtin_aarch64_reduc_smin_scal_v8hi (__a);
18172 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18173 vminvq_s32 (int32x4_t __a)
18175 return __builtin_aarch64_reduc_smin_scal_v4si (__a);
18178 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18179 vminvq_u8 (uint8x16_t __a)
18181 return __builtin_aarch64_reduc_umin_scal_v16qi_uu (__a);
18184 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18185 vminvq_u16 (uint16x8_t __a)
18187 return __builtin_aarch64_reduc_umin_scal_v8hi_uu (__a);
18190 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18191 vminvq_u32 (uint32x4_t __a)
18193 return __builtin_aarch64_reduc_umin_scal_v4si_uu (__a);
18196 /* vminnmv */
18198 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18199 vminnmv_f32 (float32x2_t __a)
18201 return __builtin_aarch64_reduc_smin_scal_v2sf (__a);
18204 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18205 vminnmvq_f32 (float32x4_t __a)
18207 return __builtin_aarch64_reduc_smin_scal_v4sf (__a);
18210 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18211 vminnmvq_f64 (float64x2_t __a)
18213 return __builtin_aarch64_reduc_smin_scal_v2df (__a);
18216 /* vmla */
18218 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18219 vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
18221 return a + b * c;
18224 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18225 vmla_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
18227 return __a + __b * __c;
18230 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18231 vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
18233 return a + b * c;
18236 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18237 vmlaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
18239 return a + b * c;
18242 /* vmla_lane */
18244 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18245 vmla_lane_f32 (float32x2_t __a, float32x2_t __b,
18246 float32x2_t __c, const int __lane)
18248 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18251 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18252 vmla_lane_s16 (int16x4_t __a, int16x4_t __b,
18253 int16x4_t __c, const int __lane)
18255 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18258 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18259 vmla_lane_s32 (int32x2_t __a, int32x2_t __b,
18260 int32x2_t __c, const int __lane)
18262 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18265 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18266 vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b,
18267 uint16x4_t __c, const int __lane)
18269 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18272 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18273 vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b,
18274 uint32x2_t __c, const int __lane)
18276 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18279 /* vmla_laneq */
18281 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18282 vmla_laneq_f32 (float32x2_t __a, float32x2_t __b,
18283 float32x4_t __c, const int __lane)
18285 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18288 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18289 vmla_laneq_s16 (int16x4_t __a, int16x4_t __b,
18290 int16x8_t __c, const int __lane)
18292 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18295 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18296 vmla_laneq_s32 (int32x2_t __a, int32x2_t __b,
18297 int32x4_t __c, const int __lane)
18299 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18302 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18303 vmla_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
18304 uint16x8_t __c, const int __lane)
18306 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18309 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18310 vmla_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
18311 uint32x4_t __c, const int __lane)
18313 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18316 /* vmlaq_lane */
18318 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18319 vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b,
18320 float32x2_t __c, const int __lane)
18322 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18325 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18326 vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b,
18327 int16x4_t __c, const int __lane)
18329 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18332 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18333 vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b,
18334 int32x2_t __c, const int __lane)
18336 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18339 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18340 vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
18341 uint16x4_t __c, const int __lane)
18343 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18346 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18347 vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
18348 uint32x2_t __c, const int __lane)
18350 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18353 /* vmlaq_laneq */
18355 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18356 vmlaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
18357 float32x4_t __c, const int __lane)
18359 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18362 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18363 vmlaq_laneq_s16 (int16x8_t __a, int16x8_t __b,
18364 int16x8_t __c, const int __lane)
18366 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18369 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18370 vmlaq_laneq_s32 (int32x4_t __a, int32x4_t __b,
18371 int32x4_t __c, const int __lane)
18373 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18376 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18377 vmlaq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
18378 uint16x8_t __c, const int __lane)
18380 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18383 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18384 vmlaq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
18385 uint32x4_t __c, const int __lane)
18387 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18390 /* vmls */
18392 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18393 vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
18395 return a - b * c;
18398 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18399 vmls_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
18401 return __a - __b * __c;
18404 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18405 vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
18407 return a - b * c;
18410 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18411 vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
18413 return a - b * c;
18416 /* vmls_lane */
18418 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18419 vmls_lane_f32 (float32x2_t __a, float32x2_t __b,
18420 float32x2_t __c, const int __lane)
18422 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18425 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18426 vmls_lane_s16 (int16x4_t __a, int16x4_t __b,
18427 int16x4_t __c, const int __lane)
18429 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18432 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18433 vmls_lane_s32 (int32x2_t __a, int32x2_t __b,
18434 int32x2_t __c, const int __lane)
18436 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18439 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18440 vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b,
18441 uint16x4_t __c, const int __lane)
18443 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18446 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18447 vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b,
18448 uint32x2_t __c, const int __lane)
18450 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18453 /* vmls_laneq */
18455 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18456 vmls_laneq_f32 (float32x2_t __a, float32x2_t __b,
18457 float32x4_t __c, const int __lane)
18459 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18462 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18463 vmls_laneq_s16 (int16x4_t __a, int16x4_t __b,
18464 int16x8_t __c, const int __lane)
18466 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18469 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18470 vmls_laneq_s32 (int32x2_t __a, int32x2_t __b,
18471 int32x4_t __c, const int __lane)
18473 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18476 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18477 vmls_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
18478 uint16x8_t __c, const int __lane)
18480 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18483 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18484 vmls_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
18485 uint32x4_t __c, const int __lane)
18487 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18490 /* vmlsq_lane */
18492 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18493 vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b,
18494 float32x2_t __c, const int __lane)
18496 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18499 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18500 vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b,
18501 int16x4_t __c, const int __lane)
18503 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18506 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18507 vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b,
18508 int32x2_t __c, const int __lane)
18510 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18513 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18514 vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
18515 uint16x4_t __c, const int __lane)
18517 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18520 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18521 vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
18522 uint32x2_t __c, const int __lane)
18524 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18527 /* vmlsq_laneq */
18529 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18530 vmlsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
18531 float32x4_t __c, const int __lane)
18533 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18536 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18537 vmlsq_laneq_s16 (int16x8_t __a, int16x8_t __b,
18538 int16x8_t __c, const int __lane)
18540 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18543 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18544 vmlsq_laneq_s32 (int32x4_t __a, int32x4_t __b,
18545 int32x4_t __c, const int __lane)
18547 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18549 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18550 vmlsq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
18551 uint16x8_t __c, const int __lane)
18553 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18556 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18557 vmlsq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
18558 uint32x4_t __c, const int __lane)
18560 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18563 /* vmov_n_ */
18565 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18566 vmov_n_f32 (float32_t __a)
18568 return vdup_n_f32 (__a);
18571 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18572 vmov_n_f64 (float64_t __a)
18574 return (float64x1_t) {__a};
18577 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
18578 vmov_n_p8 (poly8_t __a)
18580 return vdup_n_p8 (__a);
18583 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
18584 vmov_n_p16 (poly16_t __a)
18586 return vdup_n_p16 (__a);
18589 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18590 vmov_n_s8 (int8_t __a)
18592 return vdup_n_s8 (__a);
18595 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18596 vmov_n_s16 (int16_t __a)
18598 return vdup_n_s16 (__a);
18601 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18602 vmov_n_s32 (int32_t __a)
18604 return vdup_n_s32 (__a);
18607 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
18608 vmov_n_s64 (int64_t __a)
18610 return (int64x1_t) {__a};
18613 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18614 vmov_n_u8 (uint8_t __a)
18616 return vdup_n_u8 (__a);
18619 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18620 vmov_n_u16 (uint16_t __a)
18622 return vdup_n_u16 (__a);
18625 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18626 vmov_n_u32 (uint32_t __a)
18628 return vdup_n_u32 (__a);
18631 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18632 vmov_n_u64 (uint64_t __a)
18634 return (uint64x1_t) {__a};
18637 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18638 vmovq_n_f32 (float32_t __a)
18640 return vdupq_n_f32 (__a);
18643 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18644 vmovq_n_f64 (float64_t __a)
18646 return vdupq_n_f64 (__a);
18649 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
18650 vmovq_n_p8 (poly8_t __a)
18652 return vdupq_n_p8 (__a);
18655 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
18656 vmovq_n_p16 (poly16_t __a)
18658 return vdupq_n_p16 (__a);
18661 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18662 vmovq_n_s8 (int8_t __a)
18664 return vdupq_n_s8 (__a);
18667 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18668 vmovq_n_s16 (int16_t __a)
18670 return vdupq_n_s16 (__a);
18673 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18674 vmovq_n_s32 (int32_t __a)
18676 return vdupq_n_s32 (__a);
18679 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18680 vmovq_n_s64 (int64_t __a)
18682 return vdupq_n_s64 (__a);
18685 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18686 vmovq_n_u8 (uint8_t __a)
18688 return vdupq_n_u8 (__a);
18691 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18692 vmovq_n_u16 (uint16_t __a)
18694 return vdupq_n_u16 (__a);
18697 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18698 vmovq_n_u32 (uint32_t __a)
18700 return vdupq_n_u32 (__a);
18703 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18704 vmovq_n_u64 (uint64_t __a)
18706 return vdupq_n_u64 (__a);
18709 /* vmul_lane */
18711 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18712 vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __lane)
18714 return __a * __aarch64_vget_lane_any (__b, __lane);
18717 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18718 vmul_lane_f64 (float64x1_t __a, float64x1_t __b, const int __lane)
18720 return __a * __b;
18723 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18724 vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __lane)
18726 return __a * __aarch64_vget_lane_any (__b, __lane);
18729 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18730 vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __lane)
18732 return __a * __aarch64_vget_lane_any (__b, __lane);
18735 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18736 vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __lane)
18738 return __a * __aarch64_vget_lane_any (__b, __lane);
18741 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18742 vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __lane)
18744 return __a * __aarch64_vget_lane_any (__b, __lane);
18747 /* vmuld_lane */
18749 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18750 vmuld_lane_f64 (float64_t __a, float64x1_t __b, const int __lane)
18752 return __a * __aarch64_vget_lane_any (__b, __lane);
18755 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18756 vmuld_laneq_f64 (float64_t __a, float64x2_t __b, const int __lane)
18758 return __a * __aarch64_vget_lane_any (__b, __lane);
18761 /* vmuls_lane */
18763 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18764 vmuls_lane_f32 (float32_t __a, float32x2_t __b, const int __lane)
18766 return __a * __aarch64_vget_lane_any (__b, __lane);
18769 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18770 vmuls_laneq_f32 (float32_t __a, float32x4_t __b, const int __lane)
18772 return __a * __aarch64_vget_lane_any (__b, __lane);
18775 /* vmul_laneq */
18777 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18778 vmul_laneq_f32 (float32x2_t __a, float32x4_t __b, const int __lane)
18780 return __a * __aarch64_vget_lane_any (__b, __lane);
18783 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18784 vmul_laneq_f64 (float64x1_t __a, float64x2_t __b, const int __lane)
18786 return __a * __aarch64_vget_lane_any (__b, __lane);
18789 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18790 vmul_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __lane)
18792 return __a * __aarch64_vget_lane_any (__b, __lane);
18795 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18796 vmul_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __lane)
18798 return __a * __aarch64_vget_lane_any (__b, __lane);
18801 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18802 vmul_laneq_u16 (uint16x4_t __a, uint16x8_t __b, const int __lane)
18804 return __a * __aarch64_vget_lane_any (__b, __lane);
18807 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18808 vmul_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __lane)
18810 return __a * __aarch64_vget_lane_any (__b, __lane);
18813 /* vmul_n */
18815 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18816 vmul_n_f64 (float64x1_t __a, float64_t __b)
18818 return (float64x1_t) { vget_lane_f64 (__a, 0) * __b };
18821 /* vmulq_lane */
18823 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18824 vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __lane)
18826 return __a * __aarch64_vget_lane_any (__b, __lane);
18829 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18830 vmulq_lane_f64 (float64x2_t __a, float64x1_t __b, const int __lane)
18832 __AARCH64_LANE_CHECK (__a, __lane);
18833 return __a * __b[0];
18836 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18837 vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __lane)
18839 return __a * __aarch64_vget_lane_any (__b, __lane);
18842 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18843 vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __lane)
18845 return __a * __aarch64_vget_lane_any (__b, __lane);
18848 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18849 vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __lane)
18851 return __a * __aarch64_vget_lane_any (__b, __lane);
18854 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18855 vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __lane)
18857 return __a * __aarch64_vget_lane_any (__b, __lane);
18860 /* vmulq_laneq */
18862 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18863 vmulq_laneq_f32 (float32x4_t __a, float32x4_t __b, const int __lane)
18865 return __a * __aarch64_vget_lane_any (__b, __lane);
18868 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18869 vmulq_laneq_f64 (float64x2_t __a, float64x2_t __b, const int __lane)
18871 return __a * __aarch64_vget_lane_any (__b, __lane);
18874 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18875 vmulq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __lane)
18877 return __a * __aarch64_vget_lane_any (__b, __lane);
18880 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18881 vmulq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __lane)
18883 return __a * __aarch64_vget_lane_any (__b, __lane);
18886 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18887 vmulq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, const int __lane)
18889 return __a * __aarch64_vget_lane_any (__b, __lane);
18892 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18893 vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane)
18895 return __a * __aarch64_vget_lane_any (__b, __lane);
18898 /* vneg */
18900 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18901 vneg_f32 (float32x2_t __a)
18903 return -__a;
18906 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18907 vneg_f64 (float64x1_t __a)
18909 return -__a;
18912 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18913 vneg_s8 (int8x8_t __a)
18915 return -__a;
18918 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18919 vneg_s16 (int16x4_t __a)
18921 return -__a;
18924 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18925 vneg_s32 (int32x2_t __a)
18927 return -__a;
18930 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
18931 vneg_s64 (int64x1_t __a)
18933 return -__a;
18936 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18937 vnegq_f32 (float32x4_t __a)
18939 return -__a;
18942 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18943 vnegq_f64 (float64x2_t __a)
18945 return -__a;
18948 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18949 vnegq_s8 (int8x16_t __a)
18951 return -__a;
18954 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18955 vnegq_s16 (int16x8_t __a)
18957 return -__a;
18960 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18961 vnegq_s32 (int32x4_t __a)
18963 return -__a;
18966 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18967 vnegq_s64 (int64x2_t __a)
18969 return -__a;
18972 /* vpadd */
18974 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18975 vpadd_s8 (int8x8_t __a, int8x8_t __b)
18977 return __builtin_aarch64_addpv8qi (__a, __b);
18980 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18981 vpadd_s16 (int16x4_t __a, int16x4_t __b)
18983 return __builtin_aarch64_addpv4hi (__a, __b);
18986 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18987 vpadd_s32 (int32x2_t __a, int32x2_t __b)
18989 return __builtin_aarch64_addpv2si (__a, __b);
18992 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18993 vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
18995 return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
18996 (int8x8_t) __b);
18999 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19000 vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
19002 return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
19003 (int16x4_t) __b);
19006 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19007 vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
19009 return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
19010 (int32x2_t) __b);
19013 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
19014 vpaddd_f64 (float64x2_t __a)
19016 return __builtin_aarch64_reduc_plus_scal_v2df (__a);
19019 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19020 vpaddd_s64 (int64x2_t __a)
19022 return __builtin_aarch64_addpdi (__a);
19025 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19026 vpaddd_u64 (uint64x2_t __a)
19028 return __builtin_aarch64_addpdi ((int64x2_t) __a);
19031 /* vqabs */
19033 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19034 vqabsq_s64 (int64x2_t __a)
19036 return (int64x2_t) __builtin_aarch64_sqabsv2di (__a);
19039 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19040 vqabsb_s8 (int8_t __a)
19042 return (int8_t) __builtin_aarch64_sqabsqi (__a);
19045 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19046 vqabsh_s16 (int16_t __a)
19048 return (int16_t) __builtin_aarch64_sqabshi (__a);
19051 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19052 vqabss_s32 (int32_t __a)
19054 return (int32_t) __builtin_aarch64_sqabssi (__a);
19057 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19058 vqabsd_s64 (int64_t __a)
19060 return __builtin_aarch64_sqabsdi (__a);
19063 /* vqadd */
19065 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19066 vqaddb_s8 (int8_t __a, int8_t __b)
19068 return (int8_t) __builtin_aarch64_sqaddqi (__a, __b);
19071 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19072 vqaddh_s16 (int16_t __a, int16_t __b)
19074 return (int16_t) __builtin_aarch64_sqaddhi (__a, __b);
19077 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19078 vqadds_s32 (int32_t __a, int32_t __b)
19080 return (int32_t) __builtin_aarch64_sqaddsi (__a, __b);
19083 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19084 vqaddd_s64 (int64_t __a, int64_t __b)
19086 return __builtin_aarch64_sqadddi (__a, __b);
19089 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
19090 vqaddb_u8 (uint8_t __a, uint8_t __b)
19092 return (uint8_t) __builtin_aarch64_uqaddqi_uuu (__a, __b);
19095 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
19096 vqaddh_u16 (uint16_t __a, uint16_t __b)
19098 return (uint16_t) __builtin_aarch64_uqaddhi_uuu (__a, __b);
19101 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19102 vqadds_u32 (uint32_t __a, uint32_t __b)
19104 return (uint32_t) __builtin_aarch64_uqaddsi_uuu (__a, __b);
19107 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19108 vqaddd_u64 (uint64_t __a, uint64_t __b)
19110 return __builtin_aarch64_uqadddi_uuu (__a, __b);
19113 /* vqdmlal */
19115 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19116 vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
19118 return __builtin_aarch64_sqdmlalv4hi (__a, __b, __c);
19121 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19122 vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
19124 return __builtin_aarch64_sqdmlal2v8hi (__a, __b, __c);
19127 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19128 vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
19129 int const __d)
19131 return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d);
19134 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19135 vqdmlal_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
19136 int const __d)
19138 return __builtin_aarch64_sqdmlal2_laneqv8hi (__a, __b, __c, __d);
19141 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19142 vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
19144 return __builtin_aarch64_sqdmlal2_nv8hi (__a, __b, __c);
19147 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19148 vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
19150 return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __c, __d);
19153 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19154 vqdmlal_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
19156 return __builtin_aarch64_sqdmlal_laneqv4hi (__a, __b, __c, __d);
19159 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19160 vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
19162 return __builtin_aarch64_sqdmlal_nv4hi (__a, __b, __c);
19165 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19166 vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
19168 return __builtin_aarch64_sqdmlalv2si (__a, __b, __c);
19171 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19172 vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
19174 return __builtin_aarch64_sqdmlal2v4si (__a, __b, __c);
19177 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19178 vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
19179 int const __d)
19181 return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d);
19184 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19185 vqdmlal_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
19186 int const __d)
19188 return __builtin_aarch64_sqdmlal2_laneqv4si (__a, __b, __c, __d);
19191 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19192 vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
19194 return __builtin_aarch64_sqdmlal2_nv4si (__a, __b, __c);
19197 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19198 vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
19200 return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __c, __d);
19203 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19204 vqdmlal_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
19206 return __builtin_aarch64_sqdmlal_laneqv2si (__a, __b, __c, __d);
19209 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19210 vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
19212 return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c);
19215 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19216 vqdmlalh_s16 (int32_t __a, int16_t __b, int16_t __c)
19218 return __builtin_aarch64_sqdmlalhi (__a, __b, __c);
19221 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19222 vqdmlalh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d)
19224 return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
19227 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19228 vqdmlalh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d)
19230 return __builtin_aarch64_sqdmlal_laneqhi (__a, __b, __c, __d);
19233 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19234 vqdmlals_s32 (int64_t __a, int32_t __b, int32_t __c)
19236 return __builtin_aarch64_sqdmlalsi (__a, __b, __c);
19239 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19240 vqdmlals_lane_s32 (int64_t __a, int32_t __b, int32x2_t __c, const int __d)
19242 return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
19245 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19246 vqdmlals_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d)
19248 return __builtin_aarch64_sqdmlal_laneqsi (__a, __b, __c, __d);
19251 /* vqdmlsl */
19253 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19254 vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
19256 return __builtin_aarch64_sqdmlslv4hi (__a, __b, __c);
19259 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19260 vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
19262 return __builtin_aarch64_sqdmlsl2v8hi (__a, __b, __c);
19265 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19266 vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
19267 int const __d)
19269 return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d);
19272 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19273 vqdmlsl_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
19274 int const __d)
19276 return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a, __b, __c, __d);
19279 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19280 vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
19282 return __builtin_aarch64_sqdmlsl2_nv8hi (__a, __b, __c);
19285 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19286 vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
19288 return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __c, __d);
19291 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19292 vqdmlsl_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
19294 return __builtin_aarch64_sqdmlsl_laneqv4hi (__a, __b, __c, __d);
19297 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19298 vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
19300 return __builtin_aarch64_sqdmlsl_nv4hi (__a, __b, __c);
19303 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19304 vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
19306 return __builtin_aarch64_sqdmlslv2si (__a, __b, __c);
19309 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19310 vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
19312 return __builtin_aarch64_sqdmlsl2v4si (__a, __b, __c);
19315 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19316 vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
19317 int const __d)
19319 return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d);
19322 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19323 vqdmlsl_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
19324 int const __d)
19326 return __builtin_aarch64_sqdmlsl2_laneqv4si (__a, __b, __c, __d);
19329 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19330 vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
19332 return __builtin_aarch64_sqdmlsl2_nv4si (__a, __b, __c);
19335 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19336 vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
19338 return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __c, __d);
19341 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19342 vqdmlsl_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
19344 return __builtin_aarch64_sqdmlsl_laneqv2si (__a, __b, __c, __d);
19347 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19348 vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
19350 return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c);
19353 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19354 vqdmlslh_s16 (int32_t __a, int16_t __b, int16_t __c)
19356 return __builtin_aarch64_sqdmlslhi (__a, __b, __c);
19359 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19360 vqdmlslh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d)
19362 return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
19365 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19366 vqdmlslh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d)
19368 return __builtin_aarch64_sqdmlsl_laneqhi (__a, __b, __c, __d);
19371 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19372 vqdmlsls_s32 (int64_t __a, int32_t __b, int32_t __c)
19374 return __builtin_aarch64_sqdmlslsi (__a, __b, __c);
19377 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19378 vqdmlsls_lane_s32 (int64_t __a, int32_t __b, int32x2_t __c, const int __d)
19380 return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
19383 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19384 vqdmlsls_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d)
19386 return __builtin_aarch64_sqdmlsl_laneqsi (__a, __b, __c, __d);
19389 /* vqdmulh */
19391 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19392 vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
19394 return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c);
19397 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19398 vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
19400 return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c);
19403 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19404 vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
19406 return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c);
19409 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19410 vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
19412 return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c);
19415 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19416 vqdmulhh_s16 (int16_t __a, int16_t __b)
19418 return (int16_t) __builtin_aarch64_sqdmulhhi (__a, __b);
19421 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19422 vqdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
19424 return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
19427 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19428 vqdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
19430 return __builtin_aarch64_sqdmulh_laneqhi (__a, __b, __c);
19433 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19434 vqdmulhs_s32 (int32_t __a, int32_t __b)
19436 return (int32_t) __builtin_aarch64_sqdmulhsi (__a, __b);
19439 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19440 vqdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
19442 return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
19445 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19446 vqdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
19448 return __builtin_aarch64_sqdmulh_laneqsi (__a, __b, __c);
19451 /* vqdmull */
19453 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19454 vqdmull_s16 (int16x4_t __a, int16x4_t __b)
19456 return __builtin_aarch64_sqdmullv4hi (__a, __b);
19459 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19460 vqdmull_high_s16 (int16x8_t __a, int16x8_t __b)
19462 return __builtin_aarch64_sqdmull2v8hi (__a, __b);
19465 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19466 vqdmull_high_lane_s16 (int16x8_t __a, int16x4_t __b, int const __c)
19468 return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c);
19471 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19472 vqdmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, int const __c)
19474 return __builtin_aarch64_sqdmull2_laneqv8hi (__a, __b,__c);
19477 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19478 vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
19480 return __builtin_aarch64_sqdmull2_nv8hi (__a, __b);
19483 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19484 vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
19486 return __builtin_aarch64_sqdmull_lanev4hi (__a, __b, __c);
19489 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19490 vqdmull_laneq_s16 (int16x4_t __a, int16x8_t __b, int const __c)
19492 return __builtin_aarch64_sqdmull_laneqv4hi (__a, __b, __c);
19495 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19496 vqdmull_n_s16 (int16x4_t __a, int16_t __b)
19498 return __builtin_aarch64_sqdmull_nv4hi (__a, __b);
19501 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19502 vqdmull_s32 (int32x2_t __a, int32x2_t __b)
19504 return __builtin_aarch64_sqdmullv2si (__a, __b);
19507 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19508 vqdmull_high_s32 (int32x4_t __a, int32x4_t __b)
19510 return __builtin_aarch64_sqdmull2v4si (__a, __b);
19513 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19514 vqdmull_high_lane_s32 (int32x4_t __a, int32x2_t __b, int const __c)
19516 return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c);
19519 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19520 vqdmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, int const __c)
19522 return __builtin_aarch64_sqdmull2_laneqv4si (__a, __b, __c);
19525 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19526 vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
19528 return __builtin_aarch64_sqdmull2_nv4si (__a, __b);
19531 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19532 vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
19534 return __builtin_aarch64_sqdmull_lanev2si (__a, __b, __c);
19537 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19538 vqdmull_laneq_s32 (int32x2_t __a, int32x4_t __b, int const __c)
19540 return __builtin_aarch64_sqdmull_laneqv2si (__a, __b, __c);
19543 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19544 vqdmull_n_s32 (int32x2_t __a, int32_t __b)
19546 return __builtin_aarch64_sqdmull_nv2si (__a, __b);
19549 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19550 vqdmullh_s16 (int16_t __a, int16_t __b)
19552 return (int32_t) __builtin_aarch64_sqdmullhi (__a, __b);
19555 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19556 vqdmullh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
19558 return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
19561 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19562 vqdmullh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
19564 return __builtin_aarch64_sqdmull_laneqhi (__a, __b, __c);
19567 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19568 vqdmulls_s32 (int32_t __a, int32_t __b)
19570 return __builtin_aarch64_sqdmullsi (__a, __b);
19573 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19574 vqdmulls_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
19576 return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c);
19579 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19580 vqdmulls_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
19582 return __builtin_aarch64_sqdmull_laneqsi (__a, __b, __c);
19585 /* vqmovn */
19587 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19588 vqmovn_s16 (int16x8_t __a)
19590 return (int8x8_t) __builtin_aarch64_sqmovnv8hi (__a);
19593 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19594 vqmovn_s32 (int32x4_t __a)
19596 return (int16x4_t) __builtin_aarch64_sqmovnv4si (__a);
19599 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19600 vqmovn_s64 (int64x2_t __a)
19602 return (int32x2_t) __builtin_aarch64_sqmovnv2di (__a);
19605 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19606 vqmovn_u16 (uint16x8_t __a)
19608 return (uint8x8_t) __builtin_aarch64_uqmovnv8hi ((int16x8_t) __a);
19611 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19612 vqmovn_u32 (uint32x4_t __a)
19614 return (uint16x4_t) __builtin_aarch64_uqmovnv4si ((int32x4_t) __a);
19617 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19618 vqmovn_u64 (uint64x2_t __a)
19620 return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a);
19623 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19624 vqmovnh_s16 (int16_t __a)
19626 return (int8_t) __builtin_aarch64_sqmovnhi (__a);
19629 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19630 vqmovns_s32 (int32_t __a)
19632 return (int16_t) __builtin_aarch64_sqmovnsi (__a);
19635 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19636 vqmovnd_s64 (int64_t __a)
19638 return (int32_t) __builtin_aarch64_sqmovndi (__a);
19641 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
19642 vqmovnh_u16 (uint16_t __a)
19644 return (uint8_t) __builtin_aarch64_uqmovnhi (__a);
19647 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
19648 vqmovns_u32 (uint32_t __a)
19650 return (uint16_t) __builtin_aarch64_uqmovnsi (__a);
19653 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19654 vqmovnd_u64 (uint64_t __a)
19656 return (uint32_t) __builtin_aarch64_uqmovndi (__a);
19659 /* vqmovun */
19661 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19662 vqmovun_s16 (int16x8_t __a)
19664 return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a);
19667 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19668 vqmovun_s32 (int32x4_t __a)
19670 return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a);
19673 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19674 vqmovun_s64 (int64x2_t __a)
19676 return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a);
19679 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19680 vqmovunh_s16 (int16_t __a)
19682 return (int8_t) __builtin_aarch64_sqmovunhi (__a);
19685 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19686 vqmovuns_s32 (int32_t __a)
19688 return (int16_t) __builtin_aarch64_sqmovunsi (__a);
19691 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19692 vqmovund_s64 (int64_t __a)
19694 return (int32_t) __builtin_aarch64_sqmovundi (__a);
19697 /* vqneg */
19699 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19700 vqnegq_s64 (int64x2_t __a)
19702 return (int64x2_t) __builtin_aarch64_sqnegv2di (__a);
19705 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19706 vqnegb_s8 (int8_t __a)
19708 return (int8_t) __builtin_aarch64_sqnegqi (__a);
19711 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19712 vqnegh_s16 (int16_t __a)
19714 return (int16_t) __builtin_aarch64_sqneghi (__a);
19717 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19718 vqnegs_s32 (int32_t __a)
19720 return (int32_t) __builtin_aarch64_sqnegsi (__a);
19723 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19724 vqnegd_s64 (int64_t __a)
19726 return __builtin_aarch64_sqnegdi (__a);
19729 /* vqrdmulh */
19731 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19732 vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
19734 return __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c);
19737 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19738 vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
19740 return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c);
19743 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19744 vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
19746 return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c);
19749 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19750 vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
19752 return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c);
19755 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19756 vqrdmulhh_s16 (int16_t __a, int16_t __b)
19758 return (int16_t) __builtin_aarch64_sqrdmulhhi (__a, __b);
19761 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19762 vqrdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
19764 return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
19767 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19768 vqrdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
19770 return __builtin_aarch64_sqrdmulh_laneqhi (__a, __b, __c);
19773 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19774 vqrdmulhs_s32 (int32_t __a, int32_t __b)
19776 return (int32_t) __builtin_aarch64_sqrdmulhsi (__a, __b);
19779 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19780 vqrdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
19782 return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
19785 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19786 vqrdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
19788 return __builtin_aarch64_sqrdmulh_laneqsi (__a, __b, __c);
19791 /* vqrshl */
19793 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19794 vqrshl_s8 (int8x8_t __a, int8x8_t __b)
19796 return __builtin_aarch64_sqrshlv8qi (__a, __b);
19799 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19800 vqrshl_s16 (int16x4_t __a, int16x4_t __b)
19802 return __builtin_aarch64_sqrshlv4hi (__a, __b);
19805 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19806 vqrshl_s32 (int32x2_t __a, int32x2_t __b)
19808 return __builtin_aarch64_sqrshlv2si (__a, __b);
19811 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
19812 vqrshl_s64 (int64x1_t __a, int64x1_t __b)
19814 return (int64x1_t) {__builtin_aarch64_sqrshldi (__a[0], __b[0])};
19817 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19818 vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
19820 return __builtin_aarch64_uqrshlv8qi_uus ( __a, __b);
19823 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19824 vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
19826 return __builtin_aarch64_uqrshlv4hi_uus ( __a, __b);
19829 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19830 vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
19832 return __builtin_aarch64_uqrshlv2si_uus ( __a, __b);
19835 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19836 vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
19838 return (uint64x1_t) {__builtin_aarch64_uqrshldi_uus (__a[0], __b[0])};
19841 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19842 vqrshlq_s8 (int8x16_t __a, int8x16_t __b)
19844 return __builtin_aarch64_sqrshlv16qi (__a, __b);
19847 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19848 vqrshlq_s16 (int16x8_t __a, int16x8_t __b)
19850 return __builtin_aarch64_sqrshlv8hi (__a, __b);
19853 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19854 vqrshlq_s32 (int32x4_t __a, int32x4_t __b)
19856 return __builtin_aarch64_sqrshlv4si (__a, __b);
19859 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19860 vqrshlq_s64 (int64x2_t __a, int64x2_t __b)
19862 return __builtin_aarch64_sqrshlv2di (__a, __b);
19865 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19866 vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
19868 return __builtin_aarch64_uqrshlv16qi_uus ( __a, __b);
19871 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19872 vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
19874 return __builtin_aarch64_uqrshlv8hi_uus ( __a, __b);
19877 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19878 vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
19880 return __builtin_aarch64_uqrshlv4si_uus ( __a, __b);
19883 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19884 vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
19886 return __builtin_aarch64_uqrshlv2di_uus ( __a, __b);
19889 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19890 vqrshlb_s8 (int8_t __a, int8_t __b)
19892 return __builtin_aarch64_sqrshlqi (__a, __b);
19895 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19896 vqrshlh_s16 (int16_t __a, int16_t __b)
19898 return __builtin_aarch64_sqrshlhi (__a, __b);
19901 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19902 vqrshls_s32 (int32_t __a, int32_t __b)
19904 return __builtin_aarch64_sqrshlsi (__a, __b);
19907 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19908 vqrshld_s64 (int64_t __a, int64_t __b)
19910 return __builtin_aarch64_sqrshldi (__a, __b);
19913 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
19914 vqrshlb_u8 (uint8_t __a, uint8_t __b)
19916 return __builtin_aarch64_uqrshlqi_uus (__a, __b);
19919 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
19920 vqrshlh_u16 (uint16_t __a, uint16_t __b)
19922 return __builtin_aarch64_uqrshlhi_uus (__a, __b);
19925 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19926 vqrshls_u32 (uint32_t __a, uint32_t __b)
19928 return __builtin_aarch64_uqrshlsi_uus (__a, __b);
19931 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19932 vqrshld_u64 (uint64_t __a, uint64_t __b)
19934 return __builtin_aarch64_uqrshldi_uus (__a, __b);
19937 /* vqrshrn */
19939 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19940 vqrshrn_n_s16 (int16x8_t __a, const int __b)
19942 return (int8x8_t) __builtin_aarch64_sqrshrn_nv8hi (__a, __b);
19945 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19946 vqrshrn_n_s32 (int32x4_t __a, const int __b)
19948 return (int16x4_t) __builtin_aarch64_sqrshrn_nv4si (__a, __b);
19951 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19952 vqrshrn_n_s64 (int64x2_t __a, const int __b)
19954 return (int32x2_t) __builtin_aarch64_sqrshrn_nv2di (__a, __b);
19957 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19958 vqrshrn_n_u16 (uint16x8_t __a, const int __b)
19960 return __builtin_aarch64_uqrshrn_nv8hi_uus ( __a, __b);
19963 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19964 vqrshrn_n_u32 (uint32x4_t __a, const int __b)
19966 return __builtin_aarch64_uqrshrn_nv4si_uus ( __a, __b);
19969 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19970 vqrshrn_n_u64 (uint64x2_t __a, const int __b)
19972 return __builtin_aarch64_uqrshrn_nv2di_uus ( __a, __b);
19975 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19976 vqrshrnh_n_s16 (int16_t __a, const int __b)
19978 return (int8_t) __builtin_aarch64_sqrshrn_nhi (__a, __b);
19981 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19982 vqrshrns_n_s32 (int32_t __a, const int __b)
19984 return (int16_t) __builtin_aarch64_sqrshrn_nsi (__a, __b);
19987 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19988 vqrshrnd_n_s64 (int64_t __a, const int __b)
19990 return (int32_t) __builtin_aarch64_sqrshrn_ndi (__a, __b);
19993 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
19994 vqrshrnh_n_u16 (uint16_t __a, const int __b)
19996 return __builtin_aarch64_uqrshrn_nhi_uus (__a, __b);
19999 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20000 vqrshrns_n_u32 (uint32_t __a, const int __b)
20002 return __builtin_aarch64_uqrshrn_nsi_uus (__a, __b);
20005 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20006 vqrshrnd_n_u64 (uint64_t __a, const int __b)
20008 return __builtin_aarch64_uqrshrn_ndi_uus (__a, __b);
20011 /* vqrshrun */
20013 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20014 vqrshrun_n_s16 (int16x8_t __a, const int __b)
20016 return (uint8x8_t) __builtin_aarch64_sqrshrun_nv8hi (__a, __b);
20019 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20020 vqrshrun_n_s32 (int32x4_t __a, const int __b)
20022 return (uint16x4_t) __builtin_aarch64_sqrshrun_nv4si (__a, __b);
20025 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20026 vqrshrun_n_s64 (int64x2_t __a, const int __b)
20028 return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b);
20031 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20032 vqrshrunh_n_s16 (int16_t __a, const int __b)
20034 return (int8_t) __builtin_aarch64_sqrshrun_nhi (__a, __b);
20037 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20038 vqrshruns_n_s32 (int32_t __a, const int __b)
20040 return (int16_t) __builtin_aarch64_sqrshrun_nsi (__a, __b);
20043 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20044 vqrshrund_n_s64 (int64_t __a, const int __b)
20046 return (int32_t) __builtin_aarch64_sqrshrun_ndi (__a, __b);
20049 /* vqshl */
20051 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20052 vqshl_s8 (int8x8_t __a, int8x8_t __b)
20054 return __builtin_aarch64_sqshlv8qi (__a, __b);
20057 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20058 vqshl_s16 (int16x4_t __a, int16x4_t __b)
20060 return __builtin_aarch64_sqshlv4hi (__a, __b);
20063 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20064 vqshl_s32 (int32x2_t __a, int32x2_t __b)
20066 return __builtin_aarch64_sqshlv2si (__a, __b);
20069 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20070 vqshl_s64 (int64x1_t __a, int64x1_t __b)
20072 return (int64x1_t) {__builtin_aarch64_sqshldi (__a[0], __b[0])};
20075 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20076 vqshl_u8 (uint8x8_t __a, int8x8_t __b)
20078 return __builtin_aarch64_uqshlv8qi_uus ( __a, __b);
20081 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20082 vqshl_u16 (uint16x4_t __a, int16x4_t __b)
20084 return __builtin_aarch64_uqshlv4hi_uus ( __a, __b);
20087 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20088 vqshl_u32 (uint32x2_t __a, int32x2_t __b)
20090 return __builtin_aarch64_uqshlv2si_uus ( __a, __b);
20093 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20094 vqshl_u64 (uint64x1_t __a, int64x1_t __b)
20096 return (uint64x1_t) {__builtin_aarch64_uqshldi_uus (__a[0], __b[0])};
20099 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20100 vqshlq_s8 (int8x16_t __a, int8x16_t __b)
20102 return __builtin_aarch64_sqshlv16qi (__a, __b);
20105 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20106 vqshlq_s16 (int16x8_t __a, int16x8_t __b)
20108 return __builtin_aarch64_sqshlv8hi (__a, __b);
20111 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20112 vqshlq_s32 (int32x4_t __a, int32x4_t __b)
20114 return __builtin_aarch64_sqshlv4si (__a, __b);
20117 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20118 vqshlq_s64 (int64x2_t __a, int64x2_t __b)
20120 return __builtin_aarch64_sqshlv2di (__a, __b);
20123 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20124 vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
20126 return __builtin_aarch64_uqshlv16qi_uus ( __a, __b);
20129 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20130 vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
20132 return __builtin_aarch64_uqshlv8hi_uus ( __a, __b);
20135 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20136 vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
20138 return __builtin_aarch64_uqshlv4si_uus ( __a, __b);
20141 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20142 vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
20144 return __builtin_aarch64_uqshlv2di_uus ( __a, __b);
20147 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20148 vqshlb_s8 (int8_t __a, int8_t __b)
20150 return __builtin_aarch64_sqshlqi (__a, __b);
20153 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20154 vqshlh_s16 (int16_t __a, int16_t __b)
20156 return __builtin_aarch64_sqshlhi (__a, __b);
20159 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20160 vqshls_s32 (int32_t __a, int32_t __b)
20162 return __builtin_aarch64_sqshlsi (__a, __b);
20165 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20166 vqshld_s64 (int64_t __a, int64_t __b)
20168 return __builtin_aarch64_sqshldi (__a, __b);
20171 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20172 vqshlb_u8 (uint8_t __a, uint8_t __b)
20174 return __builtin_aarch64_uqshlqi_uus (__a, __b);
20177 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20178 vqshlh_u16 (uint16_t __a, uint16_t __b)
20180 return __builtin_aarch64_uqshlhi_uus (__a, __b);
20183 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20184 vqshls_u32 (uint32_t __a, uint32_t __b)
20186 return __builtin_aarch64_uqshlsi_uus (__a, __b);
20189 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20190 vqshld_u64 (uint64_t __a, uint64_t __b)
20192 return __builtin_aarch64_uqshldi_uus (__a, __b);
20195 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20196 vqshl_n_s8 (int8x8_t __a, const int __b)
20198 return (int8x8_t) __builtin_aarch64_sqshl_nv8qi (__a, __b);
20201 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20202 vqshl_n_s16 (int16x4_t __a, const int __b)
20204 return (int16x4_t) __builtin_aarch64_sqshl_nv4hi (__a, __b);
20207 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20208 vqshl_n_s32 (int32x2_t __a, const int __b)
20210 return (int32x2_t) __builtin_aarch64_sqshl_nv2si (__a, __b);
20213 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20214 vqshl_n_s64 (int64x1_t __a, const int __b)
20216 return (int64x1_t) {__builtin_aarch64_sqshl_ndi (__a[0], __b)};
20219 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20220 vqshl_n_u8 (uint8x8_t __a, const int __b)
20222 return __builtin_aarch64_uqshl_nv8qi_uus (__a, __b);
20225 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20226 vqshl_n_u16 (uint16x4_t __a, const int __b)
20228 return __builtin_aarch64_uqshl_nv4hi_uus (__a, __b);
20231 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20232 vqshl_n_u32 (uint32x2_t __a, const int __b)
20234 return __builtin_aarch64_uqshl_nv2si_uus (__a, __b);
20237 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20238 vqshl_n_u64 (uint64x1_t __a, const int __b)
20240 return (uint64x1_t) {__builtin_aarch64_uqshl_ndi_uus (__a[0], __b)};
20243 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20244 vqshlq_n_s8 (int8x16_t __a, const int __b)
20246 return (int8x16_t) __builtin_aarch64_sqshl_nv16qi (__a, __b);
20249 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20250 vqshlq_n_s16 (int16x8_t __a, const int __b)
20252 return (int16x8_t) __builtin_aarch64_sqshl_nv8hi (__a, __b);
20255 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20256 vqshlq_n_s32 (int32x4_t __a, const int __b)
20258 return (int32x4_t) __builtin_aarch64_sqshl_nv4si (__a, __b);
20261 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20262 vqshlq_n_s64 (int64x2_t __a, const int __b)
20264 return (int64x2_t) __builtin_aarch64_sqshl_nv2di (__a, __b);
20267 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20268 vqshlq_n_u8 (uint8x16_t __a, const int __b)
20270 return __builtin_aarch64_uqshl_nv16qi_uus (__a, __b);
20273 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20274 vqshlq_n_u16 (uint16x8_t __a, const int __b)
20276 return __builtin_aarch64_uqshl_nv8hi_uus (__a, __b);
20279 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20280 vqshlq_n_u32 (uint32x4_t __a, const int __b)
20282 return __builtin_aarch64_uqshl_nv4si_uus (__a, __b);
20285 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20286 vqshlq_n_u64 (uint64x2_t __a, const int __b)
20288 return __builtin_aarch64_uqshl_nv2di_uus (__a, __b);
20291 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20292 vqshlb_n_s8 (int8_t __a, const int __b)
20294 return (int8_t) __builtin_aarch64_sqshl_nqi (__a, __b);
20297 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20298 vqshlh_n_s16 (int16_t __a, const int __b)
20300 return (int16_t) __builtin_aarch64_sqshl_nhi (__a, __b);
20303 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20304 vqshls_n_s32 (int32_t __a, const int __b)
20306 return (int32_t) __builtin_aarch64_sqshl_nsi (__a, __b);
20309 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20310 vqshld_n_s64 (int64_t __a, const int __b)
20312 return __builtin_aarch64_sqshl_ndi (__a, __b);
20315 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20316 vqshlb_n_u8 (uint8_t __a, const int __b)
20318 return __builtin_aarch64_uqshl_nqi_uus (__a, __b);
20321 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20322 vqshlh_n_u16 (uint16_t __a, const int __b)
20324 return __builtin_aarch64_uqshl_nhi_uus (__a, __b);
20327 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20328 vqshls_n_u32 (uint32_t __a, const int __b)
20330 return __builtin_aarch64_uqshl_nsi_uus (__a, __b);
20333 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20334 vqshld_n_u64 (uint64_t __a, const int __b)
20336 return __builtin_aarch64_uqshl_ndi_uus (__a, __b);
20339 /* vqshlu */
20341 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20342 vqshlu_n_s8 (int8x8_t __a, const int __b)
20344 return __builtin_aarch64_sqshlu_nv8qi_uss (__a, __b);
20347 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20348 vqshlu_n_s16 (int16x4_t __a, const int __b)
20350 return __builtin_aarch64_sqshlu_nv4hi_uss (__a, __b);
20353 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20354 vqshlu_n_s32 (int32x2_t __a, const int __b)
20356 return __builtin_aarch64_sqshlu_nv2si_uss (__a, __b);
20359 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20360 vqshlu_n_s64 (int64x1_t __a, const int __b)
20362 return (uint64x1_t) {__builtin_aarch64_sqshlu_ndi_uss (__a[0], __b)};
20365 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20366 vqshluq_n_s8 (int8x16_t __a, const int __b)
20368 return __builtin_aarch64_sqshlu_nv16qi_uss (__a, __b);
20371 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20372 vqshluq_n_s16 (int16x8_t __a, const int __b)
20374 return __builtin_aarch64_sqshlu_nv8hi_uss (__a, __b);
20377 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20378 vqshluq_n_s32 (int32x4_t __a, const int __b)
20380 return __builtin_aarch64_sqshlu_nv4si_uss (__a, __b);
20383 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20384 vqshluq_n_s64 (int64x2_t __a, const int __b)
20386 return __builtin_aarch64_sqshlu_nv2di_uss (__a, __b);
20389 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20390 vqshlub_n_s8 (int8_t __a, const int __b)
20392 return (int8_t) __builtin_aarch64_sqshlu_nqi_uss (__a, __b);
20395 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20396 vqshluh_n_s16 (int16_t __a, const int __b)
20398 return (int16_t) __builtin_aarch64_sqshlu_nhi_uss (__a, __b);
20401 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20402 vqshlus_n_s32 (int32_t __a, const int __b)
20404 return (int32_t) __builtin_aarch64_sqshlu_nsi_uss (__a, __b);
20407 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20408 vqshlud_n_s64 (int64_t __a, const int __b)
20410 return __builtin_aarch64_sqshlu_ndi_uss (__a, __b);
20413 /* vqshrn */
20415 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20416 vqshrn_n_s16 (int16x8_t __a, const int __b)
20418 return (int8x8_t) __builtin_aarch64_sqshrn_nv8hi (__a, __b);
20421 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20422 vqshrn_n_s32 (int32x4_t __a, const int __b)
20424 return (int16x4_t) __builtin_aarch64_sqshrn_nv4si (__a, __b);
20427 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20428 vqshrn_n_s64 (int64x2_t __a, const int __b)
20430 return (int32x2_t) __builtin_aarch64_sqshrn_nv2di (__a, __b);
20433 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20434 vqshrn_n_u16 (uint16x8_t __a, const int __b)
20436 return __builtin_aarch64_uqshrn_nv8hi_uus ( __a, __b);
20439 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20440 vqshrn_n_u32 (uint32x4_t __a, const int __b)
20442 return __builtin_aarch64_uqshrn_nv4si_uus ( __a, __b);
20445 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20446 vqshrn_n_u64 (uint64x2_t __a, const int __b)
20448 return __builtin_aarch64_uqshrn_nv2di_uus ( __a, __b);
20451 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20452 vqshrnh_n_s16 (int16_t __a, const int __b)
20454 return (int8_t) __builtin_aarch64_sqshrn_nhi (__a, __b);
20457 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20458 vqshrns_n_s32 (int32_t __a, const int __b)
20460 return (int16_t) __builtin_aarch64_sqshrn_nsi (__a, __b);
20463 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20464 vqshrnd_n_s64 (int64_t __a, const int __b)
20466 return (int32_t) __builtin_aarch64_sqshrn_ndi (__a, __b);
20469 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20470 vqshrnh_n_u16 (uint16_t __a, const int __b)
20472 return __builtin_aarch64_uqshrn_nhi_uus (__a, __b);
20475 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20476 vqshrns_n_u32 (uint32_t __a, const int __b)
20478 return __builtin_aarch64_uqshrn_nsi_uus (__a, __b);
20481 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20482 vqshrnd_n_u64 (uint64_t __a, const int __b)
20484 return __builtin_aarch64_uqshrn_ndi_uus (__a, __b);
20487 /* vqshrun */
20489 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20490 vqshrun_n_s16 (int16x8_t __a, const int __b)
20492 return (uint8x8_t) __builtin_aarch64_sqshrun_nv8hi (__a, __b);
20495 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20496 vqshrun_n_s32 (int32x4_t __a, const int __b)
20498 return (uint16x4_t) __builtin_aarch64_sqshrun_nv4si (__a, __b);
20501 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20502 vqshrun_n_s64 (int64x2_t __a, const int __b)
20504 return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b);
20507 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20508 vqshrunh_n_s16 (int16_t __a, const int __b)
20510 return (int8_t) __builtin_aarch64_sqshrun_nhi (__a, __b);
20513 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20514 vqshruns_n_s32 (int32_t __a, const int __b)
20516 return (int16_t) __builtin_aarch64_sqshrun_nsi (__a, __b);
20519 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20520 vqshrund_n_s64 (int64_t __a, const int __b)
20522 return (int32_t) __builtin_aarch64_sqshrun_ndi (__a, __b);
20525 /* vqsub */
20527 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20528 vqsubb_s8 (int8_t __a, int8_t __b)
20530 return (int8_t) __builtin_aarch64_sqsubqi (__a, __b);
20533 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20534 vqsubh_s16 (int16_t __a, int16_t __b)
20536 return (int16_t) __builtin_aarch64_sqsubhi (__a, __b);
20539 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20540 vqsubs_s32 (int32_t __a, int32_t __b)
20542 return (int32_t) __builtin_aarch64_sqsubsi (__a, __b);
20545 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20546 vqsubd_s64 (int64_t __a, int64_t __b)
20548 return __builtin_aarch64_sqsubdi (__a, __b);
20551 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20552 vqsubb_u8 (uint8_t __a, uint8_t __b)
20554 return (uint8_t) __builtin_aarch64_uqsubqi_uuu (__a, __b);
20557 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20558 vqsubh_u16 (uint16_t __a, uint16_t __b)
20560 return (uint16_t) __builtin_aarch64_uqsubhi_uuu (__a, __b);
20563 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20564 vqsubs_u32 (uint32_t __a, uint32_t __b)
20566 return (uint32_t) __builtin_aarch64_uqsubsi_uuu (__a, __b);
20569 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20570 vqsubd_u64 (uint64_t __a, uint64_t __b)
20572 return __builtin_aarch64_uqsubdi_uuu (__a, __b);
20575 /* vqtbl2 */
20577 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20578 vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx)
20580 __builtin_aarch64_simd_oi __o;
20581 __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[0], 0);
20582 __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[1], 1);
20583 return __builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
20586 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20587 vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx)
20589 __builtin_aarch64_simd_oi __o;
20590 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
20591 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
20592 return (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
20595 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20596 vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx)
20598 __builtin_aarch64_simd_oi __o;
20599 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
20600 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
20601 return (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
20604 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20605 vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx)
20607 __builtin_aarch64_simd_oi __o;
20608 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
20609 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
20610 return __builtin_aarch64_tbl3v16qi (__o, (int8x16_t)idx);
20613 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20614 vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx)
20616 __builtin_aarch64_simd_oi __o;
20617 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
20618 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
20619 return (uint8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)idx);
20622 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20623 vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx)
20625 __builtin_aarch64_simd_oi __o;
20626 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
20627 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
20628 return (poly8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)idx);
20631 /* vqtbl3 */
20633 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20634 vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx)
20636 __builtin_aarch64_simd_ci __o;
20637 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20638 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20639 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20640 return __builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)idx);
20643 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20644 vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx)
20646 __builtin_aarch64_simd_ci __o;
20647 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20648 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20649 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20650 return (uint8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)idx);
20653 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20654 vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx)
20656 __builtin_aarch64_simd_ci __o;
20657 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20658 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20659 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20660 return (poly8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)idx);
20663 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20664 vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx)
20666 __builtin_aarch64_simd_ci __o;
20667 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20668 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20669 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20670 return __builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)idx);
20673 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20674 vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx)
20676 __builtin_aarch64_simd_ci __o;
20677 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20678 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20679 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20680 return (uint8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)idx);
20683 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20684 vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx)
20686 __builtin_aarch64_simd_ci __o;
20687 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20688 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20689 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20690 return (poly8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)idx);
20693 /* vqtbl4 */
20695 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20696 vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx)
20698 __builtin_aarch64_simd_xi __o;
20699 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20700 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20701 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20702 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20703 return __builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)idx);
20706 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20707 vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx)
20709 __builtin_aarch64_simd_xi __o;
20710 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20711 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20712 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20713 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20714 return (uint8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)idx);
20717 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20718 vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx)
20720 __builtin_aarch64_simd_xi __o;
20721 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20722 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20723 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20724 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20725 return (poly8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)idx);
20728 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20729 vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx)
20731 __builtin_aarch64_simd_xi __o;
20732 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20733 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20734 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20735 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20736 return __builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)idx);
20739 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20740 vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx)
20742 __builtin_aarch64_simd_xi __o;
20743 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20744 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20745 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20746 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20747 return (uint8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)idx);
20750 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20751 vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx)
20753 __builtin_aarch64_simd_xi __o;
20754 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20755 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20756 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20757 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20758 return (poly8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)idx);
20762 /* vqtbx2 */
20763 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20764 vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx)
20766 __builtin_aarch64_simd_oi __o;
20767 __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[0], 0);
20768 __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[1], 1);
20769 return __builtin_aarch64_tbx4v8qi (r, __o, (int8x8_t)idx);
20772 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20773 vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx)
20775 __builtin_aarch64_simd_oi __o;
20776 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
20777 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
20778 return (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)r, __o,
20779 (int8x8_t)idx);
20782 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20783 vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx)
20785 __builtin_aarch64_simd_oi __o;
20786 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
20787 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
20788 return (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)r, __o,
20789 (int8x8_t)idx);
20792 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20793 vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx)
20795 __builtin_aarch64_simd_oi __o;
20796 __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[0], 0);
20797 __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[1], 1);
20798 return __builtin_aarch64_tbx4v16qi (r, __o, (int8x16_t)idx);
20801 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20802 vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx)
20804 __builtin_aarch64_simd_oi __o;
20805 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
20806 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
20807 return (uint8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)r, __o,
20808 (int8x16_t)idx);
20811 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20812 vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx)
20814 __builtin_aarch64_simd_oi __o;
20815 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
20816 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
20817 return (poly8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)r, __o,
20818 (int8x16_t)idx);
20821 /* vqtbx3 */
20822 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20823 vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx)
20825 __builtin_aarch64_simd_ci __o;
20826 __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[0], 0);
20827 __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[1], 1);
20828 __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[2], 2);
20829 return __builtin_aarch64_qtbx3v8qi (r, __o, (int8x8_t)idx);
20832 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20833 vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx)
20835 __builtin_aarch64_simd_ci __o;
20836 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20837 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20838 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20839 return (uint8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)r, __o,
20840 (int8x8_t)idx);
20843 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20844 vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx)
20846 __builtin_aarch64_simd_ci __o;
20847 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20848 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20849 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20850 return (poly8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)r, __o,
20851 (int8x8_t)idx);
20854 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20855 vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx)
20857 __builtin_aarch64_simd_ci __o;
20858 __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[0], 0);
20859 __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[1], 1);
20860 __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[2], 2);
20861 return __builtin_aarch64_qtbx3v16qi (r, __o, (int8x16_t)idx);
20864 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20865 vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx)
20867 __builtin_aarch64_simd_ci __o;
20868 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20869 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20870 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20871 return (uint8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)r, __o,
20872 (int8x16_t)idx);
20875 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20876 vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx)
20878 __builtin_aarch64_simd_ci __o;
20879 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20880 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20881 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20882 return (poly8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)r, __o,
20883 (int8x16_t)idx);
20886 /* vqtbx4 */
20888 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20889 vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx)
20891 __builtin_aarch64_simd_xi __o;
20892 __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[0], 0);
20893 __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[1], 1);
20894 __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[2], 2);
20895 __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[3], 3);
20896 return __builtin_aarch64_qtbx4v8qi (r, __o, (int8x8_t)idx);
20899 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20900 vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx)
20902 __builtin_aarch64_simd_xi __o;
20903 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20904 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20905 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20906 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20907 return (uint8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)r, __o,
20908 (int8x8_t)idx);
20911 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20912 vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx)
20914 __builtin_aarch64_simd_xi __o;
20915 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20916 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20917 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20918 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20919 return (poly8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)r, __o,
20920 (int8x8_t)idx);
20923 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20924 vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx)
20926 __builtin_aarch64_simd_xi __o;
20927 __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[0], 0);
20928 __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[1], 1);
20929 __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[2], 2);
20930 __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[3], 3);
20931 return __builtin_aarch64_qtbx4v16qi (r, __o, (int8x16_t)idx);
20934 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20935 vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx)
20937 __builtin_aarch64_simd_xi __o;
20938 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20939 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20940 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20941 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20942 return (uint8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)r, __o,
20943 (int8x16_t)idx);
20946 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20947 vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx)
20949 __builtin_aarch64_simd_xi __o;
20950 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20951 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20952 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20953 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20954 return (poly8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)r, __o,
20955 (int8x16_t)idx);
20958 /* vrbit */
20960 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20961 vrbit_p8 (poly8x8_t __a)
20963 return (poly8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
20966 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20967 vrbit_s8 (int8x8_t __a)
20969 return __builtin_aarch64_rbitv8qi (__a);
20972 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20973 vrbit_u8 (uint8x8_t __a)
20975 return (uint8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
20978 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20979 vrbitq_p8 (poly8x16_t __a)
20981 return (poly8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t)__a);
20984 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20985 vrbitq_s8 (int8x16_t __a)
20987 return __builtin_aarch64_rbitv16qi (__a);
20990 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20991 vrbitq_u8 (uint8x16_t __a)
20993 return (uint8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t) __a);
20996 /* vrecpe */
20998 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20999 vrecpe_u32 (uint32x2_t __a)
21001 return (uint32x2_t) __builtin_aarch64_urecpev2si ((int32x2_t) __a);
21004 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21005 vrecpeq_u32 (uint32x4_t __a)
21007 return (uint32x4_t) __builtin_aarch64_urecpev4si ((int32x4_t) __a);
21010 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
21011 vrecpes_f32 (float32_t __a)
21013 return __builtin_aarch64_frecpesf (__a);
21016 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
21017 vrecped_f64 (float64_t __a)
21019 return __builtin_aarch64_frecpedf (__a);
21022 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21023 vrecpe_f32 (float32x2_t __a)
21025 return __builtin_aarch64_frecpev2sf (__a);
21028 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21029 vrecpeq_f32 (float32x4_t __a)
21031 return __builtin_aarch64_frecpev4sf (__a);
21034 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21035 vrecpeq_f64 (float64x2_t __a)
21037 return __builtin_aarch64_frecpev2df (__a);
21040 /* vrecps */
21042 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
21043 vrecpss_f32 (float32_t __a, float32_t __b)
21045 return __builtin_aarch64_frecpssf (__a, __b);
21048 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
21049 vrecpsd_f64 (float64_t __a, float64_t __b)
21051 return __builtin_aarch64_frecpsdf (__a, __b);
21054 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21055 vrecps_f32 (float32x2_t __a, float32x2_t __b)
21057 return __builtin_aarch64_frecpsv2sf (__a, __b);
21060 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21061 vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
21063 return __builtin_aarch64_frecpsv4sf (__a, __b);
21066 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21067 vrecpsq_f64 (float64x2_t __a, float64x2_t __b)
21069 return __builtin_aarch64_frecpsv2df (__a, __b);
21072 /* vrecpx */
21074 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
21075 vrecpxs_f32 (float32_t __a)
21077 return __builtin_aarch64_frecpxsf (__a);
21080 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
21081 vrecpxd_f64 (float64_t __a)
21083 return __builtin_aarch64_frecpxdf (__a);
21087 /* vrev */
21089 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
21090 vrev16_p8 (poly8x8_t a)
21092 return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
21095 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21096 vrev16_s8 (int8x8_t a)
21098 return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
21101 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21102 vrev16_u8 (uint8x8_t a)
21104 return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
21107 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
21108 vrev16q_p8 (poly8x16_t a)
21110 return __builtin_shuffle (a,
21111 (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
21114 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21115 vrev16q_s8 (int8x16_t a)
21117 return __builtin_shuffle (a,
21118 (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
21121 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21122 vrev16q_u8 (uint8x16_t a)
21124 return __builtin_shuffle (a,
21125 (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
21128 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
21129 vrev32_p8 (poly8x8_t a)
21131 return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21134 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
21135 vrev32_p16 (poly16x4_t a)
21137 return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
21140 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21141 vrev32_s8 (int8x8_t a)
21143 return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21146 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21147 vrev32_s16 (int16x4_t a)
21149 return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
21152 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21153 vrev32_u8 (uint8x8_t a)
21155 return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21158 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21159 vrev32_u16 (uint16x4_t a)
21161 return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
21164 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
21165 vrev32q_p8 (poly8x16_t a)
21167 return __builtin_shuffle (a,
21168 (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
21171 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
21172 vrev32q_p16 (poly16x8_t a)
21174 return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
21177 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21178 vrev32q_s8 (int8x16_t a)
21180 return __builtin_shuffle (a,
21181 (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
21184 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21185 vrev32q_s16 (int16x8_t a)
21187 return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
21190 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21191 vrev32q_u8 (uint8x16_t a)
21193 return __builtin_shuffle (a,
21194 (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
21197 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21198 vrev32q_u16 (uint16x8_t a)
21200 return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
21203 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21204 vrev64_f32 (float32x2_t a)
21206 return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
21209 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
21210 vrev64_p8 (poly8x8_t a)
21212 return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
21215 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
21216 vrev64_p16 (poly16x4_t a)
21218 return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
21221 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21222 vrev64_s8 (int8x8_t a)
21224 return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
21227 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21228 vrev64_s16 (int16x4_t a)
21230 return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
21233 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21234 vrev64_s32 (int32x2_t a)
21236 return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
21239 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21240 vrev64_u8 (uint8x8_t a)
21242 return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
21245 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21246 vrev64_u16 (uint16x4_t a)
21248 return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
21251 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21252 vrev64_u32 (uint32x2_t a)
21254 return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
21257 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21258 vrev64q_f32 (float32x4_t a)
21260 return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
21263 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
21264 vrev64q_p8 (poly8x16_t a)
21266 return __builtin_shuffle (a,
21267 (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
21270 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
21271 vrev64q_p16 (poly16x8_t a)
21273 return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21276 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21277 vrev64q_s8 (int8x16_t a)
21279 return __builtin_shuffle (a,
21280 (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
21283 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21284 vrev64q_s16 (int16x8_t a)
21286 return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21289 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21290 vrev64q_s32 (int32x4_t a)
21292 return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
21295 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21296 vrev64q_u8 (uint8x16_t a)
21298 return __builtin_shuffle (a,
21299 (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
21302 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21303 vrev64q_u16 (uint16x8_t a)
21305 return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21308 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21309 vrev64q_u32 (uint32x4_t a)
21311 return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
21314 /* vrnd */
21316 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21317 vrnd_f32 (float32x2_t __a)
21319 return __builtin_aarch64_btruncv2sf (__a);
21322 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21323 vrnd_f64 (float64x1_t __a)
21325 return vset_lane_f64 (__builtin_trunc (vget_lane_f64 (__a, 0)), __a, 0);
21328 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21329 vrndq_f32 (float32x4_t __a)
21331 return __builtin_aarch64_btruncv4sf (__a);
21334 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21335 vrndq_f64 (float64x2_t __a)
21337 return __builtin_aarch64_btruncv2df (__a);
21340 /* vrnda */
21342 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21343 vrnda_f32 (float32x2_t __a)
21345 return __builtin_aarch64_roundv2sf (__a);
21348 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21349 vrnda_f64 (float64x1_t __a)
21351 return vset_lane_f64 (__builtin_round (vget_lane_f64 (__a, 0)), __a, 0);
21354 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21355 vrndaq_f32 (float32x4_t __a)
21357 return __builtin_aarch64_roundv4sf (__a);
21360 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21361 vrndaq_f64 (float64x2_t __a)
21363 return __builtin_aarch64_roundv2df (__a);
21366 /* vrndi */
21368 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21369 vrndi_f32 (float32x2_t __a)
21371 return __builtin_aarch64_nearbyintv2sf (__a);
21374 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21375 vrndi_f64 (float64x1_t __a)
21377 return vset_lane_f64 (__builtin_nearbyint (vget_lane_f64 (__a, 0)), __a, 0);
21380 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21381 vrndiq_f32 (float32x4_t __a)
21383 return __builtin_aarch64_nearbyintv4sf (__a);
21386 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21387 vrndiq_f64 (float64x2_t __a)
21389 return __builtin_aarch64_nearbyintv2df (__a);
21392 /* vrndm */
21394 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21395 vrndm_f32 (float32x2_t __a)
21397 return __builtin_aarch64_floorv2sf (__a);
21400 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21401 vrndm_f64 (float64x1_t __a)
21403 return vset_lane_f64 (__builtin_floor (vget_lane_f64 (__a, 0)), __a, 0);
21406 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21407 vrndmq_f32 (float32x4_t __a)
21409 return __builtin_aarch64_floorv4sf (__a);
21412 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21413 vrndmq_f64 (float64x2_t __a)
21415 return __builtin_aarch64_floorv2df (__a);
21418 /* vrndn */
21420 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21421 vrndn_f32 (float32x2_t __a)
21423 return __builtin_aarch64_frintnv2sf (__a);
21426 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21427 vrndn_f64 (float64x1_t __a)
21429 return (float64x1_t) {__builtin_aarch64_frintndf (__a[0])};
21432 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21433 vrndnq_f32 (float32x4_t __a)
21435 return __builtin_aarch64_frintnv4sf (__a);
21438 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21439 vrndnq_f64 (float64x2_t __a)
21441 return __builtin_aarch64_frintnv2df (__a);
21444 /* vrndp */
21446 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21447 vrndp_f32 (float32x2_t __a)
21449 return __builtin_aarch64_ceilv2sf (__a);
21452 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21453 vrndp_f64 (float64x1_t __a)
21455 return vset_lane_f64 (__builtin_ceil (vget_lane_f64 (__a, 0)), __a, 0);
21458 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21459 vrndpq_f32 (float32x4_t __a)
21461 return __builtin_aarch64_ceilv4sf (__a);
21464 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21465 vrndpq_f64 (float64x2_t __a)
21467 return __builtin_aarch64_ceilv2df (__a);
21470 /* vrndx */
21472 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21473 vrndx_f32 (float32x2_t __a)
21475 return __builtin_aarch64_rintv2sf (__a);
21478 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21479 vrndx_f64 (float64x1_t __a)
21481 return vset_lane_f64 (__builtin_rint (vget_lane_f64 (__a, 0)), __a, 0);
21484 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21485 vrndxq_f32 (float32x4_t __a)
21487 return __builtin_aarch64_rintv4sf (__a);
21490 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21491 vrndxq_f64 (float64x2_t __a)
21493 return __builtin_aarch64_rintv2df (__a);
21496 /* vrshl */
21498 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21499 vrshl_s8 (int8x8_t __a, int8x8_t __b)
21501 return (int8x8_t) __builtin_aarch64_srshlv8qi (__a, __b);
21504 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21505 vrshl_s16 (int16x4_t __a, int16x4_t __b)
21507 return (int16x4_t) __builtin_aarch64_srshlv4hi (__a, __b);
21510 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21511 vrshl_s32 (int32x2_t __a, int32x2_t __b)
21513 return (int32x2_t) __builtin_aarch64_srshlv2si (__a, __b);
21516 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21517 vrshl_s64 (int64x1_t __a, int64x1_t __b)
21519 return (int64x1_t) {__builtin_aarch64_srshldi (__a[0], __b[0])};
21522 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21523 vrshl_u8 (uint8x8_t __a, int8x8_t __b)
21525 return __builtin_aarch64_urshlv8qi_uus (__a, __b);
21528 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21529 vrshl_u16 (uint16x4_t __a, int16x4_t __b)
21531 return __builtin_aarch64_urshlv4hi_uus (__a, __b);
21534 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21535 vrshl_u32 (uint32x2_t __a, int32x2_t __b)
21537 return __builtin_aarch64_urshlv2si_uus (__a, __b);
21540 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21541 vrshl_u64 (uint64x1_t __a, int64x1_t __b)
21543 return (uint64x1_t) {__builtin_aarch64_urshldi_uus (__a[0], __b[0])};
21546 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21547 vrshlq_s8 (int8x16_t __a, int8x16_t __b)
21549 return (int8x16_t) __builtin_aarch64_srshlv16qi (__a, __b);
21552 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21553 vrshlq_s16 (int16x8_t __a, int16x8_t __b)
21555 return (int16x8_t) __builtin_aarch64_srshlv8hi (__a, __b);
21558 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21559 vrshlq_s32 (int32x4_t __a, int32x4_t __b)
21561 return (int32x4_t) __builtin_aarch64_srshlv4si (__a, __b);
21564 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21565 vrshlq_s64 (int64x2_t __a, int64x2_t __b)
21567 return (int64x2_t) __builtin_aarch64_srshlv2di (__a, __b);
21570 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21571 vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
21573 return __builtin_aarch64_urshlv16qi_uus (__a, __b);
21576 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21577 vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
21579 return __builtin_aarch64_urshlv8hi_uus (__a, __b);
21582 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21583 vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
21585 return __builtin_aarch64_urshlv4si_uus (__a, __b);
21588 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21589 vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
21591 return __builtin_aarch64_urshlv2di_uus (__a, __b);
21594 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21595 vrshld_s64 (int64_t __a, int64_t __b)
21597 return __builtin_aarch64_srshldi (__a, __b);
21600 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21601 vrshld_u64 (uint64_t __a, int64_t __b)
21603 return __builtin_aarch64_urshldi_uus (__a, __b);
21606 /* vrshr */
21608 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21609 vrshr_n_s8 (int8x8_t __a, const int __b)
21611 return (int8x8_t) __builtin_aarch64_srshr_nv8qi (__a, __b);
21614 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21615 vrshr_n_s16 (int16x4_t __a, const int __b)
21617 return (int16x4_t) __builtin_aarch64_srshr_nv4hi (__a, __b);
21620 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21621 vrshr_n_s32 (int32x2_t __a, const int __b)
21623 return (int32x2_t) __builtin_aarch64_srshr_nv2si (__a, __b);
21626 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21627 vrshr_n_s64 (int64x1_t __a, const int __b)
21629 return (int64x1_t) {__builtin_aarch64_srshr_ndi (__a[0], __b)};
21632 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21633 vrshr_n_u8 (uint8x8_t __a, const int __b)
21635 return __builtin_aarch64_urshr_nv8qi_uus (__a, __b);
21638 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21639 vrshr_n_u16 (uint16x4_t __a, const int __b)
21641 return __builtin_aarch64_urshr_nv4hi_uus (__a, __b);
21644 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21645 vrshr_n_u32 (uint32x2_t __a, const int __b)
21647 return __builtin_aarch64_urshr_nv2si_uus (__a, __b);
21650 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21651 vrshr_n_u64 (uint64x1_t __a, const int __b)
21653 return (uint64x1_t) {__builtin_aarch64_urshr_ndi_uus (__a[0], __b)};
21656 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21657 vrshrq_n_s8 (int8x16_t __a, const int __b)
21659 return (int8x16_t) __builtin_aarch64_srshr_nv16qi (__a, __b);
21662 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21663 vrshrq_n_s16 (int16x8_t __a, const int __b)
21665 return (int16x8_t) __builtin_aarch64_srshr_nv8hi (__a, __b);
21668 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21669 vrshrq_n_s32 (int32x4_t __a, const int __b)
21671 return (int32x4_t) __builtin_aarch64_srshr_nv4si (__a, __b);
21674 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21675 vrshrq_n_s64 (int64x2_t __a, const int __b)
21677 return (int64x2_t) __builtin_aarch64_srshr_nv2di (__a, __b);
21680 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21681 vrshrq_n_u8 (uint8x16_t __a, const int __b)
21683 return __builtin_aarch64_urshr_nv16qi_uus (__a, __b);
21686 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21687 vrshrq_n_u16 (uint16x8_t __a, const int __b)
21689 return __builtin_aarch64_urshr_nv8hi_uus (__a, __b);
21692 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21693 vrshrq_n_u32 (uint32x4_t __a, const int __b)
21695 return __builtin_aarch64_urshr_nv4si_uus (__a, __b);
21698 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21699 vrshrq_n_u64 (uint64x2_t __a, const int __b)
21701 return __builtin_aarch64_urshr_nv2di_uus (__a, __b);
21704 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21705 vrshrd_n_s64 (int64_t __a, const int __b)
21707 return __builtin_aarch64_srshr_ndi (__a, __b);
21710 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21711 vrshrd_n_u64 (uint64_t __a, const int __b)
21713 return __builtin_aarch64_urshr_ndi_uus (__a, __b);
21716 /* vrsra */
21718 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21719 vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
21721 return (int8x8_t) __builtin_aarch64_srsra_nv8qi (__a, __b, __c);
21724 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21725 vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
21727 return (int16x4_t) __builtin_aarch64_srsra_nv4hi (__a, __b, __c);
21730 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21731 vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
21733 return (int32x2_t) __builtin_aarch64_srsra_nv2si (__a, __b, __c);
21736 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21737 vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
21739 return (int64x1_t) {__builtin_aarch64_srsra_ndi (__a[0], __b[0], __c)};
21742 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21743 vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
21745 return __builtin_aarch64_ursra_nv8qi_uuus (__a, __b, __c);
21748 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21749 vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
21751 return __builtin_aarch64_ursra_nv4hi_uuus (__a, __b, __c);
21754 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21755 vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
21757 return __builtin_aarch64_ursra_nv2si_uuus (__a, __b, __c);
21760 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21761 vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
21763 return (uint64x1_t) {__builtin_aarch64_ursra_ndi_uuus (__a[0], __b[0], __c)};
21766 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21767 vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
21769 return (int8x16_t) __builtin_aarch64_srsra_nv16qi (__a, __b, __c);
21772 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21773 vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
21775 return (int16x8_t) __builtin_aarch64_srsra_nv8hi (__a, __b, __c);
21778 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21779 vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
21781 return (int32x4_t) __builtin_aarch64_srsra_nv4si (__a, __b, __c);
21784 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21785 vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
21787 return (int64x2_t) __builtin_aarch64_srsra_nv2di (__a, __b, __c);
21790 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21791 vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
21793 return __builtin_aarch64_ursra_nv16qi_uuus (__a, __b, __c);
21796 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21797 vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
21799 return __builtin_aarch64_ursra_nv8hi_uuus (__a, __b, __c);
21802 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21803 vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
21805 return __builtin_aarch64_ursra_nv4si_uuus (__a, __b, __c);
21808 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21809 vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
21811 return __builtin_aarch64_ursra_nv2di_uuus (__a, __b, __c);
21814 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21815 vrsrad_n_s64 (int64_t __a, int64_t __b, const int __c)
21817 return __builtin_aarch64_srsra_ndi (__a, __b, __c);
21820 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21821 vrsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c)
21823 return __builtin_aarch64_ursra_ndi_uuus (__a, __b, __c);
21826 #pragma GCC push_options
21827 #pragma GCC target ("+nothing+crypto")
21829 /* vsha1 */
21831 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21832 vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
21834 return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk);
21837 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21838 vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
21840 return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk);
21843 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21844 vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
21846 return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk);
21849 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
21850 vsha1h_u32 (uint32_t hash_e)
21852 return __builtin_aarch64_crypto_sha1hsi_uu (hash_e);
21855 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21856 vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11)
21858 return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11);
21861 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21862 vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15)
21864 return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15);
21867 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21868 vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk)
21870 return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk);
21873 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21874 vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk)
21876 return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk);
21879 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21880 vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7)
21882 return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7);
21885 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21886 vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15)
21888 return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15);
21891 __extension__ static __inline poly128_t __attribute__ ((__always_inline__))
21892 vmull_p64 (poly64_t a, poly64_t b)
21894 return
21895 __builtin_aarch64_crypto_pmulldi_ppp (a, b);
21898 __extension__ static __inline poly128_t __attribute__ ((__always_inline__))
21899 vmull_high_p64 (poly64x2_t a, poly64x2_t b)
21901 return __builtin_aarch64_crypto_pmullv2di_ppp (a, b);
21904 #pragma GCC pop_options
21906 /* vshl */
21908 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21909 vshl_n_s8 (int8x8_t __a, const int __b)
21911 return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b);
21914 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21915 vshl_n_s16 (int16x4_t __a, const int __b)
21917 return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b);
21920 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21921 vshl_n_s32 (int32x2_t __a, const int __b)
21923 return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b);
21926 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21927 vshl_n_s64 (int64x1_t __a, const int __b)
21929 return (int64x1_t) {__builtin_aarch64_ashldi (__a[0], __b)};
21932 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21933 vshl_n_u8 (uint8x8_t __a, const int __b)
21935 return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b);
21938 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21939 vshl_n_u16 (uint16x4_t __a, const int __b)
21941 return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b);
21944 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21945 vshl_n_u32 (uint32x2_t __a, const int __b)
21947 return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b);
21950 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21951 vshl_n_u64 (uint64x1_t __a, const int __b)
21953 return (uint64x1_t) {__builtin_aarch64_ashldi ((int64_t) __a[0], __b)};
21956 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21957 vshlq_n_s8 (int8x16_t __a, const int __b)
21959 return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b);
21962 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21963 vshlq_n_s16 (int16x8_t __a, const int __b)
21965 return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b);
21968 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21969 vshlq_n_s32 (int32x4_t __a, const int __b)
21971 return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b);
21974 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21975 vshlq_n_s64 (int64x2_t __a, const int __b)
21977 return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b);
21980 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21981 vshlq_n_u8 (uint8x16_t __a, const int __b)
21983 return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b);
21986 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21987 vshlq_n_u16 (uint16x8_t __a, const int __b)
21989 return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b);
21992 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21993 vshlq_n_u32 (uint32x4_t __a, const int __b)
21995 return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b);
21998 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21999 vshlq_n_u64 (uint64x2_t __a, const int __b)
22001 return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b);
22004 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22005 vshld_n_s64 (int64_t __a, const int __b)
22007 return __builtin_aarch64_ashldi (__a, __b);
22010 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22011 vshld_n_u64 (uint64_t __a, const int __b)
22013 return (uint64_t) __builtin_aarch64_ashldi (__a, __b);
22016 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22017 vshl_s8 (int8x8_t __a, int8x8_t __b)
22019 return __builtin_aarch64_sshlv8qi (__a, __b);
22022 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22023 vshl_s16 (int16x4_t __a, int16x4_t __b)
22025 return __builtin_aarch64_sshlv4hi (__a, __b);
22028 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22029 vshl_s32 (int32x2_t __a, int32x2_t __b)
22031 return __builtin_aarch64_sshlv2si (__a, __b);
22034 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22035 vshl_s64 (int64x1_t __a, int64x1_t __b)
22037 return (int64x1_t) {__builtin_aarch64_sshldi (__a[0], __b[0])};
22040 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22041 vshl_u8 (uint8x8_t __a, int8x8_t __b)
22043 return __builtin_aarch64_ushlv8qi_uus (__a, __b);
22046 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22047 vshl_u16 (uint16x4_t __a, int16x4_t __b)
22049 return __builtin_aarch64_ushlv4hi_uus (__a, __b);
22052 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22053 vshl_u32 (uint32x2_t __a, int32x2_t __b)
22055 return __builtin_aarch64_ushlv2si_uus (__a, __b);
22058 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22059 vshl_u64 (uint64x1_t __a, int64x1_t __b)
22061 return (uint64x1_t) {__builtin_aarch64_ushldi_uus (__a[0], __b[0])};
22064 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22065 vshlq_s8 (int8x16_t __a, int8x16_t __b)
22067 return __builtin_aarch64_sshlv16qi (__a, __b);
22070 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22071 vshlq_s16 (int16x8_t __a, int16x8_t __b)
22073 return __builtin_aarch64_sshlv8hi (__a, __b);
22076 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22077 vshlq_s32 (int32x4_t __a, int32x4_t __b)
22079 return __builtin_aarch64_sshlv4si (__a, __b);
22082 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22083 vshlq_s64 (int64x2_t __a, int64x2_t __b)
22085 return __builtin_aarch64_sshlv2di (__a, __b);
22088 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22089 vshlq_u8 (uint8x16_t __a, int8x16_t __b)
22091 return __builtin_aarch64_ushlv16qi_uus (__a, __b);
22094 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22095 vshlq_u16 (uint16x8_t __a, int16x8_t __b)
22097 return __builtin_aarch64_ushlv8hi_uus (__a, __b);
22100 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22101 vshlq_u32 (uint32x4_t __a, int32x4_t __b)
22103 return __builtin_aarch64_ushlv4si_uus (__a, __b);
22106 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22107 vshlq_u64 (uint64x2_t __a, int64x2_t __b)
22109 return __builtin_aarch64_ushlv2di_uus (__a, __b);
22112 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22113 vshld_s64 (int64_t __a, int64_t __b)
22115 return __builtin_aarch64_sshldi (__a, __b);
22118 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22119 vshld_u64 (uint64_t __a, uint64_t __b)
22121 return __builtin_aarch64_ushldi_uus (__a, __b);
22124 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22125 vshll_high_n_s8 (int8x16_t __a, const int __b)
22127 return __builtin_aarch64_sshll2_nv16qi (__a, __b);
22130 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22131 vshll_high_n_s16 (int16x8_t __a, const int __b)
22133 return __builtin_aarch64_sshll2_nv8hi (__a, __b);
22136 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22137 vshll_high_n_s32 (int32x4_t __a, const int __b)
22139 return __builtin_aarch64_sshll2_nv4si (__a, __b);
22142 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22143 vshll_high_n_u8 (uint8x16_t __a, const int __b)
22145 return (uint16x8_t) __builtin_aarch64_ushll2_nv16qi ((int8x16_t) __a, __b);
22148 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22149 vshll_high_n_u16 (uint16x8_t __a, const int __b)
22151 return (uint32x4_t) __builtin_aarch64_ushll2_nv8hi ((int16x8_t) __a, __b);
22154 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22155 vshll_high_n_u32 (uint32x4_t __a, const int __b)
22157 return (uint64x2_t) __builtin_aarch64_ushll2_nv4si ((int32x4_t) __a, __b);
22160 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22161 vshll_n_s8 (int8x8_t __a, const int __b)
22163 return __builtin_aarch64_sshll_nv8qi (__a, __b);
22166 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22167 vshll_n_s16 (int16x4_t __a, const int __b)
22169 return __builtin_aarch64_sshll_nv4hi (__a, __b);
22172 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22173 vshll_n_s32 (int32x2_t __a, const int __b)
22175 return __builtin_aarch64_sshll_nv2si (__a, __b);
22178 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22179 vshll_n_u8 (uint8x8_t __a, const int __b)
22181 return __builtin_aarch64_ushll_nv8qi_uus (__a, __b);
22184 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22185 vshll_n_u16 (uint16x4_t __a, const int __b)
22187 return __builtin_aarch64_ushll_nv4hi_uus (__a, __b);
22190 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22191 vshll_n_u32 (uint32x2_t __a, const int __b)
22193 return __builtin_aarch64_ushll_nv2si_uus (__a, __b);
22196 /* vshr */
22198 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22199 vshr_n_s8 (int8x8_t __a, const int __b)
22201 return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b);
22204 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22205 vshr_n_s16 (int16x4_t __a, const int __b)
22207 return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b);
22210 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22211 vshr_n_s32 (int32x2_t __a, const int __b)
22213 return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b);
22216 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22217 vshr_n_s64 (int64x1_t __a, const int __b)
22219 return (int64x1_t) {__builtin_aarch64_ashr_simddi (__a[0], __b)};
22222 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22223 vshr_n_u8 (uint8x8_t __a, const int __b)
22225 return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
22228 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22229 vshr_n_u16 (uint16x4_t __a, const int __b)
22231 return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
22234 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22235 vshr_n_u32 (uint32x2_t __a, const int __b)
22237 return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
22240 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22241 vshr_n_u64 (uint64x1_t __a, const int __b)
22243 return (uint64x1_t) {__builtin_aarch64_lshr_simddi_uus ( __a[0], __b)};
22246 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22247 vshrq_n_s8 (int8x16_t __a, const int __b)
22249 return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b);
22252 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22253 vshrq_n_s16 (int16x8_t __a, const int __b)
22255 return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b);
22258 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22259 vshrq_n_s32 (int32x4_t __a, const int __b)
22261 return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b);
22264 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22265 vshrq_n_s64 (int64x2_t __a, const int __b)
22267 return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b);
22270 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22271 vshrq_n_u8 (uint8x16_t __a, const int __b)
22273 return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
22276 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22277 vshrq_n_u16 (uint16x8_t __a, const int __b)
22279 return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
22282 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22283 vshrq_n_u32 (uint32x4_t __a, const int __b)
22285 return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
22288 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22289 vshrq_n_u64 (uint64x2_t __a, const int __b)
22291 return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
22294 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22295 vshrd_n_s64 (int64_t __a, const int __b)
22297 return __builtin_aarch64_ashr_simddi (__a, __b);
22300 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22301 vshrd_n_u64 (uint64_t __a, const int __b)
22303 return __builtin_aarch64_lshr_simddi_uus (__a, __b);
22306 /* vsli */
22308 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22309 vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22311 return (int8x8_t) __builtin_aarch64_ssli_nv8qi (__a, __b, __c);
22314 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22315 vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22317 return (int16x4_t) __builtin_aarch64_ssli_nv4hi (__a, __b, __c);
22320 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22321 vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22323 return (int32x2_t) __builtin_aarch64_ssli_nv2si (__a, __b, __c);
22326 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22327 vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22329 return (int64x1_t) {__builtin_aarch64_ssli_ndi (__a[0], __b[0], __c)};
22332 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22333 vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22335 return __builtin_aarch64_usli_nv8qi_uuus (__a, __b, __c);
22338 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22339 vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22341 return __builtin_aarch64_usli_nv4hi_uuus (__a, __b, __c);
22344 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22345 vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22347 return __builtin_aarch64_usli_nv2si_uuus (__a, __b, __c);
22350 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22351 vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22353 return (uint64x1_t) {__builtin_aarch64_usli_ndi_uuus (__a[0], __b[0], __c)};
22356 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22357 vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22359 return (int8x16_t) __builtin_aarch64_ssli_nv16qi (__a, __b, __c);
22362 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22363 vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22365 return (int16x8_t) __builtin_aarch64_ssli_nv8hi (__a, __b, __c);
22368 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22369 vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22371 return (int32x4_t) __builtin_aarch64_ssli_nv4si (__a, __b, __c);
22374 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22375 vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22377 return (int64x2_t) __builtin_aarch64_ssli_nv2di (__a, __b, __c);
22380 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22381 vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22383 return __builtin_aarch64_usli_nv16qi_uuus (__a, __b, __c);
22386 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22387 vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22389 return __builtin_aarch64_usli_nv8hi_uuus (__a, __b, __c);
22392 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22393 vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22395 return __builtin_aarch64_usli_nv4si_uuus (__a, __b, __c);
22398 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22399 vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22401 return __builtin_aarch64_usli_nv2di_uuus (__a, __b, __c);
22404 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22405 vslid_n_s64 (int64_t __a, int64_t __b, const int __c)
22407 return __builtin_aarch64_ssli_ndi (__a, __b, __c);
22410 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22411 vslid_n_u64 (uint64_t __a, uint64_t __b, const int __c)
22413 return __builtin_aarch64_usli_ndi_uuus (__a, __b, __c);
22416 /* vsqadd */
22418 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22419 vsqadd_u8 (uint8x8_t __a, int8x8_t __b)
22421 return __builtin_aarch64_usqaddv8qi_uus (__a, __b);
22424 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22425 vsqadd_u16 (uint16x4_t __a, int16x4_t __b)
22427 return __builtin_aarch64_usqaddv4hi_uus (__a, __b);
22430 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22431 vsqadd_u32 (uint32x2_t __a, int32x2_t __b)
22433 return __builtin_aarch64_usqaddv2si_uus (__a, __b);
22436 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22437 vsqadd_u64 (uint64x1_t __a, int64x1_t __b)
22439 return (uint64x1_t) {__builtin_aarch64_usqadddi_uus (__a[0], __b[0])};
22442 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22443 vsqaddq_u8 (uint8x16_t __a, int8x16_t __b)
22445 return __builtin_aarch64_usqaddv16qi_uus (__a, __b);
22448 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22449 vsqaddq_u16 (uint16x8_t __a, int16x8_t __b)
22451 return __builtin_aarch64_usqaddv8hi_uus (__a, __b);
22454 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22455 vsqaddq_u32 (uint32x4_t __a, int32x4_t __b)
22457 return __builtin_aarch64_usqaddv4si_uus (__a, __b);
22460 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22461 vsqaddq_u64 (uint64x2_t __a, int64x2_t __b)
22463 return __builtin_aarch64_usqaddv2di_uus (__a, __b);
22466 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
22467 vsqaddb_u8 (uint8_t __a, int8_t __b)
22469 return __builtin_aarch64_usqaddqi_uus (__a, __b);
22472 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
22473 vsqaddh_u16 (uint16_t __a, int16_t __b)
22475 return __builtin_aarch64_usqaddhi_uus (__a, __b);
22478 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
22479 vsqadds_u32 (uint32_t __a, int32_t __b)
22481 return __builtin_aarch64_usqaddsi_uus (__a, __b);
22484 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22485 vsqaddd_u64 (uint64_t __a, int64_t __b)
22487 return __builtin_aarch64_usqadddi_uus (__a, __b);
22490 /* vsqrt */
22491 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22492 vsqrt_f32 (float32x2_t a)
22494 return __builtin_aarch64_sqrtv2sf (a);
22497 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22498 vsqrtq_f32 (float32x4_t a)
22500 return __builtin_aarch64_sqrtv4sf (a);
22503 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
22504 vsqrt_f64 (float64x1_t a)
22506 return (float64x1_t) { __builtin_aarch64_sqrtdf (a[0]) };
22509 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22510 vsqrtq_f64 (float64x2_t a)
22512 return __builtin_aarch64_sqrtv2df (a);
22515 /* vsra */
22517 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22518 vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22520 return (int8x8_t) __builtin_aarch64_ssra_nv8qi (__a, __b, __c);
22523 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22524 vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22526 return (int16x4_t) __builtin_aarch64_ssra_nv4hi (__a, __b, __c);
22529 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22530 vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22532 return (int32x2_t) __builtin_aarch64_ssra_nv2si (__a, __b, __c);
22535 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22536 vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22538 return (int64x1_t) {__builtin_aarch64_ssra_ndi (__a[0], __b[0], __c)};
22541 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22542 vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22544 return __builtin_aarch64_usra_nv8qi_uuus (__a, __b, __c);
22547 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22548 vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22550 return __builtin_aarch64_usra_nv4hi_uuus (__a, __b, __c);
22553 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22554 vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22556 return __builtin_aarch64_usra_nv2si_uuus (__a, __b, __c);
22559 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22560 vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22562 return (uint64x1_t) {__builtin_aarch64_usra_ndi_uuus (__a[0], __b[0], __c)};
22565 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22566 vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22568 return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c);
22571 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22572 vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22574 return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c);
22577 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22578 vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22580 return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c);
22583 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22584 vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22586 return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c);
22589 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22590 vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22592 return __builtin_aarch64_usra_nv16qi_uuus (__a, __b, __c);
22595 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22596 vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22598 return __builtin_aarch64_usra_nv8hi_uuus (__a, __b, __c);
22601 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22602 vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22604 return __builtin_aarch64_usra_nv4si_uuus (__a, __b, __c);
22607 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22608 vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22610 return __builtin_aarch64_usra_nv2di_uuus (__a, __b, __c);
22613 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22614 vsrad_n_s64 (int64_t __a, int64_t __b, const int __c)
22616 return __builtin_aarch64_ssra_ndi (__a, __b, __c);
22619 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22620 vsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c)
22622 return __builtin_aarch64_usra_ndi_uuus (__a, __b, __c);
22625 /* vsri */
22627 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22628 vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22630 return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c);
22633 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22634 vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22636 return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c);
22639 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22640 vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22642 return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c);
22645 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22646 vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22648 return (int64x1_t) {__builtin_aarch64_ssri_ndi (__a[0], __b[0], __c)};
22651 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22652 vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22654 return __builtin_aarch64_usri_nv8qi_uuus (__a, __b, __c);
22657 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22658 vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22660 return __builtin_aarch64_usri_nv4hi_uuus (__a, __b, __c);
22663 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22664 vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22666 return __builtin_aarch64_usri_nv2si_uuus (__a, __b, __c);
22669 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22670 vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22672 return (uint64x1_t) {__builtin_aarch64_usri_ndi_uuus (__a[0], __b[0], __c)};
22675 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22676 vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22678 return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c);
22681 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22682 vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22684 return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c);
22687 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22688 vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22690 return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c);
22693 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22694 vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22696 return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c);
22699 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22700 vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22702 return __builtin_aarch64_usri_nv16qi_uuus (__a, __b, __c);
22705 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22706 vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22708 return __builtin_aarch64_usri_nv8hi_uuus (__a, __b, __c);
22711 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22712 vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22714 return __builtin_aarch64_usri_nv4si_uuus (__a, __b, __c);
22717 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22718 vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22720 return __builtin_aarch64_usri_nv2di_uuus (__a, __b, __c);
22723 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22724 vsrid_n_s64 (int64_t __a, int64_t __b, const int __c)
22726 return __builtin_aarch64_ssri_ndi (__a, __b, __c);
22729 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22730 vsrid_n_u64 (uint64_t __a, uint64_t __b, const int __c)
22732 return __builtin_aarch64_usri_ndi_uuus (__a, __b, __c);
22735 /* vst1 */
22737 __extension__ static __inline void __attribute__ ((__always_inline__))
22738 vst1_f16 (float16_t *__a, float16x4_t __b)
22740 __builtin_aarch64_st1v4hf (__a, __b);
22743 __extension__ static __inline void __attribute__ ((__always_inline__))
22744 vst1_f32 (float32_t *a, float32x2_t b)
22746 __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b);
22749 __extension__ static __inline void __attribute__ ((__always_inline__))
22750 vst1_f64 (float64_t *a, float64x1_t b)
22752 *a = b[0];
22755 __extension__ static __inline void __attribute__ ((__always_inline__))
22756 vst1_p8 (poly8_t *a, poly8x8_t b)
22758 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
22759 (int8x8_t) b);
22762 __extension__ static __inline void __attribute__ ((__always_inline__))
22763 vst1_p16 (poly16_t *a, poly16x4_t b)
22765 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
22766 (int16x4_t) b);
22769 __extension__ static __inline void __attribute__ ((__always_inline__))
22770 vst1_s8 (int8_t *a, int8x8_t b)
22772 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b);
22775 __extension__ static __inline void __attribute__ ((__always_inline__))
22776 vst1_s16 (int16_t *a, int16x4_t b)
22778 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b);
22781 __extension__ static __inline void __attribute__ ((__always_inline__))
22782 vst1_s32 (int32_t *a, int32x2_t b)
22784 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b);
22787 __extension__ static __inline void __attribute__ ((__always_inline__))
22788 vst1_s64 (int64_t *a, int64x1_t b)
22790 *a = b[0];
22793 __extension__ static __inline void __attribute__ ((__always_inline__))
22794 vst1_u8 (uint8_t *a, uint8x8_t b)
22796 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
22797 (int8x8_t) b);
22800 __extension__ static __inline void __attribute__ ((__always_inline__))
22801 vst1_u16 (uint16_t *a, uint16x4_t b)
22803 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
22804 (int16x4_t) b);
22807 __extension__ static __inline void __attribute__ ((__always_inline__))
22808 vst1_u32 (uint32_t *a, uint32x2_t b)
22810 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a,
22811 (int32x2_t) b);
22814 __extension__ static __inline void __attribute__ ((__always_inline__))
22815 vst1_u64 (uint64_t *a, uint64x1_t b)
22817 *a = b[0];
22820 /* vst1q */
22822 __extension__ static __inline void __attribute__ ((__always_inline__))
22823 vst1q_f16 (float16_t *__a, float16x8_t __b)
22825 __builtin_aarch64_st1v8hf (__a, __b);
22828 __extension__ static __inline void __attribute__ ((__always_inline__))
22829 vst1q_f32 (float32_t *a, float32x4_t b)
22831 __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b);
22834 __extension__ static __inline void __attribute__ ((__always_inline__))
22835 vst1q_f64 (float64_t *a, float64x2_t b)
22837 __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);
22840 __extension__ static __inline void __attribute__ ((__always_inline__))
22841 vst1q_p8 (poly8_t *a, poly8x16_t b)
22843 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
22844 (int8x16_t) b);
22847 __extension__ static __inline void __attribute__ ((__always_inline__))
22848 vst1q_p16 (poly16_t *a, poly16x8_t b)
22850 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
22851 (int16x8_t) b);
22854 __extension__ static __inline void __attribute__ ((__always_inline__))
22855 vst1q_s8 (int8_t *a, int8x16_t b)
22857 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b);
22860 __extension__ static __inline void __attribute__ ((__always_inline__))
22861 vst1q_s16 (int16_t *a, int16x8_t b)
22863 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b);
22866 __extension__ static __inline void __attribute__ ((__always_inline__))
22867 vst1q_s32 (int32_t *a, int32x4_t b)
22869 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b);
22872 __extension__ static __inline void __attribute__ ((__always_inline__))
22873 vst1q_s64 (int64_t *a, int64x2_t b)
22875 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b);
22878 __extension__ static __inline void __attribute__ ((__always_inline__))
22879 vst1q_u8 (uint8_t *a, uint8x16_t b)
22881 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
22882 (int8x16_t) b);
22885 __extension__ static __inline void __attribute__ ((__always_inline__))
22886 vst1q_u16 (uint16_t *a, uint16x8_t b)
22888 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
22889 (int16x8_t) b);
22892 __extension__ static __inline void __attribute__ ((__always_inline__))
22893 vst1q_u32 (uint32_t *a, uint32x4_t b)
22895 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a,
22896 (int32x4_t) b);
22899 __extension__ static __inline void __attribute__ ((__always_inline__))
22900 vst1q_u64 (uint64_t *a, uint64x2_t b)
22902 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a,
22903 (int64x2_t) b);
22906 /* vst1_lane */
22908 __extension__ static __inline void __attribute__ ((__always_inline__))
22909 vst1_lane_f16 (float16_t *__a, float16x4_t __b, const int __lane)
22911 *__a = __aarch64_vget_lane_any (__b, __lane);
22914 __extension__ static __inline void __attribute__ ((__always_inline__))
22915 vst1_lane_f32 (float32_t *__a, float32x2_t __b, const int __lane)
22917 *__a = __aarch64_vget_lane_any (__b, __lane);
22920 __extension__ static __inline void __attribute__ ((__always_inline__))
22921 vst1_lane_f64 (float64_t *__a, float64x1_t __b, const int __lane)
22923 *__a = __aarch64_vget_lane_any (__b, __lane);
22926 __extension__ static __inline void __attribute__ ((__always_inline__))
22927 vst1_lane_p8 (poly8_t *__a, poly8x8_t __b, const int __lane)
22929 *__a = __aarch64_vget_lane_any (__b, __lane);
22932 __extension__ static __inline void __attribute__ ((__always_inline__))
22933 vst1_lane_p16 (poly16_t *__a, poly16x4_t __b, const int __lane)
22935 *__a = __aarch64_vget_lane_any (__b, __lane);
22938 __extension__ static __inline void __attribute__ ((__always_inline__))
22939 vst1_lane_s8 (int8_t *__a, int8x8_t __b, const int __lane)
22941 *__a = __aarch64_vget_lane_any (__b, __lane);
22944 __extension__ static __inline void __attribute__ ((__always_inline__))
22945 vst1_lane_s16 (int16_t *__a, int16x4_t __b, const int __lane)
22947 *__a = __aarch64_vget_lane_any (__b, __lane);
22950 __extension__ static __inline void __attribute__ ((__always_inline__))
22951 vst1_lane_s32 (int32_t *__a, int32x2_t __b, const int __lane)
22953 *__a = __aarch64_vget_lane_any (__b, __lane);
22956 __extension__ static __inline void __attribute__ ((__always_inline__))
22957 vst1_lane_s64 (int64_t *__a, int64x1_t __b, const int __lane)
22959 *__a = __aarch64_vget_lane_any (__b, __lane);
22962 __extension__ static __inline void __attribute__ ((__always_inline__))
22963 vst1_lane_u8 (uint8_t *__a, uint8x8_t __b, const int __lane)
22965 *__a = __aarch64_vget_lane_any (__b, __lane);
22968 __extension__ static __inline void __attribute__ ((__always_inline__))
22969 vst1_lane_u16 (uint16_t *__a, uint16x4_t __b, const int __lane)
22971 *__a = __aarch64_vget_lane_any (__b, __lane);
22974 __extension__ static __inline void __attribute__ ((__always_inline__))
22975 vst1_lane_u32 (uint32_t *__a, uint32x2_t __b, const int __lane)
22977 *__a = __aarch64_vget_lane_any (__b, __lane);
22980 __extension__ static __inline void __attribute__ ((__always_inline__))
22981 vst1_lane_u64 (uint64_t *__a, uint64x1_t __b, const int __lane)
22983 *__a = __aarch64_vget_lane_any (__b, __lane);
22986 /* vst1q_lane */
22988 __extension__ static __inline void __attribute__ ((__always_inline__))
22989 vst1q_lane_f16 (float16_t *__a, float16x8_t __b, const int __lane)
22991 *__a = __aarch64_vget_lane_any (__b, __lane);
22994 __extension__ static __inline void __attribute__ ((__always_inline__))
22995 vst1q_lane_f32 (float32_t *__a, float32x4_t __b, const int __lane)
22997 *__a = __aarch64_vget_lane_any (__b, __lane);
23000 __extension__ static __inline void __attribute__ ((__always_inline__))
23001 vst1q_lane_f64 (float64_t *__a, float64x2_t __b, const int __lane)
23003 *__a = __aarch64_vget_lane_any (__b, __lane);
23006 __extension__ static __inline void __attribute__ ((__always_inline__))
23007 vst1q_lane_p8 (poly8_t *__a, poly8x16_t __b, const int __lane)
23009 *__a = __aarch64_vget_lane_any (__b, __lane);
23012 __extension__ static __inline void __attribute__ ((__always_inline__))
23013 vst1q_lane_p16 (poly16_t *__a, poly16x8_t __b, const int __lane)
23015 *__a = __aarch64_vget_lane_any (__b, __lane);
23018 __extension__ static __inline void __attribute__ ((__always_inline__))
23019 vst1q_lane_s8 (int8_t *__a, int8x16_t __b, const int __lane)
23021 *__a = __aarch64_vget_lane_any (__b, __lane);
23024 __extension__ static __inline void __attribute__ ((__always_inline__))
23025 vst1q_lane_s16 (int16_t *__a, int16x8_t __b, const int __lane)
23027 *__a = __aarch64_vget_lane_any (__b, __lane);
23030 __extension__ static __inline void __attribute__ ((__always_inline__))
23031 vst1q_lane_s32 (int32_t *__a, int32x4_t __b, const int __lane)
23033 *__a = __aarch64_vget_lane_any (__b, __lane);
23036 __extension__ static __inline void __attribute__ ((__always_inline__))
23037 vst1q_lane_s64 (int64_t *__a, int64x2_t __b, const int __lane)
23039 *__a = __aarch64_vget_lane_any (__b, __lane);
23042 __extension__ static __inline void __attribute__ ((__always_inline__))
23043 vst1q_lane_u8 (uint8_t *__a, uint8x16_t __b, const int __lane)
23045 *__a = __aarch64_vget_lane_any (__b, __lane);
23048 __extension__ static __inline void __attribute__ ((__always_inline__))
23049 vst1q_lane_u16 (uint16_t *__a, uint16x8_t __b, const int __lane)
23051 *__a = __aarch64_vget_lane_any (__b, __lane);
23054 __extension__ static __inline void __attribute__ ((__always_inline__))
23055 vst1q_lane_u32 (uint32_t *__a, uint32x4_t __b, const int __lane)
23057 *__a = __aarch64_vget_lane_any (__b, __lane);
23060 __extension__ static __inline void __attribute__ ((__always_inline__))
23061 vst1q_lane_u64 (uint64_t *__a, uint64x2_t __b, const int __lane)
23063 *__a = __aarch64_vget_lane_any (__b, __lane);
23066 /* vstn */
23068 __extension__ static __inline void
23069 vst2_s64 (int64_t * __a, int64x1x2_t val)
23071 __builtin_aarch64_simd_oi __o;
23072 int64x2x2_t temp;
23073 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
23074 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
23075 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
23076 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
23077 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
23080 __extension__ static __inline void
23081 vst2_u64 (uint64_t * __a, uint64x1x2_t val)
23083 __builtin_aarch64_simd_oi __o;
23084 uint64x2x2_t temp;
23085 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
23086 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
23087 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
23088 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
23089 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
23092 __extension__ static __inline void
23093 vst2_f64 (float64_t * __a, float64x1x2_t val)
23095 __builtin_aarch64_simd_oi __o;
23096 float64x2x2_t temp;
23097 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
23098 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
23099 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0);
23100 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1);
23101 __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
23104 __extension__ static __inline void
23105 vst2_s8 (int8_t * __a, int8x8x2_t val)
23107 __builtin_aarch64_simd_oi __o;
23108 int8x16x2_t temp;
23109 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
23110 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
23111 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
23112 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
23113 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23116 __extension__ static __inline void __attribute__ ((__always_inline__))
23117 vst2_p8 (poly8_t * __a, poly8x8x2_t val)
23119 __builtin_aarch64_simd_oi __o;
23120 poly8x16x2_t temp;
23121 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
23122 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
23123 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
23124 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
23125 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23128 __extension__ static __inline void __attribute__ ((__always_inline__))
23129 vst2_s16 (int16_t * __a, int16x4x2_t val)
23131 __builtin_aarch64_simd_oi __o;
23132 int16x8x2_t temp;
23133 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
23134 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
23135 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
23136 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
23137 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23140 __extension__ static __inline void __attribute__ ((__always_inline__))
23141 vst2_p16 (poly16_t * __a, poly16x4x2_t val)
23143 __builtin_aarch64_simd_oi __o;
23144 poly16x8x2_t temp;
23145 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
23146 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
23147 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
23148 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
23149 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23152 __extension__ static __inline void __attribute__ ((__always_inline__))
23153 vst2_s32 (int32_t * __a, int32x2x2_t val)
23155 __builtin_aarch64_simd_oi __o;
23156 int32x4x2_t temp;
23157 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
23158 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
23159 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
23160 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
23161 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
23164 __extension__ static __inline void __attribute__ ((__always_inline__))
23165 vst2_u8 (uint8_t * __a, uint8x8x2_t val)
23167 __builtin_aarch64_simd_oi __o;
23168 uint8x16x2_t temp;
23169 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
23170 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
23171 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
23172 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
23173 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23176 __extension__ static __inline void __attribute__ ((__always_inline__))
23177 vst2_u16 (uint16_t * __a, uint16x4x2_t val)
23179 __builtin_aarch64_simd_oi __o;
23180 uint16x8x2_t temp;
23181 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
23182 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
23183 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
23184 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
23185 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23188 __extension__ static __inline void __attribute__ ((__always_inline__))
23189 vst2_u32 (uint32_t * __a, uint32x2x2_t val)
23191 __builtin_aarch64_simd_oi __o;
23192 uint32x4x2_t temp;
23193 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
23194 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
23195 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
23196 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
23197 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
23200 __extension__ static __inline void __attribute__ ((__always_inline__))
23201 vst2_f16 (float16_t * __a, float16x4x2_t val)
23203 __builtin_aarch64_simd_oi __o;
23204 float16x8x2_t temp;
23205 temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
23206 temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
23207 __o = __builtin_aarch64_set_qregoiv8hf (__o, temp.val[0], 0);
23208 __o = __builtin_aarch64_set_qregoiv8hf (__o, temp.val[1], 1);
23209 __builtin_aarch64_st2v4hf (__a, __o);
23212 __extension__ static __inline void __attribute__ ((__always_inline__))
23213 vst2_f32 (float32_t * __a, float32x2x2_t val)
23215 __builtin_aarch64_simd_oi __o;
23216 float32x4x2_t temp;
23217 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
23218 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
23219 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0);
23220 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1);
23221 __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
23224 __extension__ static __inline void __attribute__ ((__always_inline__))
23225 vst2q_s8 (int8_t * __a, int8x16x2_t val)
23227 __builtin_aarch64_simd_oi __o;
23228 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
23229 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
23230 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23233 __extension__ static __inline void __attribute__ ((__always_inline__))
23234 vst2q_p8 (poly8_t * __a, poly8x16x2_t val)
23236 __builtin_aarch64_simd_oi __o;
23237 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
23238 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
23239 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23242 __extension__ static __inline void __attribute__ ((__always_inline__))
23243 vst2q_s16 (int16_t * __a, int16x8x2_t val)
23245 __builtin_aarch64_simd_oi __o;
23246 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
23247 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
23248 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23251 __extension__ static __inline void __attribute__ ((__always_inline__))
23252 vst2q_p16 (poly16_t * __a, poly16x8x2_t val)
23254 __builtin_aarch64_simd_oi __o;
23255 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
23256 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
23257 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23260 __extension__ static __inline void __attribute__ ((__always_inline__))
23261 vst2q_s32 (int32_t * __a, int32x4x2_t val)
23263 __builtin_aarch64_simd_oi __o;
23264 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
23265 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
23266 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
23269 __extension__ static __inline void __attribute__ ((__always_inline__))
23270 vst2q_s64 (int64_t * __a, int64x2x2_t val)
23272 __builtin_aarch64_simd_oi __o;
23273 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
23274 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
23275 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
23278 __extension__ static __inline void __attribute__ ((__always_inline__))
23279 vst2q_u8 (uint8_t * __a, uint8x16x2_t val)
23281 __builtin_aarch64_simd_oi __o;
23282 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
23283 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
23284 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23287 __extension__ static __inline void __attribute__ ((__always_inline__))
23288 vst2q_u16 (uint16_t * __a, uint16x8x2_t val)
23290 __builtin_aarch64_simd_oi __o;
23291 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
23292 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
23293 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23296 __extension__ static __inline void __attribute__ ((__always_inline__))
23297 vst2q_u32 (uint32_t * __a, uint32x4x2_t val)
23299 __builtin_aarch64_simd_oi __o;
23300 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
23301 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
23302 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
23305 __extension__ static __inline void __attribute__ ((__always_inline__))
23306 vst2q_u64 (uint64_t * __a, uint64x2x2_t val)
23308 __builtin_aarch64_simd_oi __o;
23309 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
23310 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
23311 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
23314 __extension__ static __inline void __attribute__ ((__always_inline__))
23315 vst2q_f16 (float16_t * __a, float16x8x2_t val)
23317 __builtin_aarch64_simd_oi __o;
23318 __o = __builtin_aarch64_set_qregoiv8hf (__o, val.val[0], 0);
23319 __o = __builtin_aarch64_set_qregoiv8hf (__o, val.val[1], 1);
23320 __builtin_aarch64_st2v8hf (__a, __o);
23323 __extension__ static __inline void __attribute__ ((__always_inline__))
23324 vst2q_f32 (float32_t * __a, float32x4x2_t val)
23326 __builtin_aarch64_simd_oi __o;
23327 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0);
23328 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1);
23329 __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
23332 __extension__ static __inline void __attribute__ ((__always_inline__))
23333 vst2q_f64 (float64_t * __a, float64x2x2_t val)
23335 __builtin_aarch64_simd_oi __o;
23336 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0);
23337 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1);
23338 __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o);
23341 __extension__ static __inline void
23342 vst3_s64 (int64_t * __a, int64x1x3_t val)
23344 __builtin_aarch64_simd_ci __o;
23345 int64x2x3_t temp;
23346 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
23347 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
23348 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
23349 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
23350 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
23351 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
23352 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
23355 __extension__ static __inline void
23356 vst3_u64 (uint64_t * __a, uint64x1x3_t val)
23358 __builtin_aarch64_simd_ci __o;
23359 uint64x2x3_t temp;
23360 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
23361 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
23362 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
23363 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
23364 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
23365 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
23366 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
23369 __extension__ static __inline void
23370 vst3_f64 (float64_t * __a, float64x1x3_t val)
23372 __builtin_aarch64_simd_ci __o;
23373 float64x2x3_t temp;
23374 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
23375 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
23376 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
23377 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0);
23378 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1);
23379 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2);
23380 __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o);
23383 __extension__ static __inline void
23384 vst3_s8 (int8_t * __a, int8x8x3_t val)
23386 __builtin_aarch64_simd_ci __o;
23387 int8x16x3_t temp;
23388 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
23389 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
23390 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
23391 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
23392 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
23393 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
23394 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23397 __extension__ static __inline void __attribute__ ((__always_inline__))
23398 vst3_p8 (poly8_t * __a, poly8x8x3_t val)
23400 __builtin_aarch64_simd_ci __o;
23401 poly8x16x3_t temp;
23402 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
23403 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
23404 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
23405 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
23406 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
23407 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
23408 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23411 __extension__ static __inline void __attribute__ ((__always_inline__))
23412 vst3_s16 (int16_t * __a, int16x4x3_t val)
23414 __builtin_aarch64_simd_ci __o;
23415 int16x8x3_t temp;
23416 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
23417 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
23418 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
23419 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
23420 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
23421 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
23422 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23425 __extension__ static __inline void __attribute__ ((__always_inline__))
23426 vst3_p16 (poly16_t * __a, poly16x4x3_t val)
23428 __builtin_aarch64_simd_ci __o;
23429 poly16x8x3_t temp;
23430 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
23431 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
23432 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
23433 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
23434 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
23435 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
23436 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23439 __extension__ static __inline void __attribute__ ((__always_inline__))
23440 vst3_s32 (int32_t * __a, int32x2x3_t val)
23442 __builtin_aarch64_simd_ci __o;
23443 int32x4x3_t temp;
23444 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
23445 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
23446 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
23447 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
23448 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
23449 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
23450 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
23453 __extension__ static __inline void __attribute__ ((__always_inline__))
23454 vst3_u8 (uint8_t * __a, uint8x8x3_t val)
23456 __builtin_aarch64_simd_ci __o;
23457 uint8x16x3_t temp;
23458 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
23459 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
23460 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
23461 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
23462 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
23463 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
23464 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23467 __extension__ static __inline void __attribute__ ((__always_inline__))
23468 vst3_u16 (uint16_t * __a, uint16x4x3_t val)
23470 __builtin_aarch64_simd_ci __o;
23471 uint16x8x3_t temp;
23472 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
23473 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
23474 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
23475 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
23476 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
23477 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
23478 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23481 __extension__ static __inline void __attribute__ ((__always_inline__))
23482 vst3_u32 (uint32_t * __a, uint32x2x3_t val)
23484 __builtin_aarch64_simd_ci __o;
23485 uint32x4x3_t temp;
23486 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
23487 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
23488 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
23489 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
23490 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
23491 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
23492 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
23495 __extension__ static __inline void __attribute__ ((__always_inline__))
23496 vst3_f16 (float16_t * __a, float16x4x3_t val)
23498 __builtin_aarch64_simd_ci __o;
23499 float16x8x3_t temp;
23500 temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
23501 temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
23502 temp.val[2] = vcombine_f16 (val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0)));
23503 __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[0], 0);
23504 __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[1], 1);
23505 __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[2], 2);
23506 __builtin_aarch64_st3v4hf ((__builtin_aarch64_simd_hf *) __a, __o);
23509 __extension__ static __inline void __attribute__ ((__always_inline__))
23510 vst3_f32 (float32_t * __a, float32x2x3_t val)
23512 __builtin_aarch64_simd_ci __o;
23513 float32x4x3_t temp;
23514 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
23515 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
23516 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
23517 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0);
23518 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1);
23519 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2);
23520 __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
23523 __extension__ static __inline void __attribute__ ((__always_inline__))
23524 vst3q_s8 (int8_t * __a, int8x16x3_t val)
23526 __builtin_aarch64_simd_ci __o;
23527 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
23528 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
23529 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
23530 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23533 __extension__ static __inline void __attribute__ ((__always_inline__))
23534 vst3q_p8 (poly8_t * __a, poly8x16x3_t val)
23536 __builtin_aarch64_simd_ci __o;
23537 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
23538 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
23539 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
23540 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23543 __extension__ static __inline void __attribute__ ((__always_inline__))
23544 vst3q_s16 (int16_t * __a, int16x8x3_t val)
23546 __builtin_aarch64_simd_ci __o;
23547 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
23548 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
23549 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
23550 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23553 __extension__ static __inline void __attribute__ ((__always_inline__))
23554 vst3q_p16 (poly16_t * __a, poly16x8x3_t val)
23556 __builtin_aarch64_simd_ci __o;
23557 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
23558 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
23559 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
23560 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23563 __extension__ static __inline void __attribute__ ((__always_inline__))
23564 vst3q_s32 (int32_t * __a, int32x4x3_t val)
23566 __builtin_aarch64_simd_ci __o;
23567 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
23568 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
23569 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
23570 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
23573 __extension__ static __inline void __attribute__ ((__always_inline__))
23574 vst3q_s64 (int64_t * __a, int64x2x3_t val)
23576 __builtin_aarch64_simd_ci __o;
23577 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
23578 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
23579 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
23580 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
23583 __extension__ static __inline void __attribute__ ((__always_inline__))
23584 vst3q_u8 (uint8_t * __a, uint8x16x3_t val)
23586 __builtin_aarch64_simd_ci __o;
23587 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
23588 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
23589 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
23590 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23593 __extension__ static __inline void __attribute__ ((__always_inline__))
23594 vst3q_u16 (uint16_t * __a, uint16x8x3_t val)
23596 __builtin_aarch64_simd_ci __o;
23597 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
23598 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
23599 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
23600 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23603 __extension__ static __inline void __attribute__ ((__always_inline__))
23604 vst3q_u32 (uint32_t * __a, uint32x4x3_t val)
23606 __builtin_aarch64_simd_ci __o;
23607 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
23608 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
23609 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
23610 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
23613 __extension__ static __inline void __attribute__ ((__always_inline__))
23614 vst3q_u64 (uint64_t * __a, uint64x2x3_t val)
23616 __builtin_aarch64_simd_ci __o;
23617 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
23618 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
23619 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
23620 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
23623 __extension__ static __inline void __attribute__ ((__always_inline__))
23624 vst3q_f16 (float16_t * __a, float16x8x3_t val)
23626 __builtin_aarch64_simd_ci __o;
23627 __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[0], 0);
23628 __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[1], 1);
23629 __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[2], 2);
23630 __builtin_aarch64_st3v8hf ((__builtin_aarch64_simd_hf *) __a, __o);
23633 __extension__ static __inline void __attribute__ ((__always_inline__))
23634 vst3q_f32 (float32_t * __a, float32x4x3_t val)
23636 __builtin_aarch64_simd_ci __o;
23637 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0);
23638 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1);
23639 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2);
23640 __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
23643 __extension__ static __inline void __attribute__ ((__always_inline__))
23644 vst3q_f64 (float64_t * __a, float64x2x3_t val)
23646 __builtin_aarch64_simd_ci __o;
23647 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0);
23648 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1);
23649 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2);
23650 __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o);
23653 __extension__ static __inline void
23654 vst4_s64 (int64_t * __a, int64x1x4_t val)
23656 __builtin_aarch64_simd_xi __o;
23657 int64x2x4_t temp;
23658 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
23659 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
23660 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
23661 temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0)));
23662 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
23663 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
23664 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
23665 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
23666 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
23669 __extension__ static __inline void
23670 vst4_u64 (uint64_t * __a, uint64x1x4_t val)
23672 __builtin_aarch64_simd_xi __o;
23673 uint64x2x4_t temp;
23674 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
23675 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
23676 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
23677 temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0)));
23678 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
23679 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
23680 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
23681 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
23682 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
23685 __extension__ static __inline void
23686 vst4_f64 (float64_t * __a, float64x1x4_t val)
23688 __builtin_aarch64_simd_xi __o;
23689 float64x2x4_t temp;
23690 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
23691 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
23692 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
23693 temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0)));
23694 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0);
23695 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1);
23696 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2);
23697 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3);
23698 __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o);
23701 __extension__ static __inline void
23702 vst4_s8 (int8_t * __a, int8x8x4_t val)
23704 __builtin_aarch64_simd_xi __o;
23705 int8x16x4_t temp;
23706 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
23707 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
23708 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
23709 temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0)));
23710 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
23711 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
23712 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
23713 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
23714 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23717 __extension__ static __inline void __attribute__ ((__always_inline__))
23718 vst4_p8 (poly8_t * __a, poly8x8x4_t val)
23720 __builtin_aarch64_simd_xi __o;
23721 poly8x16x4_t temp;
23722 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
23723 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
23724 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
23725 temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0)));
23726 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
23727 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
23728 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
23729 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
23730 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23733 __extension__ static __inline void __attribute__ ((__always_inline__))
23734 vst4_s16 (int16_t * __a, int16x4x4_t val)
23736 __builtin_aarch64_simd_xi __o;
23737 int16x8x4_t temp;
23738 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
23739 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
23740 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
23741 temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0)));
23742 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
23743 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
23744 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
23745 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
23746 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23749 __extension__ static __inline void __attribute__ ((__always_inline__))
23750 vst4_p16 (poly16_t * __a, poly16x4x4_t val)
23752 __builtin_aarch64_simd_xi __o;
23753 poly16x8x4_t temp;
23754 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
23755 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
23756 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
23757 temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0)));
23758 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
23759 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
23760 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
23761 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
23762 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23765 __extension__ static __inline void __attribute__ ((__always_inline__))
23766 vst4_s32 (int32_t * __a, int32x2x4_t val)
23768 __builtin_aarch64_simd_xi __o;
23769 int32x4x4_t temp;
23770 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
23771 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
23772 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
23773 temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0)));
23774 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
23775 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
23776 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
23777 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
23778 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
23781 __extension__ static __inline void __attribute__ ((__always_inline__))
23782 vst4_u8 (uint8_t * __a, uint8x8x4_t val)
23784 __builtin_aarch64_simd_xi __o;
23785 uint8x16x4_t temp;
23786 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
23787 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
23788 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
23789 temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0)));
23790 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
23791 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
23792 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
23793 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
23794 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23797 __extension__ static __inline void __attribute__ ((__always_inline__))
23798 vst4_u16 (uint16_t * __a, uint16x4x4_t val)
23800 __builtin_aarch64_simd_xi __o;
23801 uint16x8x4_t temp;
23802 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
23803 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
23804 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
23805 temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0)));
23806 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
23807 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
23808 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
23809 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
23810 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23813 __extension__ static __inline void __attribute__ ((__always_inline__))
23814 vst4_u32 (uint32_t * __a, uint32x2x4_t val)
23816 __builtin_aarch64_simd_xi __o;
23817 uint32x4x4_t temp;
23818 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
23819 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
23820 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
23821 temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0)));
23822 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
23823 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
23824 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
23825 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
23826 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
23829 __extension__ static __inline void __attribute__ ((__always_inline__))
23830 vst4_f16 (float16_t * __a, float16x4x4_t val)
23832 __builtin_aarch64_simd_xi __o;
23833 float16x8x4_t temp;
23834 temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
23835 temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
23836 temp.val[2] = vcombine_f16 (val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0)));
23837 temp.val[3] = vcombine_f16 (val.val[3], vcreate_f16 (__AARCH64_UINT64_C (0)));
23838 __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[0], 0);
23839 __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[1], 1);
23840 __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[2], 2);
23841 __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[3], 3);
23842 __builtin_aarch64_st4v4hf ((__builtin_aarch64_simd_hf *) __a, __o);
23845 __extension__ static __inline void __attribute__ ((__always_inline__))
23846 vst4_f32 (float32_t * __a, float32x2x4_t val)
23848 __builtin_aarch64_simd_xi __o;
23849 float32x4x4_t temp;
23850 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
23851 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
23852 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
23853 temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0)));
23854 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0);
23855 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1);
23856 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2);
23857 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3);
23858 __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
23861 __extension__ static __inline void __attribute__ ((__always_inline__))
23862 vst4q_s8 (int8_t * __a, int8x16x4_t val)
23864 __builtin_aarch64_simd_xi __o;
23865 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
23866 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
23867 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
23868 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
23869 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23872 __extension__ static __inline void __attribute__ ((__always_inline__))
23873 vst4q_p8 (poly8_t * __a, poly8x16x4_t val)
23875 __builtin_aarch64_simd_xi __o;
23876 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
23877 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
23878 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
23879 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
23880 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23883 __extension__ static __inline void __attribute__ ((__always_inline__))
23884 vst4q_s16 (int16_t * __a, int16x8x4_t val)
23886 __builtin_aarch64_simd_xi __o;
23887 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
23888 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
23889 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
23890 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
23891 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23894 __extension__ static __inline void __attribute__ ((__always_inline__))
23895 vst4q_p16 (poly16_t * __a, poly16x8x4_t val)
23897 __builtin_aarch64_simd_xi __o;
23898 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
23899 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
23900 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
23901 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
23902 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23905 __extension__ static __inline void __attribute__ ((__always_inline__))
23906 vst4q_s32 (int32_t * __a, int32x4x4_t val)
23908 __builtin_aarch64_simd_xi __o;
23909 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
23910 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
23911 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
23912 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
23913 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
23916 __extension__ static __inline void __attribute__ ((__always_inline__))
23917 vst4q_s64 (int64_t * __a, int64x2x4_t val)
23919 __builtin_aarch64_simd_xi __o;
23920 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
23921 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
23922 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
23923 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
23924 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
23927 __extension__ static __inline void __attribute__ ((__always_inline__))
23928 vst4q_u8 (uint8_t * __a, uint8x16x4_t val)
23930 __builtin_aarch64_simd_xi __o;
23931 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
23932 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
23933 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
23934 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
23935 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23938 __extension__ static __inline void __attribute__ ((__always_inline__))
23939 vst4q_u16 (uint16_t * __a, uint16x8x4_t val)
23941 __builtin_aarch64_simd_xi __o;
23942 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
23943 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
23944 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
23945 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
23946 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23949 __extension__ static __inline void __attribute__ ((__always_inline__))
23950 vst4q_u32 (uint32_t * __a, uint32x4x4_t val)
23952 __builtin_aarch64_simd_xi __o;
23953 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
23954 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
23955 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
23956 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
23957 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
23960 __extension__ static __inline void __attribute__ ((__always_inline__))
23961 vst4q_u64 (uint64_t * __a, uint64x2x4_t val)
23963 __builtin_aarch64_simd_xi __o;
23964 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
23965 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
23966 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
23967 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
23968 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
23971 __extension__ static __inline void __attribute__ ((__always_inline__))
23972 vst4q_f16 (float16_t * __a, float16x8x4_t val)
23974 __builtin_aarch64_simd_xi __o;
23975 __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[0], 0);
23976 __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[1], 1);
23977 __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[2], 2);
23978 __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[3], 3);
23979 __builtin_aarch64_st4v8hf ((__builtin_aarch64_simd_hf *) __a, __o);
23982 __extension__ static __inline void __attribute__ ((__always_inline__))
23983 vst4q_f32 (float32_t * __a, float32x4x4_t val)
23985 __builtin_aarch64_simd_xi __o;
23986 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0);
23987 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1);
23988 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2);
23989 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3);
23990 __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
23993 __extension__ static __inline void __attribute__ ((__always_inline__))
23994 vst4q_f64 (float64_t * __a, float64x2x4_t val)
23996 __builtin_aarch64_simd_xi __o;
23997 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0);
23998 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1);
23999 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2);
24000 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3);
24001 __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o);
24004 /* vsub */
24006 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
24007 vsubd_s64 (int64_t __a, int64_t __b)
24009 return __a - __b;
24012 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
24013 vsubd_u64 (uint64_t __a, uint64_t __b)
24015 return __a - __b;
24018 /* vtbx1 */
24020 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24021 vtbx1_s8 (int8x8_t __r, int8x8_t __tab, int8x8_t __idx)
24023 uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
24024 vmov_n_u8 (8));
24025 int8x8_t __tbl = vtbl1_s8 (__tab, __idx);
24027 return vbsl_s8 (__mask, __tbl, __r);
24030 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24031 vtbx1_u8 (uint8x8_t __r, uint8x8_t __tab, uint8x8_t __idx)
24033 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
24034 uint8x8_t __tbl = vtbl1_u8 (__tab, __idx);
24036 return vbsl_u8 (__mask, __tbl, __r);
24039 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24040 vtbx1_p8 (poly8x8_t __r, poly8x8_t __tab, uint8x8_t __idx)
24042 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
24043 poly8x8_t __tbl = vtbl1_p8 (__tab, __idx);
24045 return vbsl_p8 (__mask, __tbl, __r);
24048 /* vtbx3 */
24050 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24051 vtbx3_s8 (int8x8_t __r, int8x8x3_t __tab, int8x8_t __idx)
24053 uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
24054 vmov_n_u8 (24));
24055 int8x8_t __tbl = vtbl3_s8 (__tab, __idx);
24057 return vbsl_s8 (__mask, __tbl, __r);
24060 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24061 vtbx3_u8 (uint8x8_t __r, uint8x8x3_t __tab, uint8x8_t __idx)
24063 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
24064 uint8x8_t __tbl = vtbl3_u8 (__tab, __idx);
24066 return vbsl_u8 (__mask, __tbl, __r);
24069 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24070 vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx)
24072 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
24073 poly8x8_t __tbl = vtbl3_p8 (__tab, __idx);
24075 return vbsl_p8 (__mask, __tbl, __r);
24078 /* vtbx4 */
24080 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24081 vtbx4_s8 (int8x8_t __r, int8x8x4_t __tab, int8x8_t __idx)
24083 int8x8_t result;
24084 int8x16x2_t temp;
24085 __builtin_aarch64_simd_oi __o;
24086 temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]);
24087 temp.val[1] = vcombine_s8 (__tab.val[2], __tab.val[3]);
24088 __o = __builtin_aarch64_set_qregoiv16qi (__o,
24089 (int8x16_t) temp.val[0], 0);
24090 __o = __builtin_aarch64_set_qregoiv16qi (__o,
24091 (int8x16_t) temp.val[1], 1);
24092 result = __builtin_aarch64_tbx4v8qi (__r, __o, __idx);
24093 return result;
24096 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24097 vtbx4_u8 (uint8x8_t __r, uint8x8x4_t __tab, uint8x8_t __idx)
24099 uint8x8_t result;
24100 uint8x16x2_t temp;
24101 __builtin_aarch64_simd_oi __o;
24102 temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]);
24103 temp.val[1] = vcombine_u8 (__tab.val[2], __tab.val[3]);
24104 __o = __builtin_aarch64_set_qregoiv16qi (__o,
24105 (int8x16_t) temp.val[0], 0);
24106 __o = __builtin_aarch64_set_qregoiv16qi (__o,
24107 (int8x16_t) temp.val[1], 1);
24108 result = (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o,
24109 (int8x8_t)__idx);
24110 return result;
24113 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24114 vtbx4_p8 (poly8x8_t __r, poly8x8x4_t __tab, uint8x8_t __idx)
24116 poly8x8_t result;
24117 poly8x16x2_t temp;
24118 __builtin_aarch64_simd_oi __o;
24119 temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]);
24120 temp.val[1] = vcombine_p8 (__tab.val[2], __tab.val[3]);
24121 __o = __builtin_aarch64_set_qregoiv16qi (__o,
24122 (int8x16_t) temp.val[0], 0);
24123 __o = __builtin_aarch64_set_qregoiv16qi (__o,
24124 (int8x16_t) temp.val[1], 1);
24125 result = (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o,
24126 (int8x8_t)__idx);
24127 return result;
24130 /* vtrn */
24132 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
24133 vtrn1_f32 (float32x2_t __a, float32x2_t __b)
24135 #ifdef __AARCH64EB__
24136 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24137 #else
24138 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24139 #endif
24142 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24143 vtrn1_p8 (poly8x8_t __a, poly8x8_t __b)
24145 #ifdef __AARCH64EB__
24146 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
24147 #else
24148 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
24149 #endif
24152 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
24153 vtrn1_p16 (poly16x4_t __a, poly16x4_t __b)
24155 #ifdef __AARCH64EB__
24156 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
24157 #else
24158 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
24159 #endif
24162 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24163 vtrn1_s8 (int8x8_t __a, int8x8_t __b)
24165 #ifdef __AARCH64EB__
24166 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
24167 #else
24168 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
24169 #endif
24172 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24173 vtrn1_s16 (int16x4_t __a, int16x4_t __b)
24175 #ifdef __AARCH64EB__
24176 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
24177 #else
24178 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
24179 #endif
24182 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24183 vtrn1_s32 (int32x2_t __a, int32x2_t __b)
24185 #ifdef __AARCH64EB__
24186 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24187 #else
24188 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24189 #endif
24192 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24193 vtrn1_u8 (uint8x8_t __a, uint8x8_t __b)
24195 #ifdef __AARCH64EB__
24196 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
24197 #else
24198 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
24199 #endif
24202 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24203 vtrn1_u16 (uint16x4_t __a, uint16x4_t __b)
24205 #ifdef __AARCH64EB__
24206 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
24207 #else
24208 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
24209 #endif
24212 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24213 vtrn1_u32 (uint32x2_t __a, uint32x2_t __b)
24215 #ifdef __AARCH64EB__
24216 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24217 #else
24218 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24219 #endif
24222 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
24223 vtrn1q_f32 (float32x4_t __a, float32x4_t __b)
24225 #ifdef __AARCH64EB__
24226 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
24227 #else
24228 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
24229 #endif
24232 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
24233 vtrn1q_f64 (float64x2_t __a, float64x2_t __b)
24235 #ifdef __AARCH64EB__
24236 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24237 #else
24238 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24239 #endif
24242 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
24243 vtrn1q_p8 (poly8x16_t __a, poly8x16_t __b)
24245 #ifdef __AARCH64EB__
24246 return __builtin_shuffle (__a, __b,
24247 (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
24248 #else
24249 return __builtin_shuffle (__a, __b,
24250 (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
24251 #endif
24254 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
24255 vtrn1q_p16 (poly16x8_t __a, poly16x8_t __b)
24257 #ifdef __AARCH64EB__
24258 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
24259 #else
24260 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
24261 #endif
24264 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24265 vtrn1q_s8 (int8x16_t __a, int8x16_t __b)
24267 #ifdef __AARCH64EB__
24268 return __builtin_shuffle (__a, __b,
24269 (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
24270 #else
24271 return __builtin_shuffle (__a, __b,
24272 (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
24273 #endif
24276 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24277 vtrn1q_s16 (int16x8_t __a, int16x8_t __b)
24279 #ifdef __AARCH64EB__
24280 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
24281 #else
24282 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
24283 #endif
24286 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24287 vtrn1q_s32 (int32x4_t __a, int32x4_t __b)
24289 #ifdef __AARCH64EB__
24290 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
24291 #else
24292 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
24293 #endif
24296 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24297 vtrn1q_s64 (int64x2_t __a, int64x2_t __b)
24299 #ifdef __AARCH64EB__
24300 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24301 #else
24302 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24303 #endif
24306 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24307 vtrn1q_u8 (uint8x16_t __a, uint8x16_t __b)
24309 #ifdef __AARCH64EB__
24310 return __builtin_shuffle (__a, __b,
24311 (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
24312 #else
24313 return __builtin_shuffle (__a, __b,
24314 (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
24315 #endif
24318 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24319 vtrn1q_u16 (uint16x8_t __a, uint16x8_t __b)
24321 #ifdef __AARCH64EB__
24322 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
24323 #else
24324 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
24325 #endif
24328 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24329 vtrn1q_u32 (uint32x4_t __a, uint32x4_t __b)
24331 #ifdef __AARCH64EB__
24332 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
24333 #else
24334 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
24335 #endif
24338 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24339 vtrn1q_u64 (uint64x2_t __a, uint64x2_t __b)
24341 #ifdef __AARCH64EB__
24342 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24343 #else
24344 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24345 #endif
24348 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
24349 vtrn2_f32 (float32x2_t __a, float32x2_t __b)
24351 #ifdef __AARCH64EB__
24352 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24353 #else
24354 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24355 #endif
24358 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24359 vtrn2_p8 (poly8x8_t __a, poly8x8_t __b)
24361 #ifdef __AARCH64EB__
24362 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
24363 #else
24364 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
24365 #endif
24368 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
24369 vtrn2_p16 (poly16x4_t __a, poly16x4_t __b)
24371 #ifdef __AARCH64EB__
24372 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
24373 #else
24374 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
24375 #endif
24378 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24379 vtrn2_s8 (int8x8_t __a, int8x8_t __b)
24381 #ifdef __AARCH64EB__
24382 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
24383 #else
24384 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
24385 #endif
24388 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24389 vtrn2_s16 (int16x4_t __a, int16x4_t __b)
24391 #ifdef __AARCH64EB__
24392 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
24393 #else
24394 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
24395 #endif
24398 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24399 vtrn2_s32 (int32x2_t __a, int32x2_t __b)
24401 #ifdef __AARCH64EB__
24402 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24403 #else
24404 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24405 #endif
24408 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24409 vtrn2_u8 (uint8x8_t __a, uint8x8_t __b)
24411 #ifdef __AARCH64EB__
24412 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
24413 #else
24414 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
24415 #endif
24418 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24419 vtrn2_u16 (uint16x4_t __a, uint16x4_t __b)
24421 #ifdef __AARCH64EB__
24422 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
24423 #else
24424 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
24425 #endif
24428 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24429 vtrn2_u32 (uint32x2_t __a, uint32x2_t __b)
24431 #ifdef __AARCH64EB__
24432 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24433 #else
24434 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24435 #endif
24438 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
24439 vtrn2q_f32 (float32x4_t __a, float32x4_t __b)
24441 #ifdef __AARCH64EB__
24442 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
24443 #else
24444 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
24445 #endif
24448 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
24449 vtrn2q_f64 (float64x2_t __a, float64x2_t __b)
24451 #ifdef __AARCH64EB__
24452 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24453 #else
24454 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24455 #endif
24458 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
24459 vtrn2q_p8 (poly8x16_t __a, poly8x16_t __b)
24461 #ifdef __AARCH64EB__
24462 return __builtin_shuffle (__a, __b,
24463 (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
24464 #else
24465 return __builtin_shuffle (__a, __b,
24466 (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
24467 #endif
24470 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
24471 vtrn2q_p16 (poly16x8_t __a, poly16x8_t __b)
24473 #ifdef __AARCH64EB__
24474 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
24475 #else
24476 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
24477 #endif
24480 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24481 vtrn2q_s8 (int8x16_t __a, int8x16_t __b)
24483 #ifdef __AARCH64EB__
24484 return __builtin_shuffle (__a, __b,
24485 (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
24486 #else
24487 return __builtin_shuffle (__a, __b,
24488 (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
24489 #endif
24492 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24493 vtrn2q_s16 (int16x8_t __a, int16x8_t __b)
24495 #ifdef __AARCH64EB__
24496 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
24497 #else
24498 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
24499 #endif
24502 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24503 vtrn2q_s32 (int32x4_t __a, int32x4_t __b)
24505 #ifdef __AARCH64EB__
24506 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
24507 #else
24508 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
24509 #endif
24512 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24513 vtrn2q_s64 (int64x2_t __a, int64x2_t __b)
24515 #ifdef __AARCH64EB__
24516 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24517 #else
24518 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24519 #endif
24522 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24523 vtrn2q_u8 (uint8x16_t __a, uint8x16_t __b)
24525 #ifdef __AARCH64EB__
24526 return __builtin_shuffle (__a, __b,
24527 (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
24528 #else
24529 return __builtin_shuffle (__a, __b,
24530 (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
24531 #endif
24534 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24535 vtrn2q_u16 (uint16x8_t __a, uint16x8_t __b)
24537 #ifdef __AARCH64EB__
24538 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
24539 #else
24540 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
24541 #endif
24544 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24545 vtrn2q_u32 (uint32x4_t __a, uint32x4_t __b)
24547 #ifdef __AARCH64EB__
24548 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
24549 #else
24550 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
24551 #endif
24554 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24555 vtrn2q_u64 (uint64x2_t __a, uint64x2_t __b)
24557 #ifdef __AARCH64EB__
24558 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24559 #else
24560 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24561 #endif
24564 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
24565 vtrn_f32 (float32x2_t a, float32x2_t b)
24567 return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)};
24570 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
24571 vtrn_p8 (poly8x8_t a, poly8x8_t b)
24573 return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)};
24576 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
24577 vtrn_p16 (poly16x4_t a, poly16x4_t b)
24579 return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)};
24582 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
24583 vtrn_s8 (int8x8_t a, int8x8_t b)
24585 return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)};
24588 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
24589 vtrn_s16 (int16x4_t a, int16x4_t b)
24591 return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)};
24594 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
24595 vtrn_s32 (int32x2_t a, int32x2_t b)
24597 return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)};
24600 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
24601 vtrn_u8 (uint8x8_t a, uint8x8_t b)
24603 return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)};
24606 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
24607 vtrn_u16 (uint16x4_t a, uint16x4_t b)
24609 return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)};
24612 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
24613 vtrn_u32 (uint32x2_t a, uint32x2_t b)
24615 return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)};
24618 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
24619 vtrnq_f32 (float32x4_t a, float32x4_t b)
24621 return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)};
24624 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
24625 vtrnq_p8 (poly8x16_t a, poly8x16_t b)
24627 return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)};
24630 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
24631 vtrnq_p16 (poly16x8_t a, poly16x8_t b)
24633 return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)};
24636 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
24637 vtrnq_s8 (int8x16_t a, int8x16_t b)
24639 return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)};
24642 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
24643 vtrnq_s16 (int16x8_t a, int16x8_t b)
24645 return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)};
24648 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
24649 vtrnq_s32 (int32x4_t a, int32x4_t b)
24651 return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)};
24654 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
24655 vtrnq_u8 (uint8x16_t a, uint8x16_t b)
24657 return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)};
24660 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
24661 vtrnq_u16 (uint16x8_t a, uint16x8_t b)
24663 return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)};
24666 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
24667 vtrnq_u32 (uint32x4_t a, uint32x4_t b)
24669 return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)};
24672 /* vtst */
24674 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24675 vtst_s8 (int8x8_t __a, int8x8_t __b)
24677 return (uint8x8_t) ((__a & __b) != 0);
24680 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24681 vtst_s16 (int16x4_t __a, int16x4_t __b)
24683 return (uint16x4_t) ((__a & __b) != 0);
24686 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24687 vtst_s32 (int32x2_t __a, int32x2_t __b)
24689 return (uint32x2_t) ((__a & __b) != 0);
24692 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24693 vtst_s64 (int64x1_t __a, int64x1_t __b)
24695 return (uint64x1_t) ((__a & __b) != __AARCH64_INT64_C (0));
24698 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24699 vtst_u8 (uint8x8_t __a, uint8x8_t __b)
24701 return ((__a & __b) != 0);
24704 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24705 vtst_u16 (uint16x4_t __a, uint16x4_t __b)
24707 return ((__a & __b) != 0);
24710 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24711 vtst_u32 (uint32x2_t __a, uint32x2_t __b)
24713 return ((__a & __b) != 0);
24716 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24717 vtst_u64 (uint64x1_t __a, uint64x1_t __b)
24719 return ((__a & __b) != __AARCH64_UINT64_C (0));
24722 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24723 vtstq_s8 (int8x16_t __a, int8x16_t __b)
24725 return (uint8x16_t) ((__a & __b) != 0);
24728 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24729 vtstq_s16 (int16x8_t __a, int16x8_t __b)
24731 return (uint16x8_t) ((__a & __b) != 0);
24734 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24735 vtstq_s32 (int32x4_t __a, int32x4_t __b)
24737 return (uint32x4_t) ((__a & __b) != 0);
24740 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24741 vtstq_s64 (int64x2_t __a, int64x2_t __b)
24743 return (uint64x2_t) ((__a & __b) != __AARCH64_INT64_C (0));
24746 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24747 vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
24749 return ((__a & __b) != 0);
24752 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24753 vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
24755 return ((__a & __b) != 0);
24758 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24759 vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
24761 return ((__a & __b) != 0);
24764 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24765 vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
24767 return ((__a & __b) != __AARCH64_UINT64_C (0));
24770 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
24771 vtstd_s64 (int64_t __a, int64_t __b)
24773 return (__a & __b) ? -1ll : 0ll;
24776 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
24777 vtstd_u64 (uint64_t __a, uint64_t __b)
24779 return (__a & __b) ? -1ll : 0ll;
24782 /* vuqadd */
24784 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24785 vuqadd_s8 (int8x8_t __a, uint8x8_t __b)
24787 return __builtin_aarch64_suqaddv8qi_ssu (__a, __b);
24790 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24791 vuqadd_s16 (int16x4_t __a, uint16x4_t __b)
24793 return __builtin_aarch64_suqaddv4hi_ssu (__a, __b);
24796 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24797 vuqadd_s32 (int32x2_t __a, uint32x2_t __b)
24799 return __builtin_aarch64_suqaddv2si_ssu (__a, __b);
24802 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
24803 vuqadd_s64 (int64x1_t __a, uint64x1_t __b)
24805 return (int64x1_t) {__builtin_aarch64_suqadddi_ssu (__a[0], __b[0])};
24808 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24809 vuqaddq_s8 (int8x16_t __a, uint8x16_t __b)
24811 return __builtin_aarch64_suqaddv16qi_ssu (__a, __b);
24814 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24815 vuqaddq_s16 (int16x8_t __a, uint16x8_t __b)
24817 return __builtin_aarch64_suqaddv8hi_ssu (__a, __b);
24820 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24821 vuqaddq_s32 (int32x4_t __a, uint32x4_t __b)
24823 return __builtin_aarch64_suqaddv4si_ssu (__a, __b);
24826 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24827 vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
24829 return __builtin_aarch64_suqaddv2di_ssu (__a, __b);
24832 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
24833 vuqaddb_s8 (int8_t __a, uint8_t __b)
24835 return __builtin_aarch64_suqaddqi_ssu (__a, __b);
24838 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
24839 vuqaddh_s16 (int16_t __a, uint16_t __b)
24841 return __builtin_aarch64_suqaddhi_ssu (__a, __b);
24844 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
24845 vuqadds_s32 (int32_t __a, uint32_t __b)
24847 return __builtin_aarch64_suqaddsi_ssu (__a, __b);
24850 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
24851 vuqaddd_s64 (int64_t __a, uint64_t __b)
24853 return __builtin_aarch64_suqadddi_ssu (__a, __b);
24856 #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \
24857 __extension__ static __inline rettype \
24858 __attribute__ ((__always_inline__)) \
24859 v ## op ## Q ## _ ## funcsuffix (intype a, intype b) \
24861 return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b), \
24862 v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)}; \
24865 #define __INTERLEAVE_LIST(op) \
24866 __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,) \
24867 __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,) \
24868 __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,) \
24869 __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,) \
24870 __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,) \
24871 __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,) \
24872 __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,) \
24873 __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,) \
24874 __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,) \
24875 __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q) \
24876 __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q) \
24877 __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q) \
24878 __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q) \
24879 __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q) \
24880 __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q) \
24881 __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q) \
24882 __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q) \
24883 __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q)
24885 /* vuzp */
24887 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
24888 vuzp1_f32 (float32x2_t __a, float32x2_t __b)
24890 #ifdef __AARCH64EB__
24891 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24892 #else
24893 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24894 #endif
24897 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24898 vuzp1_p8 (poly8x8_t __a, poly8x8_t __b)
24900 #ifdef __AARCH64EB__
24901 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24902 #else
24903 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24904 #endif
24907 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
24908 vuzp1_p16 (poly16x4_t __a, poly16x4_t __b)
24910 #ifdef __AARCH64EB__
24911 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
24912 #else
24913 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
24914 #endif
24917 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24918 vuzp1_s8 (int8x8_t __a, int8x8_t __b)
24920 #ifdef __AARCH64EB__
24921 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24922 #else
24923 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24924 #endif
24927 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24928 vuzp1_s16 (int16x4_t __a, int16x4_t __b)
24930 #ifdef __AARCH64EB__
24931 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
24932 #else
24933 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
24934 #endif
24937 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24938 vuzp1_s32 (int32x2_t __a, int32x2_t __b)
24940 #ifdef __AARCH64EB__
24941 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24942 #else
24943 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24944 #endif
24947 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24948 vuzp1_u8 (uint8x8_t __a, uint8x8_t __b)
24950 #ifdef __AARCH64EB__
24951 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24952 #else
24953 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24954 #endif
24957 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24958 vuzp1_u16 (uint16x4_t __a, uint16x4_t __b)
24960 #ifdef __AARCH64EB__
24961 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
24962 #else
24963 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
24964 #endif
24967 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24968 vuzp1_u32 (uint32x2_t __a, uint32x2_t __b)
24970 #ifdef __AARCH64EB__
24971 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24972 #else
24973 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24974 #endif
24977 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
24978 vuzp1q_f32 (float32x4_t __a, float32x4_t __b)
24980 #ifdef __AARCH64EB__
24981 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
24982 #else
24983 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
24984 #endif
24987 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
24988 vuzp1q_f64 (float64x2_t __a, float64x2_t __b)
24990 #ifdef __AARCH64EB__
24991 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24992 #else
24993 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24994 #endif
24997 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
24998 vuzp1q_p8 (poly8x16_t __a, poly8x16_t __b)
25000 #ifdef __AARCH64EB__
25001 return __builtin_shuffle (__a, __b, (uint8x16_t)
25002 {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
25003 #else
25004 return __builtin_shuffle (__a, __b, (uint8x16_t)
25005 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
25006 #endif
25009 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
25010 vuzp1q_p16 (poly16x8_t __a, poly16x8_t __b)
25012 #ifdef __AARCH64EB__
25013 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
25014 #else
25015 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
25016 #endif
25019 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
25020 vuzp1q_s8 (int8x16_t __a, int8x16_t __b)
25022 #ifdef __AARCH64EB__
25023 return __builtin_shuffle (__a, __b,
25024 (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
25025 #else
25026 return __builtin_shuffle (__a, __b,
25027 (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
25028 #endif
25031 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
25032 vuzp1q_s16 (int16x8_t __a, int16x8_t __b)
25034 #ifdef __AARCH64EB__
25035 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
25036 #else
25037 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
25038 #endif
25041 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
25042 vuzp1q_s32 (int32x4_t __a, int32x4_t __b)
25044 #ifdef __AARCH64EB__
25045 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
25046 #else
25047 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
25048 #endif
25051 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25052 vuzp1q_s64 (int64x2_t __a, int64x2_t __b)
25054 #ifdef __AARCH64EB__
25055 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
25056 #else
25057 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
25058 #endif
25061 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25062 vuzp1q_u8 (uint8x16_t __a, uint8x16_t __b)
25064 #ifdef __AARCH64EB__
25065 return __builtin_shuffle (__a, __b,
25066 (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
25067 #else
25068 return __builtin_shuffle (__a, __b,
25069 (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
25070 #endif
25073 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25074 vuzp1q_u16 (uint16x8_t __a, uint16x8_t __b)
25076 #ifdef __AARCH64EB__
25077 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
25078 #else
25079 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
25080 #endif
25083 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25084 vuzp1q_u32 (uint32x4_t __a, uint32x4_t __b)
25086 #ifdef __AARCH64EB__
25087 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
25088 #else
25089 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
25090 #endif
25093 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25094 vuzp1q_u64 (uint64x2_t __a, uint64x2_t __b)
25096 #ifdef __AARCH64EB__
25097 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
25098 #else
25099 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
25100 #endif
25103 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
25104 vuzp2_f32 (float32x2_t __a, float32x2_t __b)
25106 #ifdef __AARCH64EB__
25107 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25108 #else
25109 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25110 #endif
25113 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
25114 vuzp2_p8 (poly8x8_t __a, poly8x8_t __b)
25116 #ifdef __AARCH64EB__
25117 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
25118 #else
25119 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
25120 #endif
25123 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
25124 vuzp2_p16 (poly16x4_t __a, poly16x4_t __b)
25126 #ifdef __AARCH64EB__
25127 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
25128 #else
25129 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
25130 #endif
25133 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
25134 vuzp2_s8 (int8x8_t __a, int8x8_t __b)
25136 #ifdef __AARCH64EB__
25137 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
25138 #else
25139 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
25140 #endif
25143 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
25144 vuzp2_s16 (int16x4_t __a, int16x4_t __b)
25146 #ifdef __AARCH64EB__
25147 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
25148 #else
25149 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
25150 #endif
25153 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
25154 vuzp2_s32 (int32x2_t __a, int32x2_t __b)
25156 #ifdef __AARCH64EB__
25157 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25158 #else
25159 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25160 #endif
25163 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25164 vuzp2_u8 (uint8x8_t __a, uint8x8_t __b)
25166 #ifdef __AARCH64EB__
25167 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
25168 #else
25169 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
25170 #endif
25173 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25174 vuzp2_u16 (uint16x4_t __a, uint16x4_t __b)
25176 #ifdef __AARCH64EB__
25177 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
25178 #else
25179 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
25180 #endif
25183 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25184 vuzp2_u32 (uint32x2_t __a, uint32x2_t __b)
25186 #ifdef __AARCH64EB__
25187 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25188 #else
25189 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25190 #endif
25193 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
25194 vuzp2q_f32 (float32x4_t __a, float32x4_t __b)
25196 #ifdef __AARCH64EB__
25197 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
25198 #else
25199 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
25200 #endif
25203 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
25204 vuzp2q_f64 (float64x2_t __a, float64x2_t __b)
25206 #ifdef __AARCH64EB__
25207 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25208 #else
25209 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25210 #endif
25213 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
25214 vuzp2q_p8 (poly8x16_t __a, poly8x16_t __b)
25216 #ifdef __AARCH64EB__
25217 return __builtin_shuffle (__a, __b,
25218 (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
25219 #else
25220 return __builtin_shuffle (__a, __b,
25221 (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
25222 #endif
25225 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
25226 vuzp2q_p16 (poly16x8_t __a, poly16x8_t __b)
25228 #ifdef __AARCH64EB__
25229 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
25230 #else
25231 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
25232 #endif
25235 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
25236 vuzp2q_s8 (int8x16_t __a, int8x16_t __b)
25238 #ifdef __AARCH64EB__
25239 return __builtin_shuffle (__a, __b,
25240 (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
25241 #else
25242 return __builtin_shuffle (__a, __b,
25243 (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
25244 #endif
25247 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
25248 vuzp2q_s16 (int16x8_t __a, int16x8_t __b)
25250 #ifdef __AARCH64EB__
25251 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
25252 #else
25253 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
25254 #endif
25257 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
25258 vuzp2q_s32 (int32x4_t __a, int32x4_t __b)
25260 #ifdef __AARCH64EB__
25261 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
25262 #else
25263 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
25264 #endif
25267 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25268 vuzp2q_s64 (int64x2_t __a, int64x2_t __b)
25270 #ifdef __AARCH64EB__
25271 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25272 #else
25273 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25274 #endif
25277 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25278 vuzp2q_u8 (uint8x16_t __a, uint8x16_t __b)
25280 #ifdef __AARCH64EB__
25281 return __builtin_shuffle (__a, __b, (uint8x16_t)
25282 {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
25283 #else
25284 return __builtin_shuffle (__a, __b, (uint8x16_t)
25285 {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
25286 #endif
25289 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25290 vuzp2q_u16 (uint16x8_t __a, uint16x8_t __b)
25292 #ifdef __AARCH64EB__
25293 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
25294 #else
25295 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
25296 #endif
25299 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25300 vuzp2q_u32 (uint32x4_t __a, uint32x4_t __b)
25302 #ifdef __AARCH64EB__
25303 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
25304 #else
25305 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
25306 #endif
25309 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25310 vuzp2q_u64 (uint64x2_t __a, uint64x2_t __b)
25312 #ifdef __AARCH64EB__
25313 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25314 #else
25315 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25316 #endif
25319 __INTERLEAVE_LIST (uzp)
25321 /* vzip */
25323 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
25324 vzip1_f32 (float32x2_t __a, float32x2_t __b)
25326 #ifdef __AARCH64EB__
25327 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
25328 #else
25329 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
25330 #endif
25333 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
25334 vzip1_p8 (poly8x8_t __a, poly8x8_t __b)
25336 #ifdef __AARCH64EB__
25337 return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
25338 #else
25339 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25340 #endif
25343 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
25344 vzip1_p16 (poly16x4_t __a, poly16x4_t __b)
25346 #ifdef __AARCH64EB__
25347 return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
25348 #else
25349 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
25350 #endif
25353 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
25354 vzip1_s8 (int8x8_t __a, int8x8_t __b)
25356 #ifdef __AARCH64EB__
25357 return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
25358 #else
25359 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25360 #endif
25363 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
25364 vzip1_s16 (int16x4_t __a, int16x4_t __b)
25366 #ifdef __AARCH64EB__
25367 return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
25368 #else
25369 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
25370 #endif
25373 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
25374 vzip1_s32 (int32x2_t __a, int32x2_t __b)
25376 #ifdef __AARCH64EB__
25377 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
25378 #else
25379 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
25380 #endif
25383 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25384 vzip1_u8 (uint8x8_t __a, uint8x8_t __b)
25386 #ifdef __AARCH64EB__
25387 return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
25388 #else
25389 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25390 #endif
25393 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25394 vzip1_u16 (uint16x4_t __a, uint16x4_t __b)
25396 #ifdef __AARCH64EB__
25397 return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
25398 #else
25399 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
25400 #endif
25403 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25404 vzip1_u32 (uint32x2_t __a, uint32x2_t __b)
25406 #ifdef __AARCH64EB__
25407 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
25408 #else
25409 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
25410 #endif
25413 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
25414 vzip1q_f32 (float32x4_t __a, float32x4_t __b)
25416 #ifdef __AARCH64EB__
25417 return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
25418 #else
25419 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
25420 #endif
25423 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
25424 vzip1q_f64 (float64x2_t __a, float64x2_t __b)
25426 #ifdef __AARCH64EB__
25427 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
25428 #else
25429 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
25430 #endif
25433 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
25434 vzip1q_p8 (poly8x16_t __a, poly8x16_t __b)
25436 #ifdef __AARCH64EB__
25437 return __builtin_shuffle (__a, __b, (uint8x16_t)
25438 {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
25439 #else
25440 return __builtin_shuffle (__a, __b, (uint8x16_t)
25441 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
25442 #endif
25445 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
25446 vzip1q_p16 (poly16x8_t __a, poly16x8_t __b)
25448 #ifdef __AARCH64EB__
25449 return __builtin_shuffle (__a, __b, (uint16x8_t)
25450 {12, 4, 13, 5, 14, 6, 15, 7});
25451 #else
25452 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25453 #endif
25456 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
25457 vzip1q_s8 (int8x16_t __a, int8x16_t __b)
25459 #ifdef __AARCH64EB__
25460 return __builtin_shuffle (__a, __b, (uint8x16_t)
25461 {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
25462 #else
25463 return __builtin_shuffle (__a, __b, (uint8x16_t)
25464 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
25465 #endif
25468 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
25469 vzip1q_s16 (int16x8_t __a, int16x8_t __b)
25471 #ifdef __AARCH64EB__
25472 return __builtin_shuffle (__a, __b, (uint16x8_t)
25473 {12, 4, 13, 5, 14, 6, 15, 7});
25474 #else
25475 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25476 #endif
25479 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
25480 vzip1q_s32 (int32x4_t __a, int32x4_t __b)
25482 #ifdef __AARCH64EB__
25483 return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
25484 #else
25485 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
25486 #endif
25489 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25490 vzip1q_s64 (int64x2_t __a, int64x2_t __b)
25492 #ifdef __AARCH64EB__
25493 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
25494 #else
25495 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
25496 #endif
25499 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25500 vzip1q_u8 (uint8x16_t __a, uint8x16_t __b)
25502 #ifdef __AARCH64EB__
25503 return __builtin_shuffle (__a, __b, (uint8x16_t)
25504 {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
25505 #else
25506 return __builtin_shuffle (__a, __b, (uint8x16_t)
25507 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
25508 #endif
25511 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25512 vzip1q_u16 (uint16x8_t __a, uint16x8_t __b)
25514 #ifdef __AARCH64EB__
25515 return __builtin_shuffle (__a, __b, (uint16x8_t)
25516 {12, 4, 13, 5, 14, 6, 15, 7});
25517 #else
25518 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25519 #endif
25522 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25523 vzip1q_u32 (uint32x4_t __a, uint32x4_t __b)
25525 #ifdef __AARCH64EB__
25526 return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
25527 #else
25528 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
25529 #endif
25532 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25533 vzip1q_u64 (uint64x2_t __a, uint64x2_t __b)
25535 #ifdef __AARCH64EB__
25536 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
25537 #else
25538 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
25539 #endif
25542 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
25543 vzip2_f32 (float32x2_t __a, float32x2_t __b)
25545 #ifdef __AARCH64EB__
25546 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25547 #else
25548 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25549 #endif
25552 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
25553 vzip2_p8 (poly8x8_t __a, poly8x8_t __b)
25555 #ifdef __AARCH64EB__
25556 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25557 #else
25558 return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
25559 #endif
25562 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
25563 vzip2_p16 (poly16x4_t __a, poly16x4_t __b)
25565 #ifdef __AARCH64EB__
25566 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
25567 #else
25568 return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
25569 #endif
25572 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
25573 vzip2_s8 (int8x8_t __a, int8x8_t __b)
25575 #ifdef __AARCH64EB__
25576 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25577 #else
25578 return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
25579 #endif
25582 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
25583 vzip2_s16 (int16x4_t __a, int16x4_t __b)
25585 #ifdef __AARCH64EB__
25586 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
25587 #else
25588 return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
25589 #endif
25592 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
25593 vzip2_s32 (int32x2_t __a, int32x2_t __b)
25595 #ifdef __AARCH64EB__
25596 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25597 #else
25598 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25599 #endif
25602 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25603 vzip2_u8 (uint8x8_t __a, uint8x8_t __b)
25605 #ifdef __AARCH64EB__
25606 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25607 #else
25608 return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
25609 #endif
25612 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25613 vzip2_u16 (uint16x4_t __a, uint16x4_t __b)
25615 #ifdef __AARCH64EB__
25616 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
25617 #else
25618 return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
25619 #endif
25622 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25623 vzip2_u32 (uint32x2_t __a, uint32x2_t __b)
25625 #ifdef __AARCH64EB__
25626 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25627 #else
25628 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25629 #endif
25632 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
25633 vzip2q_f32 (float32x4_t __a, float32x4_t __b)
25635 #ifdef __AARCH64EB__
25636 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25637 #else
25638 return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25639 #endif
25642 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
25643 vzip2q_f64 (float64x2_t __a, float64x2_t __b)
25645 #ifdef __AARCH64EB__
25646 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25647 #else
25648 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25649 #endif
25652 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
25653 vzip2q_p8 (poly8x16_t __a, poly8x16_t __b)
25655 #ifdef __AARCH64EB__
25656 return __builtin_shuffle (__a, __b, (uint8x16_t)
25657 {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25658 #else
25659 return __builtin_shuffle (__a, __b, (uint8x16_t)
25660 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25661 #endif
25664 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
25665 vzip2q_p16 (poly16x8_t __a, poly16x8_t __b)
25667 #ifdef __AARCH64EB__
25668 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25669 #else
25670 return __builtin_shuffle (__a, __b, (uint16x8_t)
25671 {4, 12, 5, 13, 6, 14, 7, 15});
25672 #endif
25675 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
25676 vzip2q_s8 (int8x16_t __a, int8x16_t __b)
25678 #ifdef __AARCH64EB__
25679 return __builtin_shuffle (__a, __b, (uint8x16_t)
25680 {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25681 #else
25682 return __builtin_shuffle (__a, __b, (uint8x16_t)
25683 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25684 #endif
25687 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
25688 vzip2q_s16 (int16x8_t __a, int16x8_t __b)
25690 #ifdef __AARCH64EB__
25691 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25692 #else
25693 return __builtin_shuffle (__a, __b, (uint16x8_t)
25694 {4, 12, 5, 13, 6, 14, 7, 15});
25695 #endif
25698 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
25699 vzip2q_s32 (int32x4_t __a, int32x4_t __b)
25701 #ifdef __AARCH64EB__
25702 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25703 #else
25704 return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25705 #endif
25708 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25709 vzip2q_s64 (int64x2_t __a, int64x2_t __b)
25711 #ifdef __AARCH64EB__
25712 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25713 #else
25714 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25715 #endif
25718 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25719 vzip2q_u8 (uint8x16_t __a, uint8x16_t __b)
25721 #ifdef __AARCH64EB__
25722 return __builtin_shuffle (__a, __b, (uint8x16_t)
25723 {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25724 #else
25725 return __builtin_shuffle (__a, __b, (uint8x16_t)
25726 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25727 #endif
25730 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25731 vzip2q_u16 (uint16x8_t __a, uint16x8_t __b)
25733 #ifdef __AARCH64EB__
25734 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25735 #else
25736 return __builtin_shuffle (__a, __b, (uint16x8_t)
25737 {4, 12, 5, 13, 6, 14, 7, 15});
25738 #endif
25741 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25742 vzip2q_u32 (uint32x4_t __a, uint32x4_t __b)
25744 #ifdef __AARCH64EB__
25745 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25746 #else
25747 return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25748 #endif
25751 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25752 vzip2q_u64 (uint64x2_t __a, uint64x2_t __b)
25754 #ifdef __AARCH64EB__
25755 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25756 #else
25757 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25758 #endif
25761 __INTERLEAVE_LIST (zip)
25763 #undef __INTERLEAVE_LIST
25764 #undef __DEFINTERLEAVE
25766 /* End of optimal implementations in approved order. */
25768 #undef __aarch64_vget_lane_any
25770 #undef __aarch64_vdup_lane_any
25771 #undef __aarch64_vdup_lane_f32
25772 #undef __aarch64_vdup_lane_f64
25773 #undef __aarch64_vdup_lane_p8
25774 #undef __aarch64_vdup_lane_p16
25775 #undef __aarch64_vdup_lane_s8
25776 #undef __aarch64_vdup_lane_s16
25777 #undef __aarch64_vdup_lane_s32
25778 #undef __aarch64_vdup_lane_s64
25779 #undef __aarch64_vdup_lane_u8
25780 #undef __aarch64_vdup_lane_u16
25781 #undef __aarch64_vdup_lane_u32
25782 #undef __aarch64_vdup_lane_u64
25783 #undef __aarch64_vdup_laneq_f32
25784 #undef __aarch64_vdup_laneq_f64
25785 #undef __aarch64_vdup_laneq_p8
25786 #undef __aarch64_vdup_laneq_p16
25787 #undef __aarch64_vdup_laneq_s8
25788 #undef __aarch64_vdup_laneq_s16
25789 #undef __aarch64_vdup_laneq_s32
25790 #undef __aarch64_vdup_laneq_s64
25791 #undef __aarch64_vdup_laneq_u8
25792 #undef __aarch64_vdup_laneq_u16
25793 #undef __aarch64_vdup_laneq_u32
25794 #undef __aarch64_vdup_laneq_u64
25795 #undef __aarch64_vdupq_lane_f32
25796 #undef __aarch64_vdupq_lane_f64
25797 #undef __aarch64_vdupq_lane_p8
25798 #undef __aarch64_vdupq_lane_p16
25799 #undef __aarch64_vdupq_lane_s8
25800 #undef __aarch64_vdupq_lane_s16
25801 #undef __aarch64_vdupq_lane_s32
25802 #undef __aarch64_vdupq_lane_s64
25803 #undef __aarch64_vdupq_lane_u8
25804 #undef __aarch64_vdupq_lane_u16
25805 #undef __aarch64_vdupq_lane_u32
25806 #undef __aarch64_vdupq_lane_u64
25807 #undef __aarch64_vdupq_laneq_f32
25808 #undef __aarch64_vdupq_laneq_f64
25809 #undef __aarch64_vdupq_laneq_p8
25810 #undef __aarch64_vdupq_laneq_p16
25811 #undef __aarch64_vdupq_laneq_s8
25812 #undef __aarch64_vdupq_laneq_s16
25813 #undef __aarch64_vdupq_laneq_s32
25814 #undef __aarch64_vdupq_laneq_s64
25815 #undef __aarch64_vdupq_laneq_u8
25816 #undef __aarch64_vdupq_laneq_u16
25817 #undef __aarch64_vdupq_laneq_u32
25818 #undef __aarch64_vdupq_laneq_u64
25820 #pragma GCC pop_options
25822 #endif