gcc/
[official-gcc.git] / gcc / config / arm / arm_neon.h
blob3898ff7302dc3f21e6b50a8a7b835033c1ae2021
1 /* ARM NEON intrinsics include file.
3 Copyright (C) 2006-2016 Free Software Foundation, Inc.
4 Contributed by CodeSourcery.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 #ifndef _GCC_ARM_NEON_H
28 #define _GCC_ARM_NEON_H 1
30 #ifndef __ARM_FP
31 #error "NEON intrinsics not available with the soft-float ABI. Please use -mfloat-abi=softp or -mfloat-abi=hard"
32 #else
34 #pragma GCC push_options
35 #pragma GCC target ("fpu=neon")
37 #ifdef __cplusplus
38 extern "C" {
39 #endif
41 #include <arm_fp16.h>
42 #include <stdint.h>
44 typedef __simd64_int8_t int8x8_t;
45 typedef __simd64_int16_t int16x4_t;
46 typedef __simd64_int32_t int32x2_t;
47 typedef __builtin_neon_di int64x1_t;
48 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
49 typedef __fp16 float16_t;
50 typedef __simd64_float16_t float16x4_t;
51 #endif
52 typedef __simd64_float32_t float32x2_t;
53 typedef __simd64_poly8_t poly8x8_t;
54 typedef __simd64_poly16_t poly16x4_t;
55 #pragma GCC push_options
56 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
57 typedef __builtin_neon_poly64 poly64x1_t;
58 #pragma GCC pop_options
59 typedef __simd64_uint8_t uint8x8_t;
60 typedef __simd64_uint16_t uint16x4_t;
61 typedef __simd64_uint32_t uint32x2_t;
62 typedef __builtin_neon_udi uint64x1_t;
64 typedef __simd128_int8_t int8x16_t;
65 typedef __simd128_int16_t int16x8_t;
66 typedef __simd128_int32_t int32x4_t;
67 typedef __simd128_int64_t int64x2_t;
68 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
69 typedef __simd128_float16_t float16x8_t;
70 #endif
71 typedef __simd128_float32_t float32x4_t;
72 typedef __simd128_poly8_t poly8x16_t;
73 typedef __simd128_poly16_t poly16x8_t;
74 #pragma GCC push_options
75 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
76 typedef __builtin_neon_poly64 poly64x2_t __attribute__ ((__vector_size__ (16)));
77 #pragma GCC pop_options
79 typedef __simd128_uint8_t uint8x16_t;
80 typedef __simd128_uint16_t uint16x8_t;
81 typedef __simd128_uint32_t uint32x4_t;
82 typedef __simd128_uint64_t uint64x2_t;
84 typedef float float32_t;
86 /* The Poly types are user visible and live in their own world,
87 keep them that way. */
88 typedef __builtin_neon_poly8 poly8_t;
89 typedef __builtin_neon_poly16 poly16_t;
90 #pragma GCC push_options
91 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
92 typedef __builtin_neon_poly64 poly64_t;
93 typedef __builtin_neon_poly128 poly128_t;
94 #pragma GCC pop_options
96 typedef struct int8x8x2_t
98 int8x8_t val[2];
99 } int8x8x2_t;
101 typedef struct int8x16x2_t
103 int8x16_t val[2];
104 } int8x16x2_t;
106 typedef struct int16x4x2_t
108 int16x4_t val[2];
109 } int16x4x2_t;
111 typedef struct int16x8x2_t
113 int16x8_t val[2];
114 } int16x8x2_t;
116 typedef struct int32x2x2_t
118 int32x2_t val[2];
119 } int32x2x2_t;
121 typedef struct int32x4x2_t
123 int32x4_t val[2];
124 } int32x4x2_t;
126 typedef struct int64x1x2_t
128 int64x1_t val[2];
129 } int64x1x2_t;
131 typedef struct int64x2x2_t
133 int64x2_t val[2];
134 } int64x2x2_t;
136 typedef struct uint8x8x2_t
138 uint8x8_t val[2];
139 } uint8x8x2_t;
141 typedef struct uint8x16x2_t
143 uint8x16_t val[2];
144 } uint8x16x2_t;
146 typedef struct uint16x4x2_t
148 uint16x4_t val[2];
149 } uint16x4x2_t;
151 typedef struct uint16x8x2_t
153 uint16x8_t val[2];
154 } uint16x8x2_t;
156 typedef struct uint32x2x2_t
158 uint32x2_t val[2];
159 } uint32x2x2_t;
161 typedef struct uint32x4x2_t
163 uint32x4_t val[2];
164 } uint32x4x2_t;
166 typedef struct uint64x1x2_t
168 uint64x1_t val[2];
169 } uint64x1x2_t;
171 typedef struct uint64x2x2_t
173 uint64x2_t val[2];
174 } uint64x2x2_t;
176 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
177 typedef struct float16x4x2_t
179 float16x4_t val[2];
180 } float16x4x2_t;
181 #endif
183 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
184 typedef struct float16x8x2_t
186 float16x8_t val[2];
187 } float16x8x2_t;
188 #endif
190 typedef struct float32x2x2_t
192 float32x2_t val[2];
193 } float32x2x2_t;
195 typedef struct float32x4x2_t
197 float32x4_t val[2];
198 } float32x4x2_t;
200 typedef struct poly8x8x2_t
202 poly8x8_t val[2];
203 } poly8x8x2_t;
205 typedef struct poly8x16x2_t
207 poly8x16_t val[2];
208 } poly8x16x2_t;
210 typedef struct poly16x4x2_t
212 poly16x4_t val[2];
213 } poly16x4x2_t;
215 typedef struct poly16x8x2_t
217 poly16x8_t val[2];
218 } poly16x8x2_t;
220 #pragma GCC push_options
221 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
222 typedef struct poly64x1x2_t
224 poly64x1_t val[2];
225 } poly64x1x2_t;
228 typedef struct poly64x2x2_t
230 poly64x2_t val[2];
231 } poly64x2x2_t;
232 #pragma GCC pop_options
235 typedef struct int8x8x3_t
237 int8x8_t val[3];
238 } int8x8x3_t;
240 typedef struct int8x16x3_t
242 int8x16_t val[3];
243 } int8x16x3_t;
245 typedef struct int16x4x3_t
247 int16x4_t val[3];
248 } int16x4x3_t;
250 typedef struct int16x8x3_t
252 int16x8_t val[3];
253 } int16x8x3_t;
255 typedef struct int32x2x3_t
257 int32x2_t val[3];
258 } int32x2x3_t;
260 typedef struct int32x4x3_t
262 int32x4_t val[3];
263 } int32x4x3_t;
265 typedef struct int64x1x3_t
267 int64x1_t val[3];
268 } int64x1x3_t;
270 typedef struct int64x2x3_t
272 int64x2_t val[3];
273 } int64x2x3_t;
275 typedef struct uint8x8x3_t
277 uint8x8_t val[3];
278 } uint8x8x3_t;
280 typedef struct uint8x16x3_t
282 uint8x16_t val[3];
283 } uint8x16x3_t;
285 typedef struct uint16x4x3_t
287 uint16x4_t val[3];
288 } uint16x4x3_t;
290 typedef struct uint16x8x3_t
292 uint16x8_t val[3];
293 } uint16x8x3_t;
295 typedef struct uint32x2x3_t
297 uint32x2_t val[3];
298 } uint32x2x3_t;
300 typedef struct uint32x4x3_t
302 uint32x4_t val[3];
303 } uint32x4x3_t;
305 typedef struct uint64x1x3_t
307 uint64x1_t val[3];
308 } uint64x1x3_t;
310 typedef struct uint64x2x3_t
312 uint64x2_t val[3];
313 } uint64x2x3_t;
315 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
316 typedef struct float16x4x3_t
318 float16x4_t val[3];
319 } float16x4x3_t;
320 #endif
322 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
323 typedef struct float16x8x3_t
325 float16x8_t val[3];
326 } float16x8x3_t;
327 #endif
329 typedef struct float32x2x3_t
331 float32x2_t val[3];
332 } float32x2x3_t;
334 typedef struct float32x4x3_t
336 float32x4_t val[3];
337 } float32x4x3_t;
339 typedef struct poly8x8x3_t
341 poly8x8_t val[3];
342 } poly8x8x3_t;
344 typedef struct poly8x16x3_t
346 poly8x16_t val[3];
347 } poly8x16x3_t;
349 typedef struct poly16x4x3_t
351 poly16x4_t val[3];
352 } poly16x4x3_t;
354 typedef struct poly16x8x3_t
356 poly16x8_t val[3];
357 } poly16x8x3_t;
359 #pragma GCC push_options
360 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
361 typedef struct poly64x1x3_t
363 poly64x1_t val[3];
364 } poly64x1x3_t;
367 typedef struct poly64x2x3_t
369 poly64x2_t val[3];
370 } poly64x2x3_t;
371 #pragma GCC pop_options
374 typedef struct int8x8x4_t
376 int8x8_t val[4];
377 } int8x8x4_t;
379 typedef struct int8x16x4_t
381 int8x16_t val[4];
382 } int8x16x4_t;
384 typedef struct int16x4x4_t
386 int16x4_t val[4];
387 } int16x4x4_t;
389 typedef struct int16x8x4_t
391 int16x8_t val[4];
392 } int16x8x4_t;
394 typedef struct int32x2x4_t
396 int32x2_t val[4];
397 } int32x2x4_t;
399 typedef struct int32x4x4_t
401 int32x4_t val[4];
402 } int32x4x4_t;
404 typedef struct int64x1x4_t
406 int64x1_t val[4];
407 } int64x1x4_t;
409 typedef struct int64x2x4_t
411 int64x2_t val[4];
412 } int64x2x4_t;
414 typedef struct uint8x8x4_t
416 uint8x8_t val[4];
417 } uint8x8x4_t;
419 typedef struct uint8x16x4_t
421 uint8x16_t val[4];
422 } uint8x16x4_t;
424 typedef struct uint16x4x4_t
426 uint16x4_t val[4];
427 } uint16x4x4_t;
429 typedef struct uint16x8x4_t
431 uint16x8_t val[4];
432 } uint16x8x4_t;
434 typedef struct uint32x2x4_t
436 uint32x2_t val[4];
437 } uint32x2x4_t;
439 typedef struct uint32x4x4_t
441 uint32x4_t val[4];
442 } uint32x4x4_t;
444 typedef struct uint64x1x4_t
446 uint64x1_t val[4];
447 } uint64x1x4_t;
449 typedef struct uint64x2x4_t
451 uint64x2_t val[4];
452 } uint64x2x4_t;
454 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
455 typedef struct float16x4x4_t
457 float16x4_t val[4];
458 } float16x4x4_t;
459 #endif
461 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
462 typedef struct float16x8x4_t
464 float16x8_t val[4];
465 } float16x8x4_t;
466 #endif
468 typedef struct float32x2x4_t
470 float32x2_t val[4];
471 } float32x2x4_t;
473 typedef struct float32x4x4_t
475 float32x4_t val[4];
476 } float32x4x4_t;
478 typedef struct poly8x8x4_t
480 poly8x8_t val[4];
481 } poly8x8x4_t;
483 typedef struct poly8x16x4_t
485 poly8x16_t val[4];
486 } poly8x16x4_t;
488 typedef struct poly16x4x4_t
490 poly16x4_t val[4];
491 } poly16x4x4_t;
493 typedef struct poly16x8x4_t
495 poly16x8_t val[4];
496 } poly16x8x4_t;
498 #pragma GCC push_options
499 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
500 typedef struct poly64x1x4_t
502 poly64x1_t val[4];
503 } poly64x1x4_t;
506 typedef struct poly64x2x4_t
508 poly64x2_t val[4];
509 } poly64x2x4_t;
510 #pragma GCC pop_options
512 /* vadd */
513 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
514 vadd_s8 (int8x8_t __a, int8x8_t __b)
516 return __a + __b;
519 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
520 vadd_s16 (int16x4_t __a, int16x4_t __b)
522 return __a + __b;
525 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
526 vadd_s32 (int32x2_t __a, int32x2_t __b)
528 return __a + __b;
531 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
532 vadd_f32 (float32x2_t __a, float32x2_t __b)
534 #ifdef __FAST_MATH__
535 return __a + __b;
536 #else
537 return (float32x2_t) __builtin_neon_vaddv2sf (__a, __b);
538 #endif
541 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
542 vadd_u8 (uint8x8_t __a, uint8x8_t __b)
544 return __a + __b;
547 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
548 vadd_u16 (uint16x4_t __a, uint16x4_t __b)
550 return __a + __b;
553 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
554 vadd_u32 (uint32x2_t __a, uint32x2_t __b)
556 return __a + __b;
559 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
560 vadd_s64 (int64x1_t __a, int64x1_t __b)
562 return __a + __b;
565 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
566 vadd_u64 (uint64x1_t __a, uint64x1_t __b)
568 return __a + __b;
571 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
572 vaddq_s8 (int8x16_t __a, int8x16_t __b)
574 return __a + __b;
577 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
578 vaddq_s16 (int16x8_t __a, int16x8_t __b)
580 return __a + __b;
583 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
584 vaddq_s32 (int32x4_t __a, int32x4_t __b)
586 return __a + __b;
589 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
590 vaddq_s64 (int64x2_t __a, int64x2_t __b)
592 return __a + __b;
595 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
596 vaddq_f32 (float32x4_t __a, float32x4_t __b)
598 #ifdef __FAST_MATH__
599 return __a + __b;
600 #else
601 return (float32x4_t) __builtin_neon_vaddv4sf (__a, __b);
602 #endif
605 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
606 vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
608 return __a + __b;
611 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
612 vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
614 return __a + __b;
617 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
618 vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
620 return __a + __b;
623 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
624 vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
626 return __a + __b;
629 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
630 vaddl_s8 (int8x8_t __a, int8x8_t __b)
632 return (int16x8_t)__builtin_neon_vaddlsv8qi (__a, __b);
635 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
636 vaddl_s16 (int16x4_t __a, int16x4_t __b)
638 return (int32x4_t)__builtin_neon_vaddlsv4hi (__a, __b);
641 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
642 vaddl_s32 (int32x2_t __a, int32x2_t __b)
644 return (int64x2_t)__builtin_neon_vaddlsv2si (__a, __b);
647 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
648 vaddl_u8 (uint8x8_t __a, uint8x8_t __b)
650 return (uint16x8_t)__builtin_neon_vaddluv8qi ((int8x8_t) __a, (int8x8_t) __b);
653 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
654 vaddl_u16 (uint16x4_t __a, uint16x4_t __b)
656 return (uint32x4_t)__builtin_neon_vaddluv4hi ((int16x4_t) __a, (int16x4_t) __b);
659 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
660 vaddl_u32 (uint32x2_t __a, uint32x2_t __b)
662 return (uint64x2_t)__builtin_neon_vaddluv2si ((int32x2_t) __a, (int32x2_t) __b);
665 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
666 vaddw_s8 (int16x8_t __a, int8x8_t __b)
668 return (int16x8_t)__builtin_neon_vaddwsv8qi (__a, __b);
671 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
672 vaddw_s16 (int32x4_t __a, int16x4_t __b)
674 return (int32x4_t)__builtin_neon_vaddwsv4hi (__a, __b);
677 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
678 vaddw_s32 (int64x2_t __a, int32x2_t __b)
680 return (int64x2_t)__builtin_neon_vaddwsv2si (__a, __b);
683 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
684 vaddw_u8 (uint16x8_t __a, uint8x8_t __b)
686 return (uint16x8_t)__builtin_neon_vaddwuv8qi ((int16x8_t) __a, (int8x8_t) __b);
689 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
690 vaddw_u16 (uint32x4_t __a, uint16x4_t __b)
692 return (uint32x4_t)__builtin_neon_vaddwuv4hi ((int32x4_t) __a, (int16x4_t) __b);
695 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
696 vaddw_u32 (uint64x2_t __a, uint32x2_t __b)
698 return (uint64x2_t)__builtin_neon_vaddwuv2si ((int64x2_t) __a, (int32x2_t) __b);
701 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
702 vhadd_s8 (int8x8_t __a, int8x8_t __b)
704 return (int8x8_t)__builtin_neon_vhaddsv8qi (__a, __b);
707 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
708 vhadd_s16 (int16x4_t __a, int16x4_t __b)
710 return (int16x4_t)__builtin_neon_vhaddsv4hi (__a, __b);
713 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
714 vhadd_s32 (int32x2_t __a, int32x2_t __b)
716 return (int32x2_t)__builtin_neon_vhaddsv2si (__a, __b);
719 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
720 vhadd_u8 (uint8x8_t __a, uint8x8_t __b)
722 return (uint8x8_t)__builtin_neon_vhadduv8qi ((int8x8_t) __a, (int8x8_t) __b);
725 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
726 vhadd_u16 (uint16x4_t __a, uint16x4_t __b)
728 return (uint16x4_t)__builtin_neon_vhadduv4hi ((int16x4_t) __a, (int16x4_t) __b);
731 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
732 vhadd_u32 (uint32x2_t __a, uint32x2_t __b)
734 return (uint32x2_t)__builtin_neon_vhadduv2si ((int32x2_t) __a, (int32x2_t) __b);
737 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
738 vhaddq_s8 (int8x16_t __a, int8x16_t __b)
740 return (int8x16_t)__builtin_neon_vhaddsv16qi (__a, __b);
743 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
744 vhaddq_s16 (int16x8_t __a, int16x8_t __b)
746 return (int16x8_t)__builtin_neon_vhaddsv8hi (__a, __b);
749 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
750 vhaddq_s32 (int32x4_t __a, int32x4_t __b)
752 return (int32x4_t)__builtin_neon_vhaddsv4si (__a, __b);
755 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
756 vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
758 return (uint8x16_t)__builtin_neon_vhadduv16qi ((int8x16_t) __a, (int8x16_t) __b);
761 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
762 vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
764 return (uint16x8_t)__builtin_neon_vhadduv8hi ((int16x8_t) __a, (int16x8_t) __b);
767 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
768 vhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
770 return (uint32x4_t)__builtin_neon_vhadduv4si ((int32x4_t) __a, (int32x4_t) __b);
773 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
774 vrhadd_s8 (int8x8_t __a, int8x8_t __b)
776 return (int8x8_t)__builtin_neon_vrhaddsv8qi (__a, __b);
779 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
780 vrhadd_s16 (int16x4_t __a, int16x4_t __b)
782 return (int16x4_t)__builtin_neon_vrhaddsv4hi (__a, __b);
785 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
786 vrhadd_s32 (int32x2_t __a, int32x2_t __b)
788 return (int32x2_t)__builtin_neon_vrhaddsv2si (__a, __b);
791 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
792 vrhadd_u8 (uint8x8_t __a, uint8x8_t __b)
794 return (uint8x8_t)__builtin_neon_vrhadduv8qi ((int8x8_t) __a, (int8x8_t) __b);
797 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
798 vrhadd_u16 (uint16x4_t __a, uint16x4_t __b)
800 return (uint16x4_t)__builtin_neon_vrhadduv4hi ((int16x4_t) __a, (int16x4_t) __b);
803 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
804 vrhadd_u32 (uint32x2_t __a, uint32x2_t __b)
806 return (uint32x2_t)__builtin_neon_vrhadduv2si ((int32x2_t) __a, (int32x2_t) __b);
809 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
810 vrhaddq_s8 (int8x16_t __a, int8x16_t __b)
812 return (int8x16_t)__builtin_neon_vrhaddsv16qi (__a, __b);
815 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
816 vrhaddq_s16 (int16x8_t __a, int16x8_t __b)
818 return (int16x8_t)__builtin_neon_vrhaddsv8hi (__a, __b);
821 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
822 vrhaddq_s32 (int32x4_t __a, int32x4_t __b)
824 return (int32x4_t)__builtin_neon_vrhaddsv4si (__a, __b);
827 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
828 vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
830 return (uint8x16_t)__builtin_neon_vrhadduv16qi ((int8x16_t) __a, (int8x16_t) __b);
833 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
834 vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
836 return (uint16x8_t)__builtin_neon_vrhadduv8hi ((int16x8_t) __a, (int16x8_t) __b);
839 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
840 vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
842 return (uint32x4_t)__builtin_neon_vrhadduv4si ((int32x4_t) __a, (int32x4_t) __b);
845 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
846 vqadd_s8 (int8x8_t __a, int8x8_t __b)
848 return (int8x8_t)__builtin_neon_vqaddsv8qi (__a, __b);
851 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
852 vqadd_s16 (int16x4_t __a, int16x4_t __b)
854 return (int16x4_t)__builtin_neon_vqaddsv4hi (__a, __b);
857 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
858 vqadd_s32 (int32x2_t __a, int32x2_t __b)
860 return (int32x2_t)__builtin_neon_vqaddsv2si (__a, __b);
863 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
864 vqadd_s64 (int64x1_t __a, int64x1_t __b)
866 return (int64x1_t)__builtin_neon_vqaddsdi (__a, __b);
869 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
870 vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
872 return (uint8x8_t)__builtin_neon_vqadduv8qi ((int8x8_t) __a, (int8x8_t) __b);
875 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
876 vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
878 return (uint16x4_t)__builtin_neon_vqadduv4hi ((int16x4_t) __a, (int16x4_t) __b);
881 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
882 vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
884 return (uint32x2_t)__builtin_neon_vqadduv2si ((int32x2_t) __a, (int32x2_t) __b);
887 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
888 vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
890 return (uint64x1_t)__builtin_neon_vqaddudi ((int64x1_t) __a, (int64x1_t) __b);
893 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
894 vqaddq_s8 (int8x16_t __a, int8x16_t __b)
896 return (int8x16_t)__builtin_neon_vqaddsv16qi (__a, __b);
899 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
900 vqaddq_s16 (int16x8_t __a, int16x8_t __b)
902 return (int16x8_t)__builtin_neon_vqaddsv8hi (__a, __b);
905 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
906 vqaddq_s32 (int32x4_t __a, int32x4_t __b)
908 return (int32x4_t)__builtin_neon_vqaddsv4si (__a, __b);
911 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
912 vqaddq_s64 (int64x2_t __a, int64x2_t __b)
914 return (int64x2_t)__builtin_neon_vqaddsv2di (__a, __b);
917 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
918 vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
920 return (uint8x16_t)__builtin_neon_vqadduv16qi ((int8x16_t) __a, (int8x16_t) __b);
923 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
924 vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
926 return (uint16x8_t)__builtin_neon_vqadduv8hi ((int16x8_t) __a, (int16x8_t) __b);
929 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
930 vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
932 return (uint32x4_t)__builtin_neon_vqadduv4si ((int32x4_t) __a, (int32x4_t) __b);
935 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
936 vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
938 return (uint64x2_t)__builtin_neon_vqadduv2di ((int64x2_t) __a, (int64x2_t) __b);
941 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
942 vaddhn_s16 (int16x8_t __a, int16x8_t __b)
944 return (int8x8_t)__builtin_neon_vaddhnv8hi (__a, __b);
947 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
948 vaddhn_s32 (int32x4_t __a, int32x4_t __b)
950 return (int16x4_t)__builtin_neon_vaddhnv4si (__a, __b);
953 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
954 vaddhn_s64 (int64x2_t __a, int64x2_t __b)
956 return (int32x2_t)__builtin_neon_vaddhnv2di (__a, __b);
959 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
960 vaddhn_u16 (uint16x8_t __a, uint16x8_t __b)
962 return (uint8x8_t)__builtin_neon_vaddhnv8hi ((int16x8_t) __a, (int16x8_t) __b);
965 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
966 vaddhn_u32 (uint32x4_t __a, uint32x4_t __b)
968 return (uint16x4_t)__builtin_neon_vaddhnv4si ((int32x4_t) __a, (int32x4_t) __b);
971 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
972 vaddhn_u64 (uint64x2_t __a, uint64x2_t __b)
974 return (uint32x2_t)__builtin_neon_vaddhnv2di ((int64x2_t) __a, (int64x2_t) __b);
977 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
978 vraddhn_s16 (int16x8_t __a, int16x8_t __b)
980 return (int8x8_t)__builtin_neon_vraddhnv8hi (__a, __b);
983 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
984 vraddhn_s32 (int32x4_t __a, int32x4_t __b)
986 return (int16x4_t)__builtin_neon_vraddhnv4si (__a, __b);
989 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
990 vraddhn_s64 (int64x2_t __a, int64x2_t __b)
992 return (int32x2_t)__builtin_neon_vraddhnv2di (__a, __b);
995 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
996 vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)
998 return (uint8x8_t)__builtin_neon_vraddhnv8hi ((int16x8_t) __a, (int16x8_t) __b);
1001 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1002 vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1004 return (uint16x4_t)__builtin_neon_vraddhnv4si ((int32x4_t) __a, (int32x4_t) __b);
1007 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1008 vraddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1010 return (uint32x2_t)__builtin_neon_vraddhnv2di ((int64x2_t) __a, (int64x2_t) __b);
1013 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1014 vmul_s8 (int8x8_t __a, int8x8_t __b)
1016 return __a * __b;
1019 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1020 vmul_s16 (int16x4_t __a, int16x4_t __b)
1022 return __a * __b;
1025 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1026 vmul_s32 (int32x2_t __a, int32x2_t __b)
1028 return __a * __b;
1031 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1032 vmul_f32 (float32x2_t __a, float32x2_t __b)
1034 #ifdef __FAST_MATH__
1035 return __a * __b;
1036 #else
1037 return (float32x2_t) __builtin_neon_vmulfv2sf (__a, __b);
1038 #endif
1042 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1043 vmul_u8 (uint8x8_t __a, uint8x8_t __b)
1045 return __a * __b;
1048 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1049 vmul_u16 (uint16x4_t __a, uint16x4_t __b)
1051 return __a * __b;
1054 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1055 vmul_u32 (uint32x2_t __a, uint32x2_t __b)
1057 return __a * __b;
1060 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1061 vmulq_s8 (int8x16_t __a, int8x16_t __b)
1063 return __a * __b;
1066 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1067 vmulq_s16 (int16x8_t __a, int16x8_t __b)
1069 return __a * __b;
1072 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1073 vmulq_s32 (int32x4_t __a, int32x4_t __b)
1075 return __a * __b;
1078 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1079 vmulq_f32 (float32x4_t __a, float32x4_t __b)
1081 #ifdef __FAST_MATH__
1082 return __a * __b;
1083 #else
1084 return (float32x4_t) __builtin_neon_vmulfv4sf (__a, __b);
1085 #endif
1088 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1089 vmulq_u8 (uint8x16_t __a, uint8x16_t __b)
1091 return __a * __b;
1094 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1095 vmulq_u16 (uint16x8_t __a, uint16x8_t __b)
1097 return __a * __b;
1100 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1101 vmulq_u32 (uint32x4_t __a, uint32x4_t __b)
1103 return __a * __b;
1106 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
1107 vmul_p8 (poly8x8_t __a, poly8x8_t __b)
1109 return (poly8x8_t)__builtin_neon_vmulpv8qi ((int8x8_t) __a, (int8x8_t) __b);
1112 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
1113 vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
1115 return (poly8x16_t)__builtin_neon_vmulpv16qi ((int8x16_t) __a, (int8x16_t) __b);
1118 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1119 vqdmulh_s16 (int16x4_t __a, int16x4_t __b)
1121 return (int16x4_t)__builtin_neon_vqdmulhv4hi (__a, __b);
1124 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1125 vqdmulh_s32 (int32x2_t __a, int32x2_t __b)
1127 return (int32x2_t)__builtin_neon_vqdmulhv2si (__a, __b);
1130 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1131 vqdmulhq_s16 (int16x8_t __a, int16x8_t __b)
1133 return (int16x8_t)__builtin_neon_vqdmulhv8hi (__a, __b);
1136 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1137 vqdmulhq_s32 (int32x4_t __a, int32x4_t __b)
1139 return (int32x4_t)__builtin_neon_vqdmulhv4si (__a, __b);
1142 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1143 vqrdmulh_s16 (int16x4_t __a, int16x4_t __b)
1145 return (int16x4_t)__builtin_neon_vqrdmulhv4hi (__a, __b);
1148 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1149 vqrdmulh_s32 (int32x2_t __a, int32x2_t __b)
1151 return (int32x2_t)__builtin_neon_vqrdmulhv2si (__a, __b);
1154 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1155 vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
1157 return (int16x8_t)__builtin_neon_vqrdmulhv8hi (__a, __b);
1160 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1161 vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
1163 return (int32x4_t)__builtin_neon_vqrdmulhv4si (__a, __b);
1166 #ifdef __ARM_FEATURE_QRDMX
1167 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1168 vqrdmlah_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
1170 return (int16x4_t)__builtin_neon_vqrdmlahv4hi (__a, __b, __c);
1173 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1174 vqrdmlah_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
1176 return (int32x2_t)__builtin_neon_vqrdmlahv2si (__a, __b, __c);
1179 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1180 vqrdmlahq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
1182 return (int16x8_t)__builtin_neon_vqrdmlahv8hi (__a, __b, __c);
1185 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1186 vqrdmlahq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
1188 return (int32x4_t)__builtin_neon_vqrdmlahv4si (__a, __b, __c);
1191 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1192 vqrdmlsh_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
1194 return (int16x4_t)__builtin_neon_vqrdmlshv4hi (__a, __b, __c);
1197 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1198 vqrdmlsh_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
1200 return (int32x2_t)__builtin_neon_vqrdmlshv2si (__a, __b, __c);
1203 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1204 vqrdmlshq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
1206 return (int16x8_t)__builtin_neon_vqrdmlshv8hi (__a, __b, __c);
1209 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1210 vqrdmlshq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
1212 return (int32x4_t)__builtin_neon_vqrdmlshv4si (__a, __b, __c);
1214 #endif
1216 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1217 vmull_s8 (int8x8_t __a, int8x8_t __b)
1219 return (int16x8_t)__builtin_neon_vmullsv8qi (__a, __b);
1222 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1223 vmull_s16 (int16x4_t __a, int16x4_t __b)
1225 return (int32x4_t)__builtin_neon_vmullsv4hi (__a, __b);
1228 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1229 vmull_s32 (int32x2_t __a, int32x2_t __b)
1231 return (int64x2_t)__builtin_neon_vmullsv2si (__a, __b);
1234 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1235 vmull_u8 (uint8x8_t __a, uint8x8_t __b)
1237 return (uint16x8_t)__builtin_neon_vmulluv8qi ((int8x8_t) __a, (int8x8_t) __b);
1240 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1241 vmull_u16 (uint16x4_t __a, uint16x4_t __b)
1243 return (uint32x4_t)__builtin_neon_vmulluv4hi ((int16x4_t) __a, (int16x4_t) __b);
1246 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1247 vmull_u32 (uint32x2_t __a, uint32x2_t __b)
1249 return (uint64x2_t)__builtin_neon_vmulluv2si ((int32x2_t) __a, (int32x2_t) __b);
1252 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
1253 vmull_p8 (poly8x8_t __a, poly8x8_t __b)
1255 return (poly16x8_t)__builtin_neon_vmullpv8qi ((int8x8_t) __a, (int8x8_t) __b);
1258 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1259 vqdmull_s16 (int16x4_t __a, int16x4_t __b)
1261 return (int32x4_t)__builtin_neon_vqdmullv4hi (__a, __b);
1264 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1265 vqdmull_s32 (int32x2_t __a, int32x2_t __b)
1267 return (int64x2_t)__builtin_neon_vqdmullv2si (__a, __b);
1270 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1271 vmla_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c)
1273 return (int8x8_t)__builtin_neon_vmlav8qi (__a, __b, __c);
1276 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1277 vmla_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
1279 return (int16x4_t)__builtin_neon_vmlav4hi (__a, __b, __c);
1282 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1283 vmla_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
1285 return (int32x2_t)__builtin_neon_vmlav2si (__a, __b, __c);
1288 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1289 vmla_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
1291 return (float32x2_t)__builtin_neon_vmlav2sf (__a, __b, __c);
1294 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1295 vmla_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
1297 return (uint8x8_t)__builtin_neon_vmlav8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c);
1300 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1301 vmla_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
1303 return (uint16x4_t)__builtin_neon_vmlav4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c);
1306 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1307 vmla_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
1309 return (uint32x2_t)__builtin_neon_vmlav2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c);
1312 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1313 vmlaq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c)
1315 return (int8x16_t)__builtin_neon_vmlav16qi (__a, __b, __c);
1318 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1319 vmlaq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
1321 return (int16x8_t)__builtin_neon_vmlav8hi (__a, __b, __c);
1324 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1325 vmlaq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
1327 return (int32x4_t)__builtin_neon_vmlav4si (__a, __b, __c);
1330 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1331 vmlaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
1333 return (float32x4_t)__builtin_neon_vmlav4sf (__a, __b, __c);
1336 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1337 vmlaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
1339 return (uint8x16_t)__builtin_neon_vmlav16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c);
1342 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1343 vmlaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
1345 return (uint16x8_t)__builtin_neon_vmlav8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c);
1348 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1349 vmlaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
1351 return (uint32x4_t)__builtin_neon_vmlav4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c);
1354 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1355 vmlal_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c)
1357 return (int16x8_t)__builtin_neon_vmlalsv8qi (__a, __b, __c);
1360 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1361 vmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
1363 return (int32x4_t)__builtin_neon_vmlalsv4hi (__a, __b, __c);
1366 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1367 vmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
1369 return (int64x2_t)__builtin_neon_vmlalsv2si (__a, __b, __c);
1372 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1373 vmlal_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c)
1375 return (uint16x8_t)__builtin_neon_vmlaluv8qi ((int16x8_t) __a, (int8x8_t) __b, (int8x8_t) __c);
1378 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1379 vmlal_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c)
1381 return (uint32x4_t)__builtin_neon_vmlaluv4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c);
1384 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1385 vmlal_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c)
1387 return (uint64x2_t)__builtin_neon_vmlaluv2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c);
1390 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1391 vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
1393 return (int32x4_t)__builtin_neon_vqdmlalv4hi (__a, __b, __c);
1396 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1397 vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
1399 return (int64x2_t)__builtin_neon_vqdmlalv2si (__a, __b, __c);
1402 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1403 vmls_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c)
1405 return (int8x8_t)__builtin_neon_vmlsv8qi (__a, __b, __c);
1408 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1409 vmls_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
1411 return (int16x4_t)__builtin_neon_vmlsv4hi (__a, __b, __c);
1414 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1415 vmls_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
1417 return (int32x2_t)__builtin_neon_vmlsv2si (__a, __b, __c);
1420 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1421 vmls_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
1423 return (float32x2_t)__builtin_neon_vmlsv2sf (__a, __b, __c);
1426 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1427 vmls_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
1429 return (uint8x8_t)__builtin_neon_vmlsv8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c);
1432 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1433 vmls_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
1435 return (uint16x4_t)__builtin_neon_vmlsv4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c);
1438 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1439 vmls_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
1441 return (uint32x2_t)__builtin_neon_vmlsv2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c);
1444 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1445 vmlsq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c)
1447 return (int8x16_t)__builtin_neon_vmlsv16qi (__a, __b, __c);
1450 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1451 vmlsq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
1453 return (int16x8_t)__builtin_neon_vmlsv8hi (__a, __b, __c);
1456 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1457 vmlsq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
1459 return (int32x4_t)__builtin_neon_vmlsv4si (__a, __b, __c);
1462 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1463 vmlsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
1465 return (float32x4_t)__builtin_neon_vmlsv4sf (__a, __b, __c);
1468 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1469 vmlsq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
1471 return (uint8x16_t)__builtin_neon_vmlsv16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c);
1474 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1475 vmlsq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
1477 return (uint16x8_t)__builtin_neon_vmlsv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c);
1480 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1481 vmlsq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
1483 return (uint32x4_t)__builtin_neon_vmlsv4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c);
1486 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1487 vmlsl_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c)
1489 return (int16x8_t)__builtin_neon_vmlslsv8qi (__a, __b, __c);
1492 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1493 vmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
1495 return (int32x4_t)__builtin_neon_vmlslsv4hi (__a, __b, __c);
1498 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1499 vmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
1501 return (int64x2_t)__builtin_neon_vmlslsv2si (__a, __b, __c);
1504 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1505 vmlsl_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c)
1507 return (uint16x8_t)__builtin_neon_vmlsluv8qi ((int16x8_t) __a, (int8x8_t) __b, (int8x8_t) __c);
1510 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1511 vmlsl_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c)
1513 return (uint32x4_t)__builtin_neon_vmlsluv4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c);
1516 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1517 vmlsl_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c)
1519 return (uint64x2_t)__builtin_neon_vmlsluv2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c);
1522 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1523 vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
1525 return (int32x4_t)__builtin_neon_vqdmlslv4hi (__a, __b, __c);
1528 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1529 vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
1531 return (int64x2_t)__builtin_neon_vqdmlslv2si (__a, __b, __c);
1534 #pragma GCC push_options
1535 #pragma GCC target ("fpu=neon-vfpv4")
1536 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1537 vfma_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
1539 return (float32x2_t)__builtin_neon_vfmav2sf (__a, __b, __c);
1542 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1543 vfmaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
1545 return (float32x4_t)__builtin_neon_vfmav4sf (__a, __b, __c);
1548 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1549 vfms_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
1551 return (float32x2_t)__builtin_neon_vfmsv2sf (__a, __b, __c);
1554 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1555 vfmsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
1557 return (float32x4_t)__builtin_neon_vfmsv4sf (__a, __b, __c);
1559 #pragma GCC pop_options
1561 #if __ARM_ARCH >= 8
1562 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1563 vrndn_f32 (float32x2_t __a)
1565 return (float32x2_t)__builtin_neon_vrintnv2sf (__a);
1568 #endif
1569 #if __ARM_ARCH >= 8
1570 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1571 vrndnq_f32 (float32x4_t __a)
1573 return (float32x4_t)__builtin_neon_vrintnv4sf (__a);
1576 #endif
1577 #if __ARM_ARCH >= 8
1578 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1579 vrnda_f32 (float32x2_t __a)
1581 return (float32x2_t)__builtin_neon_vrintav2sf (__a);
1584 #endif
1585 #if __ARM_ARCH >= 8
1586 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1587 vrndaq_f32 (float32x4_t __a)
1589 return (float32x4_t)__builtin_neon_vrintav4sf (__a);
1592 #endif
1593 #if __ARM_ARCH >= 8
1594 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1595 vrndp_f32 (float32x2_t __a)
1597 return (float32x2_t)__builtin_neon_vrintpv2sf (__a);
1600 #endif
1601 #if __ARM_ARCH >= 8
1602 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1603 vrndpq_f32 (float32x4_t __a)
1605 return (float32x4_t)__builtin_neon_vrintpv4sf (__a);
1608 #endif
1609 #if __ARM_ARCH >= 8
1610 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1611 vrndm_f32 (float32x2_t __a)
1613 return (float32x2_t)__builtin_neon_vrintmv2sf (__a);
1616 #endif
1617 #if __ARM_ARCH >= 8
1618 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1619 vrndmq_f32 (float32x4_t __a)
1621 return (float32x4_t)__builtin_neon_vrintmv4sf (__a);
1624 #endif
1626 #if __ARM_ARCH >= 8
1627 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1628 vrndx_f32 (float32x2_t __a)
1630 return (float32x2_t)__builtin_neon_vrintxv2sf (__a);
1633 #endif
1635 #if __ARM_ARCH >= 8
1636 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1637 vrndxq_f32 (float32x4_t __a)
1639 return (float32x4_t)__builtin_neon_vrintxv4sf (__a);
1642 #endif
1644 #if __ARM_ARCH >= 8
1645 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1646 vrnd_f32 (float32x2_t __a)
1648 return (float32x2_t)__builtin_neon_vrintzv2sf (__a);
1651 #endif
1652 #if __ARM_ARCH >= 8
1653 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1654 vrndq_f32 (float32x4_t __a)
1656 return (float32x4_t)__builtin_neon_vrintzv4sf (__a);
1659 #endif
1661 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1662 vsub_s8 (int8x8_t __a, int8x8_t __b)
1664 return __a - __b;
1667 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1668 vsub_s16 (int16x4_t __a, int16x4_t __b)
1670 return __a - __b;
1673 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1674 vsub_s32 (int32x2_t __a, int32x2_t __b)
1676 return __a - __b;
1679 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1680 vsub_f32 (float32x2_t __a, float32x2_t __b)
1682 #ifdef __FAST_MATH__
1683 return __a - __b;
1684 #else
1685 return (float32x2_t) __builtin_neon_vsubv2sf (__a, __b);
1686 #endif
1689 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1690 vsub_u8 (uint8x8_t __a, uint8x8_t __b)
1692 return __a - __b;
1695 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1696 vsub_u16 (uint16x4_t __a, uint16x4_t __b)
1698 return __a - __b;
1701 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1702 vsub_u32 (uint32x2_t __a, uint32x2_t __b)
1704 return __a - __b;
1707 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1708 vsub_s64 (int64x1_t __a, int64x1_t __b)
1710 return __a - __b;
1713 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1714 vsub_u64 (uint64x1_t __a, uint64x1_t __b)
1716 return __a - __b;
1719 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1720 vsubq_s8 (int8x16_t __a, int8x16_t __b)
1722 return __a - __b;
1725 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1726 vsubq_s16 (int16x8_t __a, int16x8_t __b)
1728 return __a - __b;
1731 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1732 vsubq_s32 (int32x4_t __a, int32x4_t __b)
1734 return __a - __b;
1737 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1738 vsubq_s64 (int64x2_t __a, int64x2_t __b)
1740 return __a - __b;
1743 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1744 vsubq_f32 (float32x4_t __a, float32x4_t __b)
1746 #ifdef __FAST_MATH__
1747 return __a - __b;
1748 #else
1749 return (float32x4_t) __builtin_neon_vsubv4sf (__a, __b);
1750 #endif
1753 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1754 vsubq_u8 (uint8x16_t __a, uint8x16_t __b)
1756 return __a - __b;
1759 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1760 vsubq_u16 (uint16x8_t __a, uint16x8_t __b)
1762 return __a - __b;
1765 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1766 vsubq_u32 (uint32x4_t __a, uint32x4_t __b)
1768 return __a - __b;
1771 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1772 vsubq_u64 (uint64x2_t __a, uint64x2_t __b)
1774 return __a - __b;
1777 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1778 vsubl_s8 (int8x8_t __a, int8x8_t __b)
1780 return (int16x8_t)__builtin_neon_vsublsv8qi (__a, __b);
1783 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1784 vsubl_s16 (int16x4_t __a, int16x4_t __b)
1786 return (int32x4_t)__builtin_neon_vsublsv4hi (__a, __b);
1789 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1790 vsubl_s32 (int32x2_t __a, int32x2_t __b)
1792 return (int64x2_t)__builtin_neon_vsublsv2si (__a, __b);
1795 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1796 vsubl_u8 (uint8x8_t __a, uint8x8_t __b)
1798 return (uint16x8_t)__builtin_neon_vsubluv8qi ((int8x8_t) __a, (int8x8_t) __b);
1801 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1802 vsubl_u16 (uint16x4_t __a, uint16x4_t __b)
1804 return (uint32x4_t)__builtin_neon_vsubluv4hi ((int16x4_t) __a, (int16x4_t) __b);
1807 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1808 vsubl_u32 (uint32x2_t __a, uint32x2_t __b)
1810 return (uint64x2_t)__builtin_neon_vsubluv2si ((int32x2_t) __a, (int32x2_t) __b);
1813 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1814 vsubw_s8 (int16x8_t __a, int8x8_t __b)
1816 return (int16x8_t)__builtin_neon_vsubwsv8qi (__a, __b);
1819 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1820 vsubw_s16 (int32x4_t __a, int16x4_t __b)
1822 return (int32x4_t)__builtin_neon_vsubwsv4hi (__a, __b);
1825 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1826 vsubw_s32 (int64x2_t __a, int32x2_t __b)
1828 return (int64x2_t)__builtin_neon_vsubwsv2si (__a, __b);
1831 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1832 vsubw_u8 (uint16x8_t __a, uint8x8_t __b)
1834 return (uint16x8_t)__builtin_neon_vsubwuv8qi ((int16x8_t) __a, (int8x8_t) __b);
1837 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1838 vsubw_u16 (uint32x4_t __a, uint16x4_t __b)
1840 return (uint32x4_t)__builtin_neon_vsubwuv4hi ((int32x4_t) __a, (int16x4_t) __b);
1843 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1844 vsubw_u32 (uint64x2_t __a, uint32x2_t __b)
1846 return (uint64x2_t)__builtin_neon_vsubwuv2si ((int64x2_t) __a, (int32x2_t) __b);
1849 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1850 vhsub_s8 (int8x8_t __a, int8x8_t __b)
1852 return (int8x8_t)__builtin_neon_vhsubsv8qi (__a, __b);
1855 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1856 vhsub_s16 (int16x4_t __a, int16x4_t __b)
1858 return (int16x4_t)__builtin_neon_vhsubsv4hi (__a, __b);
1861 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1862 vhsub_s32 (int32x2_t __a, int32x2_t __b)
1864 return (int32x2_t)__builtin_neon_vhsubsv2si (__a, __b);
1867 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1868 vhsub_u8 (uint8x8_t __a, uint8x8_t __b)
1870 return (uint8x8_t)__builtin_neon_vhsubuv8qi ((int8x8_t) __a, (int8x8_t) __b);
1873 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1874 vhsub_u16 (uint16x4_t __a, uint16x4_t __b)
1876 return (uint16x4_t)__builtin_neon_vhsubuv4hi ((int16x4_t) __a, (int16x4_t) __b);
1879 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1880 vhsub_u32 (uint32x2_t __a, uint32x2_t __b)
1882 return (uint32x2_t)__builtin_neon_vhsubuv2si ((int32x2_t) __a, (int32x2_t) __b);
1885 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1886 vhsubq_s8 (int8x16_t __a, int8x16_t __b)
1888 return (int8x16_t)__builtin_neon_vhsubsv16qi (__a, __b);
1891 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1892 vhsubq_s16 (int16x8_t __a, int16x8_t __b)
1894 return (int16x8_t)__builtin_neon_vhsubsv8hi (__a, __b);
1897 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1898 vhsubq_s32 (int32x4_t __a, int32x4_t __b)
1900 return (int32x4_t)__builtin_neon_vhsubsv4si (__a, __b);
1903 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1904 vhsubq_u8 (uint8x16_t __a, uint8x16_t __b)
1906 return (uint8x16_t)__builtin_neon_vhsubuv16qi ((int8x16_t) __a, (int8x16_t) __b);
1909 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1910 vhsubq_u16 (uint16x8_t __a, uint16x8_t __b)
1912 return (uint16x8_t)__builtin_neon_vhsubuv8hi ((int16x8_t) __a, (int16x8_t) __b);
1915 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1916 vhsubq_u32 (uint32x4_t __a, uint32x4_t __b)
1918 return (uint32x4_t)__builtin_neon_vhsubuv4si ((int32x4_t) __a, (int32x4_t) __b);
1921 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1922 vqsub_s8 (int8x8_t __a, int8x8_t __b)
1924 return (int8x8_t)__builtin_neon_vqsubsv8qi (__a, __b);
1927 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1928 vqsub_s16 (int16x4_t __a, int16x4_t __b)
1930 return (int16x4_t)__builtin_neon_vqsubsv4hi (__a, __b);
1933 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1934 vqsub_s32 (int32x2_t __a, int32x2_t __b)
1936 return (int32x2_t)__builtin_neon_vqsubsv2si (__a, __b);
1939 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1940 vqsub_s64 (int64x1_t __a, int64x1_t __b)
1942 return (int64x1_t)__builtin_neon_vqsubsdi (__a, __b);
1945 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1946 vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
1948 return (uint8x8_t)__builtin_neon_vqsubuv8qi ((int8x8_t) __a, (int8x8_t) __b);
1951 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1952 vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
1954 return (uint16x4_t)__builtin_neon_vqsubuv4hi ((int16x4_t) __a, (int16x4_t) __b);
1957 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1958 vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
1960 return (uint32x2_t)__builtin_neon_vqsubuv2si ((int32x2_t) __a, (int32x2_t) __b);
1963 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1964 vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
1966 return (uint64x1_t)__builtin_neon_vqsubudi ((int64x1_t) __a, (int64x1_t) __b);
1969 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1970 vqsubq_s8 (int8x16_t __a, int8x16_t __b)
1972 return (int8x16_t)__builtin_neon_vqsubsv16qi (__a, __b);
1975 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1976 vqsubq_s16 (int16x8_t __a, int16x8_t __b)
1978 return (int16x8_t)__builtin_neon_vqsubsv8hi (__a, __b);
1981 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1982 vqsubq_s32 (int32x4_t __a, int32x4_t __b)
1984 return (int32x4_t)__builtin_neon_vqsubsv4si (__a, __b);
1987 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1988 vqsubq_s64 (int64x2_t __a, int64x2_t __b)
1990 return (int64x2_t)__builtin_neon_vqsubsv2di (__a, __b);
1993 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1994 vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
1996 return (uint8x16_t)__builtin_neon_vqsubuv16qi ((int8x16_t) __a, (int8x16_t) __b);
1999 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2000 vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
2002 return (uint16x8_t)__builtin_neon_vqsubuv8hi ((int16x8_t) __a, (int16x8_t) __b);
2005 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2006 vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
2008 return (uint32x4_t)__builtin_neon_vqsubuv4si ((int32x4_t) __a, (int32x4_t) __b);
2011 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2012 vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
2014 return (uint64x2_t)__builtin_neon_vqsubuv2di ((int64x2_t) __a, (int64x2_t) __b);
2017 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2018 vsubhn_s16 (int16x8_t __a, int16x8_t __b)
2020 return (int8x8_t)__builtin_neon_vsubhnv8hi (__a, __b);
2023 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2024 vsubhn_s32 (int32x4_t __a, int32x4_t __b)
2026 return (int16x4_t)__builtin_neon_vsubhnv4si (__a, __b);
2029 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2030 vsubhn_s64 (int64x2_t __a, int64x2_t __b)
2032 return (int32x2_t)__builtin_neon_vsubhnv2di (__a, __b);
2035 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2036 vsubhn_u16 (uint16x8_t __a, uint16x8_t __b)
2038 return (uint8x8_t)__builtin_neon_vsubhnv8hi ((int16x8_t) __a, (int16x8_t) __b);
2041 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2042 vsubhn_u32 (uint32x4_t __a, uint32x4_t __b)
2044 return (uint16x4_t)__builtin_neon_vsubhnv4si ((int32x4_t) __a, (int32x4_t) __b);
2047 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2048 vsubhn_u64 (uint64x2_t __a, uint64x2_t __b)
2050 return (uint32x2_t)__builtin_neon_vsubhnv2di ((int64x2_t) __a, (int64x2_t) __b);
2053 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2054 vrsubhn_s16 (int16x8_t __a, int16x8_t __b)
2056 return (int8x8_t)__builtin_neon_vrsubhnv8hi (__a, __b);
2059 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2060 vrsubhn_s32 (int32x4_t __a, int32x4_t __b)
2062 return (int16x4_t)__builtin_neon_vrsubhnv4si (__a, __b);
2065 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2066 vrsubhn_s64 (int64x2_t __a, int64x2_t __b)
2068 return (int32x2_t)__builtin_neon_vrsubhnv2di (__a, __b);
2071 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2072 vrsubhn_u16 (uint16x8_t __a, uint16x8_t __b)
2074 return (uint8x8_t)__builtin_neon_vrsubhnv8hi ((int16x8_t) __a, (int16x8_t) __b);
2077 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2078 vrsubhn_u32 (uint32x4_t __a, uint32x4_t __b)
2080 return (uint16x4_t)__builtin_neon_vrsubhnv4si ((int32x4_t) __a, (int32x4_t) __b);
2083 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2084 vrsubhn_u64 (uint64x2_t __a, uint64x2_t __b)
2086 return (uint32x2_t)__builtin_neon_vrsubhnv2di ((int64x2_t) __a, (int64x2_t) __b);
2089 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2090 vceq_s8 (int8x8_t __a, int8x8_t __b)
2092 return (uint8x8_t)__builtin_neon_vceqv8qi (__a, __b);
2095 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2096 vceq_s16 (int16x4_t __a, int16x4_t __b)
2098 return (uint16x4_t)__builtin_neon_vceqv4hi (__a, __b);
2101 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2102 vceq_s32 (int32x2_t __a, int32x2_t __b)
2104 return (uint32x2_t)__builtin_neon_vceqv2si (__a, __b);
2107 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2108 vceq_f32 (float32x2_t __a, float32x2_t __b)
2110 return (uint32x2_t)__builtin_neon_vceqv2sf (__a, __b);
2113 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2114 vceq_u8 (uint8x8_t __a, uint8x8_t __b)
2116 return (uint8x8_t)__builtin_neon_vceqv8qi ((int8x8_t) __a, (int8x8_t) __b);
2119 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2120 vceq_u16 (uint16x4_t __a, uint16x4_t __b)
2122 return (uint16x4_t)__builtin_neon_vceqv4hi ((int16x4_t) __a, (int16x4_t) __b);
2125 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2126 vceq_u32 (uint32x2_t __a, uint32x2_t __b)
2128 return (uint32x2_t)__builtin_neon_vceqv2si ((int32x2_t) __a, (int32x2_t) __b);
2131 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2132 vceq_p8 (poly8x8_t __a, poly8x8_t __b)
2134 return (uint8x8_t)__builtin_neon_vceqv8qi ((int8x8_t) __a, (int8x8_t) __b);
2137 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2138 vceqq_s8 (int8x16_t __a, int8x16_t __b)
2140 return (uint8x16_t)__builtin_neon_vceqv16qi (__a, __b);
2143 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2144 vceqq_s16 (int16x8_t __a, int16x8_t __b)
2146 return (uint16x8_t)__builtin_neon_vceqv8hi (__a, __b);
2149 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2150 vceqq_s32 (int32x4_t __a, int32x4_t __b)
2152 return (uint32x4_t)__builtin_neon_vceqv4si (__a, __b);
2155 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2156 vceqq_f32 (float32x4_t __a, float32x4_t __b)
2158 return (uint32x4_t)__builtin_neon_vceqv4sf (__a, __b);
2161 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2162 vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
2164 return (uint8x16_t)__builtin_neon_vceqv16qi ((int8x16_t) __a, (int8x16_t) __b);
2167 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2168 vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
2170 return (uint16x8_t)__builtin_neon_vceqv8hi ((int16x8_t) __a, (int16x8_t) __b);
2173 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2174 vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
2176 return (uint32x4_t)__builtin_neon_vceqv4si ((int32x4_t) __a, (int32x4_t) __b);
2179 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2180 vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
2182 return (uint8x16_t)__builtin_neon_vceqv16qi ((int8x16_t) __a, (int8x16_t) __b);
2185 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2186 vcge_s8 (int8x8_t __a, int8x8_t __b)
2188 return (uint8x8_t)__builtin_neon_vcgev8qi (__a, __b);
2191 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2192 vcge_s16 (int16x4_t __a, int16x4_t __b)
2194 return (uint16x4_t)__builtin_neon_vcgev4hi (__a, __b);
2197 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2198 vcge_s32 (int32x2_t __a, int32x2_t __b)
2200 return (uint32x2_t)__builtin_neon_vcgev2si (__a, __b);
2203 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2204 vcge_f32 (float32x2_t __a, float32x2_t __b)
2206 return (uint32x2_t)__builtin_neon_vcgev2sf (__a, __b);
2209 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2210 vcge_u8 (uint8x8_t __a, uint8x8_t __b)
2212 return (uint8x8_t)__builtin_neon_vcgeuv8qi ((int8x8_t) __a, (int8x8_t) __b);
2215 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2216 vcge_u16 (uint16x4_t __a, uint16x4_t __b)
2218 return (uint16x4_t)__builtin_neon_vcgeuv4hi ((int16x4_t) __a, (int16x4_t) __b);
2221 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2222 vcge_u32 (uint32x2_t __a, uint32x2_t __b)
2224 return (uint32x2_t)__builtin_neon_vcgeuv2si ((int32x2_t) __a, (int32x2_t) __b);
2227 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2228 vcgeq_s8 (int8x16_t __a, int8x16_t __b)
2230 return (uint8x16_t)__builtin_neon_vcgev16qi (__a, __b);
2233 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2234 vcgeq_s16 (int16x8_t __a, int16x8_t __b)
2236 return (uint16x8_t)__builtin_neon_vcgev8hi (__a, __b);
2239 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2240 vcgeq_s32 (int32x4_t __a, int32x4_t __b)
2242 return (uint32x4_t)__builtin_neon_vcgev4si (__a, __b);
2245 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2246 vcgeq_f32 (float32x4_t __a, float32x4_t __b)
2248 return (uint32x4_t)__builtin_neon_vcgev4sf (__a, __b);
2251 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2252 vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
2254 return (uint8x16_t)__builtin_neon_vcgeuv16qi ((int8x16_t) __a, (int8x16_t) __b);
2257 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2258 vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
2260 return (uint16x8_t)__builtin_neon_vcgeuv8hi ((int16x8_t) __a, (int16x8_t) __b);
2263 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2264 vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
2266 return (uint32x4_t)__builtin_neon_vcgeuv4si ((int32x4_t) __a, (int32x4_t) __b);
2269 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2270 vcle_s8 (int8x8_t __a, int8x8_t __b)
2272 return (uint8x8_t)__builtin_neon_vcgev8qi (__b, __a);
2275 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2276 vcle_s16 (int16x4_t __a, int16x4_t __b)
2278 return (uint16x4_t)__builtin_neon_vcgev4hi (__b, __a);
2281 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2282 vcle_s32 (int32x2_t __a, int32x2_t __b)
2284 return (uint32x2_t)__builtin_neon_vcgev2si (__b, __a);
2287 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2288 vcle_f32 (float32x2_t __a, float32x2_t __b)
2290 return (uint32x2_t)__builtin_neon_vcgev2sf (__b, __a);
2293 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2294 vcle_u8 (uint8x8_t __a, uint8x8_t __b)
2296 return (uint8x8_t)__builtin_neon_vcgeuv8qi ((int8x8_t) __b, (int8x8_t) __a);
2299 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2300 vcle_u16 (uint16x4_t __a, uint16x4_t __b)
2302 return (uint16x4_t)__builtin_neon_vcgeuv4hi ((int16x4_t) __b, (int16x4_t) __a);
2305 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2306 vcle_u32 (uint32x2_t __a, uint32x2_t __b)
2308 return (uint32x2_t)__builtin_neon_vcgeuv2si ((int32x2_t) __b, (int32x2_t) __a);
2311 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2312 vcleq_s8 (int8x16_t __a, int8x16_t __b)
2314 return (uint8x16_t)__builtin_neon_vcgev16qi (__b, __a);
2317 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2318 vcleq_s16 (int16x8_t __a, int16x8_t __b)
2320 return (uint16x8_t)__builtin_neon_vcgev8hi (__b, __a);
2323 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2324 vcleq_s32 (int32x4_t __a, int32x4_t __b)
2326 return (uint32x4_t)__builtin_neon_vcgev4si (__b, __a);
2329 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2330 vcleq_f32 (float32x4_t __a, float32x4_t __b)
2332 return (uint32x4_t)__builtin_neon_vcgev4sf (__b, __a);
2335 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2336 vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
2338 return (uint8x16_t)__builtin_neon_vcgeuv16qi ((int8x16_t) __b, (int8x16_t) __a);
2341 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2342 vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
2344 return (uint16x8_t)__builtin_neon_vcgeuv8hi ((int16x8_t) __b, (int16x8_t) __a);
2347 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2348 vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
2350 return (uint32x4_t)__builtin_neon_vcgeuv4si ((int32x4_t) __b, (int32x4_t) __a);
2353 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2354 vcgt_s8 (int8x8_t __a, int8x8_t __b)
2356 return (uint8x8_t)__builtin_neon_vcgtv8qi (__a, __b);
2359 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2360 vcgt_s16 (int16x4_t __a, int16x4_t __b)
2362 return (uint16x4_t)__builtin_neon_vcgtv4hi (__a, __b);
2365 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2366 vcgt_s32 (int32x2_t __a, int32x2_t __b)
2368 return (uint32x2_t)__builtin_neon_vcgtv2si (__a, __b);
2371 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2372 vcgt_f32 (float32x2_t __a, float32x2_t __b)
2374 return (uint32x2_t)__builtin_neon_vcgtv2sf (__a, __b);
2377 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2378 vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
2380 return (uint8x8_t)__builtin_neon_vcgtuv8qi ((int8x8_t) __a, (int8x8_t) __b);
2383 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2384 vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
2386 return (uint16x4_t)__builtin_neon_vcgtuv4hi ((int16x4_t) __a, (int16x4_t) __b);
2389 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2390 vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
2392 return (uint32x2_t)__builtin_neon_vcgtuv2si ((int32x2_t) __a, (int32x2_t) __b);
2395 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2396 vcgtq_s8 (int8x16_t __a, int8x16_t __b)
2398 return (uint8x16_t)__builtin_neon_vcgtv16qi (__a, __b);
2401 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2402 vcgtq_s16 (int16x8_t __a, int16x8_t __b)
2404 return (uint16x8_t)__builtin_neon_vcgtv8hi (__a, __b);
2407 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2408 vcgtq_s32 (int32x4_t __a, int32x4_t __b)
2410 return (uint32x4_t)__builtin_neon_vcgtv4si (__a, __b);
2413 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2414 vcgtq_f32 (float32x4_t __a, float32x4_t __b)
2416 return (uint32x4_t)__builtin_neon_vcgtv4sf (__a, __b);
2419 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2420 vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
2422 return (uint8x16_t)__builtin_neon_vcgtuv16qi ((int8x16_t) __a, (int8x16_t) __b);
2425 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2426 vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
2428 return (uint16x8_t)__builtin_neon_vcgtuv8hi ((int16x8_t) __a, (int16x8_t) __b);
2431 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2432 vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
2434 return (uint32x4_t)__builtin_neon_vcgtuv4si ((int32x4_t) __a, (int32x4_t) __b);
2437 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2438 vclt_s8 (int8x8_t __a, int8x8_t __b)
2440 return (uint8x8_t)__builtin_neon_vcgtv8qi (__b, __a);
2443 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2444 vclt_s16 (int16x4_t __a, int16x4_t __b)
2446 return (uint16x4_t)__builtin_neon_vcgtv4hi (__b, __a);
2449 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2450 vclt_s32 (int32x2_t __a, int32x2_t __b)
2452 return (uint32x2_t)__builtin_neon_vcgtv2si (__b, __a);
2455 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2456 vclt_f32 (float32x2_t __a, float32x2_t __b)
2458 return (uint32x2_t)__builtin_neon_vcgtv2sf (__b, __a);
2461 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2462 vclt_u8 (uint8x8_t __a, uint8x8_t __b)
2464 return (uint8x8_t)__builtin_neon_vcgtuv8qi ((int8x8_t) __b, (int8x8_t) __a);
2467 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2468 vclt_u16 (uint16x4_t __a, uint16x4_t __b)
2470 return (uint16x4_t)__builtin_neon_vcgtuv4hi ((int16x4_t) __b, (int16x4_t) __a);
2473 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2474 vclt_u32 (uint32x2_t __a, uint32x2_t __b)
2476 return (uint32x2_t)__builtin_neon_vcgtuv2si ((int32x2_t) __b, (int32x2_t) __a);
2479 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2480 vcltq_s8 (int8x16_t __a, int8x16_t __b)
2482 return (uint8x16_t)__builtin_neon_vcgtv16qi (__b, __a);
2485 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2486 vcltq_s16 (int16x8_t __a, int16x8_t __b)
2488 return (uint16x8_t)__builtin_neon_vcgtv8hi (__b, __a);
2491 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2492 vcltq_s32 (int32x4_t __a, int32x4_t __b)
2494 return (uint32x4_t)__builtin_neon_vcgtv4si (__b, __a);
2497 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2498 vcltq_f32 (float32x4_t __a, float32x4_t __b)
2500 return (uint32x4_t)__builtin_neon_vcgtv4sf (__b, __a);
2503 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2504 vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
2506 return (uint8x16_t)__builtin_neon_vcgtuv16qi ((int8x16_t) __b, (int8x16_t) __a);
2509 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2510 vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
2512 return (uint16x8_t)__builtin_neon_vcgtuv8hi ((int16x8_t) __b, (int16x8_t) __a);
2515 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2516 vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
2518 return (uint32x4_t)__builtin_neon_vcgtuv4si ((int32x4_t) __b, (int32x4_t) __a);
2521 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2522 vcage_f32 (float32x2_t __a, float32x2_t __b)
2524 return (uint32x2_t)__builtin_neon_vcagev2sf (__a, __b);
2527 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2528 vcageq_f32 (float32x4_t __a, float32x4_t __b)
2530 return (uint32x4_t)__builtin_neon_vcagev4sf (__a, __b);
2533 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2534 vcale_f32 (float32x2_t __a, float32x2_t __b)
2536 return (uint32x2_t)__builtin_neon_vcagev2sf (__b, __a);
2539 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2540 vcaleq_f32 (float32x4_t __a, float32x4_t __b)
2542 return (uint32x4_t)__builtin_neon_vcagev4sf (__b, __a);
2545 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2546 vcagt_f32 (float32x2_t __a, float32x2_t __b)
2548 return (uint32x2_t)__builtin_neon_vcagtv2sf (__a, __b);
2551 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2552 vcagtq_f32 (float32x4_t __a, float32x4_t __b)
2554 return (uint32x4_t)__builtin_neon_vcagtv4sf (__a, __b);
2557 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2558 vcalt_f32 (float32x2_t __a, float32x2_t __b)
2560 return (uint32x2_t)__builtin_neon_vcagtv2sf (__b, __a);
2563 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2564 vcaltq_f32 (float32x4_t __a, float32x4_t __b)
2566 return (uint32x4_t)__builtin_neon_vcagtv4sf (__b, __a);
2569 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2570 vtst_s8 (int8x8_t __a, int8x8_t __b)
2572 return (uint8x8_t)__builtin_neon_vtstv8qi (__a, __b);
2575 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2576 vtst_s16 (int16x4_t __a, int16x4_t __b)
2578 return (uint16x4_t)__builtin_neon_vtstv4hi (__a, __b);
2581 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2582 vtst_s32 (int32x2_t __a, int32x2_t __b)
2584 return (uint32x2_t)__builtin_neon_vtstv2si (__a, __b);
2587 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2588 vtst_u8 (uint8x8_t __a, uint8x8_t __b)
2590 return (uint8x8_t)__builtin_neon_vtstv8qi ((int8x8_t) __a, (int8x8_t) __b);
2593 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2594 vtst_u16 (uint16x4_t __a, uint16x4_t __b)
2596 return (uint16x4_t)__builtin_neon_vtstv4hi ((int16x4_t) __a, (int16x4_t) __b);
2599 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2600 vtst_u32 (uint32x2_t __a, uint32x2_t __b)
2602 return (uint32x2_t)__builtin_neon_vtstv2si ((int32x2_t) __a, (int32x2_t) __b);
2605 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2606 vtst_p8 (poly8x8_t __a, poly8x8_t __b)
2608 return (uint8x8_t)__builtin_neon_vtstv8qi ((int8x8_t) __a, (int8x8_t) __b);
2611 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2612 vtst_p16 (poly16x4_t __a, poly16x4_t __b)
2614 return (uint16x4_t)__builtin_neon_vtstv4hi ((int16x4_t) __a, (int16x4_t) __b);
2617 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2618 vtstq_s8 (int8x16_t __a, int8x16_t __b)
2620 return (uint8x16_t)__builtin_neon_vtstv16qi (__a, __b);
2623 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2624 vtstq_s16 (int16x8_t __a, int16x8_t __b)
2626 return (uint16x8_t)__builtin_neon_vtstv8hi (__a, __b);
2629 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2630 vtstq_s32 (int32x4_t __a, int32x4_t __b)
2632 return (uint32x4_t)__builtin_neon_vtstv4si (__a, __b);
2635 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2636 vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
2638 return (uint8x16_t)__builtin_neon_vtstv16qi ((int8x16_t) __a, (int8x16_t) __b);
2641 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2642 vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
2644 return (uint16x8_t)__builtin_neon_vtstv8hi ((int16x8_t) __a, (int16x8_t) __b);
2647 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2648 vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
2650 return (uint32x4_t)__builtin_neon_vtstv4si ((int32x4_t) __a, (int32x4_t) __b);
2653 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2654 vtstq_p8 (poly8x16_t __a, poly8x16_t __b)
2656 return (uint8x16_t)__builtin_neon_vtstv16qi ((int8x16_t) __a, (int8x16_t) __b);
2659 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2660 vtstq_p16 (poly16x8_t __a, poly16x8_t __b)
2662 return (uint16x8_t)__builtin_neon_vtstv8hi ((int16x8_t) __a, (int16x8_t) __b);
2665 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2666 vabd_s8 (int8x8_t __a, int8x8_t __b)
2668 return (int8x8_t)__builtin_neon_vabdsv8qi (__a, __b);
2671 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2672 vabd_s16 (int16x4_t __a, int16x4_t __b)
2674 return (int16x4_t)__builtin_neon_vabdsv4hi (__a, __b);
2677 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2678 vabd_s32 (int32x2_t __a, int32x2_t __b)
2680 return (int32x2_t)__builtin_neon_vabdsv2si (__a, __b);
2683 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2684 vabd_f32 (float32x2_t __a, float32x2_t __b)
2686 return (float32x2_t)__builtin_neon_vabdfv2sf (__a, __b);
2689 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2690 vabd_u8 (uint8x8_t __a, uint8x8_t __b)
2692 return (uint8x8_t)__builtin_neon_vabduv8qi ((int8x8_t) __a, (int8x8_t) __b);
2695 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2696 vabd_u16 (uint16x4_t __a, uint16x4_t __b)
2698 return (uint16x4_t)__builtin_neon_vabduv4hi ((int16x4_t) __a, (int16x4_t) __b);
2701 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2702 vabd_u32 (uint32x2_t __a, uint32x2_t __b)
2704 return (uint32x2_t)__builtin_neon_vabduv2si ((int32x2_t) __a, (int32x2_t) __b);
2707 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2708 vabdq_s8 (int8x16_t __a, int8x16_t __b)
2710 return (int8x16_t)__builtin_neon_vabdsv16qi (__a, __b);
2713 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2714 vabdq_s16 (int16x8_t __a, int16x8_t __b)
2716 return (int16x8_t)__builtin_neon_vabdsv8hi (__a, __b);
2719 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2720 vabdq_s32 (int32x4_t __a, int32x4_t __b)
2722 return (int32x4_t)__builtin_neon_vabdsv4si (__a, __b);
2725 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2726 vabdq_f32 (float32x4_t __a, float32x4_t __b)
2728 return (float32x4_t)__builtin_neon_vabdfv4sf (__a, __b);
2731 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2732 vabdq_u8 (uint8x16_t __a, uint8x16_t __b)
2734 return (uint8x16_t)__builtin_neon_vabduv16qi ((int8x16_t) __a, (int8x16_t) __b);
2737 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2738 vabdq_u16 (uint16x8_t __a, uint16x8_t __b)
2740 return (uint16x8_t)__builtin_neon_vabduv8hi ((int16x8_t) __a, (int16x8_t) __b);
2743 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2744 vabdq_u32 (uint32x4_t __a, uint32x4_t __b)
2746 return (uint32x4_t)__builtin_neon_vabduv4si ((int32x4_t) __a, (int32x4_t) __b);
2749 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2750 vabdl_s8 (int8x8_t __a, int8x8_t __b)
2752 return (int16x8_t)__builtin_neon_vabdlsv8qi (__a, __b);
2755 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2756 vabdl_s16 (int16x4_t __a, int16x4_t __b)
2758 return (int32x4_t)__builtin_neon_vabdlsv4hi (__a, __b);
2761 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2762 vabdl_s32 (int32x2_t __a, int32x2_t __b)
2764 return (int64x2_t)__builtin_neon_vabdlsv2si (__a, __b);
2767 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2768 vabdl_u8 (uint8x8_t __a, uint8x8_t __b)
2770 return (uint16x8_t)__builtin_neon_vabdluv8qi ((int8x8_t) __a, (int8x8_t) __b);
2773 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2774 vabdl_u16 (uint16x4_t __a, uint16x4_t __b)
2776 return (uint32x4_t)__builtin_neon_vabdluv4hi ((int16x4_t) __a, (int16x4_t) __b);
2779 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2780 vabdl_u32 (uint32x2_t __a, uint32x2_t __b)
2782 return (uint64x2_t)__builtin_neon_vabdluv2si ((int32x2_t) __a, (int32x2_t) __b);
2785 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2786 vaba_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c)
2788 return (int8x8_t)__builtin_neon_vabasv8qi (__a, __b, __c);
2791 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2792 vaba_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
2794 return (int16x4_t)__builtin_neon_vabasv4hi (__a, __b, __c);
2797 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2798 vaba_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
2800 return (int32x2_t)__builtin_neon_vabasv2si (__a, __b, __c);
2803 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2804 vaba_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
2806 return (uint8x8_t)__builtin_neon_vabauv8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c);
2809 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2810 vaba_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
2812 return (uint16x4_t)__builtin_neon_vabauv4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c);
2815 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2816 vaba_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
2818 return (uint32x2_t)__builtin_neon_vabauv2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c);
2821 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2822 vabaq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c)
2824 return (int8x16_t)__builtin_neon_vabasv16qi (__a, __b, __c);
2827 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2828 vabaq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
2830 return (int16x8_t)__builtin_neon_vabasv8hi (__a, __b, __c);
2833 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2834 vabaq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
2836 return (int32x4_t)__builtin_neon_vabasv4si (__a, __b, __c);
2839 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2840 vabaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
2842 return (uint8x16_t)__builtin_neon_vabauv16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c);
2845 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2846 vabaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
2848 return (uint16x8_t)__builtin_neon_vabauv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c);
2851 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2852 vabaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
2854 return (uint32x4_t)__builtin_neon_vabauv4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c);
2857 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2858 vabal_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c)
2860 return (int16x8_t)__builtin_neon_vabalsv8qi (__a, __b, __c);
2863 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2864 vabal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
2866 return (int32x4_t)__builtin_neon_vabalsv4hi (__a, __b, __c);
2869 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2870 vabal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
2872 return (int64x2_t)__builtin_neon_vabalsv2si (__a, __b, __c);
2875 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2876 vabal_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c)
2878 return (uint16x8_t)__builtin_neon_vabaluv8qi ((int16x8_t) __a, (int8x8_t) __b, (int8x8_t) __c);
2881 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2882 vabal_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c)
2884 return (uint32x4_t)__builtin_neon_vabaluv4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c);
2887 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2888 vabal_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c)
2890 return (uint64x2_t)__builtin_neon_vabaluv2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c);
2893 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2894 vmax_s8 (int8x8_t __a, int8x8_t __b)
2896 return (int8x8_t)__builtin_neon_vmaxsv8qi (__a, __b);
2899 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2900 vmax_s16 (int16x4_t __a, int16x4_t __b)
2902 return (int16x4_t)__builtin_neon_vmaxsv4hi (__a, __b);
2905 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2906 vmax_s32 (int32x2_t __a, int32x2_t __b)
2908 return (int32x2_t)__builtin_neon_vmaxsv2si (__a, __b);
2911 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2912 vmax_f32 (float32x2_t __a, float32x2_t __b)
2914 return (float32x2_t)__builtin_neon_vmaxfv2sf (__a, __b);
2917 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2918 vmax_u8 (uint8x8_t __a, uint8x8_t __b)
2920 return (uint8x8_t)__builtin_neon_vmaxuv8qi ((int8x8_t) __a, (int8x8_t) __b);
2923 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2924 vmax_u16 (uint16x4_t __a, uint16x4_t __b)
2926 return (uint16x4_t)__builtin_neon_vmaxuv4hi ((int16x4_t) __a, (int16x4_t) __b);
2929 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2930 vmax_u32 (uint32x2_t __a, uint32x2_t __b)
2932 return (uint32x2_t)__builtin_neon_vmaxuv2si ((int32x2_t) __a, (int32x2_t) __b);
2935 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2936 vmaxq_s8 (int8x16_t __a, int8x16_t __b)
2938 return (int8x16_t)__builtin_neon_vmaxsv16qi (__a, __b);
2941 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2942 vmaxq_s16 (int16x8_t __a, int16x8_t __b)
2944 return (int16x8_t)__builtin_neon_vmaxsv8hi (__a, __b);
2947 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2948 vmaxq_s32 (int32x4_t __a, int32x4_t __b)
2950 return (int32x4_t)__builtin_neon_vmaxsv4si (__a, __b);
2953 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2954 vmaxq_f32 (float32x4_t __a, float32x4_t __b)
2956 return (float32x4_t)__builtin_neon_vmaxfv4sf (__a, __b);
2959 #pragma GCC push_options
2960 #pragma GCC target ("fpu=neon-fp-armv8")
2961 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2962 vmaxnm_f32 (float32x2_t a, float32x2_t b)
2964 return (float32x2_t)__builtin_neon_vmaxnmv2sf (a, b);
2967 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2968 vmaxnmq_f32 (float32x4_t a, float32x4_t b)
2970 return (float32x4_t)__builtin_neon_vmaxnmv4sf (a, b);
2973 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2974 vminnm_f32 (float32x2_t a, float32x2_t b)
2976 return (float32x2_t)__builtin_neon_vminnmv2sf (a, b);
2979 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2980 vminnmq_f32 (float32x4_t a, float32x4_t b)
2982 return (float32x4_t)__builtin_neon_vminnmv4sf (a, b);
2984 #pragma GCC pop_options
2987 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2988 vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
2990 return (uint8x16_t)__builtin_neon_vmaxuv16qi ((int8x16_t) __a, (int8x16_t) __b);
2993 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2994 vmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
2996 return (uint16x8_t)__builtin_neon_vmaxuv8hi ((int16x8_t) __a, (int16x8_t) __b);
2999 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3000 vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
3002 return (uint32x4_t)__builtin_neon_vmaxuv4si ((int32x4_t) __a, (int32x4_t) __b);
3005 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3006 vmin_s8 (int8x8_t __a, int8x8_t __b)
3008 return (int8x8_t)__builtin_neon_vminsv8qi (__a, __b);
3011 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3012 vmin_s16 (int16x4_t __a, int16x4_t __b)
3014 return (int16x4_t)__builtin_neon_vminsv4hi (__a, __b);
3017 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3018 vmin_s32 (int32x2_t __a, int32x2_t __b)
3020 return (int32x2_t)__builtin_neon_vminsv2si (__a, __b);
3023 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3024 vmin_f32 (float32x2_t __a, float32x2_t __b)
3026 return (float32x2_t)__builtin_neon_vminfv2sf (__a, __b);
3029 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3030 vmin_u8 (uint8x8_t __a, uint8x8_t __b)
3032 return (uint8x8_t)__builtin_neon_vminuv8qi ((int8x8_t) __a, (int8x8_t) __b);
3035 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3036 vmin_u16 (uint16x4_t __a, uint16x4_t __b)
3038 return (uint16x4_t)__builtin_neon_vminuv4hi ((int16x4_t) __a, (int16x4_t) __b);
3041 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3042 vmin_u32 (uint32x2_t __a, uint32x2_t __b)
3044 return (uint32x2_t)__builtin_neon_vminuv2si ((int32x2_t) __a, (int32x2_t) __b);
3047 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3048 vminq_s8 (int8x16_t __a, int8x16_t __b)
3050 return (int8x16_t)__builtin_neon_vminsv16qi (__a, __b);
3053 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3054 vminq_s16 (int16x8_t __a, int16x8_t __b)
3056 return (int16x8_t)__builtin_neon_vminsv8hi (__a, __b);
3059 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3060 vminq_s32 (int32x4_t __a, int32x4_t __b)
3062 return (int32x4_t)__builtin_neon_vminsv4si (__a, __b);
3065 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3066 vminq_f32 (float32x4_t __a, float32x4_t __b)
3068 return (float32x4_t)__builtin_neon_vminfv4sf (__a, __b);
3071 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3072 vminq_u8 (uint8x16_t __a, uint8x16_t __b)
3074 return (uint8x16_t)__builtin_neon_vminuv16qi ((int8x16_t) __a, (int8x16_t) __b);
3077 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3078 vminq_u16 (uint16x8_t __a, uint16x8_t __b)
3080 return (uint16x8_t)__builtin_neon_vminuv8hi ((int16x8_t) __a, (int16x8_t) __b);
3083 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3084 vminq_u32 (uint32x4_t __a, uint32x4_t __b)
3086 return (uint32x4_t)__builtin_neon_vminuv4si ((int32x4_t) __a, (int32x4_t) __b);
3089 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3090 vpadd_s8 (int8x8_t __a, int8x8_t __b)
3092 return (int8x8_t)__builtin_neon_vpaddv8qi (__a, __b);
3095 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3096 vpadd_s16 (int16x4_t __a, int16x4_t __b)
3098 return (int16x4_t)__builtin_neon_vpaddv4hi (__a, __b);
3101 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3102 vpadd_s32 (int32x2_t __a, int32x2_t __b)
3104 return (int32x2_t)__builtin_neon_vpaddv2si (__a, __b);
3107 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3108 vpadd_f32 (float32x2_t __a, float32x2_t __b)
3110 return (float32x2_t)__builtin_neon_vpaddv2sf (__a, __b);
3113 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3114 vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
3116 return (uint8x8_t)__builtin_neon_vpaddv8qi ((int8x8_t) __a, (int8x8_t) __b);
3119 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3120 vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
3122 return (uint16x4_t)__builtin_neon_vpaddv4hi ((int16x4_t) __a, (int16x4_t) __b);
3125 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3126 vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
3128 return (uint32x2_t)__builtin_neon_vpaddv2si ((int32x2_t) __a, (int32x2_t) __b);
3131 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3132 vpaddl_s8 (int8x8_t __a)
3134 return (int16x4_t)__builtin_neon_vpaddlsv8qi (__a);
3137 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3138 vpaddl_s16 (int16x4_t __a)
3140 return (int32x2_t)__builtin_neon_vpaddlsv4hi (__a);
3143 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3144 vpaddl_s32 (int32x2_t __a)
3146 return (int64x1_t)__builtin_neon_vpaddlsv2si (__a);
3149 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3150 vpaddl_u8 (uint8x8_t __a)
3152 return (uint16x4_t)__builtin_neon_vpaddluv8qi ((int8x8_t) __a);
3155 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3156 vpaddl_u16 (uint16x4_t __a)
3158 return (uint32x2_t)__builtin_neon_vpaddluv4hi ((int16x4_t) __a);
3161 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3162 vpaddl_u32 (uint32x2_t __a)
3164 return (uint64x1_t)__builtin_neon_vpaddluv2si ((int32x2_t) __a);
3167 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3168 vpaddlq_s8 (int8x16_t __a)
3170 return (int16x8_t)__builtin_neon_vpaddlsv16qi (__a);
3173 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3174 vpaddlq_s16 (int16x8_t __a)
3176 return (int32x4_t)__builtin_neon_vpaddlsv8hi (__a);
3179 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3180 vpaddlq_s32 (int32x4_t __a)
3182 return (int64x2_t)__builtin_neon_vpaddlsv4si (__a);
3185 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3186 vpaddlq_u8 (uint8x16_t __a)
3188 return (uint16x8_t)__builtin_neon_vpaddluv16qi ((int8x16_t) __a);
3191 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3192 vpaddlq_u16 (uint16x8_t __a)
3194 return (uint32x4_t)__builtin_neon_vpaddluv8hi ((int16x8_t) __a);
3197 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3198 vpaddlq_u32 (uint32x4_t __a)
3200 return (uint64x2_t)__builtin_neon_vpaddluv4si ((int32x4_t) __a);
3203 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3204 vpadal_s8 (int16x4_t __a, int8x8_t __b)
3206 return (int16x4_t)__builtin_neon_vpadalsv8qi (__a, __b);
3209 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3210 vpadal_s16 (int32x2_t __a, int16x4_t __b)
3212 return (int32x2_t)__builtin_neon_vpadalsv4hi (__a, __b);
3215 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3216 vpadal_s32 (int64x1_t __a, int32x2_t __b)
3218 return (int64x1_t)__builtin_neon_vpadalsv2si (__a, __b);
3221 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3222 vpadal_u8 (uint16x4_t __a, uint8x8_t __b)
3224 return (uint16x4_t)__builtin_neon_vpadaluv8qi ((int16x4_t) __a, (int8x8_t) __b);
3227 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3228 vpadal_u16 (uint32x2_t __a, uint16x4_t __b)
3230 return (uint32x2_t)__builtin_neon_vpadaluv4hi ((int32x2_t) __a, (int16x4_t) __b);
3233 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3234 vpadal_u32 (uint64x1_t __a, uint32x2_t __b)
3236 return (uint64x1_t)__builtin_neon_vpadaluv2si ((int64x1_t) __a, (int32x2_t) __b);
3239 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3240 vpadalq_s8 (int16x8_t __a, int8x16_t __b)
3242 return (int16x8_t)__builtin_neon_vpadalsv16qi (__a, __b);
3245 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3246 vpadalq_s16 (int32x4_t __a, int16x8_t __b)
3248 return (int32x4_t)__builtin_neon_vpadalsv8hi (__a, __b);
3251 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3252 vpadalq_s32 (int64x2_t __a, int32x4_t __b)
3254 return (int64x2_t)__builtin_neon_vpadalsv4si (__a, __b);
3257 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3258 vpadalq_u8 (uint16x8_t __a, uint8x16_t __b)
3260 return (uint16x8_t)__builtin_neon_vpadaluv16qi ((int16x8_t) __a, (int8x16_t) __b);
3263 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3264 vpadalq_u16 (uint32x4_t __a, uint16x8_t __b)
3266 return (uint32x4_t)__builtin_neon_vpadaluv8hi ((int32x4_t) __a, (int16x8_t) __b);
3269 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3270 vpadalq_u32 (uint64x2_t __a, uint32x4_t __b)
3272 return (uint64x2_t)__builtin_neon_vpadaluv4si ((int64x2_t) __a, (int32x4_t) __b);
3275 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3276 vpmax_s8 (int8x8_t __a, int8x8_t __b)
3278 return (int8x8_t)__builtin_neon_vpmaxsv8qi (__a, __b);
3281 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3282 vpmax_s16 (int16x4_t __a, int16x4_t __b)
3284 return (int16x4_t)__builtin_neon_vpmaxsv4hi (__a, __b);
3287 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3288 vpmax_s32 (int32x2_t __a, int32x2_t __b)
3290 return (int32x2_t)__builtin_neon_vpmaxsv2si (__a, __b);
3293 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3294 vpmax_f32 (float32x2_t __a, float32x2_t __b)
3296 return (float32x2_t)__builtin_neon_vpmaxfv2sf (__a, __b);
3299 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3300 vpmax_u8 (uint8x8_t __a, uint8x8_t __b)
3302 return (uint8x8_t)__builtin_neon_vpmaxuv8qi ((int8x8_t) __a, (int8x8_t) __b);
3305 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3306 vpmax_u16 (uint16x4_t __a, uint16x4_t __b)
3308 return (uint16x4_t)__builtin_neon_vpmaxuv4hi ((int16x4_t) __a, (int16x4_t) __b);
3311 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3312 vpmax_u32 (uint32x2_t __a, uint32x2_t __b)
3314 return (uint32x2_t)__builtin_neon_vpmaxuv2si ((int32x2_t) __a, (int32x2_t) __b);
3317 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3318 vpmin_s8 (int8x8_t __a, int8x8_t __b)
3320 return (int8x8_t)__builtin_neon_vpminsv8qi (__a, __b);
3323 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3324 vpmin_s16 (int16x4_t __a, int16x4_t __b)
3326 return (int16x4_t)__builtin_neon_vpminsv4hi (__a, __b);
3329 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3330 vpmin_s32 (int32x2_t __a, int32x2_t __b)
3332 return (int32x2_t)__builtin_neon_vpminsv2si (__a, __b);
3335 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3336 vpmin_f32 (float32x2_t __a, float32x2_t __b)
3338 return (float32x2_t)__builtin_neon_vpminfv2sf (__a, __b);
3341 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3342 vpmin_u8 (uint8x8_t __a, uint8x8_t __b)
3344 return (uint8x8_t)__builtin_neon_vpminuv8qi ((int8x8_t) __a, (int8x8_t) __b);
3347 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3348 vpmin_u16 (uint16x4_t __a, uint16x4_t __b)
3350 return (uint16x4_t)__builtin_neon_vpminuv4hi ((int16x4_t) __a, (int16x4_t) __b);
3353 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3354 vpmin_u32 (uint32x2_t __a, uint32x2_t __b)
3356 return (uint32x2_t)__builtin_neon_vpminuv2si ((int32x2_t) __a, (int32x2_t) __b);
3359 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3360 vrecps_f32 (float32x2_t __a, float32x2_t __b)
3362 return (float32x2_t)__builtin_neon_vrecpsv2sf (__a, __b);
3365 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3366 vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
3368 return (float32x4_t)__builtin_neon_vrecpsv4sf (__a, __b);
3371 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3372 vrsqrts_f32 (float32x2_t __a, float32x2_t __b)
3374 return (float32x2_t)__builtin_neon_vrsqrtsv2sf (__a, __b);
3377 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3378 vrsqrtsq_f32 (float32x4_t __a, float32x4_t __b)
3380 return (float32x4_t)__builtin_neon_vrsqrtsv4sf (__a, __b);
3383 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3384 vshl_s8 (int8x8_t __a, int8x8_t __b)
3386 return (int8x8_t)__builtin_neon_vshlsv8qi (__a, __b);
3389 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3390 vshl_s16 (int16x4_t __a, int16x4_t __b)
3392 return (int16x4_t)__builtin_neon_vshlsv4hi (__a, __b);
3395 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3396 vshl_s32 (int32x2_t __a, int32x2_t __b)
3398 return (int32x2_t)__builtin_neon_vshlsv2si (__a, __b);
3401 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3402 vshl_s64 (int64x1_t __a, int64x1_t __b)
3404 return (int64x1_t)__builtin_neon_vshlsdi (__a, __b);
3407 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3408 vshl_u8 (uint8x8_t __a, int8x8_t __b)
3410 return (uint8x8_t)__builtin_neon_vshluv8qi ((int8x8_t) __a, __b);
3413 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3414 vshl_u16 (uint16x4_t __a, int16x4_t __b)
3416 return (uint16x4_t)__builtin_neon_vshluv4hi ((int16x4_t) __a, __b);
3419 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3420 vshl_u32 (uint32x2_t __a, int32x2_t __b)
3422 return (uint32x2_t)__builtin_neon_vshluv2si ((int32x2_t) __a, __b);
3425 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3426 vshl_u64 (uint64x1_t __a, int64x1_t __b)
3428 return (uint64x1_t)__builtin_neon_vshludi ((int64x1_t) __a, __b);
3431 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3432 vshlq_s8 (int8x16_t __a, int8x16_t __b)
3434 return (int8x16_t)__builtin_neon_vshlsv16qi (__a, __b);
3437 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3438 vshlq_s16 (int16x8_t __a, int16x8_t __b)
3440 return (int16x8_t)__builtin_neon_vshlsv8hi (__a, __b);
3443 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3444 vshlq_s32 (int32x4_t __a, int32x4_t __b)
3446 return (int32x4_t)__builtin_neon_vshlsv4si (__a, __b);
3449 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3450 vshlq_s64 (int64x2_t __a, int64x2_t __b)
3452 return (int64x2_t)__builtin_neon_vshlsv2di (__a, __b);
3455 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3456 vshlq_u8 (uint8x16_t __a, int8x16_t __b)
3458 return (uint8x16_t)__builtin_neon_vshluv16qi ((int8x16_t) __a, __b);
3461 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3462 vshlq_u16 (uint16x8_t __a, int16x8_t __b)
3464 return (uint16x8_t)__builtin_neon_vshluv8hi ((int16x8_t) __a, __b);
3467 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3468 vshlq_u32 (uint32x4_t __a, int32x4_t __b)
3470 return (uint32x4_t)__builtin_neon_vshluv4si ((int32x4_t) __a, __b);
3473 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3474 vshlq_u64 (uint64x2_t __a, int64x2_t __b)
3476 return (uint64x2_t)__builtin_neon_vshluv2di ((int64x2_t) __a, __b);
3479 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3480 vrshl_s8 (int8x8_t __a, int8x8_t __b)
3482 return (int8x8_t)__builtin_neon_vrshlsv8qi (__a, __b);
3485 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3486 vrshl_s16 (int16x4_t __a, int16x4_t __b)
3488 return (int16x4_t)__builtin_neon_vrshlsv4hi (__a, __b);
3491 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3492 vrshl_s32 (int32x2_t __a, int32x2_t __b)
3494 return (int32x2_t)__builtin_neon_vrshlsv2si (__a, __b);
3497 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3498 vrshl_s64 (int64x1_t __a, int64x1_t __b)
3500 return (int64x1_t)__builtin_neon_vrshlsdi (__a, __b);
3503 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3504 vrshl_u8 (uint8x8_t __a, int8x8_t __b)
3506 return (uint8x8_t)__builtin_neon_vrshluv8qi ((int8x8_t) __a, __b);
3509 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3510 vrshl_u16 (uint16x4_t __a, int16x4_t __b)
3512 return (uint16x4_t)__builtin_neon_vrshluv4hi ((int16x4_t) __a, __b);
3515 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3516 vrshl_u32 (uint32x2_t __a, int32x2_t __b)
3518 return (uint32x2_t)__builtin_neon_vrshluv2si ((int32x2_t) __a, __b);
3521 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3522 vrshl_u64 (uint64x1_t __a, int64x1_t __b)
3524 return (uint64x1_t)__builtin_neon_vrshludi ((int64x1_t) __a, __b);
3527 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3528 vrshlq_s8 (int8x16_t __a, int8x16_t __b)
3530 return (int8x16_t)__builtin_neon_vrshlsv16qi (__a, __b);
3533 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3534 vrshlq_s16 (int16x8_t __a, int16x8_t __b)
3536 return (int16x8_t)__builtin_neon_vrshlsv8hi (__a, __b);
3539 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3540 vrshlq_s32 (int32x4_t __a, int32x4_t __b)
3542 return (int32x4_t)__builtin_neon_vrshlsv4si (__a, __b);
3545 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3546 vrshlq_s64 (int64x2_t __a, int64x2_t __b)
3548 return (int64x2_t)__builtin_neon_vrshlsv2di (__a, __b);
3551 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3552 vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
3554 return (uint8x16_t)__builtin_neon_vrshluv16qi ((int8x16_t) __a, __b);
3557 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3558 vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
3560 return (uint16x8_t)__builtin_neon_vrshluv8hi ((int16x8_t) __a, __b);
3563 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3564 vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
3566 return (uint32x4_t)__builtin_neon_vrshluv4si ((int32x4_t) __a, __b);
3569 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3570 vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
3572 return (uint64x2_t)__builtin_neon_vrshluv2di ((int64x2_t) __a, __b);
3575 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3576 vqshl_s8 (int8x8_t __a, int8x8_t __b)
3578 return (int8x8_t)__builtin_neon_vqshlsv8qi (__a, __b);
3581 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3582 vqshl_s16 (int16x4_t __a, int16x4_t __b)
3584 return (int16x4_t)__builtin_neon_vqshlsv4hi (__a, __b);
3587 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3588 vqshl_s32 (int32x2_t __a, int32x2_t __b)
3590 return (int32x2_t)__builtin_neon_vqshlsv2si (__a, __b);
3593 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3594 vqshl_s64 (int64x1_t __a, int64x1_t __b)
3596 return (int64x1_t)__builtin_neon_vqshlsdi (__a, __b);
3599 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3600 vqshl_u8 (uint8x8_t __a, int8x8_t __b)
3602 return (uint8x8_t)__builtin_neon_vqshluv8qi ((int8x8_t) __a, __b);
3605 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3606 vqshl_u16 (uint16x4_t __a, int16x4_t __b)
3608 return (uint16x4_t)__builtin_neon_vqshluv4hi ((int16x4_t) __a, __b);
3611 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3612 vqshl_u32 (uint32x2_t __a, int32x2_t __b)
3614 return (uint32x2_t)__builtin_neon_vqshluv2si ((int32x2_t) __a, __b);
3617 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3618 vqshl_u64 (uint64x1_t __a, int64x1_t __b)
3620 return (uint64x1_t)__builtin_neon_vqshludi ((int64x1_t) __a, __b);
3623 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3624 vqshlq_s8 (int8x16_t __a, int8x16_t __b)
3626 return (int8x16_t)__builtin_neon_vqshlsv16qi (__a, __b);
3629 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3630 vqshlq_s16 (int16x8_t __a, int16x8_t __b)
3632 return (int16x8_t)__builtin_neon_vqshlsv8hi (__a, __b);
3635 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3636 vqshlq_s32 (int32x4_t __a, int32x4_t __b)
3638 return (int32x4_t)__builtin_neon_vqshlsv4si (__a, __b);
3641 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3642 vqshlq_s64 (int64x2_t __a, int64x2_t __b)
3644 return (int64x2_t)__builtin_neon_vqshlsv2di (__a, __b);
3647 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3648 vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
3650 return (uint8x16_t)__builtin_neon_vqshluv16qi ((int8x16_t) __a, __b);
3653 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3654 vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
3656 return (uint16x8_t)__builtin_neon_vqshluv8hi ((int16x8_t) __a, __b);
3659 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3660 vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
3662 return (uint32x4_t)__builtin_neon_vqshluv4si ((int32x4_t) __a, __b);
3665 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3666 vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
3668 return (uint64x2_t)__builtin_neon_vqshluv2di ((int64x2_t) __a, __b);
3671 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3672 vqrshl_s8 (int8x8_t __a, int8x8_t __b)
3674 return (int8x8_t)__builtin_neon_vqrshlsv8qi (__a, __b);
3677 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3678 vqrshl_s16 (int16x4_t __a, int16x4_t __b)
3680 return (int16x4_t)__builtin_neon_vqrshlsv4hi (__a, __b);
3683 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3684 vqrshl_s32 (int32x2_t __a, int32x2_t __b)
3686 return (int32x2_t)__builtin_neon_vqrshlsv2si (__a, __b);
3689 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3690 vqrshl_s64 (int64x1_t __a, int64x1_t __b)
3692 return (int64x1_t)__builtin_neon_vqrshlsdi (__a, __b);
3695 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3696 vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
3698 return (uint8x8_t)__builtin_neon_vqrshluv8qi ((int8x8_t) __a, __b);
3701 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3702 vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
3704 return (uint16x4_t)__builtin_neon_vqrshluv4hi ((int16x4_t) __a, __b);
3707 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3708 vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
3710 return (uint32x2_t)__builtin_neon_vqrshluv2si ((int32x2_t) __a, __b);
3713 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3714 vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
3716 return (uint64x1_t)__builtin_neon_vqrshludi ((int64x1_t) __a, __b);
3719 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3720 vqrshlq_s8 (int8x16_t __a, int8x16_t __b)
3722 return (int8x16_t)__builtin_neon_vqrshlsv16qi (__a, __b);
3725 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3726 vqrshlq_s16 (int16x8_t __a, int16x8_t __b)
3728 return (int16x8_t)__builtin_neon_vqrshlsv8hi (__a, __b);
3731 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3732 vqrshlq_s32 (int32x4_t __a, int32x4_t __b)
3734 return (int32x4_t)__builtin_neon_vqrshlsv4si (__a, __b);
3737 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3738 vqrshlq_s64 (int64x2_t __a, int64x2_t __b)
3740 return (int64x2_t)__builtin_neon_vqrshlsv2di (__a, __b);
3743 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3744 vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
3746 return (uint8x16_t)__builtin_neon_vqrshluv16qi ((int8x16_t) __a, __b);
3749 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3750 vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
3752 return (uint16x8_t)__builtin_neon_vqrshluv8hi ((int16x8_t) __a, __b);
3755 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3756 vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
3758 return (uint32x4_t)__builtin_neon_vqrshluv4si ((int32x4_t) __a, __b);
3761 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3762 vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
3764 return (uint64x2_t)__builtin_neon_vqrshluv2di ((int64x2_t) __a, __b);
3767 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3768 vshr_n_s8 (int8x8_t __a, const int __b)
3770 return (int8x8_t)__builtin_neon_vshrs_nv8qi (__a, __b);
3773 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3774 vshr_n_s16 (int16x4_t __a, const int __b)
3776 return (int16x4_t)__builtin_neon_vshrs_nv4hi (__a, __b);
3779 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3780 vshr_n_s32 (int32x2_t __a, const int __b)
3782 return (int32x2_t)__builtin_neon_vshrs_nv2si (__a, __b);
3785 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3786 vshr_n_s64 (int64x1_t __a, const int __b)
3788 return (int64x1_t)__builtin_neon_vshrs_ndi (__a, __b);
3791 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3792 vshr_n_u8 (uint8x8_t __a, const int __b)
3794 return (uint8x8_t)__builtin_neon_vshru_nv8qi ((int8x8_t) __a, __b);
3797 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3798 vshr_n_u16 (uint16x4_t __a, const int __b)
3800 return (uint16x4_t)__builtin_neon_vshru_nv4hi ((int16x4_t) __a, __b);
3803 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3804 vshr_n_u32 (uint32x2_t __a, const int __b)
3806 return (uint32x2_t)__builtin_neon_vshru_nv2si ((int32x2_t) __a, __b);
3809 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3810 vshr_n_u64 (uint64x1_t __a, const int __b)
3812 return (uint64x1_t)__builtin_neon_vshru_ndi ((int64x1_t) __a, __b);
3815 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3816 vshrq_n_s8 (int8x16_t __a, const int __b)
3818 return (int8x16_t)__builtin_neon_vshrs_nv16qi (__a, __b);
3821 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3822 vshrq_n_s16 (int16x8_t __a, const int __b)
3824 return (int16x8_t)__builtin_neon_vshrs_nv8hi (__a, __b);
3827 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3828 vshrq_n_s32 (int32x4_t __a, const int __b)
3830 return (int32x4_t)__builtin_neon_vshrs_nv4si (__a, __b);
3833 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3834 vshrq_n_s64 (int64x2_t __a, const int __b)
3836 return (int64x2_t)__builtin_neon_vshrs_nv2di (__a, __b);
3839 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3840 vshrq_n_u8 (uint8x16_t __a, const int __b)
3842 return (uint8x16_t)__builtin_neon_vshru_nv16qi ((int8x16_t) __a, __b);
3845 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3846 vshrq_n_u16 (uint16x8_t __a, const int __b)
3848 return (uint16x8_t)__builtin_neon_vshru_nv8hi ((int16x8_t) __a, __b);
3851 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3852 vshrq_n_u32 (uint32x4_t __a, const int __b)
3854 return (uint32x4_t)__builtin_neon_vshru_nv4si ((int32x4_t) __a, __b);
3857 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3858 vshrq_n_u64 (uint64x2_t __a, const int __b)
3860 return (uint64x2_t)__builtin_neon_vshru_nv2di ((int64x2_t) __a, __b);
3863 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3864 vrshr_n_s8 (int8x8_t __a, const int __b)
3866 return (int8x8_t)__builtin_neon_vrshrs_nv8qi (__a, __b);
3869 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3870 vrshr_n_s16 (int16x4_t __a, const int __b)
3872 return (int16x4_t)__builtin_neon_vrshrs_nv4hi (__a, __b);
3875 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3876 vrshr_n_s32 (int32x2_t __a, const int __b)
3878 return (int32x2_t)__builtin_neon_vrshrs_nv2si (__a, __b);
3881 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3882 vrshr_n_s64 (int64x1_t __a, const int __b)
3884 return (int64x1_t)__builtin_neon_vrshrs_ndi (__a, __b);
3887 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3888 vrshr_n_u8 (uint8x8_t __a, const int __b)
3890 return (uint8x8_t)__builtin_neon_vrshru_nv8qi ((int8x8_t) __a, __b);
3893 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3894 vrshr_n_u16 (uint16x4_t __a, const int __b)
3896 return (uint16x4_t)__builtin_neon_vrshru_nv4hi ((int16x4_t) __a, __b);
3899 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3900 vrshr_n_u32 (uint32x2_t __a, const int __b)
3902 return (uint32x2_t)__builtin_neon_vrshru_nv2si ((int32x2_t) __a, __b);
3905 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3906 vrshr_n_u64 (uint64x1_t __a, const int __b)
3908 return (uint64x1_t)__builtin_neon_vrshru_ndi ((int64x1_t) __a, __b);
3911 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3912 vrshrq_n_s8 (int8x16_t __a, const int __b)
3914 return (int8x16_t)__builtin_neon_vrshrs_nv16qi (__a, __b);
3917 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3918 vrshrq_n_s16 (int16x8_t __a, const int __b)
3920 return (int16x8_t)__builtin_neon_vrshrs_nv8hi (__a, __b);
3923 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3924 vrshrq_n_s32 (int32x4_t __a, const int __b)
3926 return (int32x4_t)__builtin_neon_vrshrs_nv4si (__a, __b);
3929 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3930 vrshrq_n_s64 (int64x2_t __a, const int __b)
3932 return (int64x2_t)__builtin_neon_vrshrs_nv2di (__a, __b);
3935 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3936 vrshrq_n_u8 (uint8x16_t __a, const int __b)
3938 return (uint8x16_t)__builtin_neon_vrshru_nv16qi ((int8x16_t) __a, __b);
3941 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3942 vrshrq_n_u16 (uint16x8_t __a, const int __b)
3944 return (uint16x8_t)__builtin_neon_vrshru_nv8hi ((int16x8_t) __a, __b);
3947 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3948 vrshrq_n_u32 (uint32x4_t __a, const int __b)
3950 return (uint32x4_t)__builtin_neon_vrshru_nv4si ((int32x4_t) __a, __b);
3953 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3954 vrshrq_n_u64 (uint64x2_t __a, const int __b)
3956 return (uint64x2_t)__builtin_neon_vrshru_nv2di ((int64x2_t) __a, __b);
3959 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3960 vshrn_n_s16 (int16x8_t __a, const int __b)
3962 return (int8x8_t)__builtin_neon_vshrn_nv8hi (__a, __b);
3965 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3966 vshrn_n_s32 (int32x4_t __a, const int __b)
3968 return (int16x4_t)__builtin_neon_vshrn_nv4si (__a, __b);
3971 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3972 vshrn_n_s64 (int64x2_t __a, const int __b)
3974 return (int32x2_t)__builtin_neon_vshrn_nv2di (__a, __b);
3977 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3978 vshrn_n_u16 (uint16x8_t __a, const int __b)
3980 return (uint8x8_t)__builtin_neon_vshrn_nv8hi ((int16x8_t) __a, __b);
3983 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3984 vshrn_n_u32 (uint32x4_t __a, const int __b)
3986 return (uint16x4_t)__builtin_neon_vshrn_nv4si ((int32x4_t) __a, __b);
3989 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3990 vshrn_n_u64 (uint64x2_t __a, const int __b)
3992 return (uint32x2_t)__builtin_neon_vshrn_nv2di ((int64x2_t) __a, __b);
3995 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3996 vrshrn_n_s16 (int16x8_t __a, const int __b)
3998 return (int8x8_t)__builtin_neon_vrshrn_nv8hi (__a, __b);
4001 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4002 vrshrn_n_s32 (int32x4_t __a, const int __b)
4004 return (int16x4_t)__builtin_neon_vrshrn_nv4si (__a, __b);
4007 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4008 vrshrn_n_s64 (int64x2_t __a, const int __b)
4010 return (int32x2_t)__builtin_neon_vrshrn_nv2di (__a, __b);
4013 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4014 vrshrn_n_u16 (uint16x8_t __a, const int __b)
4016 return (uint8x8_t)__builtin_neon_vrshrn_nv8hi ((int16x8_t) __a, __b);
4019 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4020 vrshrn_n_u32 (uint32x4_t __a, const int __b)
4022 return (uint16x4_t)__builtin_neon_vrshrn_nv4si ((int32x4_t) __a, __b);
4025 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4026 vrshrn_n_u64 (uint64x2_t __a, const int __b)
4028 return (uint32x2_t)__builtin_neon_vrshrn_nv2di ((int64x2_t) __a, __b);
4031 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4032 vqshrn_n_s16 (int16x8_t __a, const int __b)
4034 return (int8x8_t)__builtin_neon_vqshrns_nv8hi (__a, __b);
4037 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4038 vqshrn_n_s32 (int32x4_t __a, const int __b)
4040 return (int16x4_t)__builtin_neon_vqshrns_nv4si (__a, __b);
4043 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4044 vqshrn_n_s64 (int64x2_t __a, const int __b)
4046 return (int32x2_t)__builtin_neon_vqshrns_nv2di (__a, __b);
4049 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4050 vqshrn_n_u16 (uint16x8_t __a, const int __b)
4052 return (uint8x8_t)__builtin_neon_vqshrnu_nv8hi ((int16x8_t) __a, __b);
4055 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4056 vqshrn_n_u32 (uint32x4_t __a, const int __b)
4058 return (uint16x4_t)__builtin_neon_vqshrnu_nv4si ((int32x4_t) __a, __b);
4061 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4062 vqshrn_n_u64 (uint64x2_t __a, const int __b)
4064 return (uint32x2_t)__builtin_neon_vqshrnu_nv2di ((int64x2_t) __a, __b);
4067 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4068 vqrshrn_n_s16 (int16x8_t __a, const int __b)
4070 return (int8x8_t)__builtin_neon_vqrshrns_nv8hi (__a, __b);
4073 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4074 vqrshrn_n_s32 (int32x4_t __a, const int __b)
4076 return (int16x4_t)__builtin_neon_vqrshrns_nv4si (__a, __b);
4079 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4080 vqrshrn_n_s64 (int64x2_t __a, const int __b)
4082 return (int32x2_t)__builtin_neon_vqrshrns_nv2di (__a, __b);
4085 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4086 vqrshrn_n_u16 (uint16x8_t __a, const int __b)
4088 return (uint8x8_t)__builtin_neon_vqrshrnu_nv8hi ((int16x8_t) __a, __b);
4091 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4092 vqrshrn_n_u32 (uint32x4_t __a, const int __b)
4094 return (uint16x4_t)__builtin_neon_vqrshrnu_nv4si ((int32x4_t) __a, __b);
4097 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4098 vqrshrn_n_u64 (uint64x2_t __a, const int __b)
4100 return (uint32x2_t)__builtin_neon_vqrshrnu_nv2di ((int64x2_t) __a, __b);
4103 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4104 vqshrun_n_s16 (int16x8_t __a, const int __b)
4106 return (uint8x8_t)__builtin_neon_vqshrun_nv8hi (__a, __b);
4109 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4110 vqshrun_n_s32 (int32x4_t __a, const int __b)
4112 return (uint16x4_t)__builtin_neon_vqshrun_nv4si (__a, __b);
4115 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4116 vqshrun_n_s64 (int64x2_t __a, const int __b)
4118 return (uint32x2_t)__builtin_neon_vqshrun_nv2di (__a, __b);
4121 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4122 vqrshrun_n_s16 (int16x8_t __a, const int __b)
4124 return (uint8x8_t)__builtin_neon_vqrshrun_nv8hi (__a, __b);
4127 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4128 vqrshrun_n_s32 (int32x4_t __a, const int __b)
4130 return (uint16x4_t)__builtin_neon_vqrshrun_nv4si (__a, __b);
4133 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4134 vqrshrun_n_s64 (int64x2_t __a, const int __b)
4136 return (uint32x2_t)__builtin_neon_vqrshrun_nv2di (__a, __b);
4139 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4140 vshl_n_s8 (int8x8_t __a, const int __b)
4142 return (int8x8_t)__builtin_neon_vshl_nv8qi (__a, __b);
4145 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4146 vshl_n_s16 (int16x4_t __a, const int __b)
4148 return (int16x4_t)__builtin_neon_vshl_nv4hi (__a, __b);
4151 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4152 vshl_n_s32 (int32x2_t __a, const int __b)
4154 return (int32x2_t)__builtin_neon_vshl_nv2si (__a, __b);
4157 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4158 vshl_n_s64 (int64x1_t __a, const int __b)
4160 return (int64x1_t)__builtin_neon_vshl_ndi (__a, __b);
4163 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4164 vshl_n_u8 (uint8x8_t __a, const int __b)
4166 return (uint8x8_t)__builtin_neon_vshl_nv8qi ((int8x8_t) __a, __b);
4169 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4170 vshl_n_u16 (uint16x4_t __a, const int __b)
4172 return (uint16x4_t)__builtin_neon_vshl_nv4hi ((int16x4_t) __a, __b);
4175 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4176 vshl_n_u32 (uint32x2_t __a, const int __b)
4178 return (uint32x2_t)__builtin_neon_vshl_nv2si ((int32x2_t) __a, __b);
4181 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4182 vshl_n_u64 (uint64x1_t __a, const int __b)
4184 return (uint64x1_t)__builtin_neon_vshl_ndi ((int64x1_t) __a, __b);
4187 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4188 vshlq_n_s8 (int8x16_t __a, const int __b)
4190 return (int8x16_t)__builtin_neon_vshl_nv16qi (__a, __b);
4193 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4194 vshlq_n_s16 (int16x8_t __a, const int __b)
4196 return (int16x8_t)__builtin_neon_vshl_nv8hi (__a, __b);
4199 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4200 vshlq_n_s32 (int32x4_t __a, const int __b)
4202 return (int32x4_t)__builtin_neon_vshl_nv4si (__a, __b);
4205 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4206 vshlq_n_s64 (int64x2_t __a, const int __b)
4208 return (int64x2_t)__builtin_neon_vshl_nv2di (__a, __b);
4211 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4212 vshlq_n_u8 (uint8x16_t __a, const int __b)
4214 return (uint8x16_t)__builtin_neon_vshl_nv16qi ((int8x16_t) __a, __b);
4217 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4218 vshlq_n_u16 (uint16x8_t __a, const int __b)
4220 return (uint16x8_t)__builtin_neon_vshl_nv8hi ((int16x8_t) __a, __b);
4223 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4224 vshlq_n_u32 (uint32x4_t __a, const int __b)
4226 return (uint32x4_t)__builtin_neon_vshl_nv4si ((int32x4_t) __a, __b);
4229 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4230 vshlq_n_u64 (uint64x2_t __a, const int __b)
4232 return (uint64x2_t)__builtin_neon_vshl_nv2di ((int64x2_t) __a, __b);
4235 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4236 vqshl_n_s8 (int8x8_t __a, const int __b)
4238 return (int8x8_t)__builtin_neon_vqshl_s_nv8qi (__a, __b);
4241 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4242 vqshl_n_s16 (int16x4_t __a, const int __b)
4244 return (int16x4_t)__builtin_neon_vqshl_s_nv4hi (__a, __b);
4247 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4248 vqshl_n_s32 (int32x2_t __a, const int __b)
4250 return (int32x2_t)__builtin_neon_vqshl_s_nv2si (__a, __b);
4253 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4254 vqshl_n_s64 (int64x1_t __a, const int __b)
4256 return (int64x1_t)__builtin_neon_vqshl_s_ndi (__a, __b);
4259 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4260 vqshl_n_u8 (uint8x8_t __a, const int __b)
4262 return (uint8x8_t)__builtin_neon_vqshl_u_nv8qi ((int8x8_t) __a, __b);
4265 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4266 vqshl_n_u16 (uint16x4_t __a, const int __b)
4268 return (uint16x4_t)__builtin_neon_vqshl_u_nv4hi ((int16x4_t) __a, __b);
4271 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4272 vqshl_n_u32 (uint32x2_t __a, const int __b)
4274 return (uint32x2_t)__builtin_neon_vqshl_u_nv2si ((int32x2_t) __a, __b);
4277 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4278 vqshl_n_u64 (uint64x1_t __a, const int __b)
4280 return (uint64x1_t)__builtin_neon_vqshl_u_ndi ((int64x1_t) __a, __b);
4283 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4284 vqshlq_n_s8 (int8x16_t __a, const int __b)
4286 return (int8x16_t)__builtin_neon_vqshl_s_nv16qi (__a, __b);
4289 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4290 vqshlq_n_s16 (int16x8_t __a, const int __b)
4292 return (int16x8_t)__builtin_neon_vqshl_s_nv8hi (__a, __b);
4295 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4296 vqshlq_n_s32 (int32x4_t __a, const int __b)
4298 return (int32x4_t)__builtin_neon_vqshl_s_nv4si (__a, __b);
4301 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4302 vqshlq_n_s64 (int64x2_t __a, const int __b)
4304 return (int64x2_t)__builtin_neon_vqshl_s_nv2di (__a, __b);
4307 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4308 vqshlq_n_u8 (uint8x16_t __a, const int __b)
4310 return (uint8x16_t)__builtin_neon_vqshl_u_nv16qi ((int8x16_t) __a, __b);
4313 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4314 vqshlq_n_u16 (uint16x8_t __a, const int __b)
4316 return (uint16x8_t)__builtin_neon_vqshl_u_nv8hi ((int16x8_t) __a, __b);
4319 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4320 vqshlq_n_u32 (uint32x4_t __a, const int __b)
4322 return (uint32x4_t)__builtin_neon_vqshl_u_nv4si ((int32x4_t) __a, __b);
4325 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4326 vqshlq_n_u64 (uint64x2_t __a, const int __b)
4328 return (uint64x2_t)__builtin_neon_vqshl_u_nv2di ((int64x2_t) __a, __b);
4331 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4332 vqshlu_n_s8 (int8x8_t __a, const int __b)
4334 return (uint8x8_t)__builtin_neon_vqshlu_nv8qi (__a, __b);
4337 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4338 vqshlu_n_s16 (int16x4_t __a, const int __b)
4340 return (uint16x4_t)__builtin_neon_vqshlu_nv4hi (__a, __b);
4343 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4344 vqshlu_n_s32 (int32x2_t __a, const int __b)
4346 return (uint32x2_t)__builtin_neon_vqshlu_nv2si (__a, __b);
4349 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4350 vqshlu_n_s64 (int64x1_t __a, const int __b)
4352 return (uint64x1_t)__builtin_neon_vqshlu_ndi (__a, __b);
4355 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4356 vqshluq_n_s8 (int8x16_t __a, const int __b)
4358 return (uint8x16_t)__builtin_neon_vqshlu_nv16qi (__a, __b);
4361 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4362 vqshluq_n_s16 (int16x8_t __a, const int __b)
4364 return (uint16x8_t)__builtin_neon_vqshlu_nv8hi (__a, __b);
4367 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4368 vqshluq_n_s32 (int32x4_t __a, const int __b)
4370 return (uint32x4_t)__builtin_neon_vqshlu_nv4si (__a, __b);
4373 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4374 vqshluq_n_s64 (int64x2_t __a, const int __b)
4376 return (uint64x2_t)__builtin_neon_vqshlu_nv2di (__a, __b);
4379 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4380 vshll_n_s8 (int8x8_t __a, const int __b)
4382 return (int16x8_t)__builtin_neon_vshlls_nv8qi (__a, __b);
4385 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4386 vshll_n_s16 (int16x4_t __a, const int __b)
4388 return (int32x4_t)__builtin_neon_vshlls_nv4hi (__a, __b);
4391 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4392 vshll_n_s32 (int32x2_t __a, const int __b)
4394 return (int64x2_t)__builtin_neon_vshlls_nv2si (__a, __b);
4397 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4398 vshll_n_u8 (uint8x8_t __a, const int __b)
4400 return (uint16x8_t)__builtin_neon_vshllu_nv8qi ((int8x8_t) __a, __b);
4403 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4404 vshll_n_u16 (uint16x4_t __a, const int __b)
4406 return (uint32x4_t)__builtin_neon_vshllu_nv4hi ((int16x4_t) __a, __b);
4409 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4410 vshll_n_u32 (uint32x2_t __a, const int __b)
4412 return (uint64x2_t)__builtin_neon_vshllu_nv2si ((int32x2_t) __a, __b);
4415 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4416 vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
4418 return (int8x8_t)__builtin_neon_vsras_nv8qi (__a, __b, __c);
4421 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4422 vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
4424 return (int16x4_t)__builtin_neon_vsras_nv4hi (__a, __b, __c);
4427 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4428 vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
4430 return (int32x2_t)__builtin_neon_vsras_nv2si (__a, __b, __c);
4433 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4434 vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
4436 return (int64x1_t)__builtin_neon_vsras_ndi (__a, __b, __c);
4439 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4440 vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
4442 return (uint8x8_t)__builtin_neon_vsrau_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c);
4445 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4446 vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
4448 return (uint16x4_t)__builtin_neon_vsrau_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c);
4451 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4452 vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
4454 return (uint32x2_t)__builtin_neon_vsrau_nv2si ((int32x2_t) __a, (int32x2_t) __b, __c);
4457 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4458 vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
4460 return (uint64x1_t)__builtin_neon_vsrau_ndi ((int64x1_t) __a, (int64x1_t) __b, __c);
4463 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4464 vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
4466 return (int8x16_t)__builtin_neon_vsras_nv16qi (__a, __b, __c);
4469 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4470 vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
4472 return (int16x8_t)__builtin_neon_vsras_nv8hi (__a, __b, __c);
4475 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4476 vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
4478 return (int32x4_t)__builtin_neon_vsras_nv4si (__a, __b, __c);
4481 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4482 vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
4484 return (int64x2_t)__builtin_neon_vsras_nv2di (__a, __b, __c);
4487 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4488 vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
4490 return (uint8x16_t)__builtin_neon_vsrau_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c);
4493 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4494 vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
4496 return (uint16x8_t)__builtin_neon_vsrau_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c);
4499 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4500 vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
4502 return (uint32x4_t)__builtin_neon_vsrau_nv4si ((int32x4_t) __a, (int32x4_t) __b, __c);
4505 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4506 vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
4508 return (uint64x2_t)__builtin_neon_vsrau_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c);
4511 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4512 vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
4514 return (int8x8_t)__builtin_neon_vrsras_nv8qi (__a, __b, __c);
4517 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4518 vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
4520 return (int16x4_t)__builtin_neon_vrsras_nv4hi (__a, __b, __c);
4523 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4524 vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
4526 return (int32x2_t)__builtin_neon_vrsras_nv2si (__a, __b, __c);
4529 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4530 vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
4532 return (int64x1_t)__builtin_neon_vrsras_ndi (__a, __b, __c);
4535 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4536 vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
4538 return (uint8x8_t)__builtin_neon_vrsrau_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c);
4541 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4542 vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
4544 return (uint16x4_t)__builtin_neon_vrsrau_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c);
4547 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4548 vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
4550 return (uint32x2_t)__builtin_neon_vrsrau_nv2si ((int32x2_t) __a, (int32x2_t) __b, __c);
4553 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4554 vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
4556 return (uint64x1_t)__builtin_neon_vrsrau_ndi ((int64x1_t) __a, (int64x1_t) __b, __c);
4559 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4560 vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
4562 return (int8x16_t)__builtin_neon_vrsras_nv16qi (__a, __b, __c);
4565 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4566 vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
4568 return (int16x8_t)__builtin_neon_vrsras_nv8hi (__a, __b, __c);
4571 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4572 vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
4574 return (int32x4_t)__builtin_neon_vrsras_nv4si (__a, __b, __c);
4577 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4578 vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
4580 return (int64x2_t)__builtin_neon_vrsras_nv2di (__a, __b, __c);
4583 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4584 vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
4586 return (uint8x16_t)__builtin_neon_vrsrau_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c);
4589 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4590 vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
4592 return (uint16x8_t)__builtin_neon_vrsrau_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c);
4595 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4596 vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
4598 return (uint32x4_t)__builtin_neon_vrsrau_nv4si ((int32x4_t) __a, (int32x4_t) __b, __c);
4601 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4602 vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
4604 return (uint64x2_t)__builtin_neon_vrsrau_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c);
4607 #pragma GCC push_options
4608 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
4609 __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
4610 vsri_n_p64 (poly64x1_t __a, poly64x1_t __b, const int __c)
4612 return (poly64x1_t)__builtin_neon_vsri_ndi (__a, __b, __c);
4615 #pragma GCC pop_options
4616 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4617 vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
4619 return (int8x8_t)__builtin_neon_vsri_nv8qi (__a, __b, __c);
4622 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4623 vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
4625 return (int16x4_t)__builtin_neon_vsri_nv4hi (__a, __b, __c);
4628 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4629 vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
4631 return (int32x2_t)__builtin_neon_vsri_nv2si (__a, __b, __c);
4634 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4635 vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
4637 return (int64x1_t)__builtin_neon_vsri_ndi (__a, __b, __c);
4640 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4641 vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
4643 return (uint8x8_t)__builtin_neon_vsri_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c);
4646 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4647 vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
4649 return (uint16x4_t)__builtin_neon_vsri_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c);
4652 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4653 vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
4655 return (uint32x2_t)__builtin_neon_vsri_nv2si ((int32x2_t) __a, (int32x2_t) __b, __c);
4658 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4659 vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
4661 return (uint64x1_t)__builtin_neon_vsri_ndi ((int64x1_t) __a, (int64x1_t) __b, __c);
4664 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4665 vsri_n_p8 (poly8x8_t __a, poly8x8_t __b, const int __c)
4667 return (poly8x8_t)__builtin_neon_vsri_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c);
4670 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
4671 vsri_n_p16 (poly16x4_t __a, poly16x4_t __b, const int __c)
4673 return (poly16x4_t)__builtin_neon_vsri_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c);
4676 #pragma GCC push_options
4677 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
4678 __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
4679 vsriq_n_p64 (poly64x2_t __a, poly64x2_t __b, const int __c)
4681 return (poly64x2_t)__builtin_neon_vsri_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c);
4684 #pragma GCC pop_options
4685 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4686 vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
4688 return (int8x16_t)__builtin_neon_vsri_nv16qi (__a, __b, __c);
4691 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4692 vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
4694 return (int16x8_t)__builtin_neon_vsri_nv8hi (__a, __b, __c);
4697 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4698 vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
4700 return (int32x4_t)__builtin_neon_vsri_nv4si (__a, __b, __c);
4703 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4704 vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
4706 return (int64x2_t)__builtin_neon_vsri_nv2di (__a, __b, __c);
4709 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4710 vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
4712 return (uint8x16_t)__builtin_neon_vsri_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c);
4715 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4716 vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
4718 return (uint16x8_t)__builtin_neon_vsri_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c);
4721 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4722 vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
4724 return (uint32x4_t)__builtin_neon_vsri_nv4si ((int32x4_t) __a, (int32x4_t) __b, __c);
4727 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4728 vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
4730 return (uint64x2_t)__builtin_neon_vsri_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c);
4733 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
4734 vsriq_n_p8 (poly8x16_t __a, poly8x16_t __b, const int __c)
4736 return (poly8x16_t)__builtin_neon_vsri_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c);
4739 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
4740 vsriq_n_p16 (poly16x8_t __a, poly16x8_t __b, const int __c)
4742 return (poly16x8_t)__builtin_neon_vsri_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c);
4745 #pragma GCC push_options
4746 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
4747 __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
4748 vsli_n_p64 (poly64x1_t __a, poly64x1_t __b, const int __c)
4750 return (poly64x1_t)__builtin_neon_vsli_ndi (__a, __b, __c);
4753 #pragma GCC pop_options
4754 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4755 vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
4757 return (int8x8_t)__builtin_neon_vsli_nv8qi (__a, __b, __c);
4760 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4761 vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
4763 return (int16x4_t)__builtin_neon_vsli_nv4hi (__a, __b, __c);
4766 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4767 vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
4769 return (int32x2_t)__builtin_neon_vsli_nv2si (__a, __b, __c);
4772 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4773 vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
4775 return (int64x1_t)__builtin_neon_vsli_ndi (__a, __b, __c);
4778 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4779 vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
4781 return (uint8x8_t)__builtin_neon_vsli_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c);
4784 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4785 vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
4787 return (uint16x4_t)__builtin_neon_vsli_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c);
4790 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4791 vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
4793 return (uint32x2_t)__builtin_neon_vsli_nv2si ((int32x2_t) __a, (int32x2_t) __b, __c);
4796 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4797 vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
4799 return (uint64x1_t)__builtin_neon_vsli_ndi ((int64x1_t) __a, (int64x1_t) __b, __c);
4802 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4803 vsli_n_p8 (poly8x8_t __a, poly8x8_t __b, const int __c)
4805 return (poly8x8_t)__builtin_neon_vsli_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c);
4808 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
4809 vsli_n_p16 (poly16x4_t __a, poly16x4_t __b, const int __c)
4811 return (poly16x4_t)__builtin_neon_vsli_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c);
4814 #pragma GCC push_options
4815 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
4816 __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
4817 vsliq_n_p64 (poly64x2_t __a, poly64x2_t __b, const int __c)
4819 return (poly64x2_t)__builtin_neon_vsli_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c);
4822 #pragma GCC pop_options
4823 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4824 vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
4826 return (int8x16_t)__builtin_neon_vsli_nv16qi (__a, __b, __c);
4829 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4830 vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
4832 return (int16x8_t)__builtin_neon_vsli_nv8hi (__a, __b, __c);
4835 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4836 vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
4838 return (int32x4_t)__builtin_neon_vsli_nv4si (__a, __b, __c);
4841 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4842 vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
4844 return (int64x2_t)__builtin_neon_vsli_nv2di (__a, __b, __c);
4847 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4848 vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
4850 return (uint8x16_t)__builtin_neon_vsli_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c);
4853 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4854 vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
4856 return (uint16x8_t)__builtin_neon_vsli_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c);
4859 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4860 vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
4862 return (uint32x4_t)__builtin_neon_vsli_nv4si ((int32x4_t) __a, (int32x4_t) __b, __c);
4865 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4866 vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
4868 return (uint64x2_t)__builtin_neon_vsli_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c);
4871 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
4872 vsliq_n_p8 (poly8x16_t __a, poly8x16_t __b, const int __c)
4874 return (poly8x16_t)__builtin_neon_vsli_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c);
4877 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
4878 vsliq_n_p16 (poly16x8_t __a, poly16x8_t __b, const int __c)
4880 return (poly16x8_t)__builtin_neon_vsli_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c);
4883 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4884 vabs_s8 (int8x8_t __a)
4886 return (int8x8_t)__builtin_neon_vabsv8qi (__a);
4889 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4890 vabs_s16 (int16x4_t __a)
4892 return (int16x4_t)__builtin_neon_vabsv4hi (__a);
4895 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4896 vabs_s32 (int32x2_t __a)
4898 return (int32x2_t)__builtin_neon_vabsv2si (__a);
4901 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4902 vabs_f32 (float32x2_t __a)
4904 return (float32x2_t)__builtin_neon_vabsv2sf (__a);
4907 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4908 vabsq_s8 (int8x16_t __a)
4910 return (int8x16_t)__builtin_neon_vabsv16qi (__a);
4913 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4914 vabsq_s16 (int16x8_t __a)
4916 return (int16x8_t)__builtin_neon_vabsv8hi (__a);
4919 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4920 vabsq_s32 (int32x4_t __a)
4922 return (int32x4_t)__builtin_neon_vabsv4si (__a);
4925 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4926 vabsq_f32 (float32x4_t __a)
4928 return (float32x4_t)__builtin_neon_vabsv4sf (__a);
4931 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4932 vqabs_s8 (int8x8_t __a)
4934 return (int8x8_t)__builtin_neon_vqabsv8qi (__a);
4937 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4938 vqabs_s16 (int16x4_t __a)
4940 return (int16x4_t)__builtin_neon_vqabsv4hi (__a);
4943 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4944 vqabs_s32 (int32x2_t __a)
4946 return (int32x2_t)__builtin_neon_vqabsv2si (__a);
4949 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4950 vqabsq_s8 (int8x16_t __a)
4952 return (int8x16_t)__builtin_neon_vqabsv16qi (__a);
4955 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4956 vqabsq_s16 (int16x8_t __a)
4958 return (int16x8_t)__builtin_neon_vqabsv8hi (__a);
4961 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4962 vqabsq_s32 (int32x4_t __a)
4964 return (int32x4_t)__builtin_neon_vqabsv4si (__a);
4967 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4968 vneg_s8 (int8x8_t __a)
4970 return (int8x8_t)__builtin_neon_vnegv8qi (__a);
4973 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4974 vneg_s16 (int16x4_t __a)
4976 return (int16x4_t)__builtin_neon_vnegv4hi (__a);
4979 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4980 vneg_s32 (int32x2_t __a)
4982 return (int32x2_t)__builtin_neon_vnegv2si (__a);
4985 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4986 vneg_f32 (float32x2_t __a)
4988 return (float32x2_t)__builtin_neon_vnegv2sf (__a);
4991 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4992 vnegq_s8 (int8x16_t __a)
4994 return (int8x16_t)__builtin_neon_vnegv16qi (__a);
4997 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4998 vnegq_s16 (int16x8_t __a)
5000 return (int16x8_t)__builtin_neon_vnegv8hi (__a);
5003 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5004 vnegq_s32 (int32x4_t __a)
5006 return (int32x4_t)__builtin_neon_vnegv4si (__a);
5009 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5010 vnegq_f32 (float32x4_t __a)
5012 return (float32x4_t)__builtin_neon_vnegv4sf (__a);
5015 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5016 vqneg_s8 (int8x8_t __a)
5018 return (int8x8_t)__builtin_neon_vqnegv8qi (__a);
5021 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5022 vqneg_s16 (int16x4_t __a)
5024 return (int16x4_t)__builtin_neon_vqnegv4hi (__a);
5027 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5028 vqneg_s32 (int32x2_t __a)
5030 return (int32x2_t)__builtin_neon_vqnegv2si (__a);
5033 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5034 vqnegq_s8 (int8x16_t __a)
5036 return (int8x16_t)__builtin_neon_vqnegv16qi (__a);
5039 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5040 vqnegq_s16 (int16x8_t __a)
5042 return (int16x8_t)__builtin_neon_vqnegv8hi (__a);
5045 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5046 vqnegq_s32 (int32x4_t __a)
5048 return (int32x4_t)__builtin_neon_vqnegv4si (__a);
5051 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5052 vmvn_s8 (int8x8_t __a)
5054 return (int8x8_t)__builtin_neon_vmvnv8qi (__a);
5057 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5058 vmvn_s16 (int16x4_t __a)
5060 return (int16x4_t)__builtin_neon_vmvnv4hi (__a);
5063 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5064 vmvn_s32 (int32x2_t __a)
5066 return (int32x2_t)__builtin_neon_vmvnv2si (__a);
5069 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5070 vmvn_u8 (uint8x8_t __a)
5072 return (uint8x8_t)__builtin_neon_vmvnv8qi ((int8x8_t) __a);
5075 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5076 vmvn_u16 (uint16x4_t __a)
5078 return (uint16x4_t)__builtin_neon_vmvnv4hi ((int16x4_t) __a);
5081 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5082 vmvn_u32 (uint32x2_t __a)
5084 return (uint32x2_t)__builtin_neon_vmvnv2si ((int32x2_t) __a);
5087 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
5088 vmvn_p8 (poly8x8_t __a)
5090 return (poly8x8_t)__builtin_neon_vmvnv8qi ((int8x8_t) __a);
5093 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5094 vmvnq_s8 (int8x16_t __a)
5096 return (int8x16_t)__builtin_neon_vmvnv16qi (__a);
5099 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5100 vmvnq_s16 (int16x8_t __a)
5102 return (int16x8_t)__builtin_neon_vmvnv8hi (__a);
5105 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5106 vmvnq_s32 (int32x4_t __a)
5108 return (int32x4_t)__builtin_neon_vmvnv4si (__a);
5111 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5112 vmvnq_u8 (uint8x16_t __a)
5114 return (uint8x16_t)__builtin_neon_vmvnv16qi ((int8x16_t) __a);
5117 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5118 vmvnq_u16 (uint16x8_t __a)
5120 return (uint16x8_t)__builtin_neon_vmvnv8hi ((int16x8_t) __a);
5123 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5124 vmvnq_u32 (uint32x4_t __a)
5126 return (uint32x4_t)__builtin_neon_vmvnv4si ((int32x4_t) __a);
5129 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
5130 vmvnq_p8 (poly8x16_t __a)
5132 return (poly8x16_t)__builtin_neon_vmvnv16qi ((int8x16_t) __a);
5135 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5136 vcls_s8 (int8x8_t __a)
5138 return (int8x8_t)__builtin_neon_vclsv8qi (__a);
5141 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5142 vcls_s16 (int16x4_t __a)
5144 return (int16x4_t)__builtin_neon_vclsv4hi (__a);
5147 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5148 vcls_s32 (int32x2_t __a)
5150 return (int32x2_t)__builtin_neon_vclsv2si (__a);
5153 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5154 vclsq_s8 (int8x16_t __a)
5156 return (int8x16_t)__builtin_neon_vclsv16qi (__a);
5159 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5160 vclsq_s16 (int16x8_t __a)
5162 return (int16x8_t)__builtin_neon_vclsv8hi (__a);
5165 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5166 vclsq_s32 (int32x4_t __a)
5168 return (int32x4_t)__builtin_neon_vclsv4si (__a);
5171 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5172 vclz_s8 (int8x8_t __a)
5174 return (int8x8_t)__builtin_neon_vclzv8qi (__a);
5177 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5178 vclz_s16 (int16x4_t __a)
5180 return (int16x4_t)__builtin_neon_vclzv4hi (__a);
5183 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5184 vclz_s32 (int32x2_t __a)
5186 return (int32x2_t)__builtin_neon_vclzv2si (__a);
5189 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5190 vclz_u8 (uint8x8_t __a)
5192 return (uint8x8_t)__builtin_neon_vclzv8qi ((int8x8_t) __a);
5195 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5196 vclz_u16 (uint16x4_t __a)
5198 return (uint16x4_t)__builtin_neon_vclzv4hi ((int16x4_t) __a);
5201 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5202 vclz_u32 (uint32x2_t __a)
5204 return (uint32x2_t)__builtin_neon_vclzv2si ((int32x2_t) __a);
5207 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5208 vclzq_s8 (int8x16_t __a)
5210 return (int8x16_t)__builtin_neon_vclzv16qi (__a);
5213 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5214 vclzq_s16 (int16x8_t __a)
5216 return (int16x8_t)__builtin_neon_vclzv8hi (__a);
5219 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5220 vclzq_s32 (int32x4_t __a)
5222 return (int32x4_t)__builtin_neon_vclzv4si (__a);
5225 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5226 vclzq_u8 (uint8x16_t __a)
5228 return (uint8x16_t)__builtin_neon_vclzv16qi ((int8x16_t) __a);
5231 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5232 vclzq_u16 (uint16x8_t __a)
5234 return (uint16x8_t)__builtin_neon_vclzv8hi ((int16x8_t) __a);
5237 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5238 vclzq_u32 (uint32x4_t __a)
5240 return (uint32x4_t)__builtin_neon_vclzv4si ((int32x4_t) __a);
5243 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5244 vcnt_s8 (int8x8_t __a)
5246 return (int8x8_t)__builtin_neon_vcntv8qi (__a);
5249 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5250 vcnt_u8 (uint8x8_t __a)
5252 return (uint8x8_t)__builtin_neon_vcntv8qi ((int8x8_t) __a);
5255 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
5256 vcnt_p8 (poly8x8_t __a)
5258 return (poly8x8_t)__builtin_neon_vcntv8qi ((int8x8_t) __a);
5261 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5262 vcntq_s8 (int8x16_t __a)
5264 return (int8x16_t)__builtin_neon_vcntv16qi (__a);
5267 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5268 vcntq_u8 (uint8x16_t __a)
5270 return (uint8x16_t)__builtin_neon_vcntv16qi ((int8x16_t) __a);
5273 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
5274 vcntq_p8 (poly8x16_t __a)
5276 return (poly8x16_t)__builtin_neon_vcntv16qi ((int8x16_t) __a);
5279 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5280 vrecpe_f32 (float32x2_t __a)
5282 return (float32x2_t)__builtin_neon_vrecpev2sf (__a);
5285 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5286 vrecpe_u32 (uint32x2_t __a)
5288 return (uint32x2_t)__builtin_neon_vrecpev2si ((int32x2_t) __a);
5291 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5292 vrecpeq_f32 (float32x4_t __a)
5294 return (float32x4_t)__builtin_neon_vrecpev4sf (__a);
5297 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5298 vrecpeq_u32 (uint32x4_t __a)
5300 return (uint32x4_t)__builtin_neon_vrecpev4si ((int32x4_t) __a);
5303 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5304 vrsqrte_f32 (float32x2_t __a)
5306 return (float32x2_t)__builtin_neon_vrsqrtev2sf (__a);
5309 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5310 vrsqrte_u32 (uint32x2_t __a)
5312 return (uint32x2_t)__builtin_neon_vrsqrtev2si ((int32x2_t) __a);
5315 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5316 vrsqrteq_f32 (float32x4_t __a)
5318 return (float32x4_t)__builtin_neon_vrsqrtev4sf (__a);
5321 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5322 vrsqrteq_u32 (uint32x4_t __a)
5324 return (uint32x4_t)__builtin_neon_vrsqrtev4si ((int32x4_t) __a);
5327 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
5328 vget_lane_s8 (int8x8_t __a, const int __b)
5330 return (int8_t)__builtin_neon_vget_lanev8qi (__a, __b);
5333 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
5334 vget_lane_s16 (int16x4_t __a, const int __b)
5336 return (int16_t)__builtin_neon_vget_lanev4hi (__a, __b);
5339 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
5340 vget_lane_s32 (int32x2_t __a, const int __b)
5342 return (int32_t)__builtin_neon_vget_lanev2si (__a, __b);
5345 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
5346 /* Functions cannot accept or return __FP16 types. Even if the function
5347 were marked always-inline so there were no call sites, the declaration
5348 would nonetheless raise an error. Hence, we must use a macro instead. */
5350 /* For big-endian, GCC's vector indices are reversed within each 64
5351 bits compared to the architectural lane indices used by Neon
5352 intrinsics. */
5353 #ifdef __ARM_BIG_ENDIAN
5354 #define __ARM_NUM_LANES(__v) (sizeof (__v) / sizeof (__v[0]))
5355 #define __arm_lane(__vec, __idx) (__idx ^ (__ARM_NUM_LANES(__vec) - 1))
5356 #define __arm_laneq(__vec, __idx) (__idx ^ (__ARM_NUM_LANES(__vec)/2 - 1))
5357 #else
5358 #define __arm_lane(__vec, __idx) __idx
5359 #define __arm_laneq(__vec, __idx) __idx
5360 #endif
5362 #define vget_lane_f16(__v, __idx) \
5363 __extension__ \
5364 ({ \
5365 float16x4_t __vec = (__v); \
5366 __builtin_arm_lane_check (4, __idx); \
5367 float16_t __res = __vec[__arm_lane(__vec, __idx)]; \
5368 __res; \
5370 #endif
5372 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
5373 vget_lane_f32 (float32x2_t __a, const int __b)
5375 return (float32_t)__builtin_neon_vget_lanev2sf (__a, __b);
5378 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
5379 vget_lane_u8 (uint8x8_t __a, const int __b)
5381 return (uint8_t)__builtin_neon_vget_laneuv8qi ((int8x8_t) __a, __b);
5384 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
5385 vget_lane_u16 (uint16x4_t __a, const int __b)
5387 return (uint16_t)__builtin_neon_vget_laneuv4hi ((int16x4_t) __a, __b);
5390 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
5391 vget_lane_u32 (uint32x2_t __a, const int __b)
5393 return (uint32_t)__builtin_neon_vget_laneuv2si ((int32x2_t) __a, __b);
5396 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
5397 vget_lane_p8 (poly8x8_t __a, const int __b)
5399 return (poly8_t)__builtin_neon_vget_laneuv8qi ((int8x8_t) __a, __b);
5402 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
5403 vget_lane_p16 (poly16x4_t __a, const int __b)
5405 return (poly16_t)__builtin_neon_vget_laneuv4hi ((int16x4_t) __a, __b);
5408 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
5409 vget_lane_s64 (int64x1_t __a, const int __b)
5411 return (int64_t)__builtin_neon_vget_lanedi (__a, __b);
5414 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
5415 vget_lane_u64 (uint64x1_t __a, const int __b)
5417 return (uint64_t)__builtin_neon_vget_lanedi ((int64x1_t) __a, __b);
5420 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
5421 vgetq_lane_s8 (int8x16_t __a, const int __b)
5423 return (int8_t)__builtin_neon_vget_lanev16qi (__a, __b);
5426 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
5427 vgetq_lane_s16 (int16x8_t __a, const int __b)
5429 return (int16_t)__builtin_neon_vget_lanev8hi (__a, __b);
5432 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
5433 vgetq_lane_s32 (int32x4_t __a, const int __b)
5435 return (int32_t)__builtin_neon_vget_lanev4si (__a, __b);
5438 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
5439 #define vgetq_lane_f16(__v, __idx) \
5440 __extension__ \
5441 ({ \
5442 float16x8_t __vec = (__v); \
5443 __builtin_arm_lane_check (8, __idx); \
5444 float16_t __res = __vec[__arm_laneq(__vec, __idx)]; \
5445 __res; \
5447 #endif
5449 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
5450 vgetq_lane_f32 (float32x4_t __a, const int __b)
5452 return (float32_t)__builtin_neon_vget_lanev4sf (__a, __b);
5455 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
5456 vgetq_lane_u8 (uint8x16_t __a, const int __b)
5458 return (uint8_t)__builtin_neon_vget_laneuv16qi ((int8x16_t) __a, __b);
5461 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
5462 vgetq_lane_u16 (uint16x8_t __a, const int __b)
5464 return (uint16_t)__builtin_neon_vget_laneuv8hi ((int16x8_t) __a, __b);
5467 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
5468 vgetq_lane_u32 (uint32x4_t __a, const int __b)
5470 return (uint32_t)__builtin_neon_vget_laneuv4si ((int32x4_t) __a, __b);
5473 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
5474 vgetq_lane_p8 (poly8x16_t __a, const int __b)
5476 return (poly8_t)__builtin_neon_vget_laneuv16qi ((int8x16_t) __a, __b);
5479 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
5480 vgetq_lane_p16 (poly16x8_t __a, const int __b)
5482 return (poly16_t)__builtin_neon_vget_laneuv8hi ((int16x8_t) __a, __b);
5485 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
5486 vgetq_lane_s64 (int64x2_t __a, const int __b)
5488 return (int64_t)__builtin_neon_vget_lanev2di (__a, __b);
5491 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
5492 vgetq_lane_u64 (uint64x2_t __a, const int __b)
5494 return (uint64_t)__builtin_neon_vget_lanev2di ((int64x2_t) __a, __b);
5497 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5498 vset_lane_s8 (int8_t __a, int8x8_t __b, const int __c)
5500 return (int8x8_t)__builtin_neon_vset_lanev8qi ((__builtin_neon_qi) __a, __b, __c);
5503 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5504 vset_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
5506 return (int16x4_t)__builtin_neon_vset_lanev4hi ((__builtin_neon_hi) __a, __b, __c);
5509 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5510 vset_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
5512 return (int32x2_t)__builtin_neon_vset_lanev2si ((__builtin_neon_si) __a, __b, __c);
5515 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
5516 #define vset_lane_f16(__e, __v, __idx) \
5517 __extension__ \
5518 ({ \
5519 float16_t __elem = (__e); \
5520 float16x4_t __vec = (__v); \
5521 __builtin_arm_lane_check (4, __idx); \
5522 __vec[__arm_lane (__vec, __idx)] = __elem; \
5523 __vec; \
5525 #endif
5527 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5528 vset_lane_f32 (float32_t __a, float32x2_t __b, const int __c)
5530 return (float32x2_t)__builtin_neon_vset_lanev2sf ((__builtin_neon_sf) __a, __b, __c);
5533 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5534 vset_lane_u8 (uint8_t __a, uint8x8_t __b, const int __c)
5536 return (uint8x8_t)__builtin_neon_vset_lanev8qi ((__builtin_neon_qi) __a, (int8x8_t) __b, __c);
5539 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5540 vset_lane_u16 (uint16_t __a, uint16x4_t __b, const int __c)
5542 return (uint16x4_t)__builtin_neon_vset_lanev4hi ((__builtin_neon_hi) __a, (int16x4_t) __b, __c);
5545 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5546 vset_lane_u32 (uint32_t __a, uint32x2_t __b, const int __c)
5548 return (uint32x2_t)__builtin_neon_vset_lanev2si ((__builtin_neon_si) __a, (int32x2_t) __b, __c);
5551 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
5552 vset_lane_p8 (poly8_t __a, poly8x8_t __b, const int __c)
5554 return (poly8x8_t)__builtin_neon_vset_lanev8qi ((__builtin_neon_qi) __a, (int8x8_t) __b, __c);
5557 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
5558 vset_lane_p16 (poly16_t __a, poly16x4_t __b, const int __c)
5560 return (poly16x4_t)__builtin_neon_vset_lanev4hi ((__builtin_neon_hi) __a, (int16x4_t) __b, __c);
5563 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
5564 vset_lane_s64 (int64_t __a, int64x1_t __b, const int __c)
5566 return (int64x1_t)__builtin_neon_vset_lanedi ((__builtin_neon_di) __a, __b, __c);
5569 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
5570 vset_lane_u64 (uint64_t __a, uint64x1_t __b, const int __c)
5572 return (uint64x1_t)__builtin_neon_vset_lanedi ((__builtin_neon_di) __a, (int64x1_t) __b, __c);
5575 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5576 vsetq_lane_s8 (int8_t __a, int8x16_t __b, const int __c)
5578 return (int8x16_t)__builtin_neon_vset_lanev16qi ((__builtin_neon_qi) __a, __b, __c);
5581 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5582 vsetq_lane_s16 (int16_t __a, int16x8_t __b, const int __c)
5584 return (int16x8_t)__builtin_neon_vset_lanev8hi ((__builtin_neon_hi) __a, __b, __c);
5587 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5588 vsetq_lane_s32 (int32_t __a, int32x4_t __b, const int __c)
5590 return (int32x4_t)__builtin_neon_vset_lanev4si ((__builtin_neon_si) __a, __b, __c);
5593 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
5594 #define vsetq_lane_f16(__e, __v, __idx) \
5595 __extension__ \
5596 ({ \
5597 float16_t __elem = (__e); \
5598 float16x8_t __vec = (__v); \
5599 __builtin_arm_lane_check (8, __idx); \
5600 __vec[__arm_laneq (__vec, __idx)] = __elem; \
5601 __vec; \
5603 #endif
5605 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5606 vsetq_lane_f32 (float32_t __a, float32x4_t __b, const int __c)
5608 return (float32x4_t)__builtin_neon_vset_lanev4sf ((__builtin_neon_sf) __a, __b, __c);
5611 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5612 vsetq_lane_u8 (uint8_t __a, uint8x16_t __b, const int __c)
5614 return (uint8x16_t)__builtin_neon_vset_lanev16qi ((__builtin_neon_qi) __a, (int8x16_t) __b, __c);
5617 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5618 vsetq_lane_u16 (uint16_t __a, uint16x8_t __b, const int __c)
5620 return (uint16x8_t)__builtin_neon_vset_lanev8hi ((__builtin_neon_hi) __a, (int16x8_t) __b, __c);
5623 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5624 vsetq_lane_u32 (uint32_t __a, uint32x4_t __b, const int __c)
5626 return (uint32x4_t)__builtin_neon_vset_lanev4si ((__builtin_neon_si) __a, (int32x4_t) __b, __c);
5629 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
5630 vsetq_lane_p8 (poly8_t __a, poly8x16_t __b, const int __c)
5632 return (poly8x16_t)__builtin_neon_vset_lanev16qi ((__builtin_neon_qi) __a, (int8x16_t) __b, __c);
5635 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
5636 vsetq_lane_p16 (poly16_t __a, poly16x8_t __b, const int __c)
5638 return (poly16x8_t)__builtin_neon_vset_lanev8hi ((__builtin_neon_hi) __a, (int16x8_t) __b, __c);
5641 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
5642 vsetq_lane_s64 (int64_t __a, int64x2_t __b, const int __c)
5644 return (int64x2_t)__builtin_neon_vset_lanev2di ((__builtin_neon_di) __a, __b, __c);
5647 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5648 vsetq_lane_u64 (uint64_t __a, uint64x2_t __b, const int __c)
5650 return (uint64x2_t)__builtin_neon_vset_lanev2di ((__builtin_neon_di) __a, (int64x2_t) __b, __c);
5653 #pragma GCC push_options
5654 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
5655 __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
5656 vcreate_p64 (uint64_t __a)
5658 return (poly64x1_t)__builtin_neon_vcreatedi ((__builtin_neon_di) __a);
5661 #pragma GCC pop_options
5662 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5663 vcreate_s8 (uint64_t __a)
5665 return (int8x8_t)__builtin_neon_vcreatev8qi ((__builtin_neon_di) __a);
5668 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5669 vcreate_s16 (uint64_t __a)
5671 return (int16x4_t)__builtin_neon_vcreatev4hi ((__builtin_neon_di) __a);
5674 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5675 vcreate_s32 (uint64_t __a)
5677 return (int32x2_t)__builtin_neon_vcreatev2si ((__builtin_neon_di) __a);
5680 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
5681 vcreate_s64 (uint64_t __a)
5683 return (int64x1_t)__builtin_neon_vcreatedi ((__builtin_neon_di) __a);
5686 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
5687 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
5688 vcreate_f16 (uint64_t __a)
5690 return (float16x4_t) __a;
5692 #endif
5694 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5695 vcreate_f32 (uint64_t __a)
5697 return (float32x2_t)__builtin_neon_vcreatev2sf ((__builtin_neon_di) __a);
5700 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5701 vcreate_u8 (uint64_t __a)
5703 return (uint8x8_t)__builtin_neon_vcreatev8qi ((__builtin_neon_di) __a);
5706 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5707 vcreate_u16 (uint64_t __a)
5709 return (uint16x4_t)__builtin_neon_vcreatev4hi ((__builtin_neon_di) __a);
5712 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5713 vcreate_u32 (uint64_t __a)
5715 return (uint32x2_t)__builtin_neon_vcreatev2si ((__builtin_neon_di) __a);
5718 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
5719 vcreate_u64 (uint64_t __a)
5721 return (uint64x1_t)__builtin_neon_vcreatedi ((__builtin_neon_di) __a);
5724 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
5725 vcreate_p8 (uint64_t __a)
5727 return (poly8x8_t)__builtin_neon_vcreatev8qi ((__builtin_neon_di) __a);
5730 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
5731 vcreate_p16 (uint64_t __a)
5733 return (poly16x4_t)__builtin_neon_vcreatev4hi ((__builtin_neon_di) __a);
5736 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5737 vdup_n_s8 (int8_t __a)
5739 return (int8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a);
5742 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5743 vdup_n_s16 (int16_t __a)
5745 return (int16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a);
5748 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5749 vdup_n_s32 (int32_t __a)
5751 return (int32x2_t)__builtin_neon_vdup_nv2si ((__builtin_neon_si) __a);
5754 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5755 vdup_n_f32 (float32_t __a)
5757 return (float32x2_t)__builtin_neon_vdup_nv2sf ((__builtin_neon_sf) __a);
5760 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5761 vdup_n_u8 (uint8_t __a)
5763 return (uint8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a);
5766 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5767 vdup_n_u16 (uint16_t __a)
5769 return (uint16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a);
5772 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5773 vdup_n_u32 (uint32_t __a)
5775 return (uint32x2_t)__builtin_neon_vdup_nv2si ((__builtin_neon_si) __a);
5778 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
5779 vdup_n_p8 (poly8_t __a)
5781 return (poly8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a);
5784 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
5785 vdup_n_p16 (poly16_t __a)
5787 return (poly16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a);
5790 #pragma GCC push_options
5791 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
5792 __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
5793 vdup_n_p64 (poly64_t __a)
5795 return (poly64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a);
5798 #pragma GCC pop_options
5799 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
5800 vdup_n_s64 (int64_t __a)
5802 return (int64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a);
5805 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
5806 vdup_n_u64 (uint64_t __a)
5808 return (uint64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a);
5811 #pragma GCC push_options
5812 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
5813 __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
5814 vdupq_n_p64 (poly64_t __a)
5816 return (poly64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a);
5819 #pragma GCC pop_options
5820 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5821 vdupq_n_s8 (int8_t __a)
5823 return (int8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a);
5826 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5827 vdupq_n_s16 (int16_t __a)
5829 return (int16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a);
5832 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5833 vdupq_n_s32 (int32_t __a)
5835 return (int32x4_t)__builtin_neon_vdup_nv4si ((__builtin_neon_si) __a);
5838 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5839 vdupq_n_f32 (float32_t __a)
5841 return (float32x4_t)__builtin_neon_vdup_nv4sf ((__builtin_neon_sf) __a);
5844 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5845 vdupq_n_u8 (uint8_t __a)
5847 return (uint8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a);
5850 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5851 vdupq_n_u16 (uint16_t __a)
5853 return (uint16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a);
5856 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5857 vdupq_n_u32 (uint32_t __a)
5859 return (uint32x4_t)__builtin_neon_vdup_nv4si ((__builtin_neon_si) __a);
5862 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
5863 vdupq_n_p8 (poly8_t __a)
5865 return (poly8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a);
5868 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
5869 vdupq_n_p16 (poly16_t __a)
5871 return (poly16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a);
5874 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
5875 vdupq_n_s64 (int64_t __a)
5877 return (int64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a);
5880 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5881 vdupq_n_u64 (uint64_t __a)
5883 return (uint64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a);
5886 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5887 vmov_n_s8 (int8_t __a)
5889 return (int8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a);
5892 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5893 vmov_n_s16 (int16_t __a)
5895 return (int16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a);
5898 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5899 vmov_n_s32 (int32_t __a)
5901 return (int32x2_t)__builtin_neon_vdup_nv2si ((__builtin_neon_si) __a);
5904 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5905 vmov_n_f32 (float32_t __a)
5907 return (float32x2_t)__builtin_neon_vdup_nv2sf ((__builtin_neon_sf) __a);
5910 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5911 vmov_n_u8 (uint8_t __a)
5913 return (uint8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a);
5916 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5917 vmov_n_u16 (uint16_t __a)
5919 return (uint16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a);
5922 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5923 vmov_n_u32 (uint32_t __a)
5925 return (uint32x2_t)__builtin_neon_vdup_nv2si ((__builtin_neon_si) __a);
5928 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
5929 vmov_n_p8 (poly8_t __a)
5931 return (poly8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a);
5934 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
5935 vmov_n_p16 (poly16_t __a)
5937 return (poly16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a);
5940 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
5941 vmov_n_s64 (int64_t __a)
5943 return (int64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a);
5946 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
5947 vmov_n_u64 (uint64_t __a)
5949 return (uint64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a);
5952 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5953 vmovq_n_s8 (int8_t __a)
5955 return (int8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a);
5958 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5959 vmovq_n_s16 (int16_t __a)
5961 return (int16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a);
5964 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5965 vmovq_n_s32 (int32_t __a)
5967 return (int32x4_t)__builtin_neon_vdup_nv4si ((__builtin_neon_si) __a);
5970 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5971 vmovq_n_f32 (float32_t __a)
5973 return (float32x4_t)__builtin_neon_vdup_nv4sf ((__builtin_neon_sf) __a);
5976 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5977 vmovq_n_u8 (uint8_t __a)
5979 return (uint8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a);
5982 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5983 vmovq_n_u16 (uint16_t __a)
5985 return (uint16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a);
5988 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5989 vmovq_n_u32 (uint32_t __a)
5991 return (uint32x4_t)__builtin_neon_vdup_nv4si ((__builtin_neon_si) __a);
5994 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
5995 vmovq_n_p8 (poly8_t __a)
5997 return (poly8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a);
6000 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
6001 vmovq_n_p16 (poly16_t __a)
6003 return (poly16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a);
6006 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6007 vmovq_n_s64 (int64_t __a)
6009 return (int64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a);
6012 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6013 vmovq_n_u64 (uint64_t __a)
6015 return (uint64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a);
6018 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6019 vdup_lane_s8 (int8x8_t __a, const int __b)
6021 return (int8x8_t)__builtin_neon_vdup_lanev8qi (__a, __b);
6024 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6025 vdup_lane_s16 (int16x4_t __a, const int __b)
6027 return (int16x4_t)__builtin_neon_vdup_lanev4hi (__a, __b);
6030 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6031 vdup_lane_s32 (int32x2_t __a, const int __b)
6033 return (int32x2_t)__builtin_neon_vdup_lanev2si (__a, __b);
6036 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6037 vdup_lane_f32 (float32x2_t __a, const int __b)
6039 return (float32x2_t)__builtin_neon_vdup_lanev2sf (__a, __b);
6042 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6043 vdup_lane_u8 (uint8x8_t __a, const int __b)
6045 return (uint8x8_t)__builtin_neon_vdup_lanev8qi ((int8x8_t) __a, __b);
6048 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6049 vdup_lane_u16 (uint16x4_t __a, const int __b)
6051 return (uint16x4_t)__builtin_neon_vdup_lanev4hi ((int16x4_t) __a, __b);
6054 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6055 vdup_lane_u32 (uint32x2_t __a, const int __b)
6057 return (uint32x2_t)__builtin_neon_vdup_lanev2si ((int32x2_t) __a, __b);
6060 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6061 vdup_lane_p8 (poly8x8_t __a, const int __b)
6063 return (poly8x8_t)__builtin_neon_vdup_lanev8qi ((int8x8_t) __a, __b);
6066 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
6067 vdup_lane_p16 (poly16x4_t __a, const int __b)
6069 return (poly16x4_t)__builtin_neon_vdup_lanev4hi ((int16x4_t) __a, __b);
6072 #pragma GCC push_options
6073 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
6074 __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
6075 vdup_lane_p64 (poly64x1_t __a, const int __b)
6077 return (poly64x1_t)__builtin_neon_vdup_lanedi (__a, __b);
6080 #pragma GCC pop_options
6081 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
6082 vdup_lane_s64 (int64x1_t __a, const int __b)
6084 return (int64x1_t)__builtin_neon_vdup_lanedi (__a, __b);
6087 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
6088 vdup_lane_u64 (uint64x1_t __a, const int __b)
6090 return (uint64x1_t)__builtin_neon_vdup_lanedi ((int64x1_t) __a, __b);
6093 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
6094 vdupq_lane_s8 (int8x8_t __a, const int __b)
6096 return (int8x16_t)__builtin_neon_vdup_lanev16qi (__a, __b);
6099 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6100 vdupq_lane_s16 (int16x4_t __a, const int __b)
6102 return (int16x8_t)__builtin_neon_vdup_lanev8hi (__a, __b);
6105 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6106 vdupq_lane_s32 (int32x2_t __a, const int __b)
6108 return (int32x4_t)__builtin_neon_vdup_lanev4si (__a, __b);
6111 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6112 vdupq_lane_f32 (float32x2_t __a, const int __b)
6114 return (float32x4_t)__builtin_neon_vdup_lanev4sf (__a, __b);
6117 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
6118 vdupq_lane_u8 (uint8x8_t __a, const int __b)
6120 return (uint8x16_t)__builtin_neon_vdup_lanev16qi ((int8x8_t) __a, __b);
6123 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6124 vdupq_lane_u16 (uint16x4_t __a, const int __b)
6126 return (uint16x8_t)__builtin_neon_vdup_lanev8hi ((int16x4_t) __a, __b);
6129 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6130 vdupq_lane_u32 (uint32x2_t __a, const int __b)
6132 return (uint32x4_t)__builtin_neon_vdup_lanev4si ((int32x2_t) __a, __b);
6135 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
6136 vdupq_lane_p8 (poly8x8_t __a, const int __b)
6138 return (poly8x16_t)__builtin_neon_vdup_lanev16qi ((int8x8_t) __a, __b);
6141 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
6142 vdupq_lane_p16 (poly16x4_t __a, const int __b)
6144 return (poly16x8_t)__builtin_neon_vdup_lanev8hi ((int16x4_t) __a, __b);
6147 #pragma GCC push_options
6148 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
6149 __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
6150 vdupq_lane_p64 (poly64x1_t __a, const int __b)
6152 return (poly64x2_t)__builtin_neon_vdup_lanev2di (__a, __b);
6155 #pragma GCC pop_options
6156 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6157 vdupq_lane_s64 (int64x1_t __a, const int __b)
6159 return (int64x2_t)__builtin_neon_vdup_lanev2di (__a, __b);
6162 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6163 vdupq_lane_u64 (uint64x1_t __a, const int __b)
6165 return (uint64x2_t)__builtin_neon_vdup_lanev2di ((int64x1_t) __a, __b);
6168 #pragma GCC push_options
6169 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
6170 __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
6171 vcombine_p64 (poly64x1_t __a, poly64x1_t __b)
6173 return (poly64x2_t)__builtin_neon_vcombinedi (__a, __b);
6176 #pragma GCC pop_options
6177 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
6178 vcombine_s8 (int8x8_t __a, int8x8_t __b)
6180 return (int8x16_t)__builtin_neon_vcombinev8qi (__a, __b);
6183 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6184 vcombine_s16 (int16x4_t __a, int16x4_t __b)
6186 return (int16x8_t)__builtin_neon_vcombinev4hi (__a, __b);
6189 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6190 vcombine_s32 (int32x2_t __a, int32x2_t __b)
6192 return (int32x4_t)__builtin_neon_vcombinev2si (__a, __b);
6195 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6196 vcombine_s64 (int64x1_t __a, int64x1_t __b)
6198 return (int64x2_t)__builtin_neon_vcombinedi (__a, __b);
6201 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
6202 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
6203 vcombine_f16 (float16x4_t __a, float16x4_t __b)
6205 return __builtin_neon_vcombinev4hf (__a, __b);
6207 #endif
6209 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6210 vcombine_f32 (float32x2_t __a, float32x2_t __b)
6212 return (float32x4_t)__builtin_neon_vcombinev2sf (__a, __b);
6215 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
6216 vcombine_u8 (uint8x8_t __a, uint8x8_t __b)
6218 return (uint8x16_t)__builtin_neon_vcombinev8qi ((int8x8_t) __a, (int8x8_t) __b);
6221 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6222 vcombine_u16 (uint16x4_t __a, uint16x4_t __b)
6224 return (uint16x8_t)__builtin_neon_vcombinev4hi ((int16x4_t) __a, (int16x4_t) __b);
6227 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6228 vcombine_u32 (uint32x2_t __a, uint32x2_t __b)
6230 return (uint32x4_t)__builtin_neon_vcombinev2si ((int32x2_t) __a, (int32x2_t) __b);
6233 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6234 vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
6236 return (uint64x2_t)__builtin_neon_vcombinedi ((int64x1_t) __a, (int64x1_t) __b);
6239 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
6240 vcombine_p8 (poly8x8_t __a, poly8x8_t __b)
6242 return (poly8x16_t)__builtin_neon_vcombinev8qi ((int8x8_t) __a, (int8x8_t) __b);
6245 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
6246 vcombine_p16 (poly16x4_t __a, poly16x4_t __b)
6248 return (poly16x8_t)__builtin_neon_vcombinev4hi ((int16x4_t) __a, (int16x4_t) __b);
6251 #pragma GCC push_options
6252 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
6253 __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
6254 vget_high_p64 (poly64x2_t __a)
6256 return (poly64x1_t)__builtin_neon_vget_highv2di ((int64x2_t) __a);
6259 #pragma GCC pop_options
6260 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6261 vget_high_s8 (int8x16_t __a)
6263 return (int8x8_t)__builtin_neon_vget_highv16qi (__a);
6266 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6267 vget_high_s16 (int16x8_t __a)
6269 return (int16x4_t)__builtin_neon_vget_highv8hi (__a);
6272 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6273 vget_high_s32 (int32x4_t __a)
6275 return (int32x2_t)__builtin_neon_vget_highv4si (__a);
6278 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
6279 vget_high_s64 (int64x2_t __a)
6281 return (int64x1_t)__builtin_neon_vget_highv2di (__a);
6284 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
6285 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
6286 vget_high_f16 (float16x8_t __a)
6288 return __builtin_neon_vget_highv8hf (__a);
6290 #endif
6292 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6293 vget_high_f32 (float32x4_t __a)
6295 return (float32x2_t)__builtin_neon_vget_highv4sf (__a);
6298 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6299 vget_high_u8 (uint8x16_t __a)
6301 return (uint8x8_t)__builtin_neon_vget_highv16qi ((int8x16_t) __a);
6304 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6305 vget_high_u16 (uint16x8_t __a)
6307 return (uint16x4_t)__builtin_neon_vget_highv8hi ((int16x8_t) __a);
6310 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6311 vget_high_u32 (uint32x4_t __a)
6313 return (uint32x2_t)__builtin_neon_vget_highv4si ((int32x4_t) __a);
6316 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
6317 vget_high_u64 (uint64x2_t __a)
6319 return (uint64x1_t)__builtin_neon_vget_highv2di ((int64x2_t) __a);
6322 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6323 vget_high_p8 (poly8x16_t __a)
6325 return (poly8x8_t)__builtin_neon_vget_highv16qi ((int8x16_t) __a);
6328 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
6329 vget_high_p16 (poly16x8_t __a)
6331 return (poly16x4_t)__builtin_neon_vget_highv8hi ((int16x8_t) __a);
6334 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6335 vget_low_s8 (int8x16_t __a)
6337 return (int8x8_t)__builtin_neon_vget_lowv16qi (__a);
6340 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6341 vget_low_s16 (int16x8_t __a)
6343 return (int16x4_t)__builtin_neon_vget_lowv8hi (__a);
6346 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6347 vget_low_s32 (int32x4_t __a)
6349 return (int32x2_t)__builtin_neon_vget_lowv4si (__a);
6352 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
6353 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
6354 vget_low_f16 (float16x8_t __a)
6356 return __builtin_neon_vget_lowv8hf (__a);
6358 #endif
6360 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6361 vget_low_f32 (float32x4_t __a)
6363 return (float32x2_t)__builtin_neon_vget_lowv4sf (__a);
6366 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6367 vget_low_u8 (uint8x16_t __a)
6369 return (uint8x8_t)__builtin_neon_vget_lowv16qi ((int8x16_t) __a);
6372 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6373 vget_low_u16 (uint16x8_t __a)
6375 return (uint16x4_t)__builtin_neon_vget_lowv8hi ((int16x8_t) __a);
6378 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6379 vget_low_u32 (uint32x4_t __a)
6381 return (uint32x2_t)__builtin_neon_vget_lowv4si ((int32x4_t) __a);
6384 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6385 vget_low_p8 (poly8x16_t __a)
6387 return (poly8x8_t)__builtin_neon_vget_lowv16qi ((int8x16_t) __a);
6390 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
6391 vget_low_p16 (poly16x8_t __a)
6393 return (poly16x4_t)__builtin_neon_vget_lowv8hi ((int16x8_t) __a);
6396 #pragma GCC push_options
6397 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
6398 __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
6399 vget_low_p64 (poly64x2_t __a)
6401 return (poly64x1_t)__builtin_neon_vget_lowv2di ((int64x2_t) __a);
6404 #pragma GCC pop_options
6405 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
6406 vget_low_s64 (int64x2_t __a)
6408 return (int64x1_t)__builtin_neon_vget_lowv2di (__a);
6411 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
6412 vget_low_u64 (uint64x2_t __a)
6414 return (uint64x1_t)__builtin_neon_vget_lowv2di ((int64x2_t) __a);
6417 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6418 vcvt_s32_f32 (float32x2_t __a)
6420 return (int32x2_t)__builtin_neon_vcvtsv2sf (__a);
6423 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6424 vcvt_f32_s32 (int32x2_t __a)
6426 return (float32x2_t)__builtin_neon_vcvtsv2si (__a);
6429 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6430 vcvt_f32_u32 (uint32x2_t __a)
6432 return (float32x2_t)__builtin_neon_vcvtuv2si ((int32x2_t) __a);
6435 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6436 vcvt_u32_f32 (float32x2_t __a)
6438 return (uint32x2_t)__builtin_neon_vcvtuv2sf (__a);
6441 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6442 vcvtq_s32_f32 (float32x4_t __a)
6444 return (int32x4_t)__builtin_neon_vcvtsv4sf (__a);
6447 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6448 vcvtq_f32_s32 (int32x4_t __a)
6450 return (float32x4_t)__builtin_neon_vcvtsv4si (__a);
6453 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6454 vcvtq_f32_u32 (uint32x4_t __a)
6456 return (float32x4_t)__builtin_neon_vcvtuv4si ((int32x4_t) __a);
6459 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6460 vcvtq_u32_f32 (float32x4_t __a)
6462 return (uint32x4_t)__builtin_neon_vcvtuv4sf (__a);
6465 #pragma GCC push_options
6466 #pragma GCC target ("fpu=neon-fp16")
6467 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
6468 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
6469 vcvt_f16_f32 (float32x4_t __a)
6471 return (float16x4_t)__builtin_neon_vcvtv4hfv4sf (__a);
6473 #endif
6475 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
6476 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6477 vcvt_f32_f16 (float16x4_t __a)
6479 return (float32x4_t)__builtin_neon_vcvtv4sfv4hf (__a);
6481 #endif
6482 #pragma GCC pop_options
6484 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6485 vcvt_n_s32_f32 (float32x2_t __a, const int __b)
6487 return (int32x2_t)__builtin_neon_vcvts_nv2sf (__a, __b);
6490 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6491 vcvt_n_f32_s32 (int32x2_t __a, const int __b)
6493 return (float32x2_t)__builtin_neon_vcvts_nv2si (__a, __b);
6496 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6497 vcvt_n_f32_u32 (uint32x2_t __a, const int __b)
6499 return (float32x2_t)__builtin_neon_vcvtu_nv2si ((int32x2_t) __a, __b);
6502 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6503 vcvt_n_u32_f32 (float32x2_t __a, const int __b)
6505 return (uint32x2_t)__builtin_neon_vcvtu_nv2sf (__a, __b);
6508 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6509 vcvtq_n_s32_f32 (float32x4_t __a, const int __b)
6511 return (int32x4_t)__builtin_neon_vcvts_nv4sf (__a, __b);
6514 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6515 vcvtq_n_f32_s32 (int32x4_t __a, const int __b)
6517 return (float32x4_t)__builtin_neon_vcvts_nv4si (__a, __b);
6520 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6521 vcvtq_n_f32_u32 (uint32x4_t __a, const int __b)
6523 return (float32x4_t)__builtin_neon_vcvtu_nv4si ((int32x4_t) __a, __b);
6526 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6527 vcvtq_n_u32_f32 (float32x4_t __a, const int __b)
6529 return (uint32x4_t)__builtin_neon_vcvtu_nv4sf (__a, __b);
6532 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6533 vmovn_s16 (int16x8_t __a)
6535 return (int8x8_t)__builtin_neon_vmovnv8hi (__a);
6538 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6539 vmovn_s32 (int32x4_t __a)
6541 return (int16x4_t)__builtin_neon_vmovnv4si (__a);
6544 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6545 vmovn_s64 (int64x2_t __a)
6547 return (int32x2_t)__builtin_neon_vmovnv2di (__a);
6550 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6551 vmovn_u16 (uint16x8_t __a)
6553 return (uint8x8_t)__builtin_neon_vmovnv8hi ((int16x8_t) __a);
6556 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6557 vmovn_u32 (uint32x4_t __a)
6559 return (uint16x4_t)__builtin_neon_vmovnv4si ((int32x4_t) __a);
6562 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6563 vmovn_u64 (uint64x2_t __a)
6565 return (uint32x2_t)__builtin_neon_vmovnv2di ((int64x2_t) __a);
6568 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6569 vqmovn_s16 (int16x8_t __a)
6571 return (int8x8_t)__builtin_neon_vqmovnsv8hi (__a);
6574 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6575 vqmovn_s32 (int32x4_t __a)
6577 return (int16x4_t)__builtin_neon_vqmovnsv4si (__a);
6580 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6581 vqmovn_s64 (int64x2_t __a)
6583 return (int32x2_t)__builtin_neon_vqmovnsv2di (__a);
6586 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6587 vqmovn_u16 (uint16x8_t __a)
6589 return (uint8x8_t)__builtin_neon_vqmovnuv8hi ((int16x8_t) __a);
6592 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6593 vqmovn_u32 (uint32x4_t __a)
6595 return (uint16x4_t)__builtin_neon_vqmovnuv4si ((int32x4_t) __a);
6598 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6599 vqmovn_u64 (uint64x2_t __a)
6601 return (uint32x2_t)__builtin_neon_vqmovnuv2di ((int64x2_t) __a);
6604 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6605 vqmovun_s16 (int16x8_t __a)
6607 return (uint8x8_t)__builtin_neon_vqmovunv8hi (__a);
6610 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6611 vqmovun_s32 (int32x4_t __a)
6613 return (uint16x4_t)__builtin_neon_vqmovunv4si (__a);
6616 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6617 vqmovun_s64 (int64x2_t __a)
6619 return (uint32x2_t)__builtin_neon_vqmovunv2di (__a);
6622 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6623 vmovl_s8 (int8x8_t __a)
6625 return (int16x8_t)__builtin_neon_vmovlsv8qi (__a);
6628 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6629 vmovl_s16 (int16x4_t __a)
6631 return (int32x4_t)__builtin_neon_vmovlsv4hi (__a);
6634 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6635 vmovl_s32 (int32x2_t __a)
6637 return (int64x2_t)__builtin_neon_vmovlsv2si (__a);
6640 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6641 vmovl_u8 (uint8x8_t __a)
6643 return (uint16x8_t)__builtin_neon_vmovluv8qi ((int8x8_t) __a);
6646 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6647 vmovl_u16 (uint16x4_t __a)
6649 return (uint32x4_t)__builtin_neon_vmovluv4hi ((int16x4_t) __a);
6652 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6653 vmovl_u32 (uint32x2_t __a)
6655 return (uint64x2_t)__builtin_neon_vmovluv2si ((int32x2_t) __a);
6658 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6659 vtbl1_s8 (int8x8_t __a, int8x8_t __b)
6661 return (int8x8_t)__builtin_neon_vtbl1v8qi (__a, __b);
6664 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6665 vtbl1_u8 (uint8x8_t __a, uint8x8_t __b)
6667 return (uint8x8_t)__builtin_neon_vtbl1v8qi ((int8x8_t) __a, (int8x8_t) __b);
6670 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6671 vtbl1_p8 (poly8x8_t __a, uint8x8_t __b)
6673 return (poly8x8_t)__builtin_neon_vtbl1v8qi ((int8x8_t) __a, (int8x8_t) __b);
6676 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6677 vtbl2_s8 (int8x8x2_t __a, int8x8_t __b)
6679 union { int8x8x2_t __i; __builtin_neon_ti __o; } __au = { __a };
6680 return (int8x8_t)__builtin_neon_vtbl2v8qi (__au.__o, __b);
6683 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6684 vtbl2_u8 (uint8x8x2_t __a, uint8x8_t __b)
6686 union { uint8x8x2_t __i; __builtin_neon_ti __o; } __au = { __a };
6687 return (uint8x8_t)__builtin_neon_vtbl2v8qi (__au.__o, (int8x8_t) __b);
6690 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6691 vtbl2_p8 (poly8x8x2_t __a, uint8x8_t __b)
6693 union { poly8x8x2_t __i; __builtin_neon_ti __o; } __au = { __a };
6694 return (poly8x8_t)__builtin_neon_vtbl2v8qi (__au.__o, (int8x8_t) __b);
6697 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6698 vtbl3_s8 (int8x8x3_t __a, int8x8_t __b)
6700 union { int8x8x3_t __i; __builtin_neon_ei __o; } __au = { __a };
6701 return (int8x8_t)__builtin_neon_vtbl3v8qi (__au.__o, __b);
6704 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6705 vtbl3_u8 (uint8x8x3_t __a, uint8x8_t __b)
6707 union { uint8x8x3_t __i; __builtin_neon_ei __o; } __au = { __a };
6708 return (uint8x8_t)__builtin_neon_vtbl3v8qi (__au.__o, (int8x8_t) __b);
6711 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6712 vtbl3_p8 (poly8x8x3_t __a, uint8x8_t __b)
6714 union { poly8x8x3_t __i; __builtin_neon_ei __o; } __au = { __a };
6715 return (poly8x8_t)__builtin_neon_vtbl3v8qi (__au.__o, (int8x8_t) __b);
6718 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6719 vtbl4_s8 (int8x8x4_t __a, int8x8_t __b)
6721 union { int8x8x4_t __i; __builtin_neon_oi __o; } __au = { __a };
6722 return (int8x8_t)__builtin_neon_vtbl4v8qi (__au.__o, __b);
6725 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6726 vtbl4_u8 (uint8x8x4_t __a, uint8x8_t __b)
6728 union { uint8x8x4_t __i; __builtin_neon_oi __o; } __au = { __a };
6729 return (uint8x8_t)__builtin_neon_vtbl4v8qi (__au.__o, (int8x8_t) __b);
6732 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6733 vtbl4_p8 (poly8x8x4_t __a, uint8x8_t __b)
6735 union { poly8x8x4_t __i; __builtin_neon_oi __o; } __au = { __a };
6736 return (poly8x8_t)__builtin_neon_vtbl4v8qi (__au.__o, (int8x8_t) __b);
6739 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6740 vtbx1_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c)
6742 return (int8x8_t)__builtin_neon_vtbx1v8qi (__a, __b, __c);
6745 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6746 vtbx1_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
6748 return (uint8x8_t)__builtin_neon_vtbx1v8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c);
6751 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6752 vtbx1_p8 (poly8x8_t __a, poly8x8_t __b, uint8x8_t __c)
6754 return (poly8x8_t)__builtin_neon_vtbx1v8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c);
6757 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6758 vtbx2_s8 (int8x8_t __a, int8x8x2_t __b, int8x8_t __c)
6760 union { int8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
6761 return (int8x8_t)__builtin_neon_vtbx2v8qi (__a, __bu.__o, __c);
6764 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6765 vtbx2_u8 (uint8x8_t __a, uint8x8x2_t __b, uint8x8_t __c)
6767 union { uint8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
6768 return (uint8x8_t)__builtin_neon_vtbx2v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c);
6771 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6772 vtbx2_p8 (poly8x8_t __a, poly8x8x2_t __b, uint8x8_t __c)
6774 union { poly8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
6775 return (poly8x8_t)__builtin_neon_vtbx2v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c);
6778 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6779 vtbx3_s8 (int8x8_t __a, int8x8x3_t __b, int8x8_t __c)
6781 union { int8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
6782 return (int8x8_t)__builtin_neon_vtbx3v8qi (__a, __bu.__o, __c);
6785 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6786 vtbx3_u8 (uint8x8_t __a, uint8x8x3_t __b, uint8x8_t __c)
6788 union { uint8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
6789 return (uint8x8_t)__builtin_neon_vtbx3v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c);
6792 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6793 vtbx3_p8 (poly8x8_t __a, poly8x8x3_t __b, uint8x8_t __c)
6795 union { poly8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
6796 return (poly8x8_t)__builtin_neon_vtbx3v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c);
6799 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6800 vtbx4_s8 (int8x8_t __a, int8x8x4_t __b, int8x8_t __c)
6802 union { int8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
6803 return (int8x8_t)__builtin_neon_vtbx4v8qi (__a, __bu.__o, __c);
6806 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6807 vtbx4_u8 (uint8x8_t __a, uint8x8x4_t __b, uint8x8_t __c)
6809 union { uint8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
6810 return (uint8x8_t)__builtin_neon_vtbx4v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c);
6813 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6814 vtbx4_p8 (poly8x8_t __a, poly8x8x4_t __b, uint8x8_t __c)
6816 union { poly8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
6817 return (poly8x8_t)__builtin_neon_vtbx4v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c);
6820 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6821 vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
6823 return (int16x4_t)__builtin_neon_vmul_lanev4hi (__a, __b, __c);
6826 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6827 vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
6829 return (int32x2_t)__builtin_neon_vmul_lanev2si (__a, __b, __c);
6832 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6833 vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __c)
6835 return (float32x2_t)__builtin_neon_vmul_lanev2sf (__a, __b, __c);
6838 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6839 vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
6841 return (uint16x4_t)__builtin_neon_vmul_lanev4hi ((int16x4_t) __a, (int16x4_t) __b, __c);
6844 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6845 vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
6847 return (uint32x2_t)__builtin_neon_vmul_lanev2si ((int32x2_t) __a, (int32x2_t) __b, __c);
6850 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6851 vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
6853 return (int16x8_t)__builtin_neon_vmul_lanev8hi (__a, __b, __c);
6856 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6857 vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
6859 return (int32x4_t)__builtin_neon_vmul_lanev4si (__a, __b, __c);
6862 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6863 vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __c)
6865 return (float32x4_t)__builtin_neon_vmul_lanev4sf (__a, __b, __c);
6868 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6869 vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __c)
6871 return (uint16x8_t)__builtin_neon_vmul_lanev8hi ((int16x8_t) __a, (int16x4_t) __b, __c);
6874 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6875 vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __c)
6877 return (uint32x4_t)__builtin_neon_vmul_lanev4si ((int32x4_t) __a, (int32x2_t) __b, __c);
6880 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6881 vmla_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
6883 return (int16x4_t)__builtin_neon_vmla_lanev4hi (__a, __b, __c, __d);
6886 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6887 vmla_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
6889 return (int32x2_t)__builtin_neon_vmla_lanev2si (__a, __b, __c, __d);
6892 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6893 vmla_lane_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c, const int __d)
6895 return (float32x2_t)__builtin_neon_vmla_lanev2sf (__a, __b, __c, __d);
6898 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6899 vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d)
6901 return (uint16x4_t)__builtin_neon_vmla_lanev4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, __d);
6904 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6905 vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d)
6907 return (uint32x2_t)__builtin_neon_vmla_lanev2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, __d);
6910 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6911 vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
6913 return (int16x8_t)__builtin_neon_vmla_lanev8hi (__a, __b, __c, __d);
6916 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6917 vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
6919 return (int32x4_t)__builtin_neon_vmla_lanev4si (__a, __b, __c, __d);
6922 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6923 vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b, float32x2_t __c, const int __d)
6925 return (float32x4_t)__builtin_neon_vmla_lanev4sf (__a, __b, __c, __d);
6928 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6929 vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b, uint16x4_t __c, const int __d)
6931 return (uint16x8_t)__builtin_neon_vmla_lanev8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x4_t) __c, __d);
6934 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6935 vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b, uint32x2_t __c, const int __d)
6937 return (uint32x4_t)__builtin_neon_vmla_lanev4si ((int32x4_t) __a, (int32x4_t) __b, (int32x2_t) __c, __d);
6940 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6941 vmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
6943 return (int32x4_t)__builtin_neon_vmlals_lanev4hi (__a, __b, __c, __d);
6946 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6947 vmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
6949 return (int64x2_t)__builtin_neon_vmlals_lanev2si (__a, __b, __c, __d);
6952 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6953 vmlal_lane_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d)
6955 return (uint32x4_t)__builtin_neon_vmlalu_lanev4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, __d);
6958 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6959 vmlal_lane_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d)
6961 return (uint64x2_t)__builtin_neon_vmlalu_lanev2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, __d);
6964 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6965 vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
6967 return (int32x4_t)__builtin_neon_vqdmlal_lanev4hi (__a, __b, __c, __d);
6970 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6971 vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
6973 return (int64x2_t)__builtin_neon_vqdmlal_lanev2si (__a, __b, __c, __d);
6976 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6977 vmls_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
6979 return (int16x4_t)__builtin_neon_vmls_lanev4hi (__a, __b, __c, __d);
6982 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6983 vmls_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
6985 return (int32x2_t)__builtin_neon_vmls_lanev2si (__a, __b, __c, __d);
6988 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6989 vmls_lane_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c, const int __d)
6991 return (float32x2_t)__builtin_neon_vmls_lanev2sf (__a, __b, __c, __d);
6994 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6995 vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d)
6997 return (uint16x4_t)__builtin_neon_vmls_lanev4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, __d);
7000 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7001 vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d)
7003 return (uint32x2_t)__builtin_neon_vmls_lanev2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, __d);
7006 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7007 vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
7009 return (int16x8_t)__builtin_neon_vmls_lanev8hi (__a, __b, __c, __d);
7012 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7013 vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
7015 return (int32x4_t)__builtin_neon_vmls_lanev4si (__a, __b, __c, __d);
7018 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7019 vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b, float32x2_t __c, const int __d)
7021 return (float32x4_t)__builtin_neon_vmls_lanev4sf (__a, __b, __c, __d);
7024 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7025 vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b, uint16x4_t __c, const int __d)
7027 return (uint16x8_t)__builtin_neon_vmls_lanev8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x4_t) __c, __d);
7030 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7031 vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b, uint32x2_t __c, const int __d)
7033 return (uint32x4_t)__builtin_neon_vmls_lanev4si ((int32x4_t) __a, (int32x4_t) __b, (int32x2_t) __c, __d);
7036 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7037 vmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
7039 return (int32x4_t)__builtin_neon_vmlsls_lanev4hi (__a, __b, __c, __d);
7042 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7043 vmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
7045 return (int64x2_t)__builtin_neon_vmlsls_lanev2si (__a, __b, __c, __d);
7048 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7049 vmlsl_lane_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d)
7051 return (uint32x4_t)__builtin_neon_vmlslu_lanev4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, __d);
7054 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7055 vmlsl_lane_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d)
7057 return (uint64x2_t)__builtin_neon_vmlslu_lanev2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, __d);
7060 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7061 vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
7063 return (int32x4_t)__builtin_neon_vqdmlsl_lanev4hi (__a, __b, __c, __d);
7066 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7067 vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
7069 return (int64x2_t)__builtin_neon_vqdmlsl_lanev2si (__a, __b, __c, __d);
7072 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7073 vmull_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
7075 return (int32x4_t)__builtin_neon_vmulls_lanev4hi (__a, __b, __c);
7078 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7079 vmull_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
7081 return (int64x2_t)__builtin_neon_vmulls_lanev2si (__a, __b, __c);
7084 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7085 vmull_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
7087 return (uint32x4_t)__builtin_neon_vmullu_lanev4hi ((int16x4_t) __a, (int16x4_t) __b, __c);
7090 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7091 vmull_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
7093 return (uint64x2_t)__builtin_neon_vmullu_lanev2si ((int32x2_t) __a, (int32x2_t) __b, __c);
7096 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7097 vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
7099 return (int32x4_t)__builtin_neon_vqdmull_lanev4hi (__a, __b, __c);
7102 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7103 vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
7105 return (int64x2_t)__builtin_neon_vqdmull_lanev2si (__a, __b, __c);
7108 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7109 vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
7111 return (int16x8_t)__builtin_neon_vqdmulh_lanev8hi (__a, __b, __c);
7114 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7115 vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
7117 return (int32x4_t)__builtin_neon_vqdmulh_lanev4si (__a, __b, __c);
7120 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7121 vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
7123 return (int16x4_t)__builtin_neon_vqdmulh_lanev4hi (__a, __b, __c);
7126 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7127 vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
7129 return (int32x2_t)__builtin_neon_vqdmulh_lanev2si (__a, __b, __c);
7132 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7133 vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
7135 return (int16x8_t)__builtin_neon_vqrdmulh_lanev8hi (__a, __b, __c);
7138 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7139 vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
7141 return (int32x4_t)__builtin_neon_vqrdmulh_lanev4si (__a, __b, __c);
7144 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7145 vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
7147 return (int16x4_t)__builtin_neon_vqrdmulh_lanev4hi (__a, __b, __c);
7150 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7151 vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
7153 return (int32x2_t)__builtin_neon_vqrdmulh_lanev2si (__a, __b, __c);
7156 #ifdef __ARM_FEATURE_QRDMX
7157 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7158 vqrdmlahq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
7160 return (int16x8_t)__builtin_neon_vqrdmlah_lanev8hi (__a, __b, __c, __d);
7163 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7164 vqrdmlahq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
7166 return (int32x4_t)__builtin_neon_vqrdmlah_lanev4si (__a, __b, __c, __d);
7169 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7170 vqrdmlah_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
7172 return (int16x4_t)__builtin_neon_vqrdmlah_lanev4hi (__a, __b, __c, __d);
7175 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7176 vqrdmlah_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
7178 return (int32x2_t)__builtin_neon_vqrdmlah_lanev2si (__a, __b, __c, __d);
7181 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7182 vqrdmlshq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
7184 return (int16x8_t)__builtin_neon_vqrdmlsh_lanev8hi (__a, __b, __c, __d);
7187 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7188 vqrdmlshq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
7190 return (int32x4_t)__builtin_neon_vqrdmlsh_lanev4si (__a, __b, __c, __d);
7193 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7194 vqrdmlsh_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
7196 return (int16x4_t)__builtin_neon_vqrdmlsh_lanev4hi (__a, __b, __c, __d);
7199 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7200 vqrdmlsh_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
7202 return (int32x2_t)__builtin_neon_vqrdmlsh_lanev2si (__a, __b, __c, __d);
7204 #endif
7206 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7207 vmul_n_s16 (int16x4_t __a, int16_t __b)
7209 return (int16x4_t)__builtin_neon_vmul_nv4hi (__a, (__builtin_neon_hi) __b);
7212 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7213 vmul_n_s32 (int32x2_t __a, int32_t __b)
7215 return (int32x2_t)__builtin_neon_vmul_nv2si (__a, (__builtin_neon_si) __b);
7218 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7219 vmul_n_f32 (float32x2_t __a, float32_t __b)
7221 return (float32x2_t)__builtin_neon_vmul_nv2sf (__a, (__builtin_neon_sf) __b);
7224 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7225 vmul_n_u16 (uint16x4_t __a, uint16_t __b)
7227 return (uint16x4_t)__builtin_neon_vmul_nv4hi ((int16x4_t) __a, (__builtin_neon_hi) __b);
7230 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7231 vmul_n_u32 (uint32x2_t __a, uint32_t __b)
7233 return (uint32x2_t)__builtin_neon_vmul_nv2si ((int32x2_t) __a, (__builtin_neon_si) __b);
7236 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7237 vmulq_n_s16 (int16x8_t __a, int16_t __b)
7239 return (int16x8_t)__builtin_neon_vmul_nv8hi (__a, (__builtin_neon_hi) __b);
7242 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7243 vmulq_n_s32 (int32x4_t __a, int32_t __b)
7245 return (int32x4_t)__builtin_neon_vmul_nv4si (__a, (__builtin_neon_si) __b);
7248 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7249 vmulq_n_f32 (float32x4_t __a, float32_t __b)
7251 return (float32x4_t)__builtin_neon_vmul_nv4sf (__a, (__builtin_neon_sf) __b);
7254 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7255 vmulq_n_u16 (uint16x8_t __a, uint16_t __b)
7257 return (uint16x8_t)__builtin_neon_vmul_nv8hi ((int16x8_t) __a, (__builtin_neon_hi) __b);
7260 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7261 vmulq_n_u32 (uint32x4_t __a, uint32_t __b)
7263 return (uint32x4_t)__builtin_neon_vmul_nv4si ((int32x4_t) __a, (__builtin_neon_si) __b);
7266 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7267 vmull_n_s16 (int16x4_t __a, int16_t __b)
7269 return (int32x4_t)__builtin_neon_vmulls_nv4hi (__a, (__builtin_neon_hi) __b);
7272 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7273 vmull_n_s32 (int32x2_t __a, int32_t __b)
7275 return (int64x2_t)__builtin_neon_vmulls_nv2si (__a, (__builtin_neon_si) __b);
7278 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7279 vmull_n_u16 (uint16x4_t __a, uint16_t __b)
7281 return (uint32x4_t)__builtin_neon_vmullu_nv4hi ((int16x4_t) __a, (__builtin_neon_hi) __b);
7284 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7285 vmull_n_u32 (uint32x2_t __a, uint32_t __b)
7287 return (uint64x2_t)__builtin_neon_vmullu_nv2si ((int32x2_t) __a, (__builtin_neon_si) __b);
7290 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7291 vqdmull_n_s16 (int16x4_t __a, int16_t __b)
7293 return (int32x4_t)__builtin_neon_vqdmull_nv4hi (__a, (__builtin_neon_hi) __b);
7296 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7297 vqdmull_n_s32 (int32x2_t __a, int32_t __b)
7299 return (int64x2_t)__builtin_neon_vqdmull_nv2si (__a, (__builtin_neon_si) __b);
7302 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7303 vqdmulhq_n_s16 (int16x8_t __a, int16_t __b)
7305 return (int16x8_t)__builtin_neon_vqdmulh_nv8hi (__a, (__builtin_neon_hi) __b);
7308 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7309 vqdmulhq_n_s32 (int32x4_t __a, int32_t __b)
7311 return (int32x4_t)__builtin_neon_vqdmulh_nv4si (__a, (__builtin_neon_si) __b);
7314 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7315 vqdmulh_n_s16 (int16x4_t __a, int16_t __b)
7317 return (int16x4_t)__builtin_neon_vqdmulh_nv4hi (__a, (__builtin_neon_hi) __b);
7320 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7321 vqdmulh_n_s32 (int32x2_t __a, int32_t __b)
7323 return (int32x2_t)__builtin_neon_vqdmulh_nv2si (__a, (__builtin_neon_si) __b);
7326 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7327 vqrdmulhq_n_s16 (int16x8_t __a, int16_t __b)
7329 return (int16x8_t)__builtin_neon_vqrdmulh_nv8hi (__a, (__builtin_neon_hi) __b);
7332 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7333 vqrdmulhq_n_s32 (int32x4_t __a, int32_t __b)
7335 return (int32x4_t)__builtin_neon_vqrdmulh_nv4si (__a, (__builtin_neon_si) __b);
7338 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7339 vqrdmulh_n_s16 (int16x4_t __a, int16_t __b)
7341 return (int16x4_t)__builtin_neon_vqrdmulh_nv4hi (__a, (__builtin_neon_hi) __b);
7344 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7345 vqrdmulh_n_s32 (int32x2_t __a, int32_t __b)
7347 return (int32x2_t)__builtin_neon_vqrdmulh_nv2si (__a, (__builtin_neon_si) __b);
7350 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7351 vmla_n_s16 (int16x4_t __a, int16x4_t __b, int16_t __c)
7353 return (int16x4_t)__builtin_neon_vmla_nv4hi (__a, __b, (__builtin_neon_hi) __c);
7356 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7357 vmla_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c)
7359 return (int32x2_t)__builtin_neon_vmla_nv2si (__a, __b, (__builtin_neon_si) __c);
7362 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7363 vmla_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
7365 return (float32x2_t)__builtin_neon_vmla_nv2sf (__a, __b, (__builtin_neon_sf) __c);
7368 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7369 vmla_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c)
7371 return (uint16x4_t)__builtin_neon_vmla_nv4hi ((int16x4_t) __a, (int16x4_t) __b, (__builtin_neon_hi) __c);
7374 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7375 vmla_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c)
7377 return (uint32x2_t)__builtin_neon_vmla_nv2si ((int32x2_t) __a, (int32x2_t) __b, (__builtin_neon_si) __c);
7380 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7381 vmlaq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
7383 return (int16x8_t)__builtin_neon_vmla_nv8hi (__a, __b, (__builtin_neon_hi) __c);
7386 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7387 vmlaq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
7389 return (int32x4_t)__builtin_neon_vmla_nv4si (__a, __b, (__builtin_neon_si) __c);
7392 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7393 vmlaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
7395 return (float32x4_t)__builtin_neon_vmla_nv4sf (__a, __b, (__builtin_neon_sf) __c);
7398 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7399 vmlaq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
7401 return (uint16x8_t)__builtin_neon_vmla_nv8hi ((int16x8_t) __a, (int16x8_t) __b, (__builtin_neon_hi) __c);
7404 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7405 vmlaq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
7407 return (uint32x4_t)__builtin_neon_vmla_nv4si ((int32x4_t) __a, (int32x4_t) __b, (__builtin_neon_si) __c);
7410 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7411 vmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
7413 return (int32x4_t)__builtin_neon_vmlals_nv4hi (__a, __b, (__builtin_neon_hi) __c);
7416 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7417 vmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
7419 return (int64x2_t)__builtin_neon_vmlals_nv2si (__a, __b, (__builtin_neon_si) __c);
7422 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7423 vmlal_n_u16 (uint32x4_t __a, uint16x4_t __b, uint16_t __c)
7425 return (uint32x4_t)__builtin_neon_vmlalu_nv4hi ((int32x4_t) __a, (int16x4_t) __b, (__builtin_neon_hi) __c);
7428 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7429 vmlal_n_u32 (uint64x2_t __a, uint32x2_t __b, uint32_t __c)
7431 return (uint64x2_t)__builtin_neon_vmlalu_nv2si ((int64x2_t) __a, (int32x2_t) __b, (__builtin_neon_si) __c);
7434 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7435 vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
7437 return (int32x4_t)__builtin_neon_vqdmlal_nv4hi (__a, __b, (__builtin_neon_hi) __c);
7440 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7441 vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
7443 return (int64x2_t)__builtin_neon_vqdmlal_nv2si (__a, __b, (__builtin_neon_si) __c);
7446 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7447 vmls_n_s16 (int16x4_t __a, int16x4_t __b, int16_t __c)
7449 return (int16x4_t)__builtin_neon_vmls_nv4hi (__a, __b, (__builtin_neon_hi) __c);
7452 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7453 vmls_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c)
7455 return (int32x2_t)__builtin_neon_vmls_nv2si (__a, __b, (__builtin_neon_si) __c);
7458 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7459 vmls_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
7461 return (float32x2_t)__builtin_neon_vmls_nv2sf (__a, __b, (__builtin_neon_sf) __c);
7464 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7465 vmls_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c)
7467 return (uint16x4_t)__builtin_neon_vmls_nv4hi ((int16x4_t) __a, (int16x4_t) __b, (__builtin_neon_hi) __c);
7470 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7471 vmls_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c)
7473 return (uint32x2_t)__builtin_neon_vmls_nv2si ((int32x2_t) __a, (int32x2_t) __b, (__builtin_neon_si) __c);
7476 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7477 vmlsq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
7479 return (int16x8_t)__builtin_neon_vmls_nv8hi (__a, __b, (__builtin_neon_hi) __c);
7482 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7483 vmlsq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
7485 return (int32x4_t)__builtin_neon_vmls_nv4si (__a, __b, (__builtin_neon_si) __c);
7488 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7489 vmlsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
7491 return (float32x4_t)__builtin_neon_vmls_nv4sf (__a, __b, (__builtin_neon_sf) __c);
7494 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7495 vmlsq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
7497 return (uint16x8_t)__builtin_neon_vmls_nv8hi ((int16x8_t) __a, (int16x8_t) __b, (__builtin_neon_hi) __c);
7500 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7501 vmlsq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
7503 return (uint32x4_t)__builtin_neon_vmls_nv4si ((int32x4_t) __a, (int32x4_t) __b, (__builtin_neon_si) __c);
7506 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7507 vmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
7509 return (int32x4_t)__builtin_neon_vmlsls_nv4hi (__a, __b, (__builtin_neon_hi) __c);
7512 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7513 vmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
7515 return (int64x2_t)__builtin_neon_vmlsls_nv2si (__a, __b, (__builtin_neon_si) __c);
7518 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7519 vmlsl_n_u16 (uint32x4_t __a, uint16x4_t __b, uint16_t __c)
7521 return (uint32x4_t)__builtin_neon_vmlslu_nv4hi ((int32x4_t) __a, (int16x4_t) __b, (__builtin_neon_hi) __c);
7524 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7525 vmlsl_n_u32 (uint64x2_t __a, uint32x2_t __b, uint32_t __c)
7527 return (uint64x2_t)__builtin_neon_vmlslu_nv2si ((int64x2_t) __a, (int32x2_t) __b, (__builtin_neon_si) __c);
7530 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7531 vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
7533 return (int32x4_t)__builtin_neon_vqdmlsl_nv4hi (__a, __b, (__builtin_neon_hi) __c);
7536 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7537 vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
7539 return (int64x2_t)__builtin_neon_vqdmlsl_nv2si (__a, __b, (__builtin_neon_si) __c);
7542 #pragma GCC push_options
7543 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
7544 __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
7545 vext_p64 (poly64x1_t __a, poly64x1_t __b, const int __c)
7547 return (poly64x1_t)__builtin_neon_vextdi (__a, __b, __c);
7550 #pragma GCC pop_options
7551 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7552 vext_s8 (int8x8_t __a, int8x8_t __b, const int __c)
7554 return (int8x8_t)__builtin_neon_vextv8qi (__a, __b, __c);
7557 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7558 vext_s16 (int16x4_t __a, int16x4_t __b, const int __c)
7560 return (int16x4_t)__builtin_neon_vextv4hi (__a, __b, __c);
7563 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7564 vext_s32 (int32x2_t __a, int32x2_t __b, const int __c)
7566 return (int32x2_t)__builtin_neon_vextv2si (__a, __b, __c);
7569 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
7570 vext_s64 (int64x1_t __a, int64x1_t __b, const int __c)
7572 return (int64x1_t)__builtin_neon_vextdi (__a, __b, __c);
7575 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7576 vext_f32 (float32x2_t __a, float32x2_t __b, const int __c)
7578 return (float32x2_t)__builtin_neon_vextv2sf (__a, __b, __c);
7581 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7582 vext_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
7584 return (uint8x8_t)__builtin_neon_vextv8qi ((int8x8_t) __a, (int8x8_t) __b, __c);
7587 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7588 vext_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
7590 return (uint16x4_t)__builtin_neon_vextv4hi ((int16x4_t) __a, (int16x4_t) __b, __c);
7593 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7594 vext_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
7596 return (uint32x2_t)__builtin_neon_vextv2si ((int32x2_t) __a, (int32x2_t) __b, __c);
7599 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
7600 vext_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
7602 return (uint64x1_t)__builtin_neon_vextdi ((int64x1_t) __a, (int64x1_t) __b, __c);
7605 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
7606 vext_p8 (poly8x8_t __a, poly8x8_t __b, const int __c)
7608 return (poly8x8_t)__builtin_neon_vextv8qi ((int8x8_t) __a, (int8x8_t) __b, __c);
7611 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
7612 vext_p16 (poly16x4_t __a, poly16x4_t __b, const int __c)
7614 return (poly16x4_t)__builtin_neon_vextv4hi ((int16x4_t) __a, (int16x4_t) __b, __c);
7617 #pragma GCC push_options
7618 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
7619 __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
7620 vextq_p64 (poly64x2_t __a, poly64x2_t __b, const int __c)
7622 return (poly64x2_t)__builtin_neon_vextv2di ((int64x2_t) __a, (int64x2_t) __b, __c);
7625 #pragma GCC pop_options
7626 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7627 vextq_s8 (int8x16_t __a, int8x16_t __b, const int __c)
7629 return (int8x16_t)__builtin_neon_vextv16qi (__a, __b, __c);
7632 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7633 vextq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
7635 return (int16x8_t)__builtin_neon_vextv8hi (__a, __b, __c);
7638 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7639 vextq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
7641 return (int32x4_t)__builtin_neon_vextv4si (__a, __b, __c);
7644 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7645 vextq_s64 (int64x2_t __a, int64x2_t __b, const int __c)
7647 return (int64x2_t)__builtin_neon_vextv2di (__a, __b, __c);
7650 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7651 vextq_f32 (float32x4_t __a, float32x4_t __b, const int __c)
7653 return (float32x4_t)__builtin_neon_vextv4sf (__a, __b, __c);
7656 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7657 vextq_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
7659 return (uint8x16_t)__builtin_neon_vextv16qi ((int8x16_t) __a, (int8x16_t) __b, __c);
7662 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7663 vextq_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
7665 return (uint16x8_t)__builtin_neon_vextv8hi ((int16x8_t) __a, (int16x8_t) __b, __c);
7668 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7669 vextq_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
7671 return (uint32x4_t)__builtin_neon_vextv4si ((int32x4_t) __a, (int32x4_t) __b, __c);
7674 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7675 vextq_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
7677 return (uint64x2_t)__builtin_neon_vextv2di ((int64x2_t) __a, (int64x2_t) __b, __c);
7680 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
7681 vextq_p8 (poly8x16_t __a, poly8x16_t __b, const int __c)
7683 return (poly8x16_t)__builtin_neon_vextv16qi ((int8x16_t) __a, (int8x16_t) __b, __c);
7686 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
7687 vextq_p16 (poly16x8_t __a, poly16x8_t __b, const int __c)
7689 return (poly16x8_t)__builtin_neon_vextv8hi ((int16x8_t) __a, (int16x8_t) __b, __c);
7692 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7693 vrev64_s8 (int8x8_t __a)
7695 return (int8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
7698 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7699 vrev64_s16 (int16x4_t __a)
7701 return (int16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 3, 2, 1, 0 });
7704 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7705 vrev64_s32 (int32x2_t __a)
7707 return (int32x2_t) __builtin_shuffle (__a, (uint32x2_t) { 1, 0 });
7710 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7711 vrev64_f32 (float32x2_t __a)
7713 return (float32x2_t) __builtin_shuffle (__a, (uint32x2_t) { 1, 0 });
7716 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7717 vrev64_u8 (uint8x8_t __a)
7719 return (uint8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
7722 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7723 vrev64_u16 (uint16x4_t __a)
7725 return (uint16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 3, 2, 1, 0 });
7728 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7729 vrev64_u32 (uint32x2_t __a)
7731 return (uint32x2_t) __builtin_shuffle (__a, (uint32x2_t) { 1, 0 });
7734 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
7735 vrev64_p8 (poly8x8_t __a)
7737 return (poly8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
7740 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
7741 vrev64_p16 (poly16x4_t __a)
7743 return (poly16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 3, 2, 1, 0 });
7746 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7747 vrev64q_s8 (int8x16_t __a)
7749 return (int8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
7752 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7753 vrev64q_s16 (int16x8_t __a)
7755 return (int16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
7758 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7759 vrev64q_s32 (int32x4_t __a)
7761 return (int32x4_t) __builtin_shuffle (__a, (uint32x4_t) { 1, 0, 3, 2 });
7764 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7765 vrev64q_f32 (float32x4_t __a)
7767 return (float32x4_t) __builtin_shuffle (__a, (uint32x4_t) { 1, 0, 3, 2 });
7770 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7771 vrev64q_u8 (uint8x16_t __a)
7773 return (uint8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
7776 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7777 vrev64q_u16 (uint16x8_t __a)
7779 return (uint16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
7782 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7783 vrev64q_u32 (uint32x4_t __a)
7785 return (uint32x4_t) __builtin_shuffle (__a, (uint32x4_t) { 1, 0, 3, 2 });
7788 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
7789 vrev64q_p8 (poly8x16_t __a)
7791 return (poly8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
7794 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
7795 vrev64q_p16 (poly16x8_t __a)
7797 return (poly16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
7800 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7801 vrev32_s8 (int8x8_t __a)
7803 return (int8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
7806 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7807 vrev32_s16 (int16x4_t __a)
7809 return (int16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 1, 0, 3, 2 });
7812 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7813 vrev32_u8 (uint8x8_t __a)
7815 return (uint8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
7818 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7819 vrev32_u16 (uint16x4_t __a)
7821 return (uint16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 1, 0, 3, 2 });
7824 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
7825 vrev32_p8 (poly8x8_t __a)
7827 return (poly8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
7830 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
7831 vrev32_p16 (poly16x4_t __a)
7833 return (poly16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 1, 0, 3, 2 });
7836 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7837 vrev32q_s8 (int8x16_t __a)
7839 return (int8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
7842 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7843 vrev32q_s16 (int16x8_t __a)
7845 return (int16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
7848 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7849 vrev32q_u8 (uint8x16_t __a)
7851 return (uint8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
7854 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7855 vrev32q_u16 (uint16x8_t __a)
7857 return (uint16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
7860 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
7861 vrev32q_p8 (poly8x16_t __a)
7863 return (poly8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
7866 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
7867 vrev32q_p16 (poly16x8_t __a)
7869 return (poly16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
7872 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7873 vrev16_s8 (int8x8_t __a)
7875 return (int8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
7878 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7879 vrev16_u8 (uint8x8_t __a)
7881 return (uint8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
7884 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
7885 vrev16_p8 (poly8x8_t __a)
7887 return (poly8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
7890 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7891 vrev16q_s8 (int8x16_t __a)
7893 return (int8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
7896 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7897 vrev16q_u8 (uint8x16_t __a)
7899 return (uint8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
7902 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
7903 vrev16q_p8 (poly8x16_t __a)
7905 return (poly8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
7908 #pragma GCC push_options
7909 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
7910 __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
7911 vbsl_p64 (uint64x1_t __a, poly64x1_t __b, poly64x1_t __c)
7913 return (poly64x1_t)__builtin_neon_vbsldi ((int64x1_t) __a, __b, __c);
7916 #pragma GCC pop_options
7917 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7918 vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c)
7920 return (int8x8_t)__builtin_neon_vbslv8qi ((int8x8_t) __a, __b, __c);
7923 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7924 vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c)
7926 return (int16x4_t)__builtin_neon_vbslv4hi ((int16x4_t) __a, __b, __c);
7929 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7930 vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c)
7932 return (int32x2_t)__builtin_neon_vbslv2si ((int32x2_t) __a, __b, __c);
7935 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
7936 vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c)
7938 return (int64x1_t)__builtin_neon_vbsldi ((int64x1_t) __a, __b, __c);
7941 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7942 vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c)
7944 return (float32x2_t)__builtin_neon_vbslv2sf ((int32x2_t) __a, __b, __c);
7947 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7948 vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
7950 return (uint8x8_t)__builtin_neon_vbslv8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c);
7953 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7954 vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
7956 return (uint16x4_t)__builtin_neon_vbslv4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c);
7959 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7960 vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
7962 return (uint32x2_t)__builtin_neon_vbslv2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c);
7965 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
7966 vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c)
7968 return (uint64x1_t)__builtin_neon_vbsldi ((int64x1_t) __a, (int64x1_t) __b, (int64x1_t) __c);
7971 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
7972 vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c)
7974 return (poly8x8_t)__builtin_neon_vbslv8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c);
7977 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
7978 vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c)
7980 return (poly16x4_t)__builtin_neon_vbslv4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c);
7983 #pragma GCC push_options
7984 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
7985 __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
7986 vbslq_p64 (uint64x2_t __a, poly64x2_t __b, poly64x2_t __c)
7988 return (poly64x2_t)__builtin_neon_vbslv2di ((int64x2_t) __a, (int64x2_t) __b, (int64x2_t) __c);
7991 #pragma GCC pop_options
7992 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7993 vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c)
7995 return (int8x16_t)__builtin_neon_vbslv16qi ((int8x16_t) __a, __b, __c);
7998 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7999 vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c)
8001 return (int16x8_t)__builtin_neon_vbslv8hi ((int16x8_t) __a, __b, __c);
8004 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8005 vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c)
8007 return (int32x4_t)__builtin_neon_vbslv4si ((int32x4_t) __a, __b, __c);
8010 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8011 vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c)
8013 return (int64x2_t)__builtin_neon_vbslv2di ((int64x2_t) __a, __b, __c);
8016 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8017 vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c)
8019 return (float32x4_t)__builtin_neon_vbslv4sf ((int32x4_t) __a, __b, __c);
8022 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8023 vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
8025 return (uint8x16_t)__builtin_neon_vbslv16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c);
8028 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8029 vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
8031 return (uint16x8_t)__builtin_neon_vbslv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c);
8034 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8035 vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
8037 return (uint32x4_t)__builtin_neon_vbslv4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c);
8040 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8041 vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
8043 return (uint64x2_t)__builtin_neon_vbslv2di ((int64x2_t) __a, (int64x2_t) __b, (int64x2_t) __c);
8046 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
8047 vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c)
8049 return (poly8x16_t)__builtin_neon_vbslv16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c);
8052 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
8053 vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c)
8055 return (poly16x8_t)__builtin_neon_vbslv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c);
8058 /* For big-endian, the shuffle masks for ZIP, UZP and TRN must be changed as
8059 follows. (nelt = the number of elements within a vector.)
8061 Firstly, a value of N within a mask, becomes (N ^ (nelt - 1)), as gcc vector
8062 extension's indexing scheme is reversed *within each vector* (relative to the
8063 neon intrinsics view), but without changing which of the two vectors.
8065 Secondly, the elements within each mask are reversed, as the mask is itself a
8066 vector, and will itself be loaded in reverse order (again, relative to the
8067 neon intrinsics view, i.e. that would result from a "vld1" instruction). */
8069 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
8070 vtrn_s8 (int8x8_t __a, int8x8_t __b)
8072 int8x8x2_t __rv;
8073 #ifdef __ARM_BIG_ENDIAN
8074 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
8075 { 9, 1, 11, 3, 13, 5, 15, 7 });
8076 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
8077 { 8, 0, 10, 2, 12, 4, 14, 6 });
8078 #else
8079 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
8080 { 0, 8, 2, 10, 4, 12, 6, 14 });
8081 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
8082 { 1, 9, 3, 11, 5, 13, 7, 15 });
8083 #endif
8084 return __rv;
8087 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
8088 vtrn_s16 (int16x4_t __a, int16x4_t __b)
8090 int16x4x2_t __rv;
8091 #ifdef __ARM_BIG_ENDIAN
8092 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 1, 7, 3 });
8093 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 6, 2 });
8094 #else
8095 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 });
8096 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 });
8097 #endif
8098 return __rv;
8101 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
8102 vtrn_u8 (uint8x8_t __a, uint8x8_t __b)
8104 uint8x8x2_t __rv;
8105 #ifdef __ARM_BIG_ENDIAN
8106 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
8107 { 9, 1, 11, 3, 13, 5, 15, 7 });
8108 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
8109 { 8, 0, 10, 2, 12, 4, 14, 6 });
8110 #else
8111 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
8112 { 0, 8, 2, 10, 4, 12, 6, 14 });
8113 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
8114 { 1, 9, 3, 11, 5, 13, 7, 15 });
8115 #endif
8116 return __rv;
8119 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
8120 vtrn_u16 (uint16x4_t __a, uint16x4_t __b)
8122 uint16x4x2_t __rv;
8123 #ifdef __ARM_BIG_ENDIAN
8124 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 1, 7, 3 });
8125 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 6, 2 });
8126 #else
8127 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 });
8128 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 });
8129 #endif
8130 return __rv;
8133 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
8134 vtrn_p8 (poly8x8_t __a, poly8x8_t __b)
8136 poly8x8x2_t __rv;
8137 #ifdef __ARM_BIG_ENDIAN
8138 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
8139 { 9, 1, 11, 3, 13, 5, 15, 7 });
8140 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
8141 { 8, 0, 10, 2, 12, 4, 14, 6 });
8142 #else
8143 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
8144 { 0, 8, 2, 10, 4, 12, 6, 14 });
8145 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
8146 { 1, 9, 3, 11, 5, 13, 7, 15 });
8147 #endif
8148 return __rv;
8151 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
8152 vtrn_p16 (poly16x4_t __a, poly16x4_t __b)
8154 poly16x4x2_t __rv;
8155 #ifdef __ARM_BIG_ENDIAN
8156 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 1, 7, 3 });
8157 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 6, 2 });
8158 #else
8159 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 });
8160 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 });
8161 #endif
8162 return __rv;
8165 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
8166 vtrn_s32 (int32x2_t __a, int32x2_t __b)
8168 int32x2x2_t __rv;
8169 #ifdef __ARM_BIG_ENDIAN
8170 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
8171 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
8172 #else
8173 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
8174 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
8175 #endif
8176 return __rv;
8179 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
8180 vtrn_f32 (float32x2_t __a, float32x2_t __b)
8182 float32x2x2_t __rv;
8183 #ifdef __ARM_BIG_ENDIAN
8184 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
8185 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
8186 #else
8187 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
8188 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
8189 #endif
8190 return __rv;
8193 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
8194 vtrn_u32 (uint32x2_t __a, uint32x2_t __b)
8196 uint32x2x2_t __rv;
8197 #ifdef __ARM_BIG_ENDIAN
8198 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
8199 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
8200 #else
8201 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
8202 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
8203 #endif
8204 return __rv;
8207 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
8208 vtrnq_s8 (int8x16_t __a, int8x16_t __b)
8210 int8x16x2_t __rv;
8211 #ifdef __ARM_BIG_ENDIAN
8212 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
8213 { 17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15 });
8214 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
8215 { 16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14 });
8216 #else
8217 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
8218 { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 });
8219 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
8220 { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 });
8221 #endif
8222 return __rv;
8225 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
8226 vtrnq_s16 (int16x8_t __a, int16x8_t __b)
8228 int16x8x2_t __rv;
8229 #ifdef __ARM_BIG_ENDIAN
8230 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
8231 { 9, 1, 11, 3, 13, 5, 15, 7 });
8232 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
8233 { 8, 0, 10, 2, 12, 4, 14, 6 });
8234 #else
8235 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
8236 { 0, 8, 2, 10, 4, 12, 6, 14 });
8237 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
8238 { 1, 9, 3, 11, 5, 13, 7, 15 });
8239 #endif
8240 return __rv;
8243 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
8244 vtrnq_s32 (int32x4_t __a, int32x4_t __b)
8246 int32x4x2_t __rv;
8247 #ifdef __ARM_BIG_ENDIAN
8248 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 7, 3 });
8249 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 6, 2 });
8250 #else
8251 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 });
8252 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 });
8253 #endif
8254 return __rv;
8257 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
8258 vtrnq_f32 (float32x4_t __a, float32x4_t __b)
8260 float32x4x2_t __rv;
8261 #ifdef __ARM_BIG_ENDIAN
8262 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 7, 3 });
8263 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 6, 2 });
8264 #else
8265 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 });
8266 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 });
8267 #endif
8268 return __rv;
8271 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
8272 vtrnq_u8 (uint8x16_t __a, uint8x16_t __b)
8274 uint8x16x2_t __rv;
8275 #ifdef __ARM_BIG_ENDIAN
8276 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
8277 { 17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15 });
8278 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
8279 { 16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14 });
8280 #else
8281 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
8282 { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 });
8283 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
8284 { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 });
8285 #endif
8286 return __rv;
8289 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
8290 vtrnq_u16 (uint16x8_t __a, uint16x8_t __b)
8292 uint16x8x2_t __rv;
8293 #ifdef __ARM_BIG_ENDIAN
8294 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
8295 { 9, 1, 11, 3, 13, 5, 15, 7 });
8296 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
8297 { 8, 0, 10, 2, 12, 4, 14, 6 });
8298 #else
8299 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
8300 { 0, 8, 2, 10, 4, 12, 6, 14 });
8301 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
8302 { 1, 9, 3, 11, 5, 13, 7, 15 });
8303 #endif
8304 return __rv;
8307 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
8308 vtrnq_u32 (uint32x4_t __a, uint32x4_t __b)
8310 uint32x4x2_t __rv;
8311 #ifdef __ARM_BIG_ENDIAN
8312 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 7, 3 });
8313 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 6, 2 });
8314 #else
8315 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 });
8316 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 });
8317 #endif
8318 return __rv;
8321 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
8322 vtrnq_p8 (poly8x16_t __a, poly8x16_t __b)
8324 poly8x16x2_t __rv;
8325 #ifdef __ARM_BIG_ENDIAN
8326 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
8327 { 17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15 });
8328 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
8329 { 16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14 });
8330 #else
8331 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
8332 { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 });
8333 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
8334 { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 });
8335 #endif
8336 return __rv;
8339 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
8340 vtrnq_p16 (poly16x8_t __a, poly16x8_t __b)
8342 poly16x8x2_t __rv;
8343 #ifdef __ARM_BIG_ENDIAN
8344 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
8345 { 9, 1, 11, 3, 13, 5, 15, 7 });
8346 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
8347 { 8, 0, 10, 2, 12, 4, 14, 6 });
8348 #else
8349 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
8350 { 0, 8, 2, 10, 4, 12, 6, 14 });
8351 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
8352 { 1, 9, 3, 11, 5, 13, 7, 15 });
8353 #endif
8354 return __rv;
8357 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
8358 vzip_s8 (int8x8_t __a, int8x8_t __b)
8360 int8x8x2_t __rv;
8361 #ifdef __ARM_BIG_ENDIAN
8362 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
8363 { 12, 4, 13, 5, 14, 6, 15, 7 });
8364 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
8365 { 8, 0, 9, 1, 10, 2, 11, 3 });
8366 #else
8367 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
8368 { 0, 8, 1, 9, 2, 10, 3, 11 });
8369 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
8370 { 4, 12, 5, 13, 6, 14, 7, 15 });
8371 #endif
8372 return __rv;
8375 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
8376 vzip_s16 (int16x4_t __a, int16x4_t __b)
8378 int16x4x2_t __rv;
8379 #ifdef __ARM_BIG_ENDIAN
8380 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 6, 2, 7, 3 });
8381 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 5, 1 });
8382 #else
8383 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 });
8384 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 });
8385 #endif
8386 return __rv;
8389 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
8390 vzip_u8 (uint8x8_t __a, uint8x8_t __b)
8392 uint8x8x2_t __rv;
8393 #ifdef __ARM_BIG_ENDIAN
8394 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
8395 { 12, 4, 13, 5, 14, 6, 15, 7 });
8396 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
8397 { 8, 0, 9, 1, 10, 2, 11, 3 });
8398 #else
8399 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
8400 { 0, 8, 1, 9, 2, 10, 3, 11 });
8401 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
8402 { 4, 12, 5, 13, 6, 14, 7, 15 });
8403 #endif
8404 return __rv;
8407 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
8408 vzip_u16 (uint16x4_t __a, uint16x4_t __b)
8410 uint16x4x2_t __rv;
8411 #ifdef __ARM_BIG_ENDIAN
8412 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 6, 2, 7, 3 });
8413 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 5, 1 });
8414 #else
8415 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 });
8416 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 });
8417 #endif
8418 return __rv;
8421 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
8422 vzip_p8 (poly8x8_t __a, poly8x8_t __b)
8424 poly8x8x2_t __rv;
8425 #ifdef __ARM_BIG_ENDIAN
8426 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
8427 { 12, 4, 13, 5, 14, 6, 15, 7 });
8428 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
8429 { 8, 0, 9, 1, 10, 2, 11, 3 });
8430 #else
8431 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
8432 { 0, 8, 1, 9, 2, 10, 3, 11 });
8433 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
8434 { 4, 12, 5, 13, 6, 14, 7, 15 });
8435 #endif
8436 return __rv;
8439 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
8440 vzip_p16 (poly16x4_t __a, poly16x4_t __b)
8442 poly16x4x2_t __rv;
8443 #ifdef __ARM_BIG_ENDIAN
8444 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 6, 2, 7, 3 });
8445 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 5, 1 });
8446 #else
8447 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 });
8448 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 });
8449 #endif
8450 return __rv;
8453 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
8454 vzip_s32 (int32x2_t __a, int32x2_t __b)
8456 int32x2x2_t __rv;
8457 #ifdef __ARM_BIG_ENDIAN
8458 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
8459 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
8460 #else
8461 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
8462 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
8463 #endif
8464 return __rv;
8467 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
8468 vzip_f32 (float32x2_t __a, float32x2_t __b)
8470 float32x2x2_t __rv;
8471 #ifdef __ARM_BIG_ENDIAN
8472 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
8473 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
8474 #else
8475 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
8476 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
8477 #endif
8478 return __rv;
8481 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
8482 vzip_u32 (uint32x2_t __a, uint32x2_t __b)
8484 uint32x2x2_t __rv;
8485 #ifdef __ARM_BIG_ENDIAN
8486 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
8487 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
8488 #else
8489 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
8490 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
8491 #endif
8492 return __rv;
8495 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
8496 vzipq_s8 (int8x16_t __a, int8x16_t __b)
8498 int8x16x2_t __rv;
8499 #ifdef __ARM_BIG_ENDIAN
8500 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
8501 { 20, 4, 21, 5, 22, 6, 23, 7, 16, 0, 17, 1, 18, 2, 19, 3 });
8502 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
8503 { 28, 12, 29, 13, 30, 14, 31, 15, 24, 8, 25, 9, 26, 10, 27, 11 });
8504 #else
8505 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
8506 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
8507 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
8508 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 });
8509 #endif
8510 return __rv;
8513 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
8514 vzipq_s16 (int16x8_t __a, int16x8_t __b)
8516 int16x8x2_t __rv;
8517 #ifdef __ARM_BIG_ENDIAN
8518 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
8519 { 10, 2, 11, 3, 8, 0, 9, 1 });
8520 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
8521 { 14, 6, 15, 7, 12, 4, 13, 5 });
8522 #else
8523 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
8524 { 0, 8, 1, 9, 2, 10, 3, 11 });
8525 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
8526 { 4, 12, 5, 13, 6, 14, 7, 15 });
8527 #endif
8528 return __rv;
8531 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
8532 vzipq_s32 (int32x4_t __a, int32x4_t __b)
8534 int32x4x2_t __rv;
8535 #ifdef __ARM_BIG_ENDIAN
8536 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 4, 0 });
8537 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 7, 3, 6, 2 });
8538 #else
8539 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
8540 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
8541 #endif
8542 return __rv;
8545 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
8546 vzipq_f32 (float32x4_t __a, float32x4_t __b)
8548 float32x4x2_t __rv;
8549 #ifdef __ARM_BIG_ENDIAN
8550 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 4, 0 });
8551 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 7, 3, 6, 2 });
8552 #else
8553 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
8554 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
8555 #endif
8556 return __rv;
8559 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
8560 vzipq_u8 (uint8x16_t __a, uint8x16_t __b)
8562 uint8x16x2_t __rv;
8563 #ifdef __ARM_BIG_ENDIAN
8564 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
8565 { 20, 4, 21, 5, 22, 6, 23, 7, 16, 0, 17, 1, 18, 2, 19, 3 });
8566 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
8567 { 28, 12, 29, 13, 30, 14, 31, 15, 24, 8, 25, 9, 26, 10, 27, 11 });
8568 #else
8569 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
8570 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
8571 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
8572 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 });
8573 #endif
8574 return __rv;
8577 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
8578 vzipq_u16 (uint16x8_t __a, uint16x8_t __b)
8580 uint16x8x2_t __rv;
8581 #ifdef __ARM_BIG_ENDIAN
8582 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
8583 { 10, 2, 11, 3, 8, 0, 9, 1 });
8584 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
8585 { 14, 6, 15, 7, 12, 4, 13, 5 });
8586 #else
8587 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
8588 { 0, 8, 1, 9, 2, 10, 3, 11 });
8589 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
8590 { 4, 12, 5, 13, 6, 14, 7, 15 });
8591 #endif
8592 return __rv;
8595 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
8596 vzipq_u32 (uint32x4_t __a, uint32x4_t __b)
8598 uint32x4x2_t __rv;
8599 #ifdef __ARM_BIG_ENDIAN
8600 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 4, 0 });
8601 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 7, 3, 6, 2 });
8602 #else
8603 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
8604 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
8605 #endif
8606 return __rv;
8609 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
8610 vzipq_p8 (poly8x16_t __a, poly8x16_t __b)
8612 poly8x16x2_t __rv;
8613 #ifdef __ARM_BIG_ENDIAN
8614 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
8615 { 20, 4, 21, 5, 22, 6, 23, 7, 16, 0, 17, 1, 18, 2, 19, 3 });
8616 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
8617 { 28, 12, 29, 13, 30, 14, 31, 15, 24, 8, 25, 9, 26, 10, 27, 11 });
8618 #else
8619 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
8620 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
8621 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
8622 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 });
8623 #endif
8624 return __rv;
8627 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
8628 vzipq_p16 (poly16x8_t __a, poly16x8_t __b)
8630 poly16x8x2_t __rv;
8631 #ifdef __ARM_BIG_ENDIAN
8632 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
8633 { 10, 2, 11, 3, 8, 0, 9, 1 });
8634 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
8635 { 14, 6, 15, 7, 12, 4, 13, 5 });
8636 #else
8637 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
8638 { 0, 8, 1, 9, 2, 10, 3, 11 });
8639 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
8640 { 4, 12, 5, 13, 6, 14, 7, 15 });
8641 #endif
8642 return __rv;
8645 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
8646 vuzp_s8 (int8x8_t __a, int8x8_t __b)
8648 int8x8x2_t __rv;
8649 #ifdef __ARM_BIG_ENDIAN
8650 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
8651 { 9, 11, 13, 15, 1, 3, 5, 7 });
8652 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
8653 { 8, 10, 12, 14, 0, 2, 4, 6 });
8654 #else
8655 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
8656 { 0, 2, 4, 6, 8, 10, 12, 14 });
8657 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
8658 { 1, 3, 5, 7, 9, 11, 13, 15 });
8659 #endif
8660 return __rv;
8663 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
8664 vuzp_s16 (int16x4_t __a, int16x4_t __b)
8666 int16x4x2_t __rv;
8667 #ifdef __ARM_BIG_ENDIAN
8668 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 7, 1, 3 });
8669 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 6, 0, 2 });
8670 #else
8671 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 });
8672 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 });
8673 #endif
8674 return __rv;
8677 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
8678 vuzp_s32 (int32x2_t __a, int32x2_t __b)
8680 int32x2x2_t __rv;
8681 #ifdef __ARM_BIG_ENDIAN
8682 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
8683 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
8684 #else
8685 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
8686 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
8687 #endif
8688 return __rv;
8691 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
8692 vuzp_f32 (float32x2_t __a, float32x2_t __b)
8694 float32x2x2_t __rv;
8695 #ifdef __ARM_BIG_ENDIAN
8696 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
8697 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
8698 #else
8699 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
8700 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
8701 #endif
8702 return __rv;
8705 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
8706 vuzp_u8 (uint8x8_t __a, uint8x8_t __b)
8708 uint8x8x2_t __rv;
8709 #ifdef __ARM_BIG_ENDIAN
8710 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
8711 { 9, 11, 13, 15, 1, 3, 5, 7 });
8712 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
8713 { 8, 10, 12, 14, 0, 2, 4, 6 });
8714 #else
8715 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
8716 { 0, 2, 4, 6, 8, 10, 12, 14 });
8717 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
8718 { 1, 3, 5, 7, 9, 11, 13, 15 });
8719 #endif
8720 return __rv;
8723 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
8724 vuzp_u16 (uint16x4_t __a, uint16x4_t __b)
8726 uint16x4x2_t __rv;
8727 #ifdef __ARM_BIG_ENDIAN
8728 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 7, 1, 3 });
8729 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 6, 0, 2 });
8730 #else
8731 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 });
8732 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 });
8733 #endif
8734 return __rv;
8737 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
8738 vuzp_u32 (uint32x2_t __a, uint32x2_t __b)
8740 uint32x2x2_t __rv;
8741 #ifdef __ARM_BIG_ENDIAN
8742 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
8743 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
8744 #else
8745 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
8746 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
8747 #endif
8748 return __rv;
8751 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
8752 vuzp_p8 (poly8x8_t __a, poly8x8_t __b)
8754 poly8x8x2_t __rv;
8755 #ifdef __ARM_BIG_ENDIAN
8756 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
8757 { 9, 11, 13, 15, 1, 3, 5, 7 });
8758 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
8759 { 8, 10, 12, 14, 0, 2, 4, 6 });
8760 #else
8761 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
8762 { 0, 2, 4, 6, 8, 10, 12, 14 });
8763 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
8764 { 1, 3, 5, 7, 9, 11, 13, 15 });
8765 #endif
8766 return __rv;
8769 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
8770 vuzp_p16 (poly16x4_t __a, poly16x4_t __b)
8772 poly16x4x2_t __rv;
8773 #ifdef __ARM_BIG_ENDIAN
8774 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 7, 1, 3 });
8775 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 6, 0, 2 });
8776 #else
8777 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 });
8778 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 });
8779 #endif
8780 return __rv;
8783 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
8784 vuzpq_s8 (int8x16_t __a, int8x16_t __b)
8786 int8x16x2_t __rv;
8787 #ifdef __ARM_BIG_ENDIAN
8788 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
8789 { 9, 11, 13, 15, 1, 3, 5, 7, 25, 27, 29, 31, 17, 19, 21, 23 });
8790 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
8791 { 8, 10, 12, 14, 0, 2, 4, 6, 24, 26, 28, 30, 16, 18, 20, 22 });
8792 #else
8793 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
8794 { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
8795 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
8796 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
8797 #endif
8798 return __rv;
8801 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
8802 vuzpq_s16 (int16x8_t __a, int16x8_t __b)
8804 int16x8x2_t __rv;
8805 #ifdef __ARM_BIG_ENDIAN
8806 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
8807 { 5, 7, 1, 3, 13, 15, 9, 11 });
8808 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
8809 { 4, 6, 0, 2, 12, 14, 8, 10 });
8810 #else
8811 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
8812 { 0, 2, 4, 6, 8, 10, 12, 14 });
8813 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
8814 { 1, 3, 5, 7, 9, 11, 13, 15 });
8815 #endif
8816 return __rv;
8819 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
8820 vuzpq_s32 (int32x4_t __a, int32x4_t __b)
8822 int32x4x2_t __rv;
8823 #ifdef __ARM_BIG_ENDIAN
8824 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 3, 1, 7, 5 });
8825 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 0, 6, 4 });
8826 #else
8827 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
8828 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
8829 #endif
8830 return __rv;
8833 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
8834 vuzpq_f32 (float32x4_t __a, float32x4_t __b)
8836 float32x4x2_t __rv;
8837 #ifdef __ARM_BIG_ENDIAN
8838 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 3, 1, 7, 5 });
8839 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 0, 6, 4 });
8840 #else
8841 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
8842 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
8843 #endif
8844 return __rv;
8847 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
8848 vuzpq_u8 (uint8x16_t __a, uint8x16_t __b)
8850 uint8x16x2_t __rv;
8851 #ifdef __ARM_BIG_ENDIAN
8852 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
8853 { 9, 11, 13, 15, 1, 3, 5, 7, 25, 27, 29, 31, 17, 19, 21, 23 });
8854 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
8855 { 8, 10, 12, 14, 0, 2, 4, 6, 24, 26, 28, 30, 16, 18, 20, 22 });
8856 #else
8857 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
8858 { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
8859 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
8860 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
8861 #endif
8862 return __rv;
8865 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
8866 vuzpq_u16 (uint16x8_t __a, uint16x8_t __b)
8868 uint16x8x2_t __rv;
8869 #ifdef __ARM_BIG_ENDIAN
8870 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
8871 { 5, 7, 1, 3, 13, 15, 9, 11 });
8872 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
8873 { 4, 6, 0, 2, 12, 14, 8, 10 });
8874 #else
8875 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
8876 { 0, 2, 4, 6, 8, 10, 12, 14 });
8877 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
8878 { 1, 3, 5, 7, 9, 11, 13, 15 });
8879 #endif
8880 return __rv;
8883 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
8884 vuzpq_u32 (uint32x4_t __a, uint32x4_t __b)
8886 uint32x4x2_t __rv;
8887 #ifdef __ARM_BIG_ENDIAN
8888 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 3, 1, 7, 5 });
8889 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 0, 6, 4 });
8890 #else
8891 __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
8892 __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
8893 #endif
8894 return __rv;
8897 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
8898 vuzpq_p8 (poly8x16_t __a, poly8x16_t __b)
8900 poly8x16x2_t __rv;
8901 #ifdef __ARM_BIG_ENDIAN
8902 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
8903 { 9, 11, 13, 15, 1, 3, 5, 7, 25, 27, 29, 31, 17, 19, 21, 23 });
8904 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
8905 { 8, 10, 12, 14, 0, 2, 4, 6, 24, 26, 28, 30, 16, 18, 20, 22 });
8906 #else
8907 __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
8908 { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
8909 __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
8910 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
8911 #endif
8912 return __rv;
8915 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
8916 vuzpq_p16 (poly16x8_t __a, poly16x8_t __b)
8918 poly16x8x2_t __rv;
8919 #ifdef __ARM_BIG_ENDIAN
8920 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
8921 { 5, 7, 1, 3, 13, 15, 9, 11 });
8922 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
8923 { 4, 6, 0, 2, 12, 14, 8, 10 });
8924 #else
8925 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
8926 { 0, 2, 4, 6, 8, 10, 12, 14 });
8927 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
8928 { 1, 3, 5, 7, 9, 11, 13, 15 });
8929 #endif
8930 return __rv;
8933 #pragma GCC push_options
8934 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
8935 __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
8936 vld1_p64 (const poly64_t * __a)
8938 return (poly64x1_t)__builtin_neon_vld1di ((const __builtin_neon_di *) __a);
8941 #pragma GCC pop_options
8942 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
8943 vld1_s8 (const int8_t * __a)
8945 return (int8x8_t)__builtin_neon_vld1v8qi ((const __builtin_neon_qi *) __a);
8948 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8949 vld1_s16 (const int16_t * __a)
8951 return (int16x4_t)__builtin_neon_vld1v4hi ((const __builtin_neon_hi *) __a);
8954 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8955 vld1_s32 (const int32_t * __a)
8957 return (int32x2_t)__builtin_neon_vld1v2si ((const __builtin_neon_si *) __a);
8960 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
8961 vld1_s64 (const int64_t * __a)
8963 return (int64x1_t)__builtin_neon_vld1di ((const __builtin_neon_di *) __a);
8966 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
8967 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
8968 vld1_f16 (const float16_t * __a)
8970 return __builtin_neon_vld1v4hf (__a);
8972 #endif
8974 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8975 vld1_f32 (const float32_t * __a)
8977 return (float32x2_t)__builtin_neon_vld1v2sf ((const __builtin_neon_sf *) __a);
8980 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8981 vld1_u8 (const uint8_t * __a)
8983 return (uint8x8_t)__builtin_neon_vld1v8qi ((const __builtin_neon_qi *) __a);
8986 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8987 vld1_u16 (const uint16_t * __a)
8989 return (uint16x4_t)__builtin_neon_vld1v4hi ((const __builtin_neon_hi *) __a);
8992 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8993 vld1_u32 (const uint32_t * __a)
8995 return (uint32x2_t)__builtin_neon_vld1v2si ((const __builtin_neon_si *) __a);
8998 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8999 vld1_u64 (const uint64_t * __a)
9001 return (uint64x1_t)__builtin_neon_vld1di ((const __builtin_neon_di *) __a);
9004 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
9005 vld1_p8 (const poly8_t * __a)
9007 return (poly8x8_t)__builtin_neon_vld1v8qi ((const __builtin_neon_qi *) __a);
9010 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
9011 vld1_p16 (const poly16_t * __a)
9013 return (poly16x4_t)__builtin_neon_vld1v4hi ((const __builtin_neon_hi *) __a);
9016 #pragma GCC push_options
9017 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
9018 __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
9019 vld1q_p64 (const poly64_t * __a)
9021 return (poly64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a);
9024 #pragma GCC pop_options
9025 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9026 vld1q_s8 (const int8_t * __a)
9028 return (int8x16_t)__builtin_neon_vld1v16qi ((const __builtin_neon_qi *) __a);
9031 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9032 vld1q_s16 (const int16_t * __a)
9034 return (int16x8_t)__builtin_neon_vld1v8hi ((const __builtin_neon_hi *) __a);
9037 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9038 vld1q_s32 (const int32_t * __a)
9040 return (int32x4_t)__builtin_neon_vld1v4si ((const __builtin_neon_si *) __a);
9043 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9044 vld1q_s64 (const int64_t * __a)
9046 return (int64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a);
9049 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
9050 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
9051 vld1q_f16 (const float16_t * __a)
9053 return __builtin_neon_vld1v8hf (__a);
9055 #endif
9057 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9058 vld1q_f32 (const float32_t * __a)
9060 return (float32x4_t)__builtin_neon_vld1v4sf ((const __builtin_neon_sf *) __a);
9063 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9064 vld1q_u8 (const uint8_t * __a)
9066 return (uint8x16_t)__builtin_neon_vld1v16qi ((const __builtin_neon_qi *) __a);
9069 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9070 vld1q_u16 (const uint16_t * __a)
9072 return (uint16x8_t)__builtin_neon_vld1v8hi ((const __builtin_neon_hi *) __a);
9075 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9076 vld1q_u32 (const uint32_t * __a)
9078 return (uint32x4_t)__builtin_neon_vld1v4si ((const __builtin_neon_si *) __a);
9081 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9082 vld1q_u64 (const uint64_t * __a)
9084 return (uint64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a);
9087 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
9088 vld1q_p8 (const poly8_t * __a)
9090 return (poly8x16_t)__builtin_neon_vld1v16qi ((const __builtin_neon_qi *) __a);
9093 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
9094 vld1q_p16 (const poly16_t * __a)
9096 return (poly16x8_t)__builtin_neon_vld1v8hi ((const __builtin_neon_hi *) __a);
9099 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9100 vld1_lane_s8 (const int8_t * __a, int8x8_t __b, const int __c)
9102 return (int8x8_t)__builtin_neon_vld1_lanev8qi ((const __builtin_neon_qi *) __a, __b, __c);
9105 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9106 vld1_lane_s16 (const int16_t * __a, int16x4_t __b, const int __c)
9108 return (int16x4_t)__builtin_neon_vld1_lanev4hi ((const __builtin_neon_hi *) __a, __b, __c);
9111 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9112 vld1_lane_s32 (const int32_t * __a, int32x2_t __b, const int __c)
9114 return (int32x2_t)__builtin_neon_vld1_lanev2si ((const __builtin_neon_si *) __a, __b, __c);
9117 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
9118 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
9119 vld1_lane_f16 (const float16_t * __a, float16x4_t __b, const int __c)
9121 return vset_lane_f16 (*__a, __b, __c);
9123 #endif
9125 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9126 vld1_lane_f32 (const float32_t * __a, float32x2_t __b, const int __c)
9128 return (float32x2_t)__builtin_neon_vld1_lanev2sf ((const __builtin_neon_sf *) __a, __b, __c);
9131 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9132 vld1_lane_u8 (const uint8_t * __a, uint8x8_t __b, const int __c)
9134 return (uint8x8_t)__builtin_neon_vld1_lanev8qi ((const __builtin_neon_qi *) __a, (int8x8_t) __b, __c);
9137 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9138 vld1_lane_u16 (const uint16_t * __a, uint16x4_t __b, const int __c)
9140 return (uint16x4_t)__builtin_neon_vld1_lanev4hi ((const __builtin_neon_hi *) __a, (int16x4_t) __b, __c);
9143 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9144 vld1_lane_u32 (const uint32_t * __a, uint32x2_t __b, const int __c)
9146 return (uint32x2_t)__builtin_neon_vld1_lanev2si ((const __builtin_neon_si *) __a, (int32x2_t) __b, __c);
9149 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
9150 vld1_lane_p8 (const poly8_t * __a, poly8x8_t __b, const int __c)
9152 return (poly8x8_t)__builtin_neon_vld1_lanev8qi ((const __builtin_neon_qi *) __a, (int8x8_t) __b, __c);
9155 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
9156 vld1_lane_p16 (const poly16_t * __a, poly16x4_t __b, const int __c)
9158 return (poly16x4_t)__builtin_neon_vld1_lanev4hi ((const __builtin_neon_hi *) __a, (int16x4_t) __b, __c);
9161 #pragma GCC push_options
9162 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
9163 __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
9164 vld1_lane_p64 (const poly64_t * __a, poly64x1_t __b, const int __c)
9166 return (poly64x1_t)__builtin_neon_vld1_lanedi ((const __builtin_neon_di *) __a, __b, __c);
9169 #pragma GCC pop_options
9170 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
9171 vld1_lane_s64 (const int64_t * __a, int64x1_t __b, const int __c)
9173 return (int64x1_t)__builtin_neon_vld1_lanedi ((const __builtin_neon_di *) __a, __b, __c);
9176 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9177 vld1_lane_u64 (const uint64_t * __a, uint64x1_t __b, const int __c)
9179 return (uint64x1_t)__builtin_neon_vld1_lanedi ((const __builtin_neon_di *) __a, (int64x1_t) __b, __c);
9182 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9183 vld1q_lane_s8 (const int8_t * __a, int8x16_t __b, const int __c)
9185 return (int8x16_t)__builtin_neon_vld1_lanev16qi ((const __builtin_neon_qi *) __a, __b, __c);
9188 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9189 vld1q_lane_s16 (const int16_t * __a, int16x8_t __b, const int __c)
9191 return (int16x8_t)__builtin_neon_vld1_lanev8hi ((const __builtin_neon_hi *) __a, __b, __c);
9194 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9195 vld1q_lane_s32 (const int32_t * __a, int32x4_t __b, const int __c)
9197 return (int32x4_t)__builtin_neon_vld1_lanev4si ((const __builtin_neon_si *) __a, __b, __c);
9200 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
9201 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
9202 vld1q_lane_f16 (const float16_t * __a, float16x8_t __b, const int __c)
9204 return vsetq_lane_f16 (*__a, __b, __c);
9206 #endif
9208 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9209 vld1q_lane_f32 (const float32_t * __a, float32x4_t __b, const int __c)
9211 return (float32x4_t)__builtin_neon_vld1_lanev4sf ((const __builtin_neon_sf *) __a, __b, __c);
9214 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9215 vld1q_lane_u8 (const uint8_t * __a, uint8x16_t __b, const int __c)
9217 return (uint8x16_t)__builtin_neon_vld1_lanev16qi ((const __builtin_neon_qi *) __a, (int8x16_t) __b, __c);
9220 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9221 vld1q_lane_u16 (const uint16_t * __a, uint16x8_t __b, const int __c)
9223 return (uint16x8_t)__builtin_neon_vld1_lanev8hi ((const __builtin_neon_hi *) __a, (int16x8_t) __b, __c);
9226 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9227 vld1q_lane_u32 (const uint32_t * __a, uint32x4_t __b, const int __c)
9229 return (uint32x4_t)__builtin_neon_vld1_lanev4si ((const __builtin_neon_si *) __a, (int32x4_t) __b, __c);
9232 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
9233 vld1q_lane_p8 (const poly8_t * __a, poly8x16_t __b, const int __c)
9235 return (poly8x16_t)__builtin_neon_vld1_lanev16qi ((const __builtin_neon_qi *) __a, (int8x16_t) __b, __c);
9238 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
9239 vld1q_lane_p16 (const poly16_t * __a, poly16x8_t __b, const int __c)
9241 return (poly16x8_t)__builtin_neon_vld1_lanev8hi ((const __builtin_neon_hi *) __a, (int16x8_t) __b, __c);
9244 #pragma GCC push_options
9245 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
9246 __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
9247 vld1q_lane_p64 (const poly64_t * __a, poly64x2_t __b, const int __c)
9249 return (poly64x2_t)__builtin_neon_vld1_lanev2di ((const __builtin_neon_di *) __a, (int64x2_t) __b, __c);
9252 #pragma GCC pop_options
9253 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9254 vld1q_lane_s64 (const int64_t * __a, int64x2_t __b, const int __c)
9256 return (int64x2_t)__builtin_neon_vld1_lanev2di ((const __builtin_neon_di *) __a, __b, __c);
9259 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9260 vld1q_lane_u64 (const uint64_t * __a, uint64x2_t __b, const int __c)
9262 return (uint64x2_t)__builtin_neon_vld1_lanev2di ((const __builtin_neon_di *) __a, (int64x2_t) __b, __c);
9265 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9266 vld1_dup_s8 (const int8_t * __a)
9268 return (int8x8_t)__builtin_neon_vld1_dupv8qi ((const __builtin_neon_qi *) __a);
9271 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9272 vld1_dup_s16 (const int16_t * __a)
9274 return (int16x4_t)__builtin_neon_vld1_dupv4hi ((const __builtin_neon_hi *) __a);
9277 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9278 vld1_dup_s32 (const int32_t * __a)
9280 return (int32x2_t)__builtin_neon_vld1_dupv2si ((const __builtin_neon_si *) __a);
9283 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
9284 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
9285 vld1_dup_f16 (const float16_t * __a)
9287 float16_t __f = *__a;
9288 return (float16x4_t) { __f, __f, __f, __f };
9290 #endif
9292 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9293 vld1_dup_f32 (const float32_t * __a)
9295 return (float32x2_t)__builtin_neon_vld1_dupv2sf ((const __builtin_neon_sf *) __a);
9298 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9299 vld1_dup_u8 (const uint8_t * __a)
9301 return (uint8x8_t)__builtin_neon_vld1_dupv8qi ((const __builtin_neon_qi *) __a);
9304 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9305 vld1_dup_u16 (const uint16_t * __a)
9307 return (uint16x4_t)__builtin_neon_vld1_dupv4hi ((const __builtin_neon_hi *) __a);
9310 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9311 vld1_dup_u32 (const uint32_t * __a)
9313 return (uint32x2_t)__builtin_neon_vld1_dupv2si ((const __builtin_neon_si *) __a);
9316 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
9317 vld1_dup_p8 (const poly8_t * __a)
9319 return (poly8x8_t)__builtin_neon_vld1_dupv8qi ((const __builtin_neon_qi *) __a);
9322 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
9323 vld1_dup_p16 (const poly16_t * __a)
9325 return (poly16x4_t)__builtin_neon_vld1_dupv4hi ((const __builtin_neon_hi *) __a);
9328 #pragma GCC push_options
9329 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
9330 __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
9331 vld1_dup_p64 (const poly64_t * __a)
9333 return (poly64x1_t)__builtin_neon_vld1_dupdi ((const __builtin_neon_di *) __a);
9336 #pragma GCC pop_options
9337 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
9338 vld1_dup_s64 (const int64_t * __a)
9340 return (int64x1_t)__builtin_neon_vld1_dupdi ((const __builtin_neon_di *) __a);
9343 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9344 vld1_dup_u64 (const uint64_t * __a)
9346 return (uint64x1_t)__builtin_neon_vld1_dupdi ((const __builtin_neon_di *) __a);
9349 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9350 vld1q_dup_s8 (const int8_t * __a)
9352 return (int8x16_t)__builtin_neon_vld1_dupv16qi ((const __builtin_neon_qi *) __a);
9355 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9356 vld1q_dup_s16 (const int16_t * __a)
9358 return (int16x8_t)__builtin_neon_vld1_dupv8hi ((const __builtin_neon_hi *) __a);
9361 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9362 vld1q_dup_s32 (const int32_t * __a)
9364 return (int32x4_t)__builtin_neon_vld1_dupv4si ((const __builtin_neon_si *) __a);
9367 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
9368 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
9369 vld1q_dup_f16 (const float16_t * __a)
9371 float16_t __f = *__a;
9372 return (float16x8_t) { __f, __f, __f, __f, __f, __f, __f, __f };
9374 #endif
9376 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9377 vld1q_dup_f32 (const float32_t * __a)
9379 return (float32x4_t)__builtin_neon_vld1_dupv4sf ((const __builtin_neon_sf *) __a);
9382 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9383 vld1q_dup_u8 (const uint8_t * __a)
9385 return (uint8x16_t)__builtin_neon_vld1_dupv16qi ((const __builtin_neon_qi *) __a);
9388 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9389 vld1q_dup_u16 (const uint16_t * __a)
9391 return (uint16x8_t)__builtin_neon_vld1_dupv8hi ((const __builtin_neon_hi *) __a);
9394 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9395 vld1q_dup_u32 (const uint32_t * __a)
9397 return (uint32x4_t)__builtin_neon_vld1_dupv4si ((const __builtin_neon_si *) __a);
9400 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
9401 vld1q_dup_p8 (const poly8_t * __a)
9403 return (poly8x16_t)__builtin_neon_vld1_dupv16qi ((const __builtin_neon_qi *) __a);
9406 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
9407 vld1q_dup_p16 (const poly16_t * __a)
9409 return (poly16x8_t)__builtin_neon_vld1_dupv8hi ((const __builtin_neon_hi *) __a);
9412 #pragma GCC push_options
9413 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
9414 __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
9415 vld1q_dup_p64 (const poly64_t * __a)
9417 return (poly64x2_t)__builtin_neon_vld1_dupv2di ((const __builtin_neon_di *) __a);
9420 #pragma GCC pop_options
9421 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9422 vld1q_dup_s64 (const int64_t * __a)
9424 return (int64x2_t)__builtin_neon_vld1_dupv2di ((const __builtin_neon_di *) __a);
9427 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9428 vld1q_dup_u64 (const uint64_t * __a)
9430 return (uint64x2_t)__builtin_neon_vld1_dupv2di ((const __builtin_neon_di *) __a);
9433 #pragma GCC push_options
9434 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
9435 __extension__ static __inline void __attribute__ ((__always_inline__))
9436 vst1_p64 (poly64_t * __a, poly64x1_t __b)
9438 __builtin_neon_vst1di ((__builtin_neon_di *) __a, __b);
9441 #pragma GCC pop_options
9442 __extension__ static __inline void __attribute__ ((__always_inline__))
9443 vst1_s8 (int8_t * __a, int8x8_t __b)
9445 __builtin_neon_vst1v8qi ((__builtin_neon_qi *) __a, __b);
9448 __extension__ static __inline void __attribute__ ((__always_inline__))
9449 vst1_s16 (int16_t * __a, int16x4_t __b)
9451 __builtin_neon_vst1v4hi ((__builtin_neon_hi *) __a, __b);
9454 __extension__ static __inline void __attribute__ ((__always_inline__))
9455 vst1_s32 (int32_t * __a, int32x2_t __b)
9457 __builtin_neon_vst1v2si ((__builtin_neon_si *) __a, __b);
9460 __extension__ static __inline void __attribute__ ((__always_inline__))
9461 vst1_s64 (int64_t * __a, int64x1_t __b)
9463 __builtin_neon_vst1di ((__builtin_neon_di *) __a, __b);
9466 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
9467 __extension__ static __inline void __attribute__ ((__always_inline__))
9468 vst1_f16 (float16_t * __a, float16x4_t __b)
9470 __builtin_neon_vst1v4hf (__a, __b);
9472 #endif
9474 __extension__ static __inline void __attribute__ ((__always_inline__))
9475 vst1_f32 (float32_t * __a, float32x2_t __b)
9477 __builtin_neon_vst1v2sf ((__builtin_neon_sf *) __a, __b);
9480 __extension__ static __inline void __attribute__ ((__always_inline__))
9481 vst1_u8 (uint8_t * __a, uint8x8_t __b)
9483 __builtin_neon_vst1v8qi ((__builtin_neon_qi *) __a, (int8x8_t) __b);
9486 __extension__ static __inline void __attribute__ ((__always_inline__))
9487 vst1_u16 (uint16_t * __a, uint16x4_t __b)
9489 __builtin_neon_vst1v4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b);
9492 __extension__ static __inline void __attribute__ ((__always_inline__))
9493 vst1_u32 (uint32_t * __a, uint32x2_t __b)
9495 __builtin_neon_vst1v2si ((__builtin_neon_si *) __a, (int32x2_t) __b);
9498 __extension__ static __inline void __attribute__ ((__always_inline__))
9499 vst1_u64 (uint64_t * __a, uint64x1_t __b)
9501 __builtin_neon_vst1di ((__builtin_neon_di *) __a, (int64x1_t) __b);
9504 __extension__ static __inline void __attribute__ ((__always_inline__))
9505 vst1_p8 (poly8_t * __a, poly8x8_t __b)
9507 __builtin_neon_vst1v8qi ((__builtin_neon_qi *) __a, (int8x8_t) __b);
9510 __extension__ static __inline void __attribute__ ((__always_inline__))
9511 vst1_p16 (poly16_t * __a, poly16x4_t __b)
9513 __builtin_neon_vst1v4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b);
9516 #pragma GCC push_options
9517 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
9518 __extension__ static __inline void __attribute__ ((__always_inline__))
9519 vst1q_p64 (poly64_t * __a, poly64x2_t __b)
9521 __builtin_neon_vst1v2di ((__builtin_neon_di *) __a, (int64x2_t) __b);
9524 #pragma GCC pop_options
9525 __extension__ static __inline void __attribute__ ((__always_inline__))
9526 vst1q_s8 (int8_t * __a, int8x16_t __b)
9528 __builtin_neon_vst1v16qi ((__builtin_neon_qi *) __a, __b);
9531 __extension__ static __inline void __attribute__ ((__always_inline__))
9532 vst1q_s16 (int16_t * __a, int16x8_t __b)
9534 __builtin_neon_vst1v8hi ((__builtin_neon_hi *) __a, __b);
9537 __extension__ static __inline void __attribute__ ((__always_inline__))
9538 vst1q_s32 (int32_t * __a, int32x4_t __b)
9540 __builtin_neon_vst1v4si ((__builtin_neon_si *) __a, __b);
9543 __extension__ static __inline void __attribute__ ((__always_inline__))
9544 vst1q_s64 (int64_t * __a, int64x2_t __b)
9546 __builtin_neon_vst1v2di ((__builtin_neon_di *) __a, __b);
9549 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
9550 __extension__ static __inline void __attribute__ ((__always_inline__))
9551 vst1q_f16 (float16_t * __a, float16x8_t __b)
9553 __builtin_neon_vst1v8hf (__a, __b);
9555 #endif
9557 __extension__ static __inline void __attribute__ ((__always_inline__))
9558 vst1q_f32 (float32_t * __a, float32x4_t __b)
9560 __builtin_neon_vst1v4sf ((__builtin_neon_sf *) __a, __b);
9563 __extension__ static __inline void __attribute__ ((__always_inline__))
9564 vst1q_u8 (uint8_t * __a, uint8x16_t __b)
9566 __builtin_neon_vst1v16qi ((__builtin_neon_qi *) __a, (int8x16_t) __b);
9569 __extension__ static __inline void __attribute__ ((__always_inline__))
9570 vst1q_u16 (uint16_t * __a, uint16x8_t __b)
9572 __builtin_neon_vst1v8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b);
9575 __extension__ static __inline void __attribute__ ((__always_inline__))
9576 vst1q_u32 (uint32_t * __a, uint32x4_t __b)
9578 __builtin_neon_vst1v4si ((__builtin_neon_si *) __a, (int32x4_t) __b);
9581 __extension__ static __inline void __attribute__ ((__always_inline__))
9582 vst1q_u64 (uint64_t * __a, uint64x2_t __b)
9584 __builtin_neon_vst1v2di ((__builtin_neon_di *) __a, (int64x2_t) __b);
9587 __extension__ static __inline void __attribute__ ((__always_inline__))
9588 vst1q_p8 (poly8_t * __a, poly8x16_t __b)
9590 __builtin_neon_vst1v16qi ((__builtin_neon_qi *) __a, (int8x16_t) __b);
9593 __extension__ static __inline void __attribute__ ((__always_inline__))
9594 vst1q_p16 (poly16_t * __a, poly16x8_t __b)
9596 __builtin_neon_vst1v8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b);
9599 __extension__ static __inline void __attribute__ ((__always_inline__))
9600 vst1_lane_s8 (int8_t * __a, int8x8_t __b, const int __c)
9602 __builtin_neon_vst1_lanev8qi ((__builtin_neon_qi *) __a, __b, __c);
9605 __extension__ static __inline void __attribute__ ((__always_inline__))
9606 vst1_lane_s16 (int16_t * __a, int16x4_t __b, const int __c)
9608 __builtin_neon_vst1_lanev4hi ((__builtin_neon_hi *) __a, __b, __c);
9611 __extension__ static __inline void __attribute__ ((__always_inline__))
9612 vst1_lane_s32 (int32_t * __a, int32x2_t __b, const int __c)
9614 __builtin_neon_vst1_lanev2si ((__builtin_neon_si *) __a, __b, __c);
9617 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
9618 __extension__ static __inline void __attribute__ ((__always_inline__))
9619 vst1_lane_f16 (float16_t * __a, float16x4_t __b, const int __c)
9621 __builtin_neon_vst1_lanev4hf (__a, __b, __c);
9623 #endif
9625 __extension__ static __inline void __attribute__ ((__always_inline__))
9626 vst1_lane_f32 (float32_t * __a, float32x2_t __b, const int __c)
9628 __builtin_neon_vst1_lanev2sf ((__builtin_neon_sf *) __a, __b, __c);
9631 __extension__ static __inline void __attribute__ ((__always_inline__))
9632 vst1_lane_u8 (uint8_t * __a, uint8x8_t __b, const int __c)
9634 __builtin_neon_vst1_lanev8qi ((__builtin_neon_qi *) __a, (int8x8_t) __b, __c);
9637 __extension__ static __inline void __attribute__ ((__always_inline__))
9638 vst1_lane_u16 (uint16_t * __a, uint16x4_t __b, const int __c)
9640 __builtin_neon_vst1_lanev4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b, __c);
9643 __extension__ static __inline void __attribute__ ((__always_inline__))
9644 vst1_lane_u32 (uint32_t * __a, uint32x2_t __b, const int __c)
9646 __builtin_neon_vst1_lanev2si ((__builtin_neon_si *) __a, (int32x2_t) __b, __c);
9649 __extension__ static __inline void __attribute__ ((__always_inline__))
9650 vst1_lane_p8 (poly8_t * __a, poly8x8_t __b, const int __c)
9652 __builtin_neon_vst1_lanev8qi ((__builtin_neon_qi *) __a, (int8x8_t) __b, __c);
9655 __extension__ static __inline void __attribute__ ((__always_inline__))
9656 vst1_lane_p16 (poly16_t * __a, poly16x4_t __b, const int __c)
9658 __builtin_neon_vst1_lanev4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b, __c);
9661 #pragma GCC push_options
9662 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
9663 __extension__ static __inline void __attribute__ ((__always_inline__))
9664 vst1_lane_p64 (poly64_t * __a, poly64x1_t __b, const int __c)
9666 __builtin_neon_vst1_lanedi ((__builtin_neon_di *) __a, __b, __c);
9669 #pragma GCC pop_options
9670 __extension__ static __inline void __attribute__ ((__always_inline__))
9671 vst1_lane_s64 (int64_t * __a, int64x1_t __b, const int __c)
9673 __builtin_neon_vst1_lanedi ((__builtin_neon_di *) __a, __b, __c);
9676 __extension__ static __inline void __attribute__ ((__always_inline__))
9677 vst1_lane_u64 (uint64_t * __a, uint64x1_t __b, const int __c)
9679 __builtin_neon_vst1_lanedi ((__builtin_neon_di *) __a, (int64x1_t) __b, __c);
9682 __extension__ static __inline void __attribute__ ((__always_inline__))
9683 vst1q_lane_s8 (int8_t * __a, int8x16_t __b, const int __c)
9685 __builtin_neon_vst1_lanev16qi ((__builtin_neon_qi *) __a, __b, __c);
9688 __extension__ static __inline void __attribute__ ((__always_inline__))
9689 vst1q_lane_s16 (int16_t * __a, int16x8_t __b, const int __c)
9691 __builtin_neon_vst1_lanev8hi ((__builtin_neon_hi *) __a, __b, __c);
9694 __extension__ static __inline void __attribute__ ((__always_inline__))
9695 vst1q_lane_s32 (int32_t * __a, int32x4_t __b, const int __c)
9697 __builtin_neon_vst1_lanev4si ((__builtin_neon_si *) __a, __b, __c);
9700 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
9701 __extension__ static __inline void __attribute__ ((__always_inline__))
9702 vst1q_lane_f16 (float16_t * __a, float16x8_t __b, const int __c)
9704 __builtin_neon_vst1_lanev8hf (__a, __b, __c);
9706 #endif
9708 __extension__ static __inline void __attribute__ ((__always_inline__))
9709 vst1q_lane_f32 (float32_t * __a, float32x4_t __b, const int __c)
9711 __builtin_neon_vst1_lanev4sf ((__builtin_neon_sf *) __a, __b, __c);
9714 __extension__ static __inline void __attribute__ ((__always_inline__))
9715 vst1q_lane_u8 (uint8_t * __a, uint8x16_t __b, const int __c)
9717 __builtin_neon_vst1_lanev16qi ((__builtin_neon_qi *) __a, (int8x16_t) __b, __c);
9720 __extension__ static __inline void __attribute__ ((__always_inline__))
9721 vst1q_lane_u16 (uint16_t * __a, uint16x8_t __b, const int __c)
9723 __builtin_neon_vst1_lanev8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b, __c);
9726 __extension__ static __inline void __attribute__ ((__always_inline__))
9727 vst1q_lane_u32 (uint32_t * __a, uint32x4_t __b, const int __c)
9729 __builtin_neon_vst1_lanev4si ((__builtin_neon_si *) __a, (int32x4_t) __b, __c);
9732 __extension__ static __inline void __attribute__ ((__always_inline__))
9733 vst1q_lane_p8 (poly8_t * __a, poly8x16_t __b, const int __c)
9735 __builtin_neon_vst1_lanev16qi ((__builtin_neon_qi *) __a, (int8x16_t) __b, __c);
9738 __extension__ static __inline void __attribute__ ((__always_inline__))
9739 vst1q_lane_p16 (poly16_t * __a, poly16x8_t __b, const int __c)
9741 __builtin_neon_vst1_lanev8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b, __c);
9744 #pragma GCC push_options
9745 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
9746 __extension__ static __inline void __attribute__ ((__always_inline__))
9747 vst1q_lane_p64 (poly64_t * __a, poly64x2_t __b, const int __c)
9749 __builtin_neon_vst1_lanev2di ((__builtin_neon_di *) __a, (int64x2_t) __b, __c);
9752 #pragma GCC pop_options
9753 __extension__ static __inline void __attribute__ ((__always_inline__))
9754 vst1q_lane_s64 (int64_t * __a, int64x2_t __b, const int __c)
9756 __builtin_neon_vst1_lanev2di ((__builtin_neon_di *) __a, __b, __c);
9759 __extension__ static __inline void __attribute__ ((__always_inline__))
9760 vst1q_lane_u64 (uint64_t * __a, uint64x2_t __b, const int __c)
9762 __builtin_neon_vst1_lanev2di ((__builtin_neon_di *) __a, (int64x2_t) __b, __c);
9765 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
9766 vld2_s8 (const int8_t * __a)
9768 union { int8x8x2_t __i; __builtin_neon_ti __o; } __rv;
9769 __rv.__o = __builtin_neon_vld2v8qi ((const __builtin_neon_qi *) __a);
9770 return __rv.__i;
9773 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
9774 vld2_s16 (const int16_t * __a)
9776 union { int16x4x2_t __i; __builtin_neon_ti __o; } __rv;
9777 __rv.__o = __builtin_neon_vld2v4hi ((const __builtin_neon_hi *) __a);
9778 return __rv.__i;
9781 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
9782 vld2_s32 (const int32_t * __a)
9784 union { int32x2x2_t __i; __builtin_neon_ti __o; } __rv;
9785 __rv.__o = __builtin_neon_vld2v2si ((const __builtin_neon_si *) __a);
9786 return __rv.__i;
9789 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
9790 __extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
9791 vld2_f16 (const float16_t * __a)
9793 union { float16x4x2_t __i; __builtin_neon_ti __o; } __rv;
9794 __rv.__o = __builtin_neon_vld2v4hf (__a);
9795 return __rv.__i;
9797 #endif
9799 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
9800 vld2_f32 (const float32_t * __a)
9802 union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv;
9803 __rv.__o = __builtin_neon_vld2v2sf ((const __builtin_neon_sf *) __a);
9804 return __rv.__i;
9807 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
9808 vld2_u8 (const uint8_t * __a)
9810 union { uint8x8x2_t __i; __builtin_neon_ti __o; } __rv;
9811 __rv.__o = __builtin_neon_vld2v8qi ((const __builtin_neon_qi *) __a);
9812 return __rv.__i;
9815 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
9816 vld2_u16 (const uint16_t * __a)
9818 union { uint16x4x2_t __i; __builtin_neon_ti __o; } __rv;
9819 __rv.__o = __builtin_neon_vld2v4hi ((const __builtin_neon_hi *) __a);
9820 return __rv.__i;
9823 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
9824 vld2_u32 (const uint32_t * __a)
9826 union { uint32x2x2_t __i; __builtin_neon_ti __o; } __rv;
9827 __rv.__o = __builtin_neon_vld2v2si ((const __builtin_neon_si *) __a);
9828 return __rv.__i;
9831 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
9832 vld2_p8 (const poly8_t * __a)
9834 union { poly8x8x2_t __i; __builtin_neon_ti __o; } __rv;
9835 __rv.__o = __builtin_neon_vld2v8qi ((const __builtin_neon_qi *) __a);
9836 return __rv.__i;
9839 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
9840 vld2_p16 (const poly16_t * __a)
9842 union { poly16x4x2_t __i; __builtin_neon_ti __o; } __rv;
9843 __rv.__o = __builtin_neon_vld2v4hi ((const __builtin_neon_hi *) __a);
9844 return __rv.__i;
9847 #pragma GCC push_options
9848 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
9849 __extension__ static __inline poly64x1x2_t __attribute__ ((__always_inline__))
9850 vld2_p64 (const poly64_t * __a)
9852 union { poly64x1x2_t __i; __builtin_neon_ti __o; } __rv;
9853 __rv.__o = __builtin_neon_vld2di ((const __builtin_neon_di *) __a);
9854 return __rv.__i;
9857 #pragma GCC pop_options
9858 __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
9859 vld2_s64 (const int64_t * __a)
9861 union { int64x1x2_t __i; __builtin_neon_ti __o; } __rv;
9862 __rv.__o = __builtin_neon_vld2di ((const __builtin_neon_di *) __a);
9863 return __rv.__i;
9866 __extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
9867 vld2_u64 (const uint64_t * __a)
9869 union { uint64x1x2_t __i; __builtin_neon_ti __o; } __rv;
9870 __rv.__o = __builtin_neon_vld2di ((const __builtin_neon_di *) __a);
9871 return __rv.__i;
9874 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
9875 vld2q_s8 (const int8_t * __a)
9877 union { int8x16x2_t __i; __builtin_neon_oi __o; } __rv;
9878 __rv.__o = __builtin_neon_vld2v16qi ((const __builtin_neon_qi *) __a);
9879 return __rv.__i;
9882 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
9883 vld2q_s16 (const int16_t * __a)
9885 union { int16x8x2_t __i; __builtin_neon_oi __o; } __rv;
9886 __rv.__o = __builtin_neon_vld2v8hi ((const __builtin_neon_hi *) __a);
9887 return __rv.__i;
9890 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
9891 vld2q_s32 (const int32_t * __a)
9893 union { int32x4x2_t __i; __builtin_neon_oi __o; } __rv;
9894 __rv.__o = __builtin_neon_vld2v4si ((const __builtin_neon_si *) __a);
9895 return __rv.__i;
9898 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
9899 __extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
9900 vld2q_f16 (const float16_t * __a)
9902 union { float16x8x2_t __i; __builtin_neon_oi __o; } __rv;
9903 __rv.__o = __builtin_neon_vld2v8hf (__a);
9904 return __rv.__i;
9906 #endif
9908 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
9909 vld2q_f32 (const float32_t * __a)
9911 union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv;
9912 __rv.__o = __builtin_neon_vld2v4sf ((const __builtin_neon_sf *) __a);
9913 return __rv.__i;
9916 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
9917 vld2q_u8 (const uint8_t * __a)
9919 union { uint8x16x2_t __i; __builtin_neon_oi __o; } __rv;
9920 __rv.__o = __builtin_neon_vld2v16qi ((const __builtin_neon_qi *) __a);
9921 return __rv.__i;
9924 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
9925 vld2q_u16 (const uint16_t * __a)
9927 union { uint16x8x2_t __i; __builtin_neon_oi __o; } __rv;
9928 __rv.__o = __builtin_neon_vld2v8hi ((const __builtin_neon_hi *) __a);
9929 return __rv.__i;
9932 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
9933 vld2q_u32 (const uint32_t * __a)
9935 union { uint32x4x2_t __i; __builtin_neon_oi __o; } __rv;
9936 __rv.__o = __builtin_neon_vld2v4si ((const __builtin_neon_si *) __a);
9937 return __rv.__i;
9940 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
9941 vld2q_p8 (const poly8_t * __a)
9943 union { poly8x16x2_t __i; __builtin_neon_oi __o; } __rv;
9944 __rv.__o = __builtin_neon_vld2v16qi ((const __builtin_neon_qi *) __a);
9945 return __rv.__i;
9948 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
9949 vld2q_p16 (const poly16_t * __a)
9951 union { poly16x8x2_t __i; __builtin_neon_oi __o; } __rv;
9952 __rv.__o = __builtin_neon_vld2v8hi ((const __builtin_neon_hi *) __a);
9953 return __rv.__i;
9956 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
9957 vld2_lane_s8 (const int8_t * __a, int8x8x2_t __b, const int __c)
9959 union { int8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
9960 union { int8x8x2_t __i; __builtin_neon_ti __o; } __rv;
9961 __rv.__o = __builtin_neon_vld2_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c);
9962 return __rv.__i;
9965 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
9966 vld2_lane_s16 (const int16_t * __a, int16x4x2_t __b, const int __c)
9968 union { int16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
9969 union { int16x4x2_t __i; __builtin_neon_ti __o; } __rv;
9970 __rv.__o = __builtin_neon_vld2_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
9971 return __rv.__i;
9974 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
9975 vld2_lane_s32 (const int32_t * __a, int32x2x2_t __b, const int __c)
9977 union { int32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
9978 union { int32x2x2_t __i; __builtin_neon_ti __o; } __rv;
9979 __rv.__o = __builtin_neon_vld2_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c);
9980 return __rv.__i;
9983 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
9984 __extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
9985 vld2_lane_f16 (const float16_t * __a, float16x4x2_t __b, const int __c)
9987 union { float16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
9988 union { float16x4x2_t __i; __builtin_neon_ti __o; } __rv;
9989 __rv.__o = __builtin_neon_vld2_lanev4hf ( __a, __bu.__o, __c);
9990 return __rv.__i;
9992 #endif
9994 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
9995 vld2_lane_f32 (const float32_t * __a, float32x2x2_t __b, const int __c)
9997 union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
9998 union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv;
9999 __rv.__o = __builtin_neon_vld2_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
10000 return __rv.__i;
10003 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
10004 vld2_lane_u8 (const uint8_t * __a, uint8x8x2_t __b, const int __c)
10006 union { uint8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
10007 union { uint8x8x2_t __i; __builtin_neon_ti __o; } __rv;
10008 __rv.__o = __builtin_neon_vld2_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c);
10009 return __rv.__i;
10012 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
10013 vld2_lane_u16 (const uint16_t * __a, uint16x4x2_t __b, const int __c)
10015 union { uint16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
10016 union { uint16x4x2_t __i; __builtin_neon_ti __o; } __rv;
10017 __rv.__o = __builtin_neon_vld2_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
10018 return __rv.__i;
10021 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
10022 vld2_lane_u32 (const uint32_t * __a, uint32x2x2_t __b, const int __c)
10024 union { uint32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
10025 union { uint32x2x2_t __i; __builtin_neon_ti __o; } __rv;
10026 __rv.__o = __builtin_neon_vld2_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c);
10027 return __rv.__i;
10030 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
10031 vld2_lane_p8 (const poly8_t * __a, poly8x8x2_t __b, const int __c)
10033 union { poly8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
10034 union { poly8x8x2_t __i; __builtin_neon_ti __o; } __rv;
10035 __rv.__o = __builtin_neon_vld2_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c);
10036 return __rv.__i;
10039 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
10040 vld2_lane_p16 (const poly16_t * __a, poly16x4x2_t __b, const int __c)
10042 union { poly16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
10043 union { poly16x4x2_t __i; __builtin_neon_ti __o; } __rv;
10044 __rv.__o = __builtin_neon_vld2_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
10045 return __rv.__i;
10048 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
10049 vld2q_lane_s16 (const int16_t * __a, int16x8x2_t __b, const int __c)
10051 union { int16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
10052 union { int16x8x2_t __i; __builtin_neon_oi __o; } __rv;
10053 __rv.__o = __builtin_neon_vld2_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
10054 return __rv.__i;
10057 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
10058 vld2q_lane_s32 (const int32_t * __a, int32x4x2_t __b, const int __c)
10060 union { int32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
10061 union { int32x4x2_t __i; __builtin_neon_oi __o; } __rv;
10062 __rv.__o = __builtin_neon_vld2_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c);
10063 return __rv.__i;
10066 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
10067 __extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
10068 vld2q_lane_f16 (const float16_t * __a, float16x8x2_t __b, const int __c)
10070 union { float16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
10071 union { float16x8x2_t __i; __builtin_neon_oi __o; } __rv;
10072 __rv.__o = __builtin_neon_vld2_lanev8hf (__a, __bu.__o, __c);
10073 return __rv.__i;
10075 #endif
10077 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
10078 vld2q_lane_f32 (const float32_t * __a, float32x4x2_t __b, const int __c)
10080 union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
10081 union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv;
10082 __rv.__o = __builtin_neon_vld2_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
10083 return __rv.__i;
10086 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
10087 vld2q_lane_u16 (const uint16_t * __a, uint16x8x2_t __b, const int __c)
10089 union { uint16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
10090 union { uint16x8x2_t __i; __builtin_neon_oi __o; } __rv;
10091 __rv.__o = __builtin_neon_vld2_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
10092 return __rv.__i;
10095 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
10096 vld2q_lane_u32 (const uint32_t * __a, uint32x4x2_t __b, const int __c)
10098 union { uint32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
10099 union { uint32x4x2_t __i; __builtin_neon_oi __o; } __rv;
10100 __rv.__o = __builtin_neon_vld2_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c);
10101 return __rv.__i;
10104 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
10105 vld2q_lane_p16 (const poly16_t * __a, poly16x8x2_t __b, const int __c)
10107 union { poly16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
10108 union { poly16x8x2_t __i; __builtin_neon_oi __o; } __rv;
10109 __rv.__o = __builtin_neon_vld2_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
10110 return __rv.__i;
10113 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
10114 vld2_dup_s8 (const int8_t * __a)
10116 union { int8x8x2_t __i; __builtin_neon_ti __o; } __rv;
10117 __rv.__o = __builtin_neon_vld2_dupv8qi ((const __builtin_neon_qi *) __a);
10118 return __rv.__i;
10121 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
10122 vld2_dup_s16 (const int16_t * __a)
10124 union { int16x4x2_t __i; __builtin_neon_ti __o; } __rv;
10125 __rv.__o = __builtin_neon_vld2_dupv4hi ((const __builtin_neon_hi *) __a);
10126 return __rv.__i;
10129 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
10130 vld2_dup_s32 (const int32_t * __a)
10132 union { int32x2x2_t __i; __builtin_neon_ti __o; } __rv;
10133 __rv.__o = __builtin_neon_vld2_dupv2si ((const __builtin_neon_si *) __a);
10134 return __rv.__i;
10137 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
10138 __extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
10139 vld2_dup_f16 (const float16_t * __a)
10141 union { float16x4x2_t __i; __builtin_neon_ti __o; } __rv;
10142 __rv.__o = __builtin_neon_vld2_dupv4hf (__a);
10143 return __rv.__i;
10145 #endif
10147 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
10148 vld2_dup_f32 (const float32_t * __a)
10150 union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv;
10151 __rv.__o = __builtin_neon_vld2_dupv2sf ((const __builtin_neon_sf *) __a);
10152 return __rv.__i;
10155 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
10156 vld2_dup_u8 (const uint8_t * __a)
10158 union { uint8x8x2_t __i; __builtin_neon_ti __o; } __rv;
10159 __rv.__o = __builtin_neon_vld2_dupv8qi ((const __builtin_neon_qi *) __a);
10160 return __rv.__i;
10163 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
10164 vld2_dup_u16 (const uint16_t * __a)
10166 union { uint16x4x2_t __i; __builtin_neon_ti __o; } __rv;
10167 __rv.__o = __builtin_neon_vld2_dupv4hi ((const __builtin_neon_hi *) __a);
10168 return __rv.__i;
10171 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
10172 vld2_dup_u32 (const uint32_t * __a)
10174 union { uint32x2x2_t __i; __builtin_neon_ti __o; } __rv;
10175 __rv.__o = __builtin_neon_vld2_dupv2si ((const __builtin_neon_si *) __a);
10176 return __rv.__i;
10179 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
10180 vld2_dup_p8 (const poly8_t * __a)
10182 union { poly8x8x2_t __i; __builtin_neon_ti __o; } __rv;
10183 __rv.__o = __builtin_neon_vld2_dupv8qi ((const __builtin_neon_qi *) __a);
10184 return __rv.__i;
10187 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
10188 vld2_dup_p16 (const poly16_t * __a)
10190 union { poly16x4x2_t __i; __builtin_neon_ti __o; } __rv;
10191 __rv.__o = __builtin_neon_vld2_dupv4hi ((const __builtin_neon_hi *) __a);
10192 return __rv.__i;
10195 #pragma GCC push_options
10196 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
10197 __extension__ static __inline poly64x1x2_t __attribute__ ((__always_inline__))
10198 vld2_dup_p64 (const poly64_t * __a)
10200 union { poly64x1x2_t __i; __builtin_neon_ti __o; } __rv;
10201 __rv.__o = __builtin_neon_vld2_dupdi ((const __builtin_neon_di *) __a);
10202 return __rv.__i;
10205 #pragma GCC pop_options
10206 __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
10207 vld2_dup_s64 (const int64_t * __a)
10209 union { int64x1x2_t __i; __builtin_neon_ti __o; } __rv;
10210 __rv.__o = __builtin_neon_vld2_dupdi ((const __builtin_neon_di *) __a);
10211 return __rv.__i;
10214 __extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
10215 vld2_dup_u64 (const uint64_t * __a)
10217 union { uint64x1x2_t __i; __builtin_neon_ti __o; } __rv;
10218 __rv.__o = __builtin_neon_vld2_dupdi ((const __builtin_neon_di *) __a);
10219 return __rv.__i;
10222 __extension__ static __inline void __attribute__ ((__always_inline__))
10223 vst2_s8 (int8_t * __a, int8x8x2_t __b)
10225 union { int8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
10226 __builtin_neon_vst2v8qi ((__builtin_neon_qi *) __a, __bu.__o);
10229 __extension__ static __inline void __attribute__ ((__always_inline__))
10230 vst2_s16 (int16_t * __a, int16x4x2_t __b)
10232 union { int16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
10233 __builtin_neon_vst2v4hi ((__builtin_neon_hi *) __a, __bu.__o);
10236 __extension__ static __inline void __attribute__ ((__always_inline__))
10237 vst2_s32 (int32_t * __a, int32x2x2_t __b)
10239 union { int32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
10240 __builtin_neon_vst2v2si ((__builtin_neon_si *) __a, __bu.__o);
10243 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
10244 __extension__ static __inline void __attribute__ ((__always_inline__))
10245 vst2_f16 (float16_t * __a, float16x4x2_t __b)
10247 union { float16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
10248 __builtin_neon_vst2v4hf (__a, __bu.__o);
10250 #endif
10252 __extension__ static __inline void __attribute__ ((__always_inline__))
10253 vst2_f32 (float32_t * __a, float32x2x2_t __b)
10255 union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
10256 __builtin_neon_vst2v2sf ((__builtin_neon_sf *) __a, __bu.__o);
10259 __extension__ static __inline void __attribute__ ((__always_inline__))
10260 vst2_u8 (uint8_t * __a, uint8x8x2_t __b)
10262 union { uint8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
10263 __builtin_neon_vst2v8qi ((__builtin_neon_qi *) __a, __bu.__o);
10266 __extension__ static __inline void __attribute__ ((__always_inline__))
10267 vst2_u16 (uint16_t * __a, uint16x4x2_t __b)
10269 union { uint16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
10270 __builtin_neon_vst2v4hi ((__builtin_neon_hi *) __a, __bu.__o);
10273 __extension__ static __inline void __attribute__ ((__always_inline__))
10274 vst2_u32 (uint32_t * __a, uint32x2x2_t __b)
10276 union { uint32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
10277 __builtin_neon_vst2v2si ((__builtin_neon_si *) __a, __bu.__o);
10280 __extension__ static __inline void __attribute__ ((__always_inline__))
10281 vst2_p8 (poly8_t * __a, poly8x8x2_t __b)
10283 union { poly8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
10284 __builtin_neon_vst2v8qi ((__builtin_neon_qi *) __a, __bu.__o);
10287 __extension__ static __inline void __attribute__ ((__always_inline__))
10288 vst2_p16 (poly16_t * __a, poly16x4x2_t __b)
10290 union { poly16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
10291 __builtin_neon_vst2v4hi ((__builtin_neon_hi *) __a, __bu.__o);
10294 #pragma GCC push_options
10295 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
10296 __extension__ static __inline void __attribute__ ((__always_inline__))
10297 vst2_p64 (poly64_t * __a, poly64x1x2_t __b)
10299 union { poly64x1x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
10300 __builtin_neon_vst2di ((__builtin_neon_di *) __a, __bu.__o);
10303 #pragma GCC pop_options
10304 __extension__ static __inline void __attribute__ ((__always_inline__))
10305 vst2_s64 (int64_t * __a, int64x1x2_t __b)
10307 union { int64x1x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
10308 __builtin_neon_vst2di ((__builtin_neon_di *) __a, __bu.__o);
10311 __extension__ static __inline void __attribute__ ((__always_inline__))
10312 vst2_u64 (uint64_t * __a, uint64x1x2_t __b)
10314 union { uint64x1x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
10315 __builtin_neon_vst2di ((__builtin_neon_di *) __a, __bu.__o);
10318 __extension__ static __inline void __attribute__ ((__always_inline__))
10319 vst2q_s8 (int8_t * __a, int8x16x2_t __b)
10321 union { int8x16x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
10322 __builtin_neon_vst2v16qi ((__builtin_neon_qi *) __a, __bu.__o);
10325 __extension__ static __inline void __attribute__ ((__always_inline__))
10326 vst2q_s16 (int16_t * __a, int16x8x2_t __b)
10328 union { int16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
10329 __builtin_neon_vst2v8hi ((__builtin_neon_hi *) __a, __bu.__o);
10332 __extension__ static __inline void __attribute__ ((__always_inline__))
10333 vst2q_s32 (int32_t * __a, int32x4x2_t __b)
10335 union { int32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
10336 __builtin_neon_vst2v4si ((__builtin_neon_si *) __a, __bu.__o);
10339 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
10340 __extension__ static __inline void __attribute__ ((__always_inline__))
10341 vst2q_f16 (float16_t * __a, float16x8x2_t __b)
10343 union { float16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
10344 __builtin_neon_vst2v8hf (__a, __bu.__o);
10346 #endif
10348 __extension__ static __inline void __attribute__ ((__always_inline__))
10349 vst2q_f32 (float32_t * __a, float32x4x2_t __b)
10351 union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
10352 __builtin_neon_vst2v4sf ((__builtin_neon_sf *) __a, __bu.__o);
10355 __extension__ static __inline void __attribute__ ((__always_inline__))
10356 vst2q_u8 (uint8_t * __a, uint8x16x2_t __b)
10358 union { uint8x16x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
10359 __builtin_neon_vst2v16qi ((__builtin_neon_qi *) __a, __bu.__o);
10362 __extension__ static __inline void __attribute__ ((__always_inline__))
10363 vst2q_u16 (uint16_t * __a, uint16x8x2_t __b)
10365 union { uint16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
10366 __builtin_neon_vst2v8hi ((__builtin_neon_hi *) __a, __bu.__o);
10369 __extension__ static __inline void __attribute__ ((__always_inline__))
10370 vst2q_u32 (uint32_t * __a, uint32x4x2_t __b)
10372 union { uint32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
10373 __builtin_neon_vst2v4si ((__builtin_neon_si *) __a, __bu.__o);
10376 __extension__ static __inline void __attribute__ ((__always_inline__))
10377 vst2q_p8 (poly8_t * __a, poly8x16x2_t __b)
10379 union { poly8x16x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
10380 __builtin_neon_vst2v16qi ((__builtin_neon_qi *) __a, __bu.__o);
10383 __extension__ static __inline void __attribute__ ((__always_inline__))
10384 vst2q_p16 (poly16_t * __a, poly16x8x2_t __b)
10386 union { poly16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
10387 __builtin_neon_vst2v8hi ((__builtin_neon_hi *) __a, __bu.__o);
10390 __extension__ static __inline void __attribute__ ((__always_inline__))
10391 vst2_lane_s8 (int8_t * __a, int8x8x2_t __b, const int __c)
10393 union { int8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
10394 __builtin_neon_vst2_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c);
10397 __extension__ static __inline void __attribute__ ((__always_inline__))
10398 vst2_lane_s16 (int16_t * __a, int16x4x2_t __b, const int __c)
10400 union { int16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
10401 __builtin_neon_vst2_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
10404 __extension__ static __inline void __attribute__ ((__always_inline__))
10405 vst2_lane_s32 (int32_t * __a, int32x2x2_t __b, const int __c)
10407 union { int32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
10408 __builtin_neon_vst2_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c);
10411 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
10412 __extension__ static __inline void __attribute__ ((__always_inline__))
10413 vst2_lane_f16 (float16_t * __a, float16x4x2_t __b, const int __c)
10415 union { float16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
10416 __builtin_neon_vst2_lanev4hf (__a, __bu.__o, __c);
10418 #endif
10420 __extension__ static __inline void __attribute__ ((__always_inline__))
10421 vst2_lane_f32 (float32_t * __a, float32x2x2_t __b, const int __c)
10423 union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
10424 __builtin_neon_vst2_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
10427 __extension__ static __inline void __attribute__ ((__always_inline__))
10428 vst2_lane_u8 (uint8_t * __a, uint8x8x2_t __b, const int __c)
10430 union { uint8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
10431 __builtin_neon_vst2_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c);
10434 __extension__ static __inline void __attribute__ ((__always_inline__))
10435 vst2_lane_u16 (uint16_t * __a, uint16x4x2_t __b, const int __c)
10437 union { uint16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
10438 __builtin_neon_vst2_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
10441 __extension__ static __inline void __attribute__ ((__always_inline__))
10442 vst2_lane_u32 (uint32_t * __a, uint32x2x2_t __b, const int __c)
10444 union { uint32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
10445 __builtin_neon_vst2_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c);
10448 __extension__ static __inline void __attribute__ ((__always_inline__))
10449 vst2_lane_p8 (poly8_t * __a, poly8x8x2_t __b, const int __c)
10451 union { poly8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
10452 __builtin_neon_vst2_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c);
10455 __extension__ static __inline void __attribute__ ((__always_inline__))
10456 vst2_lane_p16 (poly16_t * __a, poly16x4x2_t __b, const int __c)
10458 union { poly16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
10459 __builtin_neon_vst2_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
10462 __extension__ static __inline void __attribute__ ((__always_inline__))
10463 vst2q_lane_s16 (int16_t * __a, int16x8x2_t __b, const int __c)
10465 union { int16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
10466 __builtin_neon_vst2_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
10469 __extension__ static __inline void __attribute__ ((__always_inline__))
10470 vst2q_lane_s32 (int32_t * __a, int32x4x2_t __b, const int __c)
10472 union { int32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
10473 __builtin_neon_vst2_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c);
10476 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
10477 __extension__ static __inline void __attribute__ ((__always_inline__))
10478 vst2q_lane_f16 (float16_t * __a, float16x8x2_t __b, const int __c)
10480 union { float16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
10481 __builtin_neon_vst2_lanev8hf (__a, __bu.__o, __c);
10483 #endif
10485 __extension__ static __inline void __attribute__ ((__always_inline__))
10486 vst2q_lane_f32 (float32_t * __a, float32x4x2_t __b, const int __c)
10488 union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
10489 __builtin_neon_vst2_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
10492 __extension__ static __inline void __attribute__ ((__always_inline__))
10493 vst2q_lane_u16 (uint16_t * __a, uint16x8x2_t __b, const int __c)
10495 union { uint16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
10496 __builtin_neon_vst2_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
10499 __extension__ static __inline void __attribute__ ((__always_inline__))
10500 vst2q_lane_u32 (uint32_t * __a, uint32x4x2_t __b, const int __c)
10502 union { uint32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
10503 __builtin_neon_vst2_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c);
10506 __extension__ static __inline void __attribute__ ((__always_inline__))
10507 vst2q_lane_p16 (poly16_t * __a, poly16x8x2_t __b, const int __c)
10509 union { poly16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
10510 __builtin_neon_vst2_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
10513 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
10514 vld3_s8 (const int8_t * __a)
10516 union { int8x8x3_t __i; __builtin_neon_ei __o; } __rv;
10517 __rv.__o = __builtin_neon_vld3v8qi ((const __builtin_neon_qi *) __a);
10518 return __rv.__i;
10521 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
10522 vld3_s16 (const int16_t * __a)
10524 union { int16x4x3_t __i; __builtin_neon_ei __o; } __rv;
10525 __rv.__o = __builtin_neon_vld3v4hi ((const __builtin_neon_hi *) __a);
10526 return __rv.__i;
10529 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
10530 vld3_s32 (const int32_t * __a)
10532 union { int32x2x3_t __i; __builtin_neon_ei __o; } __rv;
10533 __rv.__o = __builtin_neon_vld3v2si ((const __builtin_neon_si *) __a);
10534 return __rv.__i;
10537 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
10538 __extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__))
10539 vld3_f16 (const float16_t * __a)
10541 union { float16x4x3_t __i; __builtin_neon_ei __o; } __rv;
10542 __rv.__o = __builtin_neon_vld3v4hf (__a);
10543 return __rv.__i;
10545 #endif
10547 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
10548 vld3_f32 (const float32_t * __a)
10550 union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv;
10551 __rv.__o = __builtin_neon_vld3v2sf ((const __builtin_neon_sf *) __a);
10552 return __rv.__i;
10555 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
10556 vld3_u8 (const uint8_t * __a)
10558 union { uint8x8x3_t __i; __builtin_neon_ei __o; } __rv;
10559 __rv.__o = __builtin_neon_vld3v8qi ((const __builtin_neon_qi *) __a);
10560 return __rv.__i;
10563 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
10564 vld3_u16 (const uint16_t * __a)
10566 union { uint16x4x3_t __i; __builtin_neon_ei __o; } __rv;
10567 __rv.__o = __builtin_neon_vld3v4hi ((const __builtin_neon_hi *) __a);
10568 return __rv.__i;
10571 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
10572 vld3_u32 (const uint32_t * __a)
10574 union { uint32x2x3_t __i; __builtin_neon_ei __o; } __rv;
10575 __rv.__o = __builtin_neon_vld3v2si ((const __builtin_neon_si *) __a);
10576 return __rv.__i;
10579 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
10580 vld3_p8 (const poly8_t * __a)
10582 union { poly8x8x3_t __i; __builtin_neon_ei __o; } __rv;
10583 __rv.__o = __builtin_neon_vld3v8qi ((const __builtin_neon_qi *) __a);
10584 return __rv.__i;
10587 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
10588 vld3_p16 (const poly16_t * __a)
10590 union { poly16x4x3_t __i; __builtin_neon_ei __o; } __rv;
10591 __rv.__o = __builtin_neon_vld3v4hi ((const __builtin_neon_hi *) __a);
10592 return __rv.__i;
10595 #pragma GCC push_options
10596 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
10597 __extension__ static __inline poly64x1x3_t __attribute__ ((__always_inline__))
10598 vld3_p64 (const poly64_t * __a)
10600 union { poly64x1x3_t __i; __builtin_neon_ei __o; } __rv;
10601 __rv.__o = __builtin_neon_vld3di ((const __builtin_neon_di *) __a);
10602 return __rv.__i;
10605 #pragma GCC pop_options
10606 __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
10607 vld3_s64 (const int64_t * __a)
10609 union { int64x1x3_t __i; __builtin_neon_ei __o; } __rv;
10610 __rv.__o = __builtin_neon_vld3di ((const __builtin_neon_di *) __a);
10611 return __rv.__i;
10614 __extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
10615 vld3_u64 (const uint64_t * __a)
10617 union { uint64x1x3_t __i; __builtin_neon_ei __o; } __rv;
10618 __rv.__o = __builtin_neon_vld3di ((const __builtin_neon_di *) __a);
10619 return __rv.__i;
10622 __extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
10623 vld3q_s8 (const int8_t * __a)
10625 union { int8x16x3_t __i; __builtin_neon_ci __o; } __rv;
10626 __rv.__o = __builtin_neon_vld3v16qi ((const __builtin_neon_qi *) __a);
10627 return __rv.__i;
10630 __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
10631 vld3q_s16 (const int16_t * __a)
10633 union { int16x8x3_t __i; __builtin_neon_ci __o; } __rv;
10634 __rv.__o = __builtin_neon_vld3v8hi ((const __builtin_neon_hi *) __a);
10635 return __rv.__i;
10638 __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
10639 vld3q_s32 (const int32_t * __a)
10641 union { int32x4x3_t __i; __builtin_neon_ci __o; } __rv;
10642 __rv.__o = __builtin_neon_vld3v4si ((const __builtin_neon_si *) __a);
10643 return __rv.__i;
10646 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
10647 __extension__ static __inline float16x8x3_t __attribute__ ((__always_inline__))
10648 vld3q_f16 (const float16_t * __a)
10650 union { float16x8x3_t __i; __builtin_neon_ci __o; } __rv;
10651 __rv.__o = __builtin_neon_vld3v8hf (__a);
10652 return __rv.__i;
10654 #endif
10656 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
10657 vld3q_f32 (const float32_t * __a)
10659 union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv;
10660 __rv.__o = __builtin_neon_vld3v4sf ((const __builtin_neon_sf *) __a);
10661 return __rv.__i;
10664 __extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
10665 vld3q_u8 (const uint8_t * __a)
10667 union { uint8x16x3_t __i; __builtin_neon_ci __o; } __rv;
10668 __rv.__o = __builtin_neon_vld3v16qi ((const __builtin_neon_qi *) __a);
10669 return __rv.__i;
10672 __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
10673 vld3q_u16 (const uint16_t * __a)
10675 union { uint16x8x3_t __i; __builtin_neon_ci __o; } __rv;
10676 __rv.__o = __builtin_neon_vld3v8hi ((const __builtin_neon_hi *) __a);
10677 return __rv.__i;
10680 __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
10681 vld3q_u32 (const uint32_t * __a)
10683 union { uint32x4x3_t __i; __builtin_neon_ci __o; } __rv;
10684 __rv.__o = __builtin_neon_vld3v4si ((const __builtin_neon_si *) __a);
10685 return __rv.__i;
10688 __extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
10689 vld3q_p8 (const poly8_t * __a)
10691 union { poly8x16x3_t __i; __builtin_neon_ci __o; } __rv;
10692 __rv.__o = __builtin_neon_vld3v16qi ((const __builtin_neon_qi *) __a);
10693 return __rv.__i;
10696 __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
10697 vld3q_p16 (const poly16_t * __a)
10699 union { poly16x8x3_t __i; __builtin_neon_ci __o; } __rv;
10700 __rv.__o = __builtin_neon_vld3v8hi ((const __builtin_neon_hi *) __a);
10701 return __rv.__i;
10704 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
10705 vld3_lane_s8 (const int8_t * __a, int8x8x3_t __b, const int __c)
10707 union { int8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
10708 union { int8x8x3_t __i; __builtin_neon_ei __o; } __rv;
10709 __rv.__o = __builtin_neon_vld3_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c);
10710 return __rv.__i;
10713 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
10714 vld3_lane_s16 (const int16_t * __a, int16x4x3_t __b, const int __c)
10716 union { int16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
10717 union { int16x4x3_t __i; __builtin_neon_ei __o; } __rv;
10718 __rv.__o = __builtin_neon_vld3_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
10719 return __rv.__i;
10722 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
10723 vld3_lane_s32 (const int32_t * __a, int32x2x3_t __b, const int __c)
10725 union { int32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
10726 union { int32x2x3_t __i; __builtin_neon_ei __o; } __rv;
10727 __rv.__o = __builtin_neon_vld3_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c);
10728 return __rv.__i;
10731 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
10732 __extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__))
10733 vld3_lane_f16 (const float16_t * __a, float16x4x3_t __b, const int __c)
10735 union { float16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
10736 union { float16x4x3_t __i; __builtin_neon_ei __o; } __rv;
10737 __rv.__o = __builtin_neon_vld3_lanev4hf (__a, __bu.__o, __c);
10738 return __rv.__i;
10740 #endif
10742 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
10743 vld3_lane_f32 (const float32_t * __a, float32x2x3_t __b, const int __c)
10745 union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
10746 union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv;
10747 __rv.__o = __builtin_neon_vld3_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
10748 return __rv.__i;
10751 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
10752 vld3_lane_u8 (const uint8_t * __a, uint8x8x3_t __b, const int __c)
10754 union { uint8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
10755 union { uint8x8x3_t __i; __builtin_neon_ei __o; } __rv;
10756 __rv.__o = __builtin_neon_vld3_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c);
10757 return __rv.__i;
10760 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
10761 vld3_lane_u16 (const uint16_t * __a, uint16x4x3_t __b, const int __c)
10763 union { uint16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
10764 union { uint16x4x3_t __i; __builtin_neon_ei __o; } __rv;
10765 __rv.__o = __builtin_neon_vld3_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
10766 return __rv.__i;
10769 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
10770 vld3_lane_u32 (const uint32_t * __a, uint32x2x3_t __b, const int __c)
10772 union { uint32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
10773 union { uint32x2x3_t __i; __builtin_neon_ei __o; } __rv;
10774 __rv.__o = __builtin_neon_vld3_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c);
10775 return __rv.__i;
10778 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
10779 vld3_lane_p8 (const poly8_t * __a, poly8x8x3_t __b, const int __c)
10781 union { poly8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
10782 union { poly8x8x3_t __i; __builtin_neon_ei __o; } __rv;
10783 __rv.__o = __builtin_neon_vld3_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c);
10784 return __rv.__i;
10787 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
10788 vld3_lane_p16 (const poly16_t * __a, poly16x4x3_t __b, const int __c)
10790 union { poly16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
10791 union { poly16x4x3_t __i; __builtin_neon_ei __o; } __rv;
10792 __rv.__o = __builtin_neon_vld3_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
10793 return __rv.__i;
10796 __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
10797 vld3q_lane_s16 (const int16_t * __a, int16x8x3_t __b, const int __c)
10799 union { int16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
10800 union { int16x8x3_t __i; __builtin_neon_ci __o; } __rv;
10801 __rv.__o = __builtin_neon_vld3_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
10802 return __rv.__i;
10805 __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
10806 vld3q_lane_s32 (const int32_t * __a, int32x4x3_t __b, const int __c)
10808 union { int32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
10809 union { int32x4x3_t __i; __builtin_neon_ci __o; } __rv;
10810 __rv.__o = __builtin_neon_vld3_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c);
10811 return __rv.__i;
10814 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
10815 __extension__ static __inline float16x8x3_t __attribute__ ((__always_inline__))
10816 vld3q_lane_f16 (const float16_t * __a, float16x8x3_t __b, const int __c)
10818 union { float16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
10819 union { float16x8x3_t __i; __builtin_neon_ci __o; } __rv;
10820 __rv.__o = __builtin_neon_vld3_lanev8hf (__a, __bu.__o, __c);
10821 return __rv.__i;
10823 #endif
10825 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
10826 vld3q_lane_f32 (const float32_t * __a, float32x4x3_t __b, const int __c)
10828 union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
10829 union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv;
10830 __rv.__o = __builtin_neon_vld3_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
10831 return __rv.__i;
10834 __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
10835 vld3q_lane_u16 (const uint16_t * __a, uint16x8x3_t __b, const int __c)
10837 union { uint16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
10838 union { uint16x8x3_t __i; __builtin_neon_ci __o; } __rv;
10839 __rv.__o = __builtin_neon_vld3_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
10840 return __rv.__i;
10843 __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
10844 vld3q_lane_u32 (const uint32_t * __a, uint32x4x3_t __b, const int __c)
10846 union { uint32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
10847 union { uint32x4x3_t __i; __builtin_neon_ci __o; } __rv;
10848 __rv.__o = __builtin_neon_vld3_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c);
10849 return __rv.__i;
10852 __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
10853 vld3q_lane_p16 (const poly16_t * __a, poly16x8x3_t __b, const int __c)
10855 union { poly16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
10856 union { poly16x8x3_t __i; __builtin_neon_ci __o; } __rv;
10857 __rv.__o = __builtin_neon_vld3_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
10858 return __rv.__i;
10861 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
10862 vld3_dup_s8 (const int8_t * __a)
10864 union { int8x8x3_t __i; __builtin_neon_ei __o; } __rv;
10865 __rv.__o = __builtin_neon_vld3_dupv8qi ((const __builtin_neon_qi *) __a);
10866 return __rv.__i;
10869 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
10870 vld3_dup_s16 (const int16_t * __a)
10872 union { int16x4x3_t __i; __builtin_neon_ei __o; } __rv;
10873 __rv.__o = __builtin_neon_vld3_dupv4hi ((const __builtin_neon_hi *) __a);
10874 return __rv.__i;
10877 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
10878 vld3_dup_s32 (const int32_t * __a)
10880 union { int32x2x3_t __i; __builtin_neon_ei __o; } __rv;
10881 __rv.__o = __builtin_neon_vld3_dupv2si ((const __builtin_neon_si *) __a);
10882 return __rv.__i;
10885 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
10886 __extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__))
10887 vld3_dup_f16 (const float16_t * __a)
10889 union { float16x4x3_t __i; __builtin_neon_ei __o; } __rv;
10890 __rv.__o = __builtin_neon_vld3_dupv4hf (__a);
10891 return __rv.__i;
10893 #endif
10895 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
10896 vld3_dup_f32 (const float32_t * __a)
10898 union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv;
10899 __rv.__o = __builtin_neon_vld3_dupv2sf ((const __builtin_neon_sf *) __a);
10900 return __rv.__i;
10903 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
10904 vld3_dup_u8 (const uint8_t * __a)
10906 union { uint8x8x3_t __i; __builtin_neon_ei __o; } __rv;
10907 __rv.__o = __builtin_neon_vld3_dupv8qi ((const __builtin_neon_qi *) __a);
10908 return __rv.__i;
10911 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
10912 vld3_dup_u16 (const uint16_t * __a)
10914 union { uint16x4x3_t __i; __builtin_neon_ei __o; } __rv;
10915 __rv.__o = __builtin_neon_vld3_dupv4hi ((const __builtin_neon_hi *) __a);
10916 return __rv.__i;
10919 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
10920 vld3_dup_u32 (const uint32_t * __a)
10922 union { uint32x2x3_t __i; __builtin_neon_ei __o; } __rv;
10923 __rv.__o = __builtin_neon_vld3_dupv2si ((const __builtin_neon_si *) __a);
10924 return __rv.__i;
10927 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
10928 vld3_dup_p8 (const poly8_t * __a)
10930 union { poly8x8x3_t __i; __builtin_neon_ei __o; } __rv;
10931 __rv.__o = __builtin_neon_vld3_dupv8qi ((const __builtin_neon_qi *) __a);
10932 return __rv.__i;
10935 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
10936 vld3_dup_p16 (const poly16_t * __a)
10938 union { poly16x4x3_t __i; __builtin_neon_ei __o; } __rv;
10939 __rv.__o = __builtin_neon_vld3_dupv4hi ((const __builtin_neon_hi *) __a);
10940 return __rv.__i;
10943 #pragma GCC push_options
10944 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
10945 __extension__ static __inline poly64x1x3_t __attribute__ ((__always_inline__))
10946 vld3_dup_p64 (const poly64_t * __a)
10948 union { poly64x1x3_t __i; __builtin_neon_ei __o; } __rv;
10949 __rv.__o = __builtin_neon_vld3_dupdi ((const __builtin_neon_di *) __a);
10950 return __rv.__i;
10953 #pragma GCC pop_options
10954 __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
10955 vld3_dup_s64 (const int64_t * __a)
10957 union { int64x1x3_t __i; __builtin_neon_ei __o; } __rv;
10958 __rv.__o = __builtin_neon_vld3_dupdi ((const __builtin_neon_di *) __a);
10959 return __rv.__i;
10962 __extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
10963 vld3_dup_u64 (const uint64_t * __a)
10965 union { uint64x1x3_t __i; __builtin_neon_ei __o; } __rv;
10966 __rv.__o = __builtin_neon_vld3_dupdi ((const __builtin_neon_di *) __a);
10967 return __rv.__i;
10970 __extension__ static __inline void __attribute__ ((__always_inline__))
10971 vst3_s8 (int8_t * __a, int8x8x3_t __b)
10973 union { int8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
10974 __builtin_neon_vst3v8qi ((__builtin_neon_qi *) __a, __bu.__o);
10977 __extension__ static __inline void __attribute__ ((__always_inline__))
10978 vst3_s16 (int16_t * __a, int16x4x3_t __b)
10980 union { int16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
10981 __builtin_neon_vst3v4hi ((__builtin_neon_hi *) __a, __bu.__o);
10984 __extension__ static __inline void __attribute__ ((__always_inline__))
10985 vst3_s32 (int32_t * __a, int32x2x3_t __b)
10987 union { int32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
10988 __builtin_neon_vst3v2si ((__builtin_neon_si *) __a, __bu.__o);
10991 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
10992 __extension__ static __inline void __attribute__ ((__always_inline__))
10993 vst3_f16 (float16_t * __a, float16x4x3_t __b)
10995 union { float16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
10996 __builtin_neon_vst3v4hf (__a, __bu.__o);
10998 #endif
11000 __extension__ static __inline void __attribute__ ((__always_inline__))
11001 vst3_f32 (float32_t * __a, float32x2x3_t __b)
11003 union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
11004 __builtin_neon_vst3v2sf ((__builtin_neon_sf *) __a, __bu.__o);
11007 __extension__ static __inline void __attribute__ ((__always_inline__))
11008 vst3_u8 (uint8_t * __a, uint8x8x3_t __b)
11010 union { uint8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
11011 __builtin_neon_vst3v8qi ((__builtin_neon_qi *) __a, __bu.__o);
11014 __extension__ static __inline void __attribute__ ((__always_inline__))
11015 vst3_u16 (uint16_t * __a, uint16x4x3_t __b)
11017 union { uint16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
11018 __builtin_neon_vst3v4hi ((__builtin_neon_hi *) __a, __bu.__o);
11021 __extension__ static __inline void __attribute__ ((__always_inline__))
11022 vst3_u32 (uint32_t * __a, uint32x2x3_t __b)
11024 union { uint32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
11025 __builtin_neon_vst3v2si ((__builtin_neon_si *) __a, __bu.__o);
11028 __extension__ static __inline void __attribute__ ((__always_inline__))
11029 vst3_p8 (poly8_t * __a, poly8x8x3_t __b)
11031 union { poly8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
11032 __builtin_neon_vst3v8qi ((__builtin_neon_qi *) __a, __bu.__o);
11035 __extension__ static __inline void __attribute__ ((__always_inline__))
11036 vst3_p16 (poly16_t * __a, poly16x4x3_t __b)
11038 union { poly16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
11039 __builtin_neon_vst3v4hi ((__builtin_neon_hi *) __a, __bu.__o);
11042 #pragma GCC push_options
11043 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
11044 __extension__ static __inline void __attribute__ ((__always_inline__))
11045 vst3_p64 (poly64_t * __a, poly64x1x3_t __b)
11047 union { poly64x1x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
11048 __builtin_neon_vst3di ((__builtin_neon_di *) __a, __bu.__o);
11051 #pragma GCC pop_options
11052 __extension__ static __inline void __attribute__ ((__always_inline__))
11053 vst3_s64 (int64_t * __a, int64x1x3_t __b)
11055 union { int64x1x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
11056 __builtin_neon_vst3di ((__builtin_neon_di *) __a, __bu.__o);
11059 __extension__ static __inline void __attribute__ ((__always_inline__))
11060 vst3_u64 (uint64_t * __a, uint64x1x3_t __b)
11062 union { uint64x1x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
11063 __builtin_neon_vst3di ((__builtin_neon_di *) __a, __bu.__o);
11066 __extension__ static __inline void __attribute__ ((__always_inline__))
11067 vst3q_s8 (int8_t * __a, int8x16x3_t __b)
11069 union { int8x16x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
11070 __builtin_neon_vst3v16qi ((__builtin_neon_qi *) __a, __bu.__o);
11073 __extension__ static __inline void __attribute__ ((__always_inline__))
11074 vst3q_s16 (int16_t * __a, int16x8x3_t __b)
11076 union { int16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
11077 __builtin_neon_vst3v8hi ((__builtin_neon_hi *) __a, __bu.__o);
11080 __extension__ static __inline void __attribute__ ((__always_inline__))
11081 vst3q_s32 (int32_t * __a, int32x4x3_t __b)
11083 union { int32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
11084 __builtin_neon_vst3v4si ((__builtin_neon_si *) __a, __bu.__o);
11087 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
11088 __extension__ static __inline void __attribute__ ((__always_inline__))
11089 vst3q_f16 (float16_t * __a, float16x8x3_t __b)
11091 union { float16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
11092 __builtin_neon_vst3v8hf (__a, __bu.__o);
11094 #endif
11096 __extension__ static __inline void __attribute__ ((__always_inline__))
11097 vst3q_f32 (float32_t * __a, float32x4x3_t __b)
11099 union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
11100 __builtin_neon_vst3v4sf ((__builtin_neon_sf *) __a, __bu.__o);
11103 __extension__ static __inline void __attribute__ ((__always_inline__))
11104 vst3q_u8 (uint8_t * __a, uint8x16x3_t __b)
11106 union { uint8x16x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
11107 __builtin_neon_vst3v16qi ((__builtin_neon_qi *) __a, __bu.__o);
11110 __extension__ static __inline void __attribute__ ((__always_inline__))
11111 vst3q_u16 (uint16_t * __a, uint16x8x3_t __b)
11113 union { uint16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
11114 __builtin_neon_vst3v8hi ((__builtin_neon_hi *) __a, __bu.__o);
11117 __extension__ static __inline void __attribute__ ((__always_inline__))
11118 vst3q_u32 (uint32_t * __a, uint32x4x3_t __b)
11120 union { uint32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
11121 __builtin_neon_vst3v4si ((__builtin_neon_si *) __a, __bu.__o);
11124 __extension__ static __inline void __attribute__ ((__always_inline__))
11125 vst3q_p8 (poly8_t * __a, poly8x16x3_t __b)
11127 union { poly8x16x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
11128 __builtin_neon_vst3v16qi ((__builtin_neon_qi *) __a, __bu.__o);
11131 __extension__ static __inline void __attribute__ ((__always_inline__))
11132 vst3q_p16 (poly16_t * __a, poly16x8x3_t __b)
11134 union { poly16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
11135 __builtin_neon_vst3v8hi ((__builtin_neon_hi *) __a, __bu.__o);
11138 __extension__ static __inline void __attribute__ ((__always_inline__))
11139 vst3_lane_s8 (int8_t * __a, int8x8x3_t __b, const int __c)
11141 union { int8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
11142 __builtin_neon_vst3_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c);
11145 __extension__ static __inline void __attribute__ ((__always_inline__))
11146 vst3_lane_s16 (int16_t * __a, int16x4x3_t __b, const int __c)
11148 union { int16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
11149 __builtin_neon_vst3_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
11152 __extension__ static __inline void __attribute__ ((__always_inline__))
11153 vst3_lane_s32 (int32_t * __a, int32x2x3_t __b, const int __c)
11155 union { int32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
11156 __builtin_neon_vst3_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c);
11159 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
11160 __extension__ static __inline void __attribute__ ((__always_inline__))
11161 vst3_lane_f16 (float16_t * __a, float16x4x3_t __b, const int __c)
11163 union { float16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
11164 __builtin_neon_vst3_lanev4hf (__a, __bu.__o, __c);
11166 #endif
11168 __extension__ static __inline void __attribute__ ((__always_inline__))
11169 vst3_lane_f32 (float32_t * __a, float32x2x3_t __b, const int __c)
11171 union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
11172 __builtin_neon_vst3_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
11175 __extension__ static __inline void __attribute__ ((__always_inline__))
11176 vst3_lane_u8 (uint8_t * __a, uint8x8x3_t __b, const int __c)
11178 union { uint8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
11179 __builtin_neon_vst3_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c);
11182 __extension__ static __inline void __attribute__ ((__always_inline__))
11183 vst3_lane_u16 (uint16_t * __a, uint16x4x3_t __b, const int __c)
11185 union { uint16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
11186 __builtin_neon_vst3_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
11189 __extension__ static __inline void __attribute__ ((__always_inline__))
11190 vst3_lane_u32 (uint32_t * __a, uint32x2x3_t __b, const int __c)
11192 union { uint32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
11193 __builtin_neon_vst3_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c);
11196 __extension__ static __inline void __attribute__ ((__always_inline__))
11197 vst3_lane_p8 (poly8_t * __a, poly8x8x3_t __b, const int __c)
11199 union { poly8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
11200 __builtin_neon_vst3_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c);
11203 __extension__ static __inline void __attribute__ ((__always_inline__))
11204 vst3_lane_p16 (poly16_t * __a, poly16x4x3_t __b, const int __c)
11206 union { poly16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
11207 __builtin_neon_vst3_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
11210 __extension__ static __inline void __attribute__ ((__always_inline__))
11211 vst3q_lane_s16 (int16_t * __a, int16x8x3_t __b, const int __c)
11213 union { int16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
11214 __builtin_neon_vst3_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
11217 __extension__ static __inline void __attribute__ ((__always_inline__))
11218 vst3q_lane_s32 (int32_t * __a, int32x4x3_t __b, const int __c)
11220 union { int32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
11221 __builtin_neon_vst3_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c);
11224 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
11225 __extension__ static __inline void __attribute__ ((__always_inline__))
11226 vst3q_lane_f16 (float16_t * __a, float16x8x3_t __b, const int __c)
11228 union { float16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
11229 __builtin_neon_vst3_lanev8hf (__a, __bu.__o, __c);
11231 #endif
11233 __extension__ static __inline void __attribute__ ((__always_inline__))
11234 vst3q_lane_f32 (float32_t * __a, float32x4x3_t __b, const int __c)
11236 union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
11237 __builtin_neon_vst3_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
11240 __extension__ static __inline void __attribute__ ((__always_inline__))
11241 vst3q_lane_u16 (uint16_t * __a, uint16x8x3_t __b, const int __c)
11243 union { uint16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
11244 __builtin_neon_vst3_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
11247 __extension__ static __inline void __attribute__ ((__always_inline__))
11248 vst3q_lane_u32 (uint32_t * __a, uint32x4x3_t __b, const int __c)
11250 union { uint32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
11251 __builtin_neon_vst3_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c);
11254 __extension__ static __inline void __attribute__ ((__always_inline__))
11255 vst3q_lane_p16 (poly16_t * __a, poly16x8x3_t __b, const int __c)
11257 union { poly16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
11258 __builtin_neon_vst3_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
11261 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
11262 vld4_s8 (const int8_t * __a)
11264 union { int8x8x4_t __i; __builtin_neon_oi __o; } __rv;
11265 __rv.__o = __builtin_neon_vld4v8qi ((const __builtin_neon_qi *) __a);
11266 return __rv.__i;
11269 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
11270 vld4_s16 (const int16_t * __a)
11272 union { int16x4x4_t __i; __builtin_neon_oi __o; } __rv;
11273 __rv.__o = __builtin_neon_vld4v4hi ((const __builtin_neon_hi *) __a);
11274 return __rv.__i;
11277 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
11278 vld4_s32 (const int32_t * __a)
11280 union { int32x2x4_t __i; __builtin_neon_oi __o; } __rv;
11281 __rv.__o = __builtin_neon_vld4v2si ((const __builtin_neon_si *) __a);
11282 return __rv.__i;
11285 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
11286 __extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__))
11287 vld4_f16 (const float16_t * __a)
11289 union { float16x4x4_t __i; __builtin_neon_oi __o; } __rv;
11290 __rv.__o = __builtin_neon_vld4v4hf (__a);
11291 return __rv.__i;
11293 #endif
11295 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
11296 vld4_f32 (const float32_t * __a)
11298 union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv;
11299 __rv.__o = __builtin_neon_vld4v2sf ((const __builtin_neon_sf *) __a);
11300 return __rv.__i;
11303 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
11304 vld4_u8 (const uint8_t * __a)
11306 union { uint8x8x4_t __i; __builtin_neon_oi __o; } __rv;
11307 __rv.__o = __builtin_neon_vld4v8qi ((const __builtin_neon_qi *) __a);
11308 return __rv.__i;
11311 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
11312 vld4_u16 (const uint16_t * __a)
11314 union { uint16x4x4_t __i; __builtin_neon_oi __o; } __rv;
11315 __rv.__o = __builtin_neon_vld4v4hi ((const __builtin_neon_hi *) __a);
11316 return __rv.__i;
11319 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
11320 vld4_u32 (const uint32_t * __a)
11322 union { uint32x2x4_t __i; __builtin_neon_oi __o; } __rv;
11323 __rv.__o = __builtin_neon_vld4v2si ((const __builtin_neon_si *) __a);
11324 return __rv.__i;
11327 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
11328 vld4_p8 (const poly8_t * __a)
11330 union { poly8x8x4_t __i; __builtin_neon_oi __o; } __rv;
11331 __rv.__o = __builtin_neon_vld4v8qi ((const __builtin_neon_qi *) __a);
11332 return __rv.__i;
11335 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
11336 vld4_p16 (const poly16_t * __a)
11338 union { poly16x4x4_t __i; __builtin_neon_oi __o; } __rv;
11339 __rv.__o = __builtin_neon_vld4v4hi ((const __builtin_neon_hi *) __a);
11340 return __rv.__i;
11343 #pragma GCC push_options
11344 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
11345 __extension__ static __inline poly64x1x4_t __attribute__ ((__always_inline__))
11346 vld4_p64 (const poly64_t * __a)
11348 union { poly64x1x4_t __i; __builtin_neon_oi __o; } __rv;
11349 __rv.__o = __builtin_neon_vld4di ((const __builtin_neon_di *) __a);
11350 return __rv.__i;
11353 #pragma GCC pop_options
11354 __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
11355 vld4_s64 (const int64_t * __a)
11357 union { int64x1x4_t __i; __builtin_neon_oi __o; } __rv;
11358 __rv.__o = __builtin_neon_vld4di ((const __builtin_neon_di *) __a);
11359 return __rv.__i;
11362 __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
11363 vld4_u64 (const uint64_t * __a)
11365 union { uint64x1x4_t __i; __builtin_neon_oi __o; } __rv;
11366 __rv.__o = __builtin_neon_vld4di ((const __builtin_neon_di *) __a);
11367 return __rv.__i;
11370 __extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
11371 vld4q_s8 (const int8_t * __a)
11373 union { int8x16x4_t __i; __builtin_neon_xi __o; } __rv;
11374 __rv.__o = __builtin_neon_vld4v16qi ((const __builtin_neon_qi *) __a);
11375 return __rv.__i;
11378 __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
11379 vld4q_s16 (const int16_t * __a)
11381 union { int16x8x4_t __i; __builtin_neon_xi __o; } __rv;
11382 __rv.__o = __builtin_neon_vld4v8hi ((const __builtin_neon_hi *) __a);
11383 return __rv.__i;
11386 __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
11387 vld4q_s32 (const int32_t * __a)
11389 union { int32x4x4_t __i; __builtin_neon_xi __o; } __rv;
11390 __rv.__o = __builtin_neon_vld4v4si ((const __builtin_neon_si *) __a);
11391 return __rv.__i;
11394 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
11395 __extension__ static __inline float16x8x4_t __attribute__ ((__always_inline__))
11396 vld4q_f16 (const float16_t * __a)
11398 union { float16x8x4_t __i; __builtin_neon_xi __o; } __rv;
11399 __rv.__o = __builtin_neon_vld4v8hf (__a);
11400 return __rv.__i;
11402 #endif
11404 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
11405 vld4q_f32 (const float32_t * __a)
11407 union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv;
11408 __rv.__o = __builtin_neon_vld4v4sf ((const __builtin_neon_sf *) __a);
11409 return __rv.__i;
11412 __extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
11413 vld4q_u8 (const uint8_t * __a)
11415 union { uint8x16x4_t __i; __builtin_neon_xi __o; } __rv;
11416 __rv.__o = __builtin_neon_vld4v16qi ((const __builtin_neon_qi *) __a);
11417 return __rv.__i;
11420 __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
11421 vld4q_u16 (const uint16_t * __a)
11423 union { uint16x8x4_t __i; __builtin_neon_xi __o; } __rv;
11424 __rv.__o = __builtin_neon_vld4v8hi ((const __builtin_neon_hi *) __a);
11425 return __rv.__i;
11428 __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
11429 vld4q_u32 (const uint32_t * __a)
11431 union { uint32x4x4_t __i; __builtin_neon_xi __o; } __rv;
11432 __rv.__o = __builtin_neon_vld4v4si ((const __builtin_neon_si *) __a);
11433 return __rv.__i;
11436 __extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
11437 vld4q_p8 (const poly8_t * __a)
11439 union { poly8x16x4_t __i; __builtin_neon_xi __o; } __rv;
11440 __rv.__o = __builtin_neon_vld4v16qi ((const __builtin_neon_qi *) __a);
11441 return __rv.__i;
11444 __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
11445 vld4q_p16 (const poly16_t * __a)
11447 union { poly16x8x4_t __i; __builtin_neon_xi __o; } __rv;
11448 __rv.__o = __builtin_neon_vld4v8hi ((const __builtin_neon_hi *) __a);
11449 return __rv.__i;
11452 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
11453 vld4_lane_s8 (const int8_t * __a, int8x8x4_t __b, const int __c)
11455 union { int8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11456 union { int8x8x4_t __i; __builtin_neon_oi __o; } __rv;
11457 __rv.__o = __builtin_neon_vld4_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c);
11458 return __rv.__i;
11461 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
11462 vld4_lane_s16 (const int16_t * __a, int16x4x4_t __b, const int __c)
11464 union { int16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11465 union { int16x4x4_t __i; __builtin_neon_oi __o; } __rv;
11466 __rv.__o = __builtin_neon_vld4_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
11467 return __rv.__i;
11470 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
11471 vld4_lane_s32 (const int32_t * __a, int32x2x4_t __b, const int __c)
11473 union { int32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11474 union { int32x2x4_t __i; __builtin_neon_oi __o; } __rv;
11475 __rv.__o = __builtin_neon_vld4_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c);
11476 return __rv.__i;
11479 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
11480 __extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__))
11481 vld4_lane_f16 (const float16_t * __a, float16x4x4_t __b, const int __c)
11483 union { float16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11484 union { float16x4x4_t __i; __builtin_neon_oi __o; } __rv;
11485 __rv.__o = __builtin_neon_vld4_lanev4hf (__a,
11486 __bu.__o, __c);
11487 return __rv.__i;
11489 #endif
11491 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
11492 vld4_lane_f32 (const float32_t * __a, float32x2x4_t __b, const int __c)
11494 union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11495 union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv;
11496 __rv.__o = __builtin_neon_vld4_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
11497 return __rv.__i;
11500 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
11501 vld4_lane_u8 (const uint8_t * __a, uint8x8x4_t __b, const int __c)
11503 union { uint8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11504 union { uint8x8x4_t __i; __builtin_neon_oi __o; } __rv;
11505 __rv.__o = __builtin_neon_vld4_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c);
11506 return __rv.__i;
11509 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
11510 vld4_lane_u16 (const uint16_t * __a, uint16x4x4_t __b, const int __c)
11512 union { uint16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11513 union { uint16x4x4_t __i; __builtin_neon_oi __o; } __rv;
11514 __rv.__o = __builtin_neon_vld4_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
11515 return __rv.__i;
11518 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
11519 vld4_lane_u32 (const uint32_t * __a, uint32x2x4_t __b, const int __c)
11521 union { uint32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11522 union { uint32x2x4_t __i; __builtin_neon_oi __o; } __rv;
11523 __rv.__o = __builtin_neon_vld4_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c);
11524 return __rv.__i;
11527 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
11528 vld4_lane_p8 (const poly8_t * __a, poly8x8x4_t __b, const int __c)
11530 union { poly8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11531 union { poly8x8x4_t __i; __builtin_neon_oi __o; } __rv;
11532 __rv.__o = __builtin_neon_vld4_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c);
11533 return __rv.__i;
11536 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
11537 vld4_lane_p16 (const poly16_t * __a, poly16x4x4_t __b, const int __c)
11539 union { poly16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11540 union { poly16x4x4_t __i; __builtin_neon_oi __o; } __rv;
11541 __rv.__o = __builtin_neon_vld4_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
11542 return __rv.__i;
11545 __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
11546 vld4q_lane_s16 (const int16_t * __a, int16x8x4_t __b, const int __c)
11548 union { int16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
11549 union { int16x8x4_t __i; __builtin_neon_xi __o; } __rv;
11550 __rv.__o = __builtin_neon_vld4_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
11551 return __rv.__i;
11554 __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
11555 vld4q_lane_s32 (const int32_t * __a, int32x4x4_t __b, const int __c)
11557 union { int32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
11558 union { int32x4x4_t __i; __builtin_neon_xi __o; } __rv;
11559 __rv.__o = __builtin_neon_vld4_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c);
11560 return __rv.__i;
11563 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
11564 __extension__ static __inline float16x8x4_t __attribute__ ((__always_inline__))
11565 vld4q_lane_f16 (const float16_t * __a, float16x8x4_t __b, const int __c)
11567 union { float16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
11568 union { float16x8x4_t __i; __builtin_neon_xi __o; } __rv;
11569 __rv.__o = __builtin_neon_vld4_lanev8hf (__a,
11570 __bu.__o, __c);
11571 return __rv.__i;
11573 #endif
11575 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
11576 vld4q_lane_f32 (const float32_t * __a, float32x4x4_t __b, const int __c)
11578 union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
11579 union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv;
11580 __rv.__o = __builtin_neon_vld4_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
11581 return __rv.__i;
11584 __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
11585 vld4q_lane_u16 (const uint16_t * __a, uint16x8x4_t __b, const int __c)
11587 union { uint16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
11588 union { uint16x8x4_t __i; __builtin_neon_xi __o; } __rv;
11589 __rv.__o = __builtin_neon_vld4_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
11590 return __rv.__i;
11593 __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
11594 vld4q_lane_u32 (const uint32_t * __a, uint32x4x4_t __b, const int __c)
11596 union { uint32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
11597 union { uint32x4x4_t __i; __builtin_neon_xi __o; } __rv;
11598 __rv.__o = __builtin_neon_vld4_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c);
11599 return __rv.__i;
11602 __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
11603 vld4q_lane_p16 (const poly16_t * __a, poly16x8x4_t __b, const int __c)
11605 union { poly16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
11606 union { poly16x8x4_t __i; __builtin_neon_xi __o; } __rv;
11607 __rv.__o = __builtin_neon_vld4_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
11608 return __rv.__i;
11611 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
11612 vld4_dup_s8 (const int8_t * __a)
11614 union { int8x8x4_t __i; __builtin_neon_oi __o; } __rv;
11615 __rv.__o = __builtin_neon_vld4_dupv8qi ((const __builtin_neon_qi *) __a);
11616 return __rv.__i;
11619 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
11620 vld4_dup_s16 (const int16_t * __a)
11622 union { int16x4x4_t __i; __builtin_neon_oi __o; } __rv;
11623 __rv.__o = __builtin_neon_vld4_dupv4hi ((const __builtin_neon_hi *) __a);
11624 return __rv.__i;
11627 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
11628 vld4_dup_s32 (const int32_t * __a)
11630 union { int32x2x4_t __i; __builtin_neon_oi __o; } __rv;
11631 __rv.__o = __builtin_neon_vld4_dupv2si ((const __builtin_neon_si *) __a);
11632 return __rv.__i;
11635 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
11636 __extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__))
11637 vld4_dup_f16 (const float16_t * __a)
11639 union { float16x4x4_t __i; __builtin_neon_oi __o; } __rv;
11640 __rv.__o = __builtin_neon_vld4_dupv4hf (__a);
11641 return __rv.__i;
11643 #endif
11645 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
11646 vld4_dup_f32 (const float32_t * __a)
11648 union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv;
11649 __rv.__o = __builtin_neon_vld4_dupv2sf ((const __builtin_neon_sf *) __a);
11650 return __rv.__i;
11653 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
11654 vld4_dup_u8 (const uint8_t * __a)
11656 union { uint8x8x4_t __i; __builtin_neon_oi __o; } __rv;
11657 __rv.__o = __builtin_neon_vld4_dupv8qi ((const __builtin_neon_qi *) __a);
11658 return __rv.__i;
11661 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
11662 vld4_dup_u16 (const uint16_t * __a)
11664 union { uint16x4x4_t __i; __builtin_neon_oi __o; } __rv;
11665 __rv.__o = __builtin_neon_vld4_dupv4hi ((const __builtin_neon_hi *) __a);
11666 return __rv.__i;
11669 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
11670 vld4_dup_u32 (const uint32_t * __a)
11672 union { uint32x2x4_t __i; __builtin_neon_oi __o; } __rv;
11673 __rv.__o = __builtin_neon_vld4_dupv2si ((const __builtin_neon_si *) __a);
11674 return __rv.__i;
11677 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
11678 vld4_dup_p8 (const poly8_t * __a)
11680 union { poly8x8x4_t __i; __builtin_neon_oi __o; } __rv;
11681 __rv.__o = __builtin_neon_vld4_dupv8qi ((const __builtin_neon_qi *) __a);
11682 return __rv.__i;
11685 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
11686 vld4_dup_p16 (const poly16_t * __a)
11688 union { poly16x4x4_t __i; __builtin_neon_oi __o; } __rv;
11689 __rv.__o = __builtin_neon_vld4_dupv4hi ((const __builtin_neon_hi *) __a);
11690 return __rv.__i;
11693 #pragma GCC push_options
11694 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
11695 __extension__ static __inline poly64x1x4_t __attribute__ ((__always_inline__))
11696 vld4_dup_p64 (const poly64_t * __a)
11698 union { poly64x1x4_t __i; __builtin_neon_oi __o; } __rv;
11699 __rv.__o = __builtin_neon_vld4_dupdi ((const __builtin_neon_di *) __a);
11700 return __rv.__i;
11703 #pragma GCC pop_options
11704 __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
11705 vld4_dup_s64 (const int64_t * __a)
11707 union { int64x1x4_t __i; __builtin_neon_oi __o; } __rv;
11708 __rv.__o = __builtin_neon_vld4_dupdi ((const __builtin_neon_di *) __a);
11709 return __rv.__i;
11712 __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
11713 vld4_dup_u64 (const uint64_t * __a)
11715 union { uint64x1x4_t __i; __builtin_neon_oi __o; } __rv;
11716 __rv.__o = __builtin_neon_vld4_dupdi ((const __builtin_neon_di *) __a);
11717 return __rv.__i;
11720 __extension__ static __inline void __attribute__ ((__always_inline__))
11721 vst4_s8 (int8_t * __a, int8x8x4_t __b)
11723 union { int8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11724 __builtin_neon_vst4v8qi ((__builtin_neon_qi *) __a, __bu.__o);
11727 __extension__ static __inline void __attribute__ ((__always_inline__))
11728 vst4_s16 (int16_t * __a, int16x4x4_t __b)
11730 union { int16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11731 __builtin_neon_vst4v4hi ((__builtin_neon_hi *) __a, __bu.__o);
11734 __extension__ static __inline void __attribute__ ((__always_inline__))
11735 vst4_s32 (int32_t * __a, int32x2x4_t __b)
11737 union { int32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11738 __builtin_neon_vst4v2si ((__builtin_neon_si *) __a, __bu.__o);
11741 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
11742 __extension__ static __inline void __attribute__ ((__always_inline__))
11743 vst4_f16 (float16_t * __a, float16x4x4_t __b)
11745 union { float16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11746 __builtin_neon_vst4v4hf (__a, __bu.__o);
11748 #endif
11750 __extension__ static __inline void __attribute__ ((__always_inline__))
11751 vst4_f32 (float32_t * __a, float32x2x4_t __b)
11753 union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11754 __builtin_neon_vst4v2sf ((__builtin_neon_sf *) __a, __bu.__o);
11757 __extension__ static __inline void __attribute__ ((__always_inline__))
11758 vst4_u8 (uint8_t * __a, uint8x8x4_t __b)
11760 union { uint8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11761 __builtin_neon_vst4v8qi ((__builtin_neon_qi *) __a, __bu.__o);
11764 __extension__ static __inline void __attribute__ ((__always_inline__))
11765 vst4_u16 (uint16_t * __a, uint16x4x4_t __b)
11767 union { uint16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11768 __builtin_neon_vst4v4hi ((__builtin_neon_hi *) __a, __bu.__o);
11771 __extension__ static __inline void __attribute__ ((__always_inline__))
11772 vst4_u32 (uint32_t * __a, uint32x2x4_t __b)
11774 union { uint32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11775 __builtin_neon_vst4v2si ((__builtin_neon_si *) __a, __bu.__o);
11778 __extension__ static __inline void __attribute__ ((__always_inline__))
11779 vst4_p8 (poly8_t * __a, poly8x8x4_t __b)
11781 union { poly8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11782 __builtin_neon_vst4v8qi ((__builtin_neon_qi *) __a, __bu.__o);
11785 __extension__ static __inline void __attribute__ ((__always_inline__))
11786 vst4_p16 (poly16_t * __a, poly16x4x4_t __b)
11788 union { poly16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11789 __builtin_neon_vst4v4hi ((__builtin_neon_hi *) __a, __bu.__o);
11792 #pragma GCC push_options
11793 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
11794 __extension__ static __inline void __attribute__ ((__always_inline__))
11795 vst4_p64 (poly64_t * __a, poly64x1x4_t __b)
11797 union { poly64x1x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11798 __builtin_neon_vst4di ((__builtin_neon_di *) __a, __bu.__o);
11801 #pragma GCC pop_options
11802 __extension__ static __inline void __attribute__ ((__always_inline__))
11803 vst4_s64 (int64_t * __a, int64x1x4_t __b)
11805 union { int64x1x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11806 __builtin_neon_vst4di ((__builtin_neon_di *) __a, __bu.__o);
11809 __extension__ static __inline void __attribute__ ((__always_inline__))
11810 vst4_u64 (uint64_t * __a, uint64x1x4_t __b)
11812 union { uint64x1x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11813 __builtin_neon_vst4di ((__builtin_neon_di *) __a, __bu.__o);
11816 __extension__ static __inline void __attribute__ ((__always_inline__))
11817 vst4q_s8 (int8_t * __a, int8x16x4_t __b)
11819 union { int8x16x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
11820 __builtin_neon_vst4v16qi ((__builtin_neon_qi *) __a, __bu.__o);
11823 __extension__ static __inline void __attribute__ ((__always_inline__))
11824 vst4q_s16 (int16_t * __a, int16x8x4_t __b)
11826 union { int16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
11827 __builtin_neon_vst4v8hi ((__builtin_neon_hi *) __a, __bu.__o);
11830 __extension__ static __inline void __attribute__ ((__always_inline__))
11831 vst4q_s32 (int32_t * __a, int32x4x4_t __b)
11833 union { int32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
11834 __builtin_neon_vst4v4si ((__builtin_neon_si *) __a, __bu.__o);
11837 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
11838 __extension__ static __inline void __attribute__ ((__always_inline__))
11839 vst4q_f16 (float16_t * __a, float16x8x4_t __b)
11841 union { float16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
11842 __builtin_neon_vst4v8hf (__a, __bu.__o);
11844 #endif
11846 __extension__ static __inline void __attribute__ ((__always_inline__))
11847 vst4q_f32 (float32_t * __a, float32x4x4_t __b)
11849 union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
11850 __builtin_neon_vst4v4sf ((__builtin_neon_sf *) __a, __bu.__o);
11853 __extension__ static __inline void __attribute__ ((__always_inline__))
11854 vst4q_u8 (uint8_t * __a, uint8x16x4_t __b)
11856 union { uint8x16x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
11857 __builtin_neon_vst4v16qi ((__builtin_neon_qi *) __a, __bu.__o);
11860 __extension__ static __inline void __attribute__ ((__always_inline__))
11861 vst4q_u16 (uint16_t * __a, uint16x8x4_t __b)
11863 union { uint16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
11864 __builtin_neon_vst4v8hi ((__builtin_neon_hi *) __a, __bu.__o);
11867 __extension__ static __inline void __attribute__ ((__always_inline__))
11868 vst4q_u32 (uint32_t * __a, uint32x4x4_t __b)
11870 union { uint32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
11871 __builtin_neon_vst4v4si ((__builtin_neon_si *) __a, __bu.__o);
11874 __extension__ static __inline void __attribute__ ((__always_inline__))
11875 vst4q_p8 (poly8_t * __a, poly8x16x4_t __b)
11877 union { poly8x16x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
11878 __builtin_neon_vst4v16qi ((__builtin_neon_qi *) __a, __bu.__o);
11881 __extension__ static __inline void __attribute__ ((__always_inline__))
11882 vst4q_p16 (poly16_t * __a, poly16x8x4_t __b)
11884 union { poly16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
11885 __builtin_neon_vst4v8hi ((__builtin_neon_hi *) __a, __bu.__o);
11888 __extension__ static __inline void __attribute__ ((__always_inline__))
11889 vst4_lane_s8 (int8_t * __a, int8x8x4_t __b, const int __c)
11891 union { int8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11892 __builtin_neon_vst4_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c);
11895 __extension__ static __inline void __attribute__ ((__always_inline__))
11896 vst4_lane_s16 (int16_t * __a, int16x4x4_t __b, const int __c)
11898 union { int16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11899 __builtin_neon_vst4_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
11902 __extension__ static __inline void __attribute__ ((__always_inline__))
11903 vst4_lane_s32 (int32_t * __a, int32x2x4_t __b, const int __c)
11905 union { int32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11906 __builtin_neon_vst4_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c);
11909 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
11910 __extension__ static __inline void __attribute__ ((__always_inline__))
11911 vst4_lane_f16 (float16_t * __a, float16x4x4_t __b, const int __c)
11913 union { float16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11914 __builtin_neon_vst4_lanev4hf (__a, __bu.__o, __c);
11916 #endif
11918 __extension__ static __inline void __attribute__ ((__always_inline__))
11919 vst4_lane_f32 (float32_t * __a, float32x2x4_t __b, const int __c)
11921 union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11922 __builtin_neon_vst4_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
11925 __extension__ static __inline void __attribute__ ((__always_inline__))
11926 vst4_lane_u8 (uint8_t * __a, uint8x8x4_t __b, const int __c)
11928 union { uint8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11929 __builtin_neon_vst4_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c);
11932 __extension__ static __inline void __attribute__ ((__always_inline__))
11933 vst4_lane_u16 (uint16_t * __a, uint16x4x4_t __b, const int __c)
11935 union { uint16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11936 __builtin_neon_vst4_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
11939 __extension__ static __inline void __attribute__ ((__always_inline__))
11940 vst4_lane_u32 (uint32_t * __a, uint32x2x4_t __b, const int __c)
11942 union { uint32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11943 __builtin_neon_vst4_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c);
11946 __extension__ static __inline void __attribute__ ((__always_inline__))
11947 vst4_lane_p8 (poly8_t * __a, poly8x8x4_t __b, const int __c)
11949 union { poly8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11950 __builtin_neon_vst4_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c);
11953 __extension__ static __inline void __attribute__ ((__always_inline__))
11954 vst4_lane_p16 (poly16_t * __a, poly16x4x4_t __b, const int __c)
11956 union { poly16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
11957 __builtin_neon_vst4_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
11960 __extension__ static __inline void __attribute__ ((__always_inline__))
11961 vst4q_lane_s16 (int16_t * __a, int16x8x4_t __b, const int __c)
11963 union { int16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
11964 __builtin_neon_vst4_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
11967 __extension__ static __inline void __attribute__ ((__always_inline__))
11968 vst4q_lane_s32 (int32_t * __a, int32x4x4_t __b, const int __c)
11970 union { int32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
11971 __builtin_neon_vst4_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c);
11974 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
11975 __extension__ static __inline void __attribute__ ((__always_inline__))
11976 vst4q_lane_f16 (float16_t * __a, float16x8x4_t __b, const int __c)
11978 union { float16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
11979 __builtin_neon_vst4_lanev8hf (__a, __bu.__o, __c);
11981 #endif
11983 __extension__ static __inline void __attribute__ ((__always_inline__))
11984 vst4q_lane_f32 (float32_t * __a, float32x4x4_t __b, const int __c)
11986 union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
11987 __builtin_neon_vst4_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
11990 __extension__ static __inline void __attribute__ ((__always_inline__))
11991 vst4q_lane_u16 (uint16_t * __a, uint16x8x4_t __b, const int __c)
11993 union { uint16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
11994 __builtin_neon_vst4_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
11997 __extension__ static __inline void __attribute__ ((__always_inline__))
11998 vst4q_lane_u32 (uint32_t * __a, uint32x4x4_t __b, const int __c)
12000 union { uint32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
12001 __builtin_neon_vst4_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c);
12004 __extension__ static __inline void __attribute__ ((__always_inline__))
12005 vst4q_lane_p16 (poly16_t * __a, poly16x8x4_t __b, const int __c)
12007 union { poly16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
12008 __builtin_neon_vst4_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
12011 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12012 vand_s8 (int8x8_t __a, int8x8_t __b)
12014 return __a & __b;
12017 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12018 vand_s16 (int16x4_t __a, int16x4_t __b)
12020 return __a & __b;
12023 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12024 vand_s32 (int32x2_t __a, int32x2_t __b)
12026 return __a & __b;
12029 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12030 vand_u8 (uint8x8_t __a, uint8x8_t __b)
12032 return __a & __b;
12035 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12036 vand_u16 (uint16x4_t __a, uint16x4_t __b)
12038 return __a & __b;
12041 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12042 vand_u32 (uint32x2_t __a, uint32x2_t __b)
12044 return __a & __b;
12047 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
12048 vand_s64 (int64x1_t __a, int64x1_t __b)
12050 return __a & __b;
12053 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12054 vand_u64 (uint64x1_t __a, uint64x1_t __b)
12056 return __a & __b;
12059 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12060 vandq_s8 (int8x16_t __a, int8x16_t __b)
12062 return __a & __b;
12065 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12066 vandq_s16 (int16x8_t __a, int16x8_t __b)
12068 return __a & __b;
12071 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12072 vandq_s32 (int32x4_t __a, int32x4_t __b)
12074 return __a & __b;
12077 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12078 vandq_s64 (int64x2_t __a, int64x2_t __b)
12080 return __a & __b;
12083 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12084 vandq_u8 (uint8x16_t __a, uint8x16_t __b)
12086 return __a & __b;
12089 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12090 vandq_u16 (uint16x8_t __a, uint16x8_t __b)
12092 return __a & __b;
12095 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12096 vandq_u32 (uint32x4_t __a, uint32x4_t __b)
12098 return __a & __b;
12101 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12102 vandq_u64 (uint64x2_t __a, uint64x2_t __b)
12104 return __a & __b;
12107 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12108 vorr_s8 (int8x8_t __a, int8x8_t __b)
12110 return __a | __b;
12113 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12114 vorr_s16 (int16x4_t __a, int16x4_t __b)
12116 return __a | __b;
12119 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12120 vorr_s32 (int32x2_t __a, int32x2_t __b)
12122 return __a | __b;
12125 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12126 vorr_u8 (uint8x8_t __a, uint8x8_t __b)
12128 return __a | __b;
12131 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12132 vorr_u16 (uint16x4_t __a, uint16x4_t __b)
12134 return __a | __b;
12137 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12138 vorr_u32 (uint32x2_t __a, uint32x2_t __b)
12140 return __a | __b;
12143 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
12144 vorr_s64 (int64x1_t __a, int64x1_t __b)
12146 return __a | __b;
12149 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12150 vorr_u64 (uint64x1_t __a, uint64x1_t __b)
12152 return __a | __b;
12155 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12156 vorrq_s8 (int8x16_t __a, int8x16_t __b)
12158 return __a | __b;
12161 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12162 vorrq_s16 (int16x8_t __a, int16x8_t __b)
12164 return __a | __b;
12167 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12168 vorrq_s32 (int32x4_t __a, int32x4_t __b)
12170 return __a | __b;
12173 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12174 vorrq_s64 (int64x2_t __a, int64x2_t __b)
12176 return __a | __b;
12179 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12180 vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
12182 return __a | __b;
12185 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12186 vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
12188 return __a | __b;
12191 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12192 vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
12194 return __a | __b;
12197 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12198 vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
12200 return __a | __b;
12203 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12204 veor_s8 (int8x8_t __a, int8x8_t __b)
12206 return __a ^ __b;
12209 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12210 veor_s16 (int16x4_t __a, int16x4_t __b)
12212 return __a ^ __b;
12215 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12216 veor_s32 (int32x2_t __a, int32x2_t __b)
12218 return __a ^ __b;
12221 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12222 veor_u8 (uint8x8_t __a, uint8x8_t __b)
12224 return __a ^ __b;
12227 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12228 veor_u16 (uint16x4_t __a, uint16x4_t __b)
12230 return __a ^ __b;
12233 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12234 veor_u32 (uint32x2_t __a, uint32x2_t __b)
12236 return __a ^ __b;
12239 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
12240 veor_s64 (int64x1_t __a, int64x1_t __b)
12242 return __a ^ __b;
12245 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12246 veor_u64 (uint64x1_t __a, uint64x1_t __b)
12248 return __a ^ __b;
12251 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12252 veorq_s8 (int8x16_t __a, int8x16_t __b)
12254 return __a ^ __b;
12257 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12258 veorq_s16 (int16x8_t __a, int16x8_t __b)
12260 return __a ^ __b;
12263 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12264 veorq_s32 (int32x4_t __a, int32x4_t __b)
12266 return __a ^ __b;
12269 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12270 veorq_s64 (int64x2_t __a, int64x2_t __b)
12272 return __a ^ __b;
12275 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12276 veorq_u8 (uint8x16_t __a, uint8x16_t __b)
12278 return __a ^ __b;
12281 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12282 veorq_u16 (uint16x8_t __a, uint16x8_t __b)
12284 return __a ^ __b;
12287 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12288 veorq_u32 (uint32x4_t __a, uint32x4_t __b)
12290 return __a ^ __b;
12293 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12294 veorq_u64 (uint64x2_t __a, uint64x2_t __b)
12296 return __a ^ __b;
12299 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12300 vbic_s8 (int8x8_t __a, int8x8_t __b)
12302 return __a & ~__b;
12305 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12306 vbic_s16 (int16x4_t __a, int16x4_t __b)
12308 return __a & ~__b;
12311 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12312 vbic_s32 (int32x2_t __a, int32x2_t __b)
12314 return __a & ~__b;
12317 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12318 vbic_u8 (uint8x8_t __a, uint8x8_t __b)
12320 return __a & ~__b;
12323 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12324 vbic_u16 (uint16x4_t __a, uint16x4_t __b)
12326 return __a & ~__b;
12329 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12330 vbic_u32 (uint32x2_t __a, uint32x2_t __b)
12332 return __a & ~__b;
12335 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
12336 vbic_s64 (int64x1_t __a, int64x1_t __b)
12338 return __a & ~__b;
12341 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12342 vbic_u64 (uint64x1_t __a, uint64x1_t __b)
12344 return __a & ~__b;
12347 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12348 vbicq_s8 (int8x16_t __a, int8x16_t __b)
12350 return __a & ~__b;
12353 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12354 vbicq_s16 (int16x8_t __a, int16x8_t __b)
12356 return __a & ~__b;
12359 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12360 vbicq_s32 (int32x4_t __a, int32x4_t __b)
12362 return __a & ~__b;
12365 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12366 vbicq_s64 (int64x2_t __a, int64x2_t __b)
12368 return __a & ~__b;
12371 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12372 vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
12374 return __a & ~__b;
12377 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12378 vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
12380 return __a & ~__b;
12383 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12384 vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
12386 return __a & ~__b;
12389 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12390 vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
12392 return __a & ~__b;
12395 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12396 vorn_s8 (int8x8_t __a, int8x8_t __b)
12398 return __a | ~__b;
12401 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12402 vorn_s16 (int16x4_t __a, int16x4_t __b)
12404 return __a | ~__b;
12407 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12408 vorn_s32 (int32x2_t __a, int32x2_t __b)
12410 return __a | ~__b;
12413 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12414 vorn_u8 (uint8x8_t __a, uint8x8_t __b)
12416 return __a | ~__b;
12419 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12420 vorn_u16 (uint16x4_t __a, uint16x4_t __b)
12422 return __a | ~__b;
12425 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12426 vorn_u32 (uint32x2_t __a, uint32x2_t __b)
12428 return __a | ~__b;
12431 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
12432 vorn_s64 (int64x1_t __a, int64x1_t __b)
12434 return __a | ~__b;
12437 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12438 vorn_u64 (uint64x1_t __a, uint64x1_t __b)
12440 return __a | ~__b;
12443 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12444 vornq_s8 (int8x16_t __a, int8x16_t __b)
12446 return __a | ~__b;
12449 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12450 vornq_s16 (int16x8_t __a, int16x8_t __b)
12452 return __a | ~__b;
12455 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12456 vornq_s32 (int32x4_t __a, int32x4_t __b)
12458 return __a | ~__b;
12461 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12462 vornq_s64 (int64x2_t __a, int64x2_t __b)
12464 return __a | ~__b;
12467 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12468 vornq_u8 (uint8x16_t __a, uint8x16_t __b)
12470 return __a | ~__b;
12473 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12474 vornq_u16 (uint16x8_t __a, uint16x8_t __b)
12476 return __a | ~__b;
12479 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12480 vornq_u32 (uint32x4_t __a, uint32x4_t __b)
12482 return __a | ~__b;
12485 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12486 vornq_u64 (uint64x2_t __a, uint64x2_t __b)
12488 return __a | ~__b;
12491 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12492 vreinterpret_p8_p16 (poly16x4_t __a)
12494 return (poly8x8_t) __a;
12497 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
12498 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12499 vreinterpret_p8_f16 (float16x4_t __a)
12501 return (poly8x8_t) __a;
12503 #endif
12505 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12506 vreinterpret_p8_f32 (float32x2_t __a)
12508 return (poly8x8_t)__a;
12511 #pragma GCC push_options
12512 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
12513 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12514 vreinterpret_p8_p64 (poly64x1_t __a)
12516 return (poly8x8_t)__a;
12519 #pragma GCC pop_options
12520 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12521 vreinterpret_p8_s64 (int64x1_t __a)
12523 return (poly8x8_t)__a;
12526 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12527 vreinterpret_p8_u64 (uint64x1_t __a)
12529 return (poly8x8_t)__a;
12532 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12533 vreinterpret_p8_s8 (int8x8_t __a)
12535 return (poly8x8_t)__a;
12538 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12539 vreinterpret_p8_s16 (int16x4_t __a)
12541 return (poly8x8_t)__a;
12544 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12545 vreinterpret_p8_s32 (int32x2_t __a)
12547 return (poly8x8_t)__a;
12550 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12551 vreinterpret_p8_u8 (uint8x8_t __a)
12553 return (poly8x8_t)__a;
12556 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12557 vreinterpret_p8_u16 (uint16x4_t __a)
12559 return (poly8x8_t)__a;
12562 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12563 vreinterpret_p8_u32 (uint32x2_t __a)
12565 return (poly8x8_t)__a;
12568 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12569 vreinterpret_p16_p8 (poly8x8_t __a)
12571 return (poly16x4_t)__a;
12574 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
12575 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12576 vreinterpret_p16_f16 (float16x4_t __a)
12578 return (poly16x4_t) __a;
12580 #endif
12582 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12583 vreinterpret_p16_f32 (float32x2_t __a)
12585 return (poly16x4_t)__a;
12588 #pragma GCC push_options
12589 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
12590 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12591 vreinterpret_p16_p64 (poly64x1_t __a)
12593 return (poly16x4_t)__a;
12596 #pragma GCC pop_options
12597 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12598 vreinterpret_p16_s64 (int64x1_t __a)
12600 return (poly16x4_t)__a;
12603 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12604 vreinterpret_p16_u64 (uint64x1_t __a)
12606 return (poly16x4_t)__a;
12609 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12610 vreinterpret_p16_s8 (int8x8_t __a)
12612 return (poly16x4_t)__a;
12615 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12616 vreinterpret_p16_s16 (int16x4_t __a)
12618 return (poly16x4_t)__a;
12621 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12622 vreinterpret_p16_s32 (int32x2_t __a)
12624 return (poly16x4_t)__a;
12627 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12628 vreinterpret_p16_u8 (uint8x8_t __a)
12630 return (poly16x4_t)__a;
12633 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12634 vreinterpret_p16_u16 (uint16x4_t __a)
12636 return (poly16x4_t)__a;
12639 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12640 vreinterpret_p16_u32 (uint32x2_t __a)
12642 return (poly16x4_t)__a;
12645 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
12646 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
12647 vreinterpret_f16_p8 (poly8x8_t __a)
12649 return (float16x4_t) __a;
12651 #endif
12653 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
12654 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
12655 vreinterpret_f16_p16 (poly16x4_t __a)
12657 return (float16x4_t) __a;
12659 #endif
12661 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
12662 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
12663 vreinterpret_f16_f32 (float32x2_t __a)
12665 return (float16x4_t) __a;
12667 #endif
12669 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
12670 #pragma GCC push_options
12671 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
12672 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
12673 vreinterpret_f16_p64 (poly64x1_t __a)
12675 return (float16x4_t) __a;
12677 #pragma GCC pop_options
12678 #endif
12680 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
12681 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
12682 vreinterpret_f16_s64 (int64x1_t __a)
12684 return (float16x4_t) __a;
12686 #endif
12688 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
12689 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
12690 vreinterpret_f16_u64 (uint64x1_t __a)
12692 return (float16x4_t) __a;
12694 #endif
12696 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
12697 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
12698 vreinterpret_f16_s8 (int8x8_t __a)
12700 return (float16x4_t) __a;
12702 #endif
12704 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
12705 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
12706 vreinterpret_f16_s16 (int16x4_t __a)
12708 return (float16x4_t) __a;
12710 #endif
12712 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
12713 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
12714 vreinterpret_f16_s32 (int32x2_t __a)
12716 return (float16x4_t) __a;
12718 #endif
12720 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
12721 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
12722 vreinterpret_f16_u8 (uint8x8_t __a)
12724 return (float16x4_t) __a;
12726 #endif
12728 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
12729 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
12730 vreinterpret_f16_u16 (uint16x4_t __a)
12732 return (float16x4_t) __a;
12734 #endif
12736 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
12737 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
12738 vreinterpret_f16_u32 (uint32x2_t __a)
12740 return (float16x4_t) __a;
12742 #endif
12744 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12745 vreinterpret_f32_p8 (poly8x8_t __a)
12747 return (float32x2_t)__a;
12750 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12751 vreinterpret_f32_p16 (poly16x4_t __a)
12753 return (float32x2_t)__a;
12756 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
12757 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12758 vreinterpret_f32_f16 (float16x4_t __a)
12760 return (float32x2_t) __a;
12762 #endif
12764 #pragma GCC push_options
12765 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
12766 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12767 vreinterpret_f32_p64 (poly64x1_t __a)
12769 return (float32x2_t)__a;
12772 #pragma GCC pop_options
12773 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12774 vreinterpret_f32_s64 (int64x1_t __a)
12776 return (float32x2_t)__a;
12779 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12780 vreinterpret_f32_u64 (uint64x1_t __a)
12782 return (float32x2_t)__a;
12785 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12786 vreinterpret_f32_s8 (int8x8_t __a)
12788 return (float32x2_t)__a;
12791 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12792 vreinterpret_f32_s16 (int16x4_t __a)
12794 return (float32x2_t)__a;
12797 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12798 vreinterpret_f32_s32 (int32x2_t __a)
12800 return (float32x2_t)__a;
12803 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12804 vreinterpret_f32_u8 (uint8x8_t __a)
12806 return (float32x2_t)__a;
12809 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12810 vreinterpret_f32_u16 (uint16x4_t __a)
12812 return (float32x2_t)__a;
12815 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12816 vreinterpret_f32_u32 (uint32x2_t __a)
12818 return (float32x2_t)__a;
12821 #pragma GCC push_options
12822 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
12823 __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
12824 vreinterpret_p64_p8 (poly8x8_t __a)
12826 return (poly64x1_t)__a;
12829 __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
12830 vreinterpret_p64_p16 (poly16x4_t __a)
12832 return (poly64x1_t)__a;
12835 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
12836 __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
12837 vreinterpret_p64_f16 (float16x4_t __a)
12839 return (poly64x1_t) __a;
12841 #endif
12843 __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
12844 vreinterpret_p64_f32 (float32x2_t __a)
12846 return (poly64x1_t)__a;
12849 __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
12850 vreinterpret_p64_s64 (int64x1_t __a)
12852 return (poly64x1_t)__a;
12855 __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
12856 vreinterpret_p64_u64 (uint64x1_t __a)
12858 return (poly64x1_t)__a;
12861 __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
12862 vreinterpret_p64_s8 (int8x8_t __a)
12864 return (poly64x1_t)__a;
12867 __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
12868 vreinterpret_p64_s16 (int16x4_t __a)
12870 return (poly64x1_t)__a;
12873 __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
12874 vreinterpret_p64_s32 (int32x2_t __a)
12876 return (poly64x1_t)__a;
12879 __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
12880 vreinterpret_p64_u8 (uint8x8_t __a)
12882 return (poly64x1_t)__a;
12885 __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
12886 vreinterpret_p64_u16 (uint16x4_t __a)
12888 return (poly64x1_t)__a;
12891 __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
12892 vreinterpret_p64_u32 (uint32x2_t __a)
12894 return (poly64x1_t)__a;
12897 #pragma GCC pop_options
12898 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
12899 vreinterpret_s64_p8 (poly8x8_t __a)
12901 return (int64x1_t)__a;
12904 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
12905 vreinterpret_s64_p16 (poly16x4_t __a)
12907 return (int64x1_t)__a;
12910 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
12911 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
12912 vreinterpret_s64_f16 (float16x4_t __a)
12914 return (int64x1_t) __a;
12916 #endif
12918 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
12919 vreinterpret_s64_f32 (float32x2_t __a)
12921 return (int64x1_t)__a;
12924 #pragma GCC push_options
12925 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
12926 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
12927 vreinterpret_s64_p64 (poly64x1_t __a)
12929 return (int64x1_t)__a;
12932 #pragma GCC pop_options
12933 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
12934 vreinterpret_s64_u64 (uint64x1_t __a)
12936 return (int64x1_t)__a;
12939 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
12940 vreinterpret_s64_s8 (int8x8_t __a)
12942 return (int64x1_t)__a;
12945 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
12946 vreinterpret_s64_s16 (int16x4_t __a)
12948 return (int64x1_t)__a;
12951 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
12952 vreinterpret_s64_s32 (int32x2_t __a)
12954 return (int64x1_t)__a;
12957 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
12958 vreinterpret_s64_u8 (uint8x8_t __a)
12960 return (int64x1_t)__a;
12963 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
12964 vreinterpret_s64_u16 (uint16x4_t __a)
12966 return (int64x1_t)__a;
12969 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
12970 vreinterpret_s64_u32 (uint32x2_t __a)
12972 return (int64x1_t)__a;
12975 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12976 vreinterpret_u64_p8 (poly8x8_t __a)
12978 return (uint64x1_t)__a;
12981 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12982 vreinterpret_u64_p16 (poly16x4_t __a)
12984 return (uint64x1_t)__a;
12987 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
12988 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12989 vreinterpret_u64_f16 (float16x4_t __a)
12991 return (uint64x1_t) __a;
12993 #endif
12995 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12996 vreinterpret_u64_f32 (float32x2_t __a)
12998 return (uint64x1_t)__a;
13001 #pragma GCC push_options
13002 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
13003 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13004 vreinterpret_u64_p64 (poly64x1_t __a)
13006 return (uint64x1_t)__a;
13009 #pragma GCC pop_options
13010 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13011 vreinterpret_u64_s64 (int64x1_t __a)
13013 return (uint64x1_t)__a;
13016 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13017 vreinterpret_u64_s8 (int8x8_t __a)
13019 return (uint64x1_t)__a;
13022 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13023 vreinterpret_u64_s16 (int16x4_t __a)
13025 return (uint64x1_t)__a;
13028 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13029 vreinterpret_u64_s32 (int32x2_t __a)
13031 return (uint64x1_t)__a;
13034 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13035 vreinterpret_u64_u8 (uint8x8_t __a)
13037 return (uint64x1_t)__a;
13040 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13041 vreinterpret_u64_u16 (uint16x4_t __a)
13043 return (uint64x1_t)__a;
13046 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13047 vreinterpret_u64_u32 (uint32x2_t __a)
13049 return (uint64x1_t)__a;
13052 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13053 vreinterpret_s8_p8 (poly8x8_t __a)
13055 return (int8x8_t)__a;
13058 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13059 vreinterpret_s8_p16 (poly16x4_t __a)
13061 return (int8x8_t)__a;
13064 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
13065 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13066 vreinterpret_s8_f16 (float16x4_t __a)
13068 return (int8x8_t) __a;
13070 #endif
13072 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13073 vreinterpret_s8_f32 (float32x2_t __a)
13075 return (int8x8_t)__a;
13078 #pragma GCC push_options
13079 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
13080 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13081 vreinterpret_s8_p64 (poly64x1_t __a)
13083 return (int8x8_t)__a;
13086 #pragma GCC pop_options
13087 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13088 vreinterpret_s8_s64 (int64x1_t __a)
13090 return (int8x8_t)__a;
13093 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13094 vreinterpret_s8_u64 (uint64x1_t __a)
13096 return (int8x8_t)__a;
13099 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13100 vreinterpret_s8_s16 (int16x4_t __a)
13102 return (int8x8_t)__a;
13105 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13106 vreinterpret_s8_s32 (int32x2_t __a)
13108 return (int8x8_t)__a;
13111 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13112 vreinterpret_s8_u8 (uint8x8_t __a)
13114 return (int8x8_t)__a;
13117 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13118 vreinterpret_s8_u16 (uint16x4_t __a)
13120 return (int8x8_t)__a;
13123 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13124 vreinterpret_s8_u32 (uint32x2_t __a)
13126 return (int8x8_t)__a;
13129 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13130 vreinterpret_s16_p8 (poly8x8_t __a)
13132 return (int16x4_t)__a;
13135 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13136 vreinterpret_s16_p16 (poly16x4_t __a)
13138 return (int16x4_t)__a;
13141 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
13142 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13143 vreinterpret_s16_f16 (float16x4_t __a)
13145 return (int16x4_t) __a;
13147 #endif
13149 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13150 vreinterpret_s16_f32 (float32x2_t __a)
13152 return (int16x4_t)__a;
13155 #pragma GCC push_options
13156 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
13157 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13158 vreinterpret_s16_p64 (poly64x1_t __a)
13160 return (int16x4_t)__a;
13163 #pragma GCC pop_options
13164 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13165 vreinterpret_s16_s64 (int64x1_t __a)
13167 return (int16x4_t)__a;
13170 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13171 vreinterpret_s16_u64 (uint64x1_t __a)
13173 return (int16x4_t)__a;
13176 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13177 vreinterpret_s16_s8 (int8x8_t __a)
13179 return (int16x4_t)__a;
13182 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13183 vreinterpret_s16_s32 (int32x2_t __a)
13185 return (int16x4_t)__a;
13188 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13189 vreinterpret_s16_u8 (uint8x8_t __a)
13191 return (int16x4_t)__a;
13194 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13195 vreinterpret_s16_u16 (uint16x4_t __a)
13197 return (int16x4_t)__a;
13200 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13201 vreinterpret_s16_u32 (uint32x2_t __a)
13203 return (int16x4_t)__a;
13206 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13207 vreinterpret_s32_p8 (poly8x8_t __a)
13209 return (int32x2_t)__a;
13212 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13213 vreinterpret_s32_p16 (poly16x4_t __a)
13215 return (int32x2_t)__a;
13218 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
13219 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13220 vreinterpret_s32_f16 (float16x4_t __a)
13222 return (int32x2_t) __a;
13224 #endif
13226 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13227 vreinterpret_s32_f32 (float32x2_t __a)
13229 return (int32x2_t)__a;
13232 #pragma GCC push_options
13233 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
13234 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13235 vreinterpret_s32_p64 (poly64x1_t __a)
13237 return (int32x2_t)__a;
13240 #pragma GCC pop_options
13241 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13242 vreinterpret_s32_s64 (int64x1_t __a)
13244 return (int32x2_t)__a;
13247 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13248 vreinterpret_s32_u64 (uint64x1_t __a)
13250 return (int32x2_t)__a;
13253 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13254 vreinterpret_s32_s8 (int8x8_t __a)
13256 return (int32x2_t)__a;
13259 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13260 vreinterpret_s32_s16 (int16x4_t __a)
13262 return (int32x2_t)__a;
13265 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13266 vreinterpret_s32_u8 (uint8x8_t __a)
13268 return (int32x2_t)__a;
13271 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13272 vreinterpret_s32_u16 (uint16x4_t __a)
13274 return (int32x2_t)__a;
13277 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13278 vreinterpret_s32_u32 (uint32x2_t __a)
13280 return (int32x2_t)__a;
13283 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13284 vreinterpret_u8_p8 (poly8x8_t __a)
13286 return (uint8x8_t)__a;
13289 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13290 vreinterpret_u8_p16 (poly16x4_t __a)
13292 return (uint8x8_t)__a;
13295 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
13296 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13297 vreinterpret_u8_f16 (float16x4_t __a)
13299 return (uint8x8_t) __a;
13301 #endif
13303 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13304 vreinterpret_u8_f32 (float32x2_t __a)
13306 return (uint8x8_t)__a;
13309 #pragma GCC push_options
13310 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
13311 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13312 vreinterpret_u8_p64 (poly64x1_t __a)
13314 return (uint8x8_t)__a;
13317 #pragma GCC pop_options
13318 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13319 vreinterpret_u8_s64 (int64x1_t __a)
13321 return (uint8x8_t)__a;
13324 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13325 vreinterpret_u8_u64 (uint64x1_t __a)
13327 return (uint8x8_t)__a;
13330 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13331 vreinterpret_u8_s8 (int8x8_t __a)
13333 return (uint8x8_t)__a;
13336 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13337 vreinterpret_u8_s16 (int16x4_t __a)
13339 return (uint8x8_t)__a;
13342 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13343 vreinterpret_u8_s32 (int32x2_t __a)
13345 return (uint8x8_t)__a;
13348 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13349 vreinterpret_u8_u16 (uint16x4_t __a)
13351 return (uint8x8_t)__a;
13354 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13355 vreinterpret_u8_u32 (uint32x2_t __a)
13357 return (uint8x8_t)__a;
13360 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13361 vreinterpret_u16_p8 (poly8x8_t __a)
13363 return (uint16x4_t)__a;
13366 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13367 vreinterpret_u16_p16 (poly16x4_t __a)
13369 return (uint16x4_t)__a;
13372 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
13373 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13374 vreinterpret_u16_f16 (float16x4_t __a)
13376 return (uint16x4_t) __a;
13378 #endif
13380 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13381 vreinterpret_u16_f32 (float32x2_t __a)
13383 return (uint16x4_t)__a;
13386 #pragma GCC push_options
13387 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
13388 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13389 vreinterpret_u16_p64 (poly64x1_t __a)
13391 return (uint16x4_t)__a;
13394 #pragma GCC pop_options
13395 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13396 vreinterpret_u16_s64 (int64x1_t __a)
13398 return (uint16x4_t)__a;
13401 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13402 vreinterpret_u16_u64 (uint64x1_t __a)
13404 return (uint16x4_t)__a;
13407 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13408 vreinterpret_u16_s8 (int8x8_t __a)
13410 return (uint16x4_t)__a;
13413 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13414 vreinterpret_u16_s16 (int16x4_t __a)
13416 return (uint16x4_t)__a;
13419 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13420 vreinterpret_u16_s32 (int32x2_t __a)
13422 return (uint16x4_t)__a;
13425 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13426 vreinterpret_u16_u8 (uint8x8_t __a)
13428 return (uint16x4_t)__a;
13431 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13432 vreinterpret_u16_u32 (uint32x2_t __a)
13434 return (uint16x4_t)__a;
13437 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13438 vreinterpret_u32_p8 (poly8x8_t __a)
13440 return (uint32x2_t)__a;
13443 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13444 vreinterpret_u32_p16 (poly16x4_t __a)
13446 return (uint32x2_t)__a;
13449 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
13450 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13451 vreinterpret_u32_f16 (float16x4_t __a)
13453 return (uint32x2_t) __a;
13455 #endif
13457 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13458 vreinterpret_u32_f32 (float32x2_t __a)
13460 return (uint32x2_t)__a;
13463 #pragma GCC push_options
13464 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
13465 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13466 vreinterpret_u32_p64 (poly64x1_t __a)
13468 return (uint32x2_t)__a;
13471 #pragma GCC pop_options
13472 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13473 vreinterpret_u32_s64 (int64x1_t __a)
13475 return (uint32x2_t)__a;
13478 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13479 vreinterpret_u32_u64 (uint64x1_t __a)
13481 return (uint32x2_t)__a;
13484 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13485 vreinterpret_u32_s8 (int8x8_t __a)
13487 return (uint32x2_t)__a;
13490 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13491 vreinterpret_u32_s16 (int16x4_t __a)
13493 return (uint32x2_t)__a;
13496 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13497 vreinterpret_u32_s32 (int32x2_t __a)
13499 return (uint32x2_t)__a;
13502 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13503 vreinterpret_u32_u8 (uint8x8_t __a)
13505 return (uint32x2_t)__a;
13508 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13509 vreinterpret_u32_u16 (uint16x4_t __a)
13511 return (uint32x2_t)__a;
13514 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13515 vreinterpretq_p8_p16 (poly16x8_t __a)
13517 return (poly8x16_t)__a;
13520 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
13521 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13522 vreinterpretq_p8_f16 (float16x8_t __a)
13524 return (poly8x16_t) __a;
13526 #endif
13528 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13529 vreinterpretq_p8_f32 (float32x4_t __a)
13531 return (poly8x16_t)__a;
13534 #pragma GCC push_options
13535 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
13536 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13537 vreinterpretq_p8_p64 (poly64x2_t __a)
13539 return (poly8x16_t)__a;
13543 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13544 vreinterpretq_p8_p128 (poly128_t __a)
13546 return (poly8x16_t)__a;
13549 #pragma GCC pop_options
13550 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13551 vreinterpretq_p8_s64 (int64x2_t __a)
13553 return (poly8x16_t)__a;
13556 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13557 vreinterpretq_p8_u64 (uint64x2_t __a)
13559 return (poly8x16_t)__a;
13562 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13563 vreinterpretq_p8_s8 (int8x16_t __a)
13565 return (poly8x16_t)__a;
13568 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13569 vreinterpretq_p8_s16 (int16x8_t __a)
13571 return (poly8x16_t)__a;
13574 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13575 vreinterpretq_p8_s32 (int32x4_t __a)
13577 return (poly8x16_t)__a;
13580 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13581 vreinterpretq_p8_u8 (uint8x16_t __a)
13583 return (poly8x16_t)__a;
13586 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13587 vreinterpretq_p8_u16 (uint16x8_t __a)
13589 return (poly8x16_t)__a;
13592 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13593 vreinterpretq_p8_u32 (uint32x4_t __a)
13595 return (poly8x16_t)__a;
13598 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13599 vreinterpretq_p16_p8 (poly8x16_t __a)
13601 return (poly16x8_t)__a;
13604 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
13605 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13606 vreinterpretq_p16_f16 (float16x8_t __a)
13608 return (poly16x8_t) __a;
13610 #endif
13612 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13613 vreinterpretq_p16_f32 (float32x4_t __a)
13615 return (poly16x8_t)__a;
13618 #pragma GCC push_options
13619 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
13620 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13621 vreinterpretq_p16_p64 (poly64x2_t __a)
13623 return (poly16x8_t)__a;
13626 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13627 vreinterpretq_p16_p128 (poly128_t __a)
13629 return (poly16x8_t)__a;
13632 #pragma GCC pop_options
13633 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13634 vreinterpretq_p16_s64 (int64x2_t __a)
13636 return (poly16x8_t)__a;
13639 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13640 vreinterpretq_p16_u64 (uint64x2_t __a)
13642 return (poly16x8_t)__a;
13645 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13646 vreinterpretq_p16_s8 (int8x16_t __a)
13648 return (poly16x8_t)__a;
13651 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13652 vreinterpretq_p16_s16 (int16x8_t __a)
13654 return (poly16x8_t)__a;
13657 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13658 vreinterpretq_p16_s32 (int32x4_t __a)
13660 return (poly16x8_t)__a;
13663 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13664 vreinterpretq_p16_u8 (uint8x16_t __a)
13666 return (poly16x8_t)__a;
13669 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13670 vreinterpretq_p16_u16 (uint16x8_t __a)
13672 return (poly16x8_t)__a;
13675 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13676 vreinterpretq_p16_u32 (uint32x4_t __a)
13678 return (poly16x8_t)__a;
13681 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
13682 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
13683 vreinterpretq_f16_p8 (poly8x16_t __a)
13685 return (float16x8_t) __a;
13687 #endif
13689 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
13690 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
13691 vreinterpretq_f16_p16 (poly16x8_t __a)
13693 return (float16x8_t) __a;
13695 #endif
13697 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
13698 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
13699 vreinterpretq_f16_f32 (float32x4_t __a)
13701 return (float16x8_t) __a;
13703 #endif
13705 #pragma GCC push_options
13706 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
13708 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
13709 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
13710 vreinterpretq_f16_p64 (poly64x2_t __a)
13712 return (float16x8_t) __a;
13714 #endif
13716 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
13717 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
13718 vreinterpretq_f16_p128 (poly128_t __a)
13720 return (float16x8_t) __a;
13722 #endif
13724 #pragma GCC pop_options
13726 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
13727 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
13728 vreinterpretq_f16_s64 (int64x2_t __a)
13730 return (float16x8_t) __a;
13732 #endif
13734 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
13735 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
13736 vreinterpretq_f16_u64 (uint64x2_t __a)
13738 return (float16x8_t) __a;
13740 #endif
13742 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
13743 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
13744 vreinterpretq_f16_s8 (int8x16_t __a)
13746 return (float16x8_t) __a;
13748 #endif
13750 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
13751 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
13752 vreinterpretq_f16_s16 (int16x8_t __a)
13754 return (float16x8_t) __a;
13756 #endif
13758 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
13759 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
13760 vreinterpretq_f16_s32 (int32x4_t __a)
13762 return (float16x8_t) __a;
13764 #endif
13766 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
13767 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
13768 vreinterpretq_f16_u8 (uint8x16_t __a)
13770 return (float16x8_t) __a;
13772 #endif
13774 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
13775 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
13776 vreinterpretq_f16_u16 (uint16x8_t __a)
13778 return (float16x8_t) __a;
13780 #endif
13782 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
13783 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
13784 vreinterpretq_f16_u32 (uint32x4_t __a)
13786 return (float16x8_t) __a;
13788 #endif
13790 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13791 vreinterpretq_f32_p8 (poly8x16_t __a)
13793 return (float32x4_t)__a;
13796 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13797 vreinterpretq_f32_p16 (poly16x8_t __a)
13799 return (float32x4_t)__a;
13802 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
13803 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13804 vreinterpretq_f32_f16 (float16x8_t __a)
13806 return (float32x4_t) __a;
13808 #endif
13810 #pragma GCC push_options
13811 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
13812 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13813 vreinterpretq_f32_p64 (poly64x2_t __a)
13815 return (float32x4_t)__a;
13818 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13819 vreinterpretq_f32_p128 (poly128_t __a)
13821 return (float32x4_t)__a;
13824 #pragma GCC pop_options
13825 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13826 vreinterpretq_f32_s64 (int64x2_t __a)
13828 return (float32x4_t)__a;
13831 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13832 vreinterpretq_f32_u64 (uint64x2_t __a)
13834 return (float32x4_t)__a;
13837 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13838 vreinterpretq_f32_s8 (int8x16_t __a)
13840 return (float32x4_t)__a;
13843 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13844 vreinterpretq_f32_s16 (int16x8_t __a)
13846 return (float32x4_t)__a;
13849 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13850 vreinterpretq_f32_s32 (int32x4_t __a)
13852 return (float32x4_t)__a;
13855 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13856 vreinterpretq_f32_u8 (uint8x16_t __a)
13858 return (float32x4_t)__a;
13861 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13862 vreinterpretq_f32_u16 (uint16x8_t __a)
13864 return (float32x4_t)__a;
13867 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13868 vreinterpretq_f32_u32 (uint32x4_t __a)
13870 return (float32x4_t)__a;
13873 #pragma GCC push_options
13874 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
13875 __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
13876 vreinterpretq_p64_p8 (poly8x16_t __a)
13878 return (poly64x2_t)__a;
13881 __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
13882 vreinterpretq_p64_p16 (poly16x8_t __a)
13884 return (poly64x2_t)__a;
13887 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
13888 __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
13889 vreinterpretq_p64_f16 (float16x8_t __a)
13891 return (poly64x2_t) __a;
13893 #endif
13895 __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
13896 vreinterpretq_p64_f32 (float32x4_t __a)
13898 return (poly64x2_t)__a;
13901 __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
13902 vreinterpretq_p64_p128 (poly128_t __a)
13904 return (poly64x2_t)__a;
13907 __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
13908 vreinterpretq_p64_s64 (int64x2_t __a)
13910 return (poly64x2_t)__a;
13913 __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
13914 vreinterpretq_p64_u64 (uint64x2_t __a)
13916 return (poly64x2_t)__a;
13919 __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
13920 vreinterpretq_p64_s8 (int8x16_t __a)
13922 return (poly64x2_t)__a;
13925 __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
13926 vreinterpretq_p64_s16 (int16x8_t __a)
13928 return (poly64x2_t)__a;
13931 __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
13932 vreinterpretq_p64_s32 (int32x4_t __a)
13934 return (poly64x2_t)__a;
13937 __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
13938 vreinterpretq_p64_u8 (uint8x16_t __a)
13940 return (poly64x2_t)__a;
13943 __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
13944 vreinterpretq_p64_u16 (uint16x8_t __a)
13946 return (poly64x2_t)__a;
13949 __extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
13950 vreinterpretq_p64_u32 (uint32x4_t __a)
13952 return (poly64x2_t)__a;
13955 __extension__ static __inline poly128_t __attribute__ ((__always_inline__))
13956 vreinterpretq_p128_p8 (poly8x16_t __a)
13958 return (poly128_t)__a;
13961 __extension__ static __inline poly128_t __attribute__ ((__always_inline__))
13962 vreinterpretq_p128_p16 (poly16x8_t __a)
13964 return (poly128_t)__a;
13967 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
13968 __extension__ static __inline poly128_t __attribute__ ((__always_inline__))
13969 vreinterpretq_p128_f16 (float16x8_t __a)
13971 return (poly128_t) __a;
13973 #endif
13975 __extension__ static __inline poly128_t __attribute__ ((__always_inline__))
13976 vreinterpretq_p128_f32 (float32x4_t __a)
13978 return (poly128_t)__a;
13981 __extension__ static __inline poly128_t __attribute__ ((__always_inline__))
13982 vreinterpretq_p128_p64 (poly64x2_t __a)
13984 return (poly128_t)__a;
13987 __extension__ static __inline poly128_t __attribute__ ((__always_inline__))
13988 vreinterpretq_p128_s64 (int64x2_t __a)
13990 return (poly128_t)__a;
13993 __extension__ static __inline poly128_t __attribute__ ((__always_inline__))
13994 vreinterpretq_p128_u64 (uint64x2_t __a)
13996 return (poly128_t)__a;
13999 __extension__ static __inline poly128_t __attribute__ ((__always_inline__))
14000 vreinterpretq_p128_s8 (int8x16_t __a)
14002 return (poly128_t)__a;
14005 __extension__ static __inline poly128_t __attribute__ ((__always_inline__))
14006 vreinterpretq_p128_s16 (int16x8_t __a)
14008 return (poly128_t)__a;
14011 __extension__ static __inline poly128_t __attribute__ ((__always_inline__))
14012 vreinterpretq_p128_s32 (int32x4_t __a)
14014 return (poly128_t)__a;
14017 __extension__ static __inline poly128_t __attribute__ ((__always_inline__))
14018 vreinterpretq_p128_u8 (uint8x16_t __a)
14020 return (poly128_t)__a;
14023 __extension__ static __inline poly128_t __attribute__ ((__always_inline__))
14024 vreinterpretq_p128_u16 (uint16x8_t __a)
14026 return (poly128_t)__a;
14029 __extension__ static __inline poly128_t __attribute__ ((__always_inline__))
14030 vreinterpretq_p128_u32 (uint32x4_t __a)
14032 return (poly128_t)__a;
14035 #pragma GCC pop_options
14036 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14037 vreinterpretq_s64_p8 (poly8x16_t __a)
14039 return (int64x2_t)__a;
14042 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14043 vreinterpretq_s64_p16 (poly16x8_t __a)
14045 return (int64x2_t)__a;
14048 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
14049 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14050 vreinterpretq_s64_f16 (float16x8_t __a)
14052 return (int64x2_t) __a;
14054 #endif
14056 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14057 vreinterpretq_s64_f32 (float32x4_t __a)
14059 return (int64x2_t)__a;
14062 #pragma GCC push_options
14063 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
14064 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14065 vreinterpretq_s64_p64 (poly64x2_t __a)
14067 return (int64x2_t)__a;
14070 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14071 vreinterpretq_s64_p128 (poly128_t __a)
14073 return (int64x2_t)__a;
14076 #pragma GCC pop_options
14077 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14078 vreinterpretq_s64_u64 (uint64x2_t __a)
14080 return (int64x2_t)__a;
14083 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14084 vreinterpretq_s64_s8 (int8x16_t __a)
14086 return (int64x2_t)__a;
14089 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14090 vreinterpretq_s64_s16 (int16x8_t __a)
14092 return (int64x2_t)__a;
14095 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14096 vreinterpretq_s64_s32 (int32x4_t __a)
14098 return (int64x2_t)__a;
14101 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14102 vreinterpretq_s64_u8 (uint8x16_t __a)
14104 return (int64x2_t)__a;
14107 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14108 vreinterpretq_s64_u16 (uint16x8_t __a)
14110 return (int64x2_t)__a;
14113 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14114 vreinterpretq_s64_u32 (uint32x4_t __a)
14116 return (int64x2_t)__a;
14119 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14120 vreinterpretq_u64_p8 (poly8x16_t __a)
14122 return (uint64x2_t)__a;
14125 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14126 vreinterpretq_u64_p16 (poly16x8_t __a)
14128 return (uint64x2_t)__a;
14131 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
14132 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14133 vreinterpretq_u64_f16 (float16x8_t __a)
14135 return (uint64x2_t) __a;
14137 #endif
14139 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14140 vreinterpretq_u64_f32 (float32x4_t __a)
14142 return (uint64x2_t)__a;
14145 #pragma GCC push_options
14146 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
14147 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14148 vreinterpretq_u64_p64 (poly64x2_t __a)
14150 return (uint64x2_t)__a;
14153 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14154 vreinterpretq_u64_p128 (poly128_t __a)
14156 return (uint64x2_t)__a;
14159 #pragma GCC pop_options
14160 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14161 vreinterpretq_u64_s64 (int64x2_t __a)
14163 return (uint64x2_t)__a;
14166 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14167 vreinterpretq_u64_s8 (int8x16_t __a)
14169 return (uint64x2_t)__a;
14172 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14173 vreinterpretq_u64_s16 (int16x8_t __a)
14175 return (uint64x2_t)__a;
14178 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14179 vreinterpretq_u64_s32 (int32x4_t __a)
14181 return (uint64x2_t)__a;
14184 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14185 vreinterpretq_u64_u8 (uint8x16_t __a)
14187 return (uint64x2_t)__a;
14190 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14191 vreinterpretq_u64_u16 (uint16x8_t __a)
14193 return (uint64x2_t)__a;
14196 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14197 vreinterpretq_u64_u32 (uint32x4_t __a)
14199 return (uint64x2_t)__a;
14202 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14203 vreinterpretq_s8_p8 (poly8x16_t __a)
14205 return (int8x16_t)__a;
14208 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14209 vreinterpretq_s8_p16 (poly16x8_t __a)
14211 return (int8x16_t)__a;
14214 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
14215 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14216 vreinterpretq_s8_f16 (float16x8_t __a)
14218 return (int8x16_t) __a;
14220 #endif
14222 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14223 vreinterpretq_s8_f32 (float32x4_t __a)
14225 return (int8x16_t)__a;
14228 #pragma GCC push_options
14229 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
14230 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14231 vreinterpretq_s8_p64 (poly64x2_t __a)
14233 return (int8x16_t)__a;
14236 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14237 vreinterpretq_s8_p128 (poly128_t __a)
14239 return (int8x16_t)__a;
14242 #pragma GCC pop_options
14243 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14244 vreinterpretq_s8_s64 (int64x2_t __a)
14246 return (int8x16_t)__a;
14249 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14250 vreinterpretq_s8_u64 (uint64x2_t __a)
14252 return (int8x16_t)__a;
14255 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14256 vreinterpretq_s8_s16 (int16x8_t __a)
14258 return (int8x16_t)__a;
14261 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14262 vreinterpretq_s8_s32 (int32x4_t __a)
14264 return (int8x16_t)__a;
14267 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14268 vreinterpretq_s8_u8 (uint8x16_t __a)
14270 return (int8x16_t)__a;
14273 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14274 vreinterpretq_s8_u16 (uint16x8_t __a)
14276 return (int8x16_t)__a;
14279 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14280 vreinterpretq_s8_u32 (uint32x4_t __a)
14282 return (int8x16_t)__a;
14285 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14286 vreinterpretq_s16_p8 (poly8x16_t __a)
14288 return (int16x8_t)__a;
14291 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14292 vreinterpretq_s16_p16 (poly16x8_t __a)
14294 return (int16x8_t)__a;
14297 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
14298 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14299 vreinterpretq_s16_f16 (float16x8_t __a)
14301 return (int16x8_t) __a;
14303 #endif
14305 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14306 vreinterpretq_s16_f32 (float32x4_t __a)
14308 return (int16x8_t)__a;
14311 #pragma GCC push_options
14312 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
14313 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14314 vreinterpretq_s16_p64 (poly64x2_t __a)
14316 return (int16x8_t)__a;
14319 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14320 vreinterpretq_s16_p128 (poly128_t __a)
14322 return (int16x8_t)__a;
14325 #pragma GCC pop_options
14326 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14327 vreinterpretq_s16_s64 (int64x2_t __a)
14329 return (int16x8_t)__a;
14332 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14333 vreinterpretq_s16_u64 (uint64x2_t __a)
14335 return (int16x8_t)__a;
14338 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14339 vreinterpretq_s16_s8 (int8x16_t __a)
14341 return (int16x8_t)__a;
14344 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14345 vreinterpretq_s16_s32 (int32x4_t __a)
14347 return (int16x8_t)__a;
14350 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14351 vreinterpretq_s16_u8 (uint8x16_t __a)
14353 return (int16x8_t)__a;
14356 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14357 vreinterpretq_s16_u16 (uint16x8_t __a)
14359 return (int16x8_t)__a;
14362 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14363 vreinterpretq_s16_u32 (uint32x4_t __a)
14365 return (int16x8_t)__a;
14368 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14369 vreinterpretq_s32_p8 (poly8x16_t __a)
14371 return (int32x4_t)__a;
14374 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14375 vreinterpretq_s32_p16 (poly16x8_t __a)
14377 return (int32x4_t)__a;
14380 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
14381 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14382 vreinterpretq_s32_f16 (float16x8_t __a)
14384 return (int32x4_t)__a;
14386 #endif
14388 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14389 vreinterpretq_s32_f32 (float32x4_t __a)
14391 return (int32x4_t)__a;
14394 #pragma GCC push_options
14395 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
14396 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14397 vreinterpretq_s32_p64 (poly64x2_t __a)
14399 return (int32x4_t)__a;
14402 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14403 vreinterpretq_s32_p128 (poly128_t __a)
14405 return (int32x4_t)__a;
14408 #pragma GCC pop_options
14409 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14410 vreinterpretq_s32_s64 (int64x2_t __a)
14412 return (int32x4_t)__a;
14415 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14416 vreinterpretq_s32_u64 (uint64x2_t __a)
14418 return (int32x4_t)__a;
14421 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14422 vreinterpretq_s32_s8 (int8x16_t __a)
14424 return (int32x4_t)__a;
14427 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14428 vreinterpretq_s32_s16 (int16x8_t __a)
14430 return (int32x4_t)__a;
14433 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14434 vreinterpretq_s32_u8 (uint8x16_t __a)
14436 return (int32x4_t)__a;
14439 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14440 vreinterpretq_s32_u16 (uint16x8_t __a)
14442 return (int32x4_t)__a;
14445 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14446 vreinterpretq_s32_u32 (uint32x4_t __a)
14448 return (int32x4_t)__a;
14451 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14452 vreinterpretq_u8_p8 (poly8x16_t __a)
14454 return (uint8x16_t)__a;
14457 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14458 vreinterpretq_u8_p16 (poly16x8_t __a)
14460 return (uint8x16_t)__a;
14463 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
14464 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14465 vreinterpretq_u8_f16 (float16x8_t __a)
14467 return (uint8x16_t) __a;
14469 #endif
14471 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14472 vreinterpretq_u8_f32 (float32x4_t __a)
14474 return (uint8x16_t)__a;
14477 #pragma GCC push_options
14478 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
14479 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14480 vreinterpretq_u8_p64 (poly64x2_t __a)
14482 return (uint8x16_t)__a;
14485 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14486 vreinterpretq_u8_p128 (poly128_t __a)
14488 return (uint8x16_t)__a;
14491 #pragma GCC pop_options
14492 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14493 vreinterpretq_u8_s64 (int64x2_t __a)
14495 return (uint8x16_t)__a;
14498 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14499 vreinterpretq_u8_u64 (uint64x2_t __a)
14501 return (uint8x16_t)__a;
14504 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14505 vreinterpretq_u8_s8 (int8x16_t __a)
14507 return (uint8x16_t)__a;
14510 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14511 vreinterpretq_u8_s16 (int16x8_t __a)
14513 return (uint8x16_t)__a;
14516 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14517 vreinterpretq_u8_s32 (int32x4_t __a)
14519 return (uint8x16_t)__a;
14522 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14523 vreinterpretq_u8_u16 (uint16x8_t __a)
14525 return (uint8x16_t)__a;
14528 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14529 vreinterpretq_u8_u32 (uint32x4_t __a)
14531 return (uint8x16_t)__a;
14534 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14535 vreinterpretq_u16_p8 (poly8x16_t __a)
14537 return (uint16x8_t)__a;
14540 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14541 vreinterpretq_u16_p16 (poly16x8_t __a)
14543 return (uint16x8_t)__a;
14546 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
14547 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14548 vreinterpretq_u16_f16 (float16x8_t __a)
14550 return (uint16x8_t) __a;
14552 #endif
14554 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14555 vreinterpretq_u16_f32 (float32x4_t __a)
14557 return (uint16x8_t)__a;
14560 #pragma GCC push_options
14561 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
14562 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14563 vreinterpretq_u16_p64 (poly64x2_t __a)
14565 return (uint16x8_t)__a;
14568 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14569 vreinterpretq_u16_p128 (poly128_t __a)
14571 return (uint16x8_t)__a;
14574 #pragma GCC pop_options
14575 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14576 vreinterpretq_u16_s64 (int64x2_t __a)
14578 return (uint16x8_t)__a;
14581 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14582 vreinterpretq_u16_u64 (uint64x2_t __a)
14584 return (uint16x8_t)__a;
14587 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14588 vreinterpretq_u16_s8 (int8x16_t __a)
14590 return (uint16x8_t)__a;
14593 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14594 vreinterpretq_u16_s16 (int16x8_t __a)
14596 return (uint16x8_t)__a;
14599 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14600 vreinterpretq_u16_s32 (int32x4_t __a)
14602 return (uint16x8_t)__a;
14605 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14606 vreinterpretq_u16_u8 (uint8x16_t __a)
14608 return (uint16x8_t)__a;
14611 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14612 vreinterpretq_u16_u32 (uint32x4_t __a)
14614 return (uint16x8_t)__a;
14617 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14618 vreinterpretq_u32_p8 (poly8x16_t __a)
14620 return (uint32x4_t)__a;
14623 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14624 vreinterpretq_u32_p16 (poly16x8_t __a)
14626 return (uint32x4_t)__a;
14629 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
14630 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14631 vreinterpretq_u32_f16 (float16x8_t __a)
14633 return (uint32x4_t) __a;
14635 #endif
14637 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14638 vreinterpretq_u32_f32 (float32x4_t __a)
14640 return (uint32x4_t)__a;
14643 #pragma GCC push_options
14644 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
14645 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14646 vreinterpretq_u32_p64 (poly64x2_t __a)
14648 return (uint32x4_t)__a;
14651 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14652 vreinterpretq_u32_p128 (poly128_t __a)
14654 return (uint32x4_t)__a;
14657 #pragma GCC pop_options
14658 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14659 vreinterpretq_u32_s64 (int64x2_t __a)
14661 return (uint32x4_t)__a;
14664 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14665 vreinterpretq_u32_u64 (uint64x2_t __a)
14667 return (uint32x4_t)__a;
14670 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14671 vreinterpretq_u32_s8 (int8x16_t __a)
14673 return (uint32x4_t)__a;
14676 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14677 vreinterpretq_u32_s16 (int16x8_t __a)
14679 return (uint32x4_t)__a;
14682 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14683 vreinterpretq_u32_s32 (int32x4_t __a)
14685 return (uint32x4_t)__a;
14688 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14689 vreinterpretq_u32_u8 (uint8x16_t __a)
14691 return (uint32x4_t)__a;
14694 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14695 vreinterpretq_u32_u16 (uint16x8_t __a)
14697 return (uint32x4_t)__a;
14701 #pragma GCC push_options
14702 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
14703 __extension__ static __inline poly128_t __attribute__ ((__always_inline__))
14704 vldrq_p128 (poly128_t const * __ptr)
14706 #ifdef __ARM_BIG_ENDIAN
14707 poly64_t* __ptmp = (poly64_t*) __ptr;
14708 poly64_t __d0 = vld1_p64 (__ptmp);
14709 poly64_t __d1 = vld1_p64 (__ptmp + 1);
14710 return vreinterpretq_p128_p64 (vcombine_p64 (__d1, __d0));
14711 #else
14712 return vreinterpretq_p128_p64 (vld1q_p64 ((poly64_t*) __ptr));
14713 #endif
14716 __extension__ static __inline void __attribute__ ((__always_inline__))
14717 vstrq_p128 (poly128_t * __ptr, poly128_t __val)
14719 #ifdef __ARM_BIG_ENDIAN
14720 poly64x2_t __tmp = vreinterpretq_p64_p128 (__val);
14721 poly64_t __d0 = vget_high_p64 (__tmp);
14722 poly64_t __d1 = vget_low_p64 (__tmp);
14723 vst1q_p64 ((poly64_t*) __ptr, vcombine_p64 (__d0, __d1));
14724 #else
14725 vst1q_p64 ((poly64_t*) __ptr, vreinterpretq_p64_p128 (__val));
14726 #endif
14729 /* The vceq_p64 intrinsic does not map to a single instruction.
14730 Instead we emulate it by performing a 32-bit variant of the vceq
14731 and applying a pairwise min reduction to the result.
14732 vceq_u32 will produce two 32-bit halves, each of which will contain either
14733 all ones or all zeros depending on whether the corresponding 32-bit
14734 halves of the poly64_t were equal. The whole poly64_t values are equal
14735 if and only if both halves are equal, i.e. vceq_u32 returns all ones.
14736 If the result is all zeroes for any half then the whole result is zeroes.
14737 This is what the pairwise min reduction achieves. */
14739 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14740 vceq_p64 (poly64x1_t __a, poly64x1_t __b)
14742 uint32x2_t __t_a = vreinterpret_u32_p64 (__a);
14743 uint32x2_t __t_b = vreinterpret_u32_p64 (__b);
14744 uint32x2_t __c = vceq_u32 (__t_a, __t_b);
14745 uint32x2_t __m = vpmin_u32 (__c, __c);
14746 return vreinterpret_u64_u32 (__m);
14749 /* The vtst_p64 intrinsic does not map to a single instruction.
14750 We emulate it in way similar to vceq_p64 above but here we do
14751 a reduction with max since if any two corresponding bits
14752 in the two poly64_t's match, then the whole result must be all ones. */
14754 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14755 vtst_p64 (poly64x1_t __a, poly64x1_t __b)
14757 uint32x2_t __t_a = vreinterpret_u32_p64 (__a);
14758 uint32x2_t __t_b = vreinterpret_u32_p64 (__b);
14759 uint32x2_t __c = vtst_u32 (__t_a, __t_b);
14760 uint32x2_t __m = vpmax_u32 (__c, __c);
14761 return vreinterpret_u64_u32 (__m);
14764 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14765 vaeseq_u8 (uint8x16_t __data, uint8x16_t __key)
14767 return __builtin_arm_crypto_aese (__data, __key);
14770 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14771 vaesdq_u8 (uint8x16_t __data, uint8x16_t __key)
14773 return __builtin_arm_crypto_aesd (__data, __key);
14776 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14777 vaesmcq_u8 (uint8x16_t __data)
14779 return __builtin_arm_crypto_aesmc (__data);
14782 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14783 vaesimcq_u8 (uint8x16_t __data)
14785 return __builtin_arm_crypto_aesimc (__data);
14788 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14789 vsha1h_u32 (uint32_t __hash_e)
14791 uint32x4_t __t = vdupq_n_u32 (0);
14792 __t = vsetq_lane_u32 (__hash_e, __t, 0);
14793 __t = __builtin_arm_crypto_sha1h (__t);
14794 return vgetq_lane_u32 (__t, 0);
14797 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14798 vsha1cq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk)
14800 uint32x4_t __t = vdupq_n_u32 (0);
14801 __t = vsetq_lane_u32 (__hash_e, __t, 0);
14802 return __builtin_arm_crypto_sha1c (__hash_abcd, __t, __wk);
14805 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14806 vsha1pq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk)
14808 uint32x4_t __t = vdupq_n_u32 (0);
14809 __t = vsetq_lane_u32 (__hash_e, __t, 0);
14810 return __builtin_arm_crypto_sha1p (__hash_abcd, __t, __wk);
14813 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14814 vsha1mq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk)
14816 uint32x4_t __t = vdupq_n_u32 (0);
14817 __t = vsetq_lane_u32 (__hash_e, __t, 0);
14818 return __builtin_arm_crypto_sha1m (__hash_abcd, __t, __wk);
14821 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14822 vsha1su0q_u32 (uint32x4_t __w0_3, uint32x4_t __w4_7, uint32x4_t __w8_11)
14824 return __builtin_arm_crypto_sha1su0 (__w0_3, __w4_7, __w8_11);
14827 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14828 vsha1su1q_u32 (uint32x4_t __tw0_3, uint32x4_t __w12_15)
14830 return __builtin_arm_crypto_sha1su1 (__tw0_3, __w12_15);
14833 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14834 vsha256hq_u32 (uint32x4_t __hash_abcd, uint32x4_t __hash_efgh, uint32x4_t __wk)
14836 return __builtin_arm_crypto_sha256h (__hash_abcd, __hash_efgh, __wk);
14839 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14840 vsha256h2q_u32 (uint32x4_t __hash_abcd, uint32x4_t __hash_efgh, uint32x4_t __wk)
14842 return __builtin_arm_crypto_sha256h2 (__hash_abcd, __hash_efgh, __wk);
14845 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14846 vsha256su0q_u32 (uint32x4_t __w0_3, uint32x4_t __w4_7)
14848 return __builtin_arm_crypto_sha256su0 (__w0_3, __w4_7);
14851 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14852 vsha256su1q_u32 (uint32x4_t __tw0_3, uint32x4_t __w8_11, uint32x4_t __w12_15)
14854 return __builtin_arm_crypto_sha256su1 (__tw0_3, __w8_11, __w12_15);
14857 __extension__ static __inline poly128_t __attribute__ ((__always_inline__))
14858 vmull_p64 (poly64_t __a, poly64_t __b)
14860 return (poly128_t) __builtin_arm_crypto_vmullp64 ((uint64_t) __a, (uint64_t) __b);
14863 __extension__ static __inline poly128_t __attribute__ ((__always_inline__))
14864 vmull_high_p64 (poly64x2_t __a, poly64x2_t __b)
14866 poly64_t __t1 = vget_high_p64 (__a);
14867 poly64_t __t2 = vget_high_p64 (__b);
14869 return (poly128_t) __builtin_arm_crypto_vmullp64 ((uint64_t) __t1, (uint64_t) __t2);
14872 #pragma GCC pop_options
14874 /* Intrinsics for FP16 instructions. */
14875 #pragma GCC push_options
14876 #pragma GCC target ("fpu=neon-fp-armv8")
14877 #if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
14879 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
14880 vabd_f16 (float16x4_t __a, float16x4_t __b)
14882 return __builtin_neon_vabdv4hf (__a, __b);
14885 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
14886 vabdq_f16 (float16x8_t __a, float16x8_t __b)
14888 return __builtin_neon_vabdv8hf (__a, __b);
14891 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
14892 vabs_f16 (float16x4_t __a)
14894 return __builtin_neon_vabsv4hf (__a);
14897 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
14898 vabsq_f16 (float16x8_t __a)
14900 return __builtin_neon_vabsv8hf (__a);
14903 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
14904 vadd_f16 (float16x4_t __a, float16x4_t __b)
14906 return __builtin_neon_vaddv4hf (__a, __b);
14909 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
14910 vaddq_f16 (float16x8_t __a, float16x8_t __b)
14912 return __builtin_neon_vaddv8hf (__a, __b);
14915 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14916 vcage_f16 (float16x4_t __a, float16x4_t __b)
14918 return (uint16x4_t)__builtin_neon_vcagev4hf (__a, __b);
14921 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14922 vcageq_f16 (float16x8_t __a, float16x8_t __b)
14924 return (uint16x8_t)__builtin_neon_vcagev8hf (__a, __b);
14927 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14928 vcagt_f16 (float16x4_t __a, float16x4_t __b)
14930 return (uint16x4_t)__builtin_neon_vcagtv4hf (__a, __b);
14933 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14934 vcagtq_f16 (float16x8_t __a, float16x8_t __b)
14936 return (uint16x8_t)__builtin_neon_vcagtv8hf (__a, __b);
14939 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14940 vcale_f16 (float16x4_t __a, float16x4_t __b)
14942 return (uint16x4_t)__builtin_neon_vcalev4hf (__a, __b);
14945 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14946 vcaleq_f16 (float16x8_t __a, float16x8_t __b)
14948 return (uint16x8_t)__builtin_neon_vcalev8hf (__a, __b);
14951 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14952 vcalt_f16 (float16x4_t __a, float16x4_t __b)
14954 return (uint16x4_t)__builtin_neon_vcaltv4hf (__a, __b);
14957 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14958 vcaltq_f16 (float16x8_t __a, float16x8_t __b)
14960 return (uint16x8_t)__builtin_neon_vcaltv8hf (__a, __b);
14963 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14964 vceq_f16 (float16x4_t __a, float16x4_t __b)
14966 return (uint16x4_t)__builtin_neon_vceqv4hf (__a, __b);
14969 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14970 vceqq_f16 (float16x8_t __a, float16x8_t __b)
14972 return (uint16x8_t)__builtin_neon_vceqv8hf (__a, __b);
14975 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14976 vceqz_f16 (float16x4_t __a)
14978 return (uint16x4_t)__builtin_neon_vceqzv4hf (__a);
14981 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14982 vceqzq_f16 (float16x8_t __a)
14984 return (uint16x8_t)__builtin_neon_vceqzv8hf (__a);
14987 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14988 vcge_f16 (float16x4_t __a, float16x4_t __b)
14990 return (uint16x4_t)__builtin_neon_vcgev4hf (__a, __b);
14993 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14994 vcgeq_f16 (float16x8_t __a, float16x8_t __b)
14996 return (uint16x8_t)__builtin_neon_vcgev8hf (__a, __b);
14999 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15000 vcgez_f16 (float16x4_t __a)
15002 return (uint16x4_t)__builtin_neon_vcgezv4hf (__a);
15005 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15006 vcgezq_f16 (float16x8_t __a)
15008 return (uint16x8_t)__builtin_neon_vcgezv8hf (__a);
15011 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15012 vcgt_f16 (float16x4_t __a, float16x4_t __b)
15014 return (uint16x4_t)__builtin_neon_vcgtv4hf (__a, __b);
15017 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15018 vcgtq_f16 (float16x8_t __a, float16x8_t __b)
15020 return (uint16x8_t)__builtin_neon_vcgtv8hf (__a, __b);
15023 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15024 vcgtz_f16 (float16x4_t __a)
15026 return (uint16x4_t)__builtin_neon_vcgtzv4hf (__a);
15029 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15030 vcgtzq_f16 (float16x8_t __a)
15032 return (uint16x8_t)__builtin_neon_vcgtzv8hf (__a);
15035 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15036 vcle_f16 (float16x4_t __a, float16x4_t __b)
15038 return (uint16x4_t)__builtin_neon_vclev4hf (__a, __b);
15041 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15042 vcleq_f16 (float16x8_t __a, float16x8_t __b)
15044 return (uint16x8_t)__builtin_neon_vclev8hf (__a, __b);
15047 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15048 vclez_f16 (float16x4_t __a)
15050 return (uint16x4_t)__builtin_neon_vclezv4hf (__a);
15053 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15054 vclezq_f16 (float16x8_t __a)
15056 return (uint16x8_t)__builtin_neon_vclezv8hf (__a);
15059 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15060 vclt_f16 (float16x4_t __a, float16x4_t __b)
15062 return (uint16x4_t)__builtin_neon_vcltv4hf (__a, __b);
15065 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15066 vcltq_f16 (float16x8_t __a, float16x8_t __b)
15068 return (uint16x8_t)__builtin_neon_vcltv8hf (__a, __b);
15071 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15072 vcltz_f16 (float16x4_t __a)
15074 return (uint16x4_t)__builtin_neon_vcltzv4hf (__a);
15077 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15078 vcltzq_f16 (float16x8_t __a)
15080 return (uint16x8_t)__builtin_neon_vcltzv8hf (__a);
15083 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15084 vcvt_f16_s16 (int16x4_t __a)
15086 return (float16x4_t)__builtin_neon_vcvtsv4hi (__a);
15089 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15090 vcvt_f16_u16 (uint16x4_t __a)
15092 return (float16x4_t)__builtin_neon_vcvtuv4hi ((int16x4_t)__a);
15095 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15096 vcvt_s16_f16 (float16x4_t __a)
15098 return (int16x4_t)__builtin_neon_vcvtsv4hf (__a);
15101 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15102 vcvt_u16_f16 (float16x4_t __a)
15104 return (uint16x4_t)__builtin_neon_vcvtuv4hf (__a);
15107 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15108 vcvtq_f16_s16 (int16x8_t __a)
15110 return (float16x8_t)__builtin_neon_vcvtsv8hi (__a);
15113 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15114 vcvtq_f16_u16 (uint16x8_t __a)
15116 return (float16x8_t)__builtin_neon_vcvtuv8hi ((int16x8_t)__a);
15119 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15120 vcvtq_s16_f16 (float16x8_t __a)
15122 return (int16x8_t)__builtin_neon_vcvtsv8hf (__a);
15125 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15126 vcvtq_u16_f16 (float16x8_t __a)
15128 return (uint16x8_t)__builtin_neon_vcvtuv8hf (__a);
15131 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15132 vcvta_s16_f16 (float16x4_t __a)
15134 return __builtin_neon_vcvtasv4hf (__a);
15137 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15138 vcvta_u16_f16 (float16x4_t __a)
15140 return (uint16x4_t)__builtin_neon_vcvtauv4hf (__a);
15143 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15144 vcvtaq_s16_f16 (float16x8_t __a)
15146 return __builtin_neon_vcvtasv8hf (__a);
15149 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15150 vcvtaq_u16_f16 (float16x8_t __a)
15152 return (uint16x8_t)__builtin_neon_vcvtauv8hf (__a);
15155 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15156 vcvtm_s16_f16 (float16x4_t __a)
15158 return __builtin_neon_vcvtmsv4hf (__a);
15161 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15162 vcvtm_u16_f16 (float16x4_t __a)
15164 return (uint16x4_t)__builtin_neon_vcvtmuv4hf (__a);
15167 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15168 vcvtmq_s16_f16 (float16x8_t __a)
15170 return __builtin_neon_vcvtmsv8hf (__a);
15173 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15174 vcvtmq_u16_f16 (float16x8_t __a)
15176 return (uint16x8_t)__builtin_neon_vcvtmuv8hf (__a);
15179 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15180 vcvtn_s16_f16 (float16x4_t __a)
15182 return __builtin_neon_vcvtnsv4hf (__a);
15185 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15186 vcvtn_u16_f16 (float16x4_t __a)
15188 return (uint16x4_t)__builtin_neon_vcvtnuv4hf (__a);
15191 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15192 vcvtnq_s16_f16 (float16x8_t __a)
15194 return __builtin_neon_vcvtnsv8hf (__a);
15197 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15198 vcvtnq_u16_f16 (float16x8_t __a)
15200 return (uint16x8_t)__builtin_neon_vcvtnuv8hf (__a);
15203 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15204 vcvtp_s16_f16 (float16x4_t __a)
15206 return __builtin_neon_vcvtpsv4hf (__a);
15209 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15210 vcvtp_u16_f16 (float16x4_t __a)
15212 return (uint16x4_t)__builtin_neon_vcvtpuv4hf (__a);
15215 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15216 vcvtpq_s16_f16 (float16x8_t __a)
15218 return __builtin_neon_vcvtpsv8hf (__a);
15221 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15222 vcvtpq_u16_f16 (float16x8_t __a)
15224 return (uint16x8_t)__builtin_neon_vcvtpuv8hf (__a);
15227 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15228 vcvt_n_f16_s16 (int16x4_t __a, const int __b)
15230 return __builtin_neon_vcvts_nv4hi (__a, __b);
15233 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15234 vcvt_n_f16_u16 (uint16x4_t __a, const int __b)
15236 return __builtin_neon_vcvtu_nv4hi ((int16x4_t)__a, __b);
15239 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15240 vcvtq_n_f16_s16 (int16x8_t __a, const int __b)
15242 return __builtin_neon_vcvts_nv8hi (__a, __b);
15245 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15246 vcvtq_n_f16_u16 (uint16x8_t __a, const int __b)
15248 return __builtin_neon_vcvtu_nv8hi ((int16x8_t)__a, __b);
15251 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15252 vcvt_n_s16_f16 (float16x4_t __a, const int __b)
15254 return __builtin_neon_vcvts_nv4hf (__a, __b);
15257 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15258 vcvt_n_u16_f16 (float16x4_t __a, const int __b)
15260 return (uint16x4_t)__builtin_neon_vcvtu_nv4hf (__a, __b);
15263 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15264 vcvtq_n_s16_f16 (float16x8_t __a, const int __b)
15266 return __builtin_neon_vcvts_nv8hf (__a, __b);
15269 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15270 vcvtq_n_u16_f16 (float16x8_t __a, const int __b)
15272 return (uint16x8_t)__builtin_neon_vcvtu_nv8hf (__a, __b);
15275 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15276 vfma_f16 (float16x4_t __a, float16x4_t __b, float16x4_t __c)
15278 return __builtin_neon_vfmav4hf (__a, __b, __c);
15281 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15282 vfmaq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
15284 return __builtin_neon_vfmav8hf (__a, __b, __c);
15287 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15288 vfms_f16 (float16x4_t __a, float16x4_t __b, float16x4_t __c)
15290 return __builtin_neon_vfmsv4hf (__a, __b, __c);
15293 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15294 vfmsq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
15296 return __builtin_neon_vfmsv8hf (__a, __b, __c);
15299 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15300 vmax_f16 (float16x4_t __a, float16x4_t __b)
15302 return __builtin_neon_vmaxfv4hf (__a, __b);
15305 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15306 vmaxq_f16 (float16x8_t __a, float16x8_t __b)
15308 return __builtin_neon_vmaxfv8hf (__a, __b);
15311 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15312 vmaxnm_f16 (float16x4_t __a, float16x4_t __b)
15314 return __builtin_neon_vmaxnmv4hf (__a, __b);
15317 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15318 vmaxnmq_f16 (float16x8_t __a, float16x8_t __b)
15320 return __builtin_neon_vmaxnmv8hf (__a, __b);
15323 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15324 vmin_f16 (float16x4_t __a, float16x4_t __b)
15326 return __builtin_neon_vminfv4hf (__a, __b);
15329 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15330 vminq_f16 (float16x8_t __a, float16x8_t __b)
15332 return __builtin_neon_vminfv8hf (__a, __b);
15335 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15336 vminnm_f16 (float16x4_t __a, float16x4_t __b)
15338 return __builtin_neon_vminnmv4hf (__a, __b);
15341 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15342 vminnmq_f16 (float16x8_t __a, float16x8_t __b)
15344 return __builtin_neon_vminnmv8hf (__a, __b);
15347 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15348 vmul_f16 (float16x4_t __a, float16x4_t __b)
15350 return __builtin_neon_vmulfv4hf (__a, __b);
15353 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15354 vmul_lane_f16 (float16x4_t __a, float16x4_t __b, const int __c)
15356 return __builtin_neon_vmul_lanev4hf (__a, __b, __c);
15359 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15360 vmul_n_f16 (float16x4_t __a, float16_t __b)
15362 return __builtin_neon_vmul_nv4hf (__a, __b);
15365 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15366 vmulq_f16 (float16x8_t __a, float16x8_t __b)
15368 return __builtin_neon_vmulfv8hf (__a, __b);
15371 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15372 vmulq_lane_f16 (float16x8_t __a, float16x4_t __b, const int __c)
15374 return __builtin_neon_vmul_lanev8hf (__a, __b, __c);
15377 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15378 vmulq_n_f16 (float16x8_t __a, float16_t __b)
15380 return __builtin_neon_vmul_nv8hf (__a, __b);
15383 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15384 vneg_f16 (float16x4_t __a)
15386 return __builtin_neon_vnegv4hf (__a);
15389 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15390 vnegq_f16 (float16x8_t __a)
15392 return __builtin_neon_vnegv8hf (__a);
15395 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15396 vpadd_f16 (float16x4_t __a, float16x4_t __b)
15398 return __builtin_neon_vpaddv4hf (__a, __b);
15401 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15402 vpmax_f16 (float16x4_t __a, float16x4_t __b)
15404 return __builtin_neon_vpmaxfv4hf (__a, __b);
15407 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15408 vpmin_f16 (float16x4_t __a, float16x4_t __b)
15410 return __builtin_neon_vpminfv4hf (__a, __b);
15413 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15414 vrecpe_f16 (float16x4_t __a)
15416 return __builtin_neon_vrecpev4hf (__a);
15419 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15420 vrecpeq_f16 (float16x8_t __a)
15422 return __builtin_neon_vrecpev8hf (__a);
15425 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15426 vrnd_f16 (float16x4_t __a)
15428 return __builtin_neon_vrndv4hf (__a);
15431 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15432 vrndq_f16 (float16x8_t __a)
15434 return __builtin_neon_vrndv8hf (__a);
15437 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15438 vrnda_f16 (float16x4_t __a)
15440 return __builtin_neon_vrndav4hf (__a);
15443 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15444 vrndaq_f16 (float16x8_t __a)
15446 return __builtin_neon_vrndav8hf (__a);
15449 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15450 vrndm_f16 (float16x4_t __a)
15452 return __builtin_neon_vrndmv4hf (__a);
15455 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15456 vrndmq_f16 (float16x8_t __a)
15458 return __builtin_neon_vrndmv8hf (__a);
15461 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15462 vrndn_f16 (float16x4_t __a)
15464 return __builtin_neon_vrndnv4hf (__a);
15467 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15468 vrndnq_f16 (float16x8_t __a)
15470 return __builtin_neon_vrndnv8hf (__a);
15473 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15474 vrndp_f16 (float16x4_t __a)
15476 return __builtin_neon_vrndpv4hf (__a);
15479 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15480 vrndpq_f16 (float16x8_t __a)
15482 return __builtin_neon_vrndpv8hf (__a);
15485 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15486 vrndx_f16 (float16x4_t __a)
15488 return __builtin_neon_vrndxv4hf (__a);
15491 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15492 vrndxq_f16 (float16x8_t __a)
15494 return __builtin_neon_vrndxv8hf (__a);
15497 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15498 vrsqrte_f16 (float16x4_t __a)
15500 return __builtin_neon_vrsqrtev4hf (__a);
15503 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15504 vrsqrteq_f16 (float16x8_t __a)
15506 return __builtin_neon_vrsqrtev8hf (__a);
15509 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15510 vrecps_f16 (float16x4_t __a, float16x4_t __b)
15512 return __builtin_neon_vrecpsv4hf (__a, __b);
15515 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15516 vrecpsq_f16 (float16x8_t __a, float16x8_t __b)
15518 return __builtin_neon_vrecpsv8hf (__a, __b);
15521 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15522 vrsqrts_f16 (float16x4_t __a, float16x4_t __b)
15524 return __builtin_neon_vrsqrtsv4hf (__a, __b);
15527 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15528 vrsqrtsq_f16 (float16x8_t __a, float16x8_t __b)
15530 return __builtin_neon_vrsqrtsv8hf (__a, __b);
15533 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15534 vsub_f16 (float16x4_t __a, float16x4_t __b)
15536 return __builtin_neon_vsubv4hf (__a, __b);
15539 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15540 vsubq_f16 (float16x8_t __a, float16x8_t __b)
15542 return __builtin_neon_vsubv8hf (__a, __b);
15545 #endif /* __ARM_FEATURE_VECTOR_FP16_ARITHMETIC. */
15546 #pragma GCC pop_options
15548 /* Half-precision data processing intrinsics. */
15549 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
15551 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15552 vbsl_f16 (uint16x4_t __a, float16x4_t __b, float16x4_t __c)
15554 return __builtin_neon_vbslv4hf ((int16x4_t)__a, __b, __c);
15557 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15558 vbslq_f16 (uint16x8_t __a, float16x8_t __b, float16x8_t __c)
15560 return __builtin_neon_vbslv8hf ((int16x8_t)__a, __b, __c);
15563 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15564 vdup_n_f16 (float16_t __a)
15566 return __builtin_neon_vdup_nv4hf (__a);
15569 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15570 vdupq_n_f16 (float16_t __a)
15572 return __builtin_neon_vdup_nv8hf (__a);
15575 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15576 vdup_lane_f16 (float16x4_t __a, const int __b)
15578 return __builtin_neon_vdup_lanev4hf (__a, __b);
15581 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15582 vdupq_lane_f16 (float16x4_t __a, const int __b)
15584 return __builtin_neon_vdup_lanev8hf (__a, __b);
15587 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15588 vext_f16 (float16x4_t __a, float16x4_t __b, const int __c)
15590 return __builtin_neon_vextv4hf (__a, __b, __c);
15593 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15594 vextq_f16 (float16x8_t __a, float16x8_t __b, const int __c)
15596 return __builtin_neon_vextv8hf (__a, __b, __c);
15599 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15600 vmov_n_f16 (float16_t __a)
15602 return __builtin_neon_vdup_nv4hf (__a);
15605 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15606 vmovq_n_f16 (float16_t __a)
15608 return __builtin_neon_vdup_nv8hf (__a);
15611 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
15612 vrev64_f16 (float16x4_t __a)
15614 return (float16x4_t)__builtin_shuffle (__a, (uint16x4_t){ 3, 2, 1, 0 });
15617 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
15618 vrev64q_f16 (float16x8_t __a)
15620 return
15621 (float16x8_t)__builtin_shuffle (__a,
15622 (uint16x8_t){ 3, 2, 1, 0, 7, 6, 5, 4 });
15625 __extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
15626 vtrn_f16 (float16x4_t __a, float16x4_t __b)
15628 float16x4x2_t __rv;
15629 #ifdef __ARM_BIG_ENDIAN
15630 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 5, 1, 7, 3 });
15631 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 0, 6, 2 });
15632 #else
15633 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 4, 2, 6 });
15634 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 1, 5, 3, 7 });
15635 #endif
15636 return __rv;
15639 __extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
15640 vtrnq_f16 (float16x8_t __a, float16x8_t __b)
15642 float16x8x2_t __rv;
15643 #ifdef __ARM_BIG_ENDIAN
15644 __rv.val[0] = __builtin_shuffle (__a, __b,
15645 (uint16x8_t){ 9, 1, 11, 3, 13, 5, 15, 7 });
15646 __rv.val[1] = __builtin_shuffle (__a, __b,
15647 (uint16x8_t){ 8, 0, 10, 2, 12, 4, 14, 6 });
15648 #else
15649 __rv.val[0] = __builtin_shuffle (__a, __b,
15650 (uint16x8_t){ 0, 8, 2, 10, 4, 12, 6, 14 });
15651 __rv.val[1] = __builtin_shuffle (__a, __b,
15652 (uint16x8_t){ 1, 9, 3, 11, 5, 13, 7, 15 });
15653 #endif
15654 return __rv;
15657 __extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
15658 vuzp_f16 (float16x4_t __a, float16x4_t __b)
15660 float16x4x2_t __rv;
15661 #ifdef __ARM_BIG_ENDIAN
15662 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 5, 7, 1, 3 });
15663 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 6, 0, 2 });
15664 #else
15665 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 2, 4, 6 });
15666 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 1, 3, 5, 7 });
15667 #endif
15668 return __rv;
15671 __extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
15672 vuzpq_f16 (float16x8_t __a, float16x8_t __b)
15674 float16x8x2_t __rv;
15675 #ifdef __ARM_BIG_ENDIAN
15676 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
15677 { 5, 7, 1, 3, 13, 15, 9, 11 });
15678 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
15679 { 4, 6, 0, 2, 12, 14, 8, 10 });
15680 #else
15681 __rv.val[0] = __builtin_shuffle (__a, __b,
15682 (uint16x8_t){ 0, 2, 4, 6, 8, 10, 12, 14 });
15683 __rv.val[1] = __builtin_shuffle (__a, __b,
15684 (uint16x8_t){ 1, 3, 5, 7, 9, 11, 13, 15 });
15685 #endif
15686 return __rv;
15689 __extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
15690 vzip_f16 (float16x4_t __a, float16x4_t __b)
15692 float16x4x2_t __rv;
15693 #ifdef __ARM_BIG_ENDIAN
15694 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 6, 2, 7, 3 });
15695 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 0, 5, 1 });
15696 #else
15697 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 4, 1, 5 });
15698 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 2, 6, 3, 7 });
15699 #endif
15700 return __rv;
15703 __extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
15704 vzipq_f16 (float16x8_t __a, float16x8_t __b)
15706 float16x8x2_t __rv;
15707 #ifdef __ARM_BIG_ENDIAN
15708 __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
15709 { 10, 2, 11, 3, 8, 0, 9, 1 });
15710 __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
15711 { 14, 6, 15, 7, 12, 4, 13, 5 });
15712 #else
15713 __rv.val[0] = __builtin_shuffle (__a, __b,
15714 (uint16x8_t){ 0, 8, 1, 9, 2, 10, 3, 11 });
15715 __rv.val[1] = __builtin_shuffle (__a, __b,
15716 (uint16x8_t){ 4, 12, 5, 13, 6, 14, 7, 15 });
15717 #endif
15718 return __rv;
15721 #endif
15723 #ifdef __cplusplus
15725 #endif
15727 #pragma GCC pop_options
15729 #endif
15730 #endif