[AArch64] Error out of arm_neon.h if nofp/nosimd
[official-gcc.git] / gcc / config / aarch64 / arm_neon.h
blobc679802e9e58244b50ec6c05961701b9b3f5d7f4
1 /* ARM NEON intrinsics include file.
3 Copyright (C) 2011-2015 Free Software Foundation, Inc.
4 Contributed by ARM Ltd.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 #ifndef _AARCH64_NEON_H_
28 #define _AARCH64_NEON_H_
30 #ifndef __ARM_NEON
31 #error You must enable AdvancedSIMD instructions to use arm_neon.h
32 #else
34 #include <stdint.h>
36 #define __AARCH64_UINT64_C(__C) ((uint64_t) __C)
37 #define __AARCH64_INT64_C(__C) ((int64_t) __C)
39 typedef __Int8x8_t int8x8_t;
40 typedef __Int16x4_t int16x4_t;
41 typedef __Int32x2_t int32x2_t;
42 typedef __Int64x1_t int64x1_t;
43 typedef __Float32x2_t float32x2_t;
44 typedef __Poly8x8_t poly8x8_t;
45 typedef __Poly16x4_t poly16x4_t;
46 typedef __Uint8x8_t uint8x8_t;
47 typedef __Uint16x4_t uint16x4_t;
48 typedef __Uint32x2_t uint32x2_t;
49 typedef __Float64x1_t float64x1_t;
50 typedef __Uint64x1_t uint64x1_t;
51 typedef __Int8x16_t int8x16_t;
52 typedef __Int16x8_t int16x8_t;
53 typedef __Int32x4_t int32x4_t;
54 typedef __Int64x2_t int64x2_t;
55 typedef __Float32x4_t float32x4_t;
56 typedef __Float64x2_t float64x2_t;
57 typedef __Poly8x16_t poly8x16_t;
58 typedef __Poly16x8_t poly16x8_t;
59 typedef __Poly64x2_t poly64x2_t;
60 typedef __Uint8x16_t uint8x16_t;
61 typedef __Uint16x8_t uint16x8_t;
62 typedef __Uint32x4_t uint32x4_t;
63 typedef __Uint64x2_t uint64x2_t;
65 typedef __Poly8_t poly8_t;
66 typedef __Poly16_t poly16_t;
67 typedef __Poly64_t poly64_t;
68 typedef __Poly128_t poly128_t;
70 typedef float float32_t;
71 typedef double float64_t;
73 typedef struct int8x8x2_t
75 int8x8_t val[2];
76 } int8x8x2_t;
78 typedef struct int8x16x2_t
80 int8x16_t val[2];
81 } int8x16x2_t;
83 typedef struct int16x4x2_t
85 int16x4_t val[2];
86 } int16x4x2_t;
88 typedef struct int16x8x2_t
90 int16x8_t val[2];
91 } int16x8x2_t;
93 typedef struct int32x2x2_t
95 int32x2_t val[2];
96 } int32x2x2_t;
98 typedef struct int32x4x2_t
100 int32x4_t val[2];
101 } int32x4x2_t;
103 typedef struct int64x1x2_t
105 int64x1_t val[2];
106 } int64x1x2_t;
108 typedef struct int64x2x2_t
110 int64x2_t val[2];
111 } int64x2x2_t;
113 typedef struct uint8x8x2_t
115 uint8x8_t val[2];
116 } uint8x8x2_t;
118 typedef struct uint8x16x2_t
120 uint8x16_t val[2];
121 } uint8x16x2_t;
123 typedef struct uint16x4x2_t
125 uint16x4_t val[2];
126 } uint16x4x2_t;
128 typedef struct uint16x8x2_t
130 uint16x8_t val[2];
131 } uint16x8x2_t;
133 typedef struct uint32x2x2_t
135 uint32x2_t val[2];
136 } uint32x2x2_t;
138 typedef struct uint32x4x2_t
140 uint32x4_t val[2];
141 } uint32x4x2_t;
143 typedef struct uint64x1x2_t
145 uint64x1_t val[2];
146 } uint64x1x2_t;
148 typedef struct uint64x2x2_t
150 uint64x2_t val[2];
151 } uint64x2x2_t;
153 typedef struct float32x2x2_t
155 float32x2_t val[2];
156 } float32x2x2_t;
158 typedef struct float32x4x2_t
160 float32x4_t val[2];
161 } float32x4x2_t;
163 typedef struct float64x2x2_t
165 float64x2_t val[2];
166 } float64x2x2_t;
168 typedef struct float64x1x2_t
170 float64x1_t val[2];
171 } float64x1x2_t;
173 typedef struct poly8x8x2_t
175 poly8x8_t val[2];
176 } poly8x8x2_t;
178 typedef struct poly8x16x2_t
180 poly8x16_t val[2];
181 } poly8x16x2_t;
183 typedef struct poly16x4x2_t
185 poly16x4_t val[2];
186 } poly16x4x2_t;
188 typedef struct poly16x8x2_t
190 poly16x8_t val[2];
191 } poly16x8x2_t;
193 typedef struct int8x8x3_t
195 int8x8_t val[3];
196 } int8x8x3_t;
198 typedef struct int8x16x3_t
200 int8x16_t val[3];
201 } int8x16x3_t;
203 typedef struct int16x4x3_t
205 int16x4_t val[3];
206 } int16x4x3_t;
208 typedef struct int16x8x3_t
210 int16x8_t val[3];
211 } int16x8x3_t;
213 typedef struct int32x2x3_t
215 int32x2_t val[3];
216 } int32x2x3_t;
218 typedef struct int32x4x3_t
220 int32x4_t val[3];
221 } int32x4x3_t;
223 typedef struct int64x1x3_t
225 int64x1_t val[3];
226 } int64x1x3_t;
228 typedef struct int64x2x3_t
230 int64x2_t val[3];
231 } int64x2x3_t;
233 typedef struct uint8x8x3_t
235 uint8x8_t val[3];
236 } uint8x8x3_t;
238 typedef struct uint8x16x3_t
240 uint8x16_t val[3];
241 } uint8x16x3_t;
243 typedef struct uint16x4x3_t
245 uint16x4_t val[3];
246 } uint16x4x3_t;
248 typedef struct uint16x8x3_t
250 uint16x8_t val[3];
251 } uint16x8x3_t;
253 typedef struct uint32x2x3_t
255 uint32x2_t val[3];
256 } uint32x2x3_t;
258 typedef struct uint32x4x3_t
260 uint32x4_t val[3];
261 } uint32x4x3_t;
263 typedef struct uint64x1x3_t
265 uint64x1_t val[3];
266 } uint64x1x3_t;
268 typedef struct uint64x2x3_t
270 uint64x2_t val[3];
271 } uint64x2x3_t;
273 typedef struct float32x2x3_t
275 float32x2_t val[3];
276 } float32x2x3_t;
278 typedef struct float32x4x3_t
280 float32x4_t val[3];
281 } float32x4x3_t;
283 typedef struct float64x2x3_t
285 float64x2_t val[3];
286 } float64x2x3_t;
288 typedef struct float64x1x3_t
290 float64x1_t val[3];
291 } float64x1x3_t;
293 typedef struct poly8x8x3_t
295 poly8x8_t val[3];
296 } poly8x8x3_t;
298 typedef struct poly8x16x3_t
300 poly8x16_t val[3];
301 } poly8x16x3_t;
303 typedef struct poly16x4x3_t
305 poly16x4_t val[3];
306 } poly16x4x3_t;
308 typedef struct poly16x8x3_t
310 poly16x8_t val[3];
311 } poly16x8x3_t;
313 typedef struct int8x8x4_t
315 int8x8_t val[4];
316 } int8x8x4_t;
318 typedef struct int8x16x4_t
320 int8x16_t val[4];
321 } int8x16x4_t;
323 typedef struct int16x4x4_t
325 int16x4_t val[4];
326 } int16x4x4_t;
328 typedef struct int16x8x4_t
330 int16x8_t val[4];
331 } int16x8x4_t;
333 typedef struct int32x2x4_t
335 int32x2_t val[4];
336 } int32x2x4_t;
338 typedef struct int32x4x4_t
340 int32x4_t val[4];
341 } int32x4x4_t;
343 typedef struct int64x1x4_t
345 int64x1_t val[4];
346 } int64x1x4_t;
348 typedef struct int64x2x4_t
350 int64x2_t val[4];
351 } int64x2x4_t;
353 typedef struct uint8x8x4_t
355 uint8x8_t val[4];
356 } uint8x8x4_t;
358 typedef struct uint8x16x4_t
360 uint8x16_t val[4];
361 } uint8x16x4_t;
363 typedef struct uint16x4x4_t
365 uint16x4_t val[4];
366 } uint16x4x4_t;
368 typedef struct uint16x8x4_t
370 uint16x8_t val[4];
371 } uint16x8x4_t;
373 typedef struct uint32x2x4_t
375 uint32x2_t val[4];
376 } uint32x2x4_t;
378 typedef struct uint32x4x4_t
380 uint32x4_t val[4];
381 } uint32x4x4_t;
383 typedef struct uint64x1x4_t
385 uint64x1_t val[4];
386 } uint64x1x4_t;
388 typedef struct uint64x2x4_t
390 uint64x2_t val[4];
391 } uint64x2x4_t;
393 typedef struct float32x2x4_t
395 float32x2_t val[4];
396 } float32x2x4_t;
398 typedef struct float32x4x4_t
400 float32x4_t val[4];
401 } float32x4x4_t;
403 typedef struct float64x2x4_t
405 float64x2_t val[4];
406 } float64x2x4_t;
408 typedef struct float64x1x4_t
410 float64x1_t val[4];
411 } float64x1x4_t;
413 typedef struct poly8x8x4_t
415 poly8x8_t val[4];
416 } poly8x8x4_t;
418 typedef struct poly8x16x4_t
420 poly8x16_t val[4];
421 } poly8x16x4_t;
423 typedef struct poly16x4x4_t
425 poly16x4_t val[4];
426 } poly16x4x4_t;
428 typedef struct poly16x8x4_t
430 poly16x8_t val[4];
431 } poly16x8x4_t;
433 /* __aarch64_vdup_lane internal macros. */
434 #define __aarch64_vdup_lane_any(__size, __q, __a, __b) \
435 vdup##__q##_n_##__size (__aarch64_vget_lane_any (__a, __b))
437 #define __aarch64_vdup_lane_f32(__a, __b) \
438 __aarch64_vdup_lane_any (f32, , __a, __b)
439 #define __aarch64_vdup_lane_f64(__a, __b) \
440 __aarch64_vdup_lane_any (f64, , __a, __b)
441 #define __aarch64_vdup_lane_p8(__a, __b) \
442 __aarch64_vdup_lane_any (p8, , __a, __b)
443 #define __aarch64_vdup_lane_p16(__a, __b) \
444 __aarch64_vdup_lane_any (p16, , __a, __b)
445 #define __aarch64_vdup_lane_s8(__a, __b) \
446 __aarch64_vdup_lane_any (s8, , __a, __b)
447 #define __aarch64_vdup_lane_s16(__a, __b) \
448 __aarch64_vdup_lane_any (s16, , __a, __b)
449 #define __aarch64_vdup_lane_s32(__a, __b) \
450 __aarch64_vdup_lane_any (s32, , __a, __b)
451 #define __aarch64_vdup_lane_s64(__a, __b) \
452 __aarch64_vdup_lane_any (s64, , __a, __b)
453 #define __aarch64_vdup_lane_u8(__a, __b) \
454 __aarch64_vdup_lane_any (u8, , __a, __b)
455 #define __aarch64_vdup_lane_u16(__a, __b) \
456 __aarch64_vdup_lane_any (u16, , __a, __b)
457 #define __aarch64_vdup_lane_u32(__a, __b) \
458 __aarch64_vdup_lane_any (u32, , __a, __b)
459 #define __aarch64_vdup_lane_u64(__a, __b) \
460 __aarch64_vdup_lane_any (u64, , __a, __b)
462 /* __aarch64_vdup_laneq internal macros. */
463 #define __aarch64_vdup_laneq_f32(__a, __b) \
464 __aarch64_vdup_lane_any (f32, , __a, __b)
465 #define __aarch64_vdup_laneq_f64(__a, __b) \
466 __aarch64_vdup_lane_any (f64, , __a, __b)
467 #define __aarch64_vdup_laneq_p8(__a, __b) \
468 __aarch64_vdup_lane_any (p8, , __a, __b)
469 #define __aarch64_vdup_laneq_p16(__a, __b) \
470 __aarch64_vdup_lane_any (p16, , __a, __b)
471 #define __aarch64_vdup_laneq_s8(__a, __b) \
472 __aarch64_vdup_lane_any (s8, , __a, __b)
473 #define __aarch64_vdup_laneq_s16(__a, __b) \
474 __aarch64_vdup_lane_any (s16, , __a, __b)
475 #define __aarch64_vdup_laneq_s32(__a, __b) \
476 __aarch64_vdup_lane_any (s32, , __a, __b)
477 #define __aarch64_vdup_laneq_s64(__a, __b) \
478 __aarch64_vdup_lane_any (s64, , __a, __b)
479 #define __aarch64_vdup_laneq_u8(__a, __b) \
480 __aarch64_vdup_lane_any (u8, , __a, __b)
481 #define __aarch64_vdup_laneq_u16(__a, __b) \
482 __aarch64_vdup_lane_any (u16, , __a, __b)
483 #define __aarch64_vdup_laneq_u32(__a, __b) \
484 __aarch64_vdup_lane_any (u32, , __a, __b)
485 #define __aarch64_vdup_laneq_u64(__a, __b) \
486 __aarch64_vdup_lane_any (u64, , __a, __b)
488 /* __aarch64_vdupq_lane internal macros. */
489 #define __aarch64_vdupq_lane_f32(__a, __b) \
490 __aarch64_vdup_lane_any (f32, q, __a, __b)
491 #define __aarch64_vdupq_lane_f64(__a, __b) \
492 __aarch64_vdup_lane_any (f64, q, __a, __b)
493 #define __aarch64_vdupq_lane_p8(__a, __b) \
494 __aarch64_vdup_lane_any (p8, q, __a, __b)
495 #define __aarch64_vdupq_lane_p16(__a, __b) \
496 __aarch64_vdup_lane_any (p16, q, __a, __b)
497 #define __aarch64_vdupq_lane_s8(__a, __b) \
498 __aarch64_vdup_lane_any (s8, q, __a, __b)
499 #define __aarch64_vdupq_lane_s16(__a, __b) \
500 __aarch64_vdup_lane_any (s16, q, __a, __b)
501 #define __aarch64_vdupq_lane_s32(__a, __b) \
502 __aarch64_vdup_lane_any (s32, q, __a, __b)
503 #define __aarch64_vdupq_lane_s64(__a, __b) \
504 __aarch64_vdup_lane_any (s64, q, __a, __b)
505 #define __aarch64_vdupq_lane_u8(__a, __b) \
506 __aarch64_vdup_lane_any (u8, q, __a, __b)
507 #define __aarch64_vdupq_lane_u16(__a, __b) \
508 __aarch64_vdup_lane_any (u16, q, __a, __b)
509 #define __aarch64_vdupq_lane_u32(__a, __b) \
510 __aarch64_vdup_lane_any (u32, q, __a, __b)
511 #define __aarch64_vdupq_lane_u64(__a, __b) \
512 __aarch64_vdup_lane_any (u64, q, __a, __b)
514 /* __aarch64_vdupq_laneq internal macros. */
515 #define __aarch64_vdupq_laneq_f32(__a, __b) \
516 __aarch64_vdup_lane_any (f32, q, __a, __b)
517 #define __aarch64_vdupq_laneq_f64(__a, __b) \
518 __aarch64_vdup_lane_any (f64, q, __a, __b)
519 #define __aarch64_vdupq_laneq_p8(__a, __b) \
520 __aarch64_vdup_lane_any (p8, q, __a, __b)
521 #define __aarch64_vdupq_laneq_p16(__a, __b) \
522 __aarch64_vdup_lane_any (p16, q, __a, __b)
523 #define __aarch64_vdupq_laneq_s8(__a, __b) \
524 __aarch64_vdup_lane_any (s8, q, __a, __b)
525 #define __aarch64_vdupq_laneq_s16(__a, __b) \
526 __aarch64_vdup_lane_any (s16, q, __a, __b)
527 #define __aarch64_vdupq_laneq_s32(__a, __b) \
528 __aarch64_vdup_lane_any (s32, q, __a, __b)
529 #define __aarch64_vdupq_laneq_s64(__a, __b) \
530 __aarch64_vdup_lane_any (s64, q, __a, __b)
531 #define __aarch64_vdupq_laneq_u8(__a, __b) \
532 __aarch64_vdup_lane_any (u8, q, __a, __b)
533 #define __aarch64_vdupq_laneq_u16(__a, __b) \
534 __aarch64_vdup_lane_any (u16, q, __a, __b)
535 #define __aarch64_vdupq_laneq_u32(__a, __b) \
536 __aarch64_vdup_lane_any (u32, q, __a, __b)
537 #define __aarch64_vdupq_laneq_u64(__a, __b) \
538 __aarch64_vdup_lane_any (u64, q, __a, __b)
540 /* Internal macro for lane indices. */
542 #define __AARCH64_NUM_LANES(__v) (sizeof (__v) / sizeof (__v[0]))
543 #define __AARCH64_LANE_CHECK(__vec, __idx) \
544 __builtin_aarch64_im_lane_boundsi (__AARCH64_NUM_LANES (__vec), __idx)
546 /* For big-endian, GCC's vector indices are the opposite way around
547 to the architectural lane indices used by Neon intrinsics. */
548 #ifdef __AARCH64EB__
549 #define __aarch64_lane(__vec, __idx) (__AARCH64_NUM_LANES (__vec) - 1 - __idx)
550 #else
551 #define __aarch64_lane(__vec, __idx) __idx
552 #endif
554 /* vget_lane internal macro. */
555 #define __aarch64_vget_lane_any(__vec, __index) \
556 __extension__ \
557 ({ \
558 __AARCH64_LANE_CHECK (__vec, __index); \
559 __vec[__aarch64_lane (__vec, __index)]; \
562 /* vset_lane and vld1_lane internal macro. */
563 #define __aarch64_vset_lane_any(__elem, __vec, __index) \
564 __extension__ \
565 ({ \
566 __AARCH64_LANE_CHECK (__vec, __index); \
567 __vec[__aarch64_lane (__vec, __index)] = __elem; \
568 __vec; \
571 /* vadd */
572 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
573 vadd_s8 (int8x8_t __a, int8x8_t __b)
575 return __a + __b;
578 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
579 vadd_s16 (int16x4_t __a, int16x4_t __b)
581 return __a + __b;
584 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
585 vadd_s32 (int32x2_t __a, int32x2_t __b)
587 return __a + __b;
590 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
591 vadd_f32 (float32x2_t __a, float32x2_t __b)
593 return __a + __b;
596 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
597 vadd_f64 (float64x1_t __a, float64x1_t __b)
599 return __a + __b;
602 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
603 vadd_u8 (uint8x8_t __a, uint8x8_t __b)
605 return __a + __b;
608 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
609 vadd_u16 (uint16x4_t __a, uint16x4_t __b)
611 return __a + __b;
614 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
615 vadd_u32 (uint32x2_t __a, uint32x2_t __b)
617 return __a + __b;
620 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
621 vadd_s64 (int64x1_t __a, int64x1_t __b)
623 return __a + __b;
626 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
627 vadd_u64 (uint64x1_t __a, uint64x1_t __b)
629 return __a + __b;
632 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
633 vaddq_s8 (int8x16_t __a, int8x16_t __b)
635 return __a + __b;
638 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
639 vaddq_s16 (int16x8_t __a, int16x8_t __b)
641 return __a + __b;
644 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
645 vaddq_s32 (int32x4_t __a, int32x4_t __b)
647 return __a + __b;
650 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
651 vaddq_s64 (int64x2_t __a, int64x2_t __b)
653 return __a + __b;
656 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
657 vaddq_f32 (float32x4_t __a, float32x4_t __b)
659 return __a + __b;
662 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
663 vaddq_f64 (float64x2_t __a, float64x2_t __b)
665 return __a + __b;
668 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
669 vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
671 return __a + __b;
674 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
675 vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
677 return __a + __b;
680 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
681 vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
683 return __a + __b;
686 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
687 vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
689 return __a + __b;
692 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
693 vaddl_s8 (int8x8_t __a, int8x8_t __b)
695 return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b);
698 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
699 vaddl_s16 (int16x4_t __a, int16x4_t __b)
701 return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b);
704 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
705 vaddl_s32 (int32x2_t __a, int32x2_t __b)
707 return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b);
710 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
711 vaddl_u8 (uint8x8_t __a, uint8x8_t __b)
713 return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a,
714 (int8x8_t) __b);
717 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
718 vaddl_u16 (uint16x4_t __a, uint16x4_t __b)
720 return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a,
721 (int16x4_t) __b);
724 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
725 vaddl_u32 (uint32x2_t __a, uint32x2_t __b)
727 return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a,
728 (int32x2_t) __b);
731 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
732 vaddl_high_s8 (int8x16_t __a, int8x16_t __b)
734 return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b);
737 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
738 vaddl_high_s16 (int16x8_t __a, int16x8_t __b)
740 return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b);
743 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
744 vaddl_high_s32 (int32x4_t __a, int32x4_t __b)
746 return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b);
749 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
750 vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b)
752 return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a,
753 (int8x16_t) __b);
756 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
757 vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b)
759 return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a,
760 (int16x8_t) __b);
763 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
764 vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b)
766 return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a,
767 (int32x4_t) __b);
770 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
771 vaddw_s8 (int16x8_t __a, int8x8_t __b)
773 return (int16x8_t) __builtin_aarch64_saddwv8qi (__a, __b);
776 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
777 vaddw_s16 (int32x4_t __a, int16x4_t __b)
779 return (int32x4_t) __builtin_aarch64_saddwv4hi (__a, __b);
782 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
783 vaddw_s32 (int64x2_t __a, int32x2_t __b)
785 return (int64x2_t) __builtin_aarch64_saddwv2si (__a, __b);
788 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
789 vaddw_u8 (uint16x8_t __a, uint8x8_t __b)
791 return (uint16x8_t) __builtin_aarch64_uaddwv8qi ((int16x8_t) __a,
792 (int8x8_t) __b);
795 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
796 vaddw_u16 (uint32x4_t __a, uint16x4_t __b)
798 return (uint32x4_t) __builtin_aarch64_uaddwv4hi ((int32x4_t) __a,
799 (int16x4_t) __b);
802 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
803 vaddw_u32 (uint64x2_t __a, uint32x2_t __b)
805 return (uint64x2_t) __builtin_aarch64_uaddwv2si ((int64x2_t) __a,
806 (int32x2_t) __b);
809 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
810 vaddw_high_s8 (int16x8_t __a, int8x16_t __b)
812 return (int16x8_t) __builtin_aarch64_saddw2v16qi (__a, __b);
815 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
816 vaddw_high_s16 (int32x4_t __a, int16x8_t __b)
818 return (int32x4_t) __builtin_aarch64_saddw2v8hi (__a, __b);
821 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
822 vaddw_high_s32 (int64x2_t __a, int32x4_t __b)
824 return (int64x2_t) __builtin_aarch64_saddw2v4si (__a, __b);
827 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
828 vaddw_high_u8 (uint16x8_t __a, uint8x16_t __b)
830 return (uint16x8_t) __builtin_aarch64_uaddw2v16qi ((int16x8_t) __a,
831 (int8x16_t) __b);
834 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
835 vaddw_high_u16 (uint32x4_t __a, uint16x8_t __b)
837 return (uint32x4_t) __builtin_aarch64_uaddw2v8hi ((int32x4_t) __a,
838 (int16x8_t) __b);
841 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
842 vaddw_high_u32 (uint64x2_t __a, uint32x4_t __b)
844 return (uint64x2_t) __builtin_aarch64_uaddw2v4si ((int64x2_t) __a,
845 (int32x4_t) __b);
848 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
849 vhadd_s8 (int8x8_t __a, int8x8_t __b)
851 return (int8x8_t) __builtin_aarch64_shaddv8qi (__a, __b);
854 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
855 vhadd_s16 (int16x4_t __a, int16x4_t __b)
857 return (int16x4_t) __builtin_aarch64_shaddv4hi (__a, __b);
860 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
861 vhadd_s32 (int32x2_t __a, int32x2_t __b)
863 return (int32x2_t) __builtin_aarch64_shaddv2si (__a, __b);
866 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
867 vhadd_u8 (uint8x8_t __a, uint8x8_t __b)
869 return (uint8x8_t) __builtin_aarch64_uhaddv8qi ((int8x8_t) __a,
870 (int8x8_t) __b);
873 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
874 vhadd_u16 (uint16x4_t __a, uint16x4_t __b)
876 return (uint16x4_t) __builtin_aarch64_uhaddv4hi ((int16x4_t) __a,
877 (int16x4_t) __b);
880 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
881 vhadd_u32 (uint32x2_t __a, uint32x2_t __b)
883 return (uint32x2_t) __builtin_aarch64_uhaddv2si ((int32x2_t) __a,
884 (int32x2_t) __b);
887 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
888 vhaddq_s8 (int8x16_t __a, int8x16_t __b)
890 return (int8x16_t) __builtin_aarch64_shaddv16qi (__a, __b);
893 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
894 vhaddq_s16 (int16x8_t __a, int16x8_t __b)
896 return (int16x8_t) __builtin_aarch64_shaddv8hi (__a, __b);
899 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
900 vhaddq_s32 (int32x4_t __a, int32x4_t __b)
902 return (int32x4_t) __builtin_aarch64_shaddv4si (__a, __b);
905 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
906 vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
908 return (uint8x16_t) __builtin_aarch64_uhaddv16qi ((int8x16_t) __a,
909 (int8x16_t) __b);
912 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
913 vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
915 return (uint16x8_t) __builtin_aarch64_uhaddv8hi ((int16x8_t) __a,
916 (int16x8_t) __b);
919 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
920 vhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
922 return (uint32x4_t) __builtin_aarch64_uhaddv4si ((int32x4_t) __a,
923 (int32x4_t) __b);
926 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
927 vrhadd_s8 (int8x8_t __a, int8x8_t __b)
929 return (int8x8_t) __builtin_aarch64_srhaddv8qi (__a, __b);
932 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
933 vrhadd_s16 (int16x4_t __a, int16x4_t __b)
935 return (int16x4_t) __builtin_aarch64_srhaddv4hi (__a, __b);
938 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
939 vrhadd_s32 (int32x2_t __a, int32x2_t __b)
941 return (int32x2_t) __builtin_aarch64_srhaddv2si (__a, __b);
944 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
945 vrhadd_u8 (uint8x8_t __a, uint8x8_t __b)
947 return (uint8x8_t) __builtin_aarch64_urhaddv8qi ((int8x8_t) __a,
948 (int8x8_t) __b);
951 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
952 vrhadd_u16 (uint16x4_t __a, uint16x4_t __b)
954 return (uint16x4_t) __builtin_aarch64_urhaddv4hi ((int16x4_t) __a,
955 (int16x4_t) __b);
958 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
959 vrhadd_u32 (uint32x2_t __a, uint32x2_t __b)
961 return (uint32x2_t) __builtin_aarch64_urhaddv2si ((int32x2_t) __a,
962 (int32x2_t) __b);
965 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
966 vrhaddq_s8 (int8x16_t __a, int8x16_t __b)
968 return (int8x16_t) __builtin_aarch64_srhaddv16qi (__a, __b);
971 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
972 vrhaddq_s16 (int16x8_t __a, int16x8_t __b)
974 return (int16x8_t) __builtin_aarch64_srhaddv8hi (__a, __b);
977 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
978 vrhaddq_s32 (int32x4_t __a, int32x4_t __b)
980 return (int32x4_t) __builtin_aarch64_srhaddv4si (__a, __b);
983 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
984 vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
986 return (uint8x16_t) __builtin_aarch64_urhaddv16qi ((int8x16_t) __a,
987 (int8x16_t) __b);
990 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
991 vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
993 return (uint16x8_t) __builtin_aarch64_urhaddv8hi ((int16x8_t) __a,
994 (int16x8_t) __b);
997 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
998 vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
1000 return (uint32x4_t) __builtin_aarch64_urhaddv4si ((int32x4_t) __a,
1001 (int32x4_t) __b);
1004 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1005 vaddhn_s16 (int16x8_t __a, int16x8_t __b)
1007 return (int8x8_t) __builtin_aarch64_addhnv8hi (__a, __b);
1010 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1011 vaddhn_s32 (int32x4_t __a, int32x4_t __b)
1013 return (int16x4_t) __builtin_aarch64_addhnv4si (__a, __b);
1016 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1017 vaddhn_s64 (int64x2_t __a, int64x2_t __b)
1019 return (int32x2_t) __builtin_aarch64_addhnv2di (__a, __b);
1022 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1023 vaddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1025 return (uint8x8_t) __builtin_aarch64_addhnv8hi ((int16x8_t) __a,
1026 (int16x8_t) __b);
1029 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1030 vaddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1032 return (uint16x4_t) __builtin_aarch64_addhnv4si ((int32x4_t) __a,
1033 (int32x4_t) __b);
1036 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1037 vaddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1039 return (uint32x2_t) __builtin_aarch64_addhnv2di ((int64x2_t) __a,
1040 (int64x2_t) __b);
1043 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1044 vraddhn_s16 (int16x8_t __a, int16x8_t __b)
1046 return (int8x8_t) __builtin_aarch64_raddhnv8hi (__a, __b);
1049 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1050 vraddhn_s32 (int32x4_t __a, int32x4_t __b)
1052 return (int16x4_t) __builtin_aarch64_raddhnv4si (__a, __b);
1055 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1056 vraddhn_s64 (int64x2_t __a, int64x2_t __b)
1058 return (int32x2_t) __builtin_aarch64_raddhnv2di (__a, __b);
1061 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1062 vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1064 return (uint8x8_t) __builtin_aarch64_raddhnv8hi ((int16x8_t) __a,
1065 (int16x8_t) __b);
1068 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1069 vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1071 return (uint16x4_t) __builtin_aarch64_raddhnv4si ((int32x4_t) __a,
1072 (int32x4_t) __b);
1075 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1076 vraddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1078 return (uint32x2_t) __builtin_aarch64_raddhnv2di ((int64x2_t) __a,
1079 (int64x2_t) __b);
1082 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1083 vaddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1085 return (int8x16_t) __builtin_aarch64_addhn2v8hi (__a, __b, __c);
1088 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1089 vaddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1091 return (int16x8_t) __builtin_aarch64_addhn2v4si (__a, __b, __c);
1094 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1095 vaddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1097 return (int32x4_t) __builtin_aarch64_addhn2v2di (__a, __b, __c);
1100 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1101 vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1103 return (uint8x16_t) __builtin_aarch64_addhn2v8hi ((int8x8_t) __a,
1104 (int16x8_t) __b,
1105 (int16x8_t) __c);
1108 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1109 vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1111 return (uint16x8_t) __builtin_aarch64_addhn2v4si ((int16x4_t) __a,
1112 (int32x4_t) __b,
1113 (int32x4_t) __c);
1116 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1117 vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1119 return (uint32x4_t) __builtin_aarch64_addhn2v2di ((int32x2_t) __a,
1120 (int64x2_t) __b,
1121 (int64x2_t) __c);
1124 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1125 vraddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1127 return (int8x16_t) __builtin_aarch64_raddhn2v8hi (__a, __b, __c);
1130 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1131 vraddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1133 return (int16x8_t) __builtin_aarch64_raddhn2v4si (__a, __b, __c);
1136 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1137 vraddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1139 return (int32x4_t) __builtin_aarch64_raddhn2v2di (__a, __b, __c);
1142 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1143 vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1145 return (uint8x16_t) __builtin_aarch64_raddhn2v8hi ((int8x8_t) __a,
1146 (int16x8_t) __b,
1147 (int16x8_t) __c);
1150 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1151 vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1153 return (uint16x8_t) __builtin_aarch64_raddhn2v4si ((int16x4_t) __a,
1154 (int32x4_t) __b,
1155 (int32x4_t) __c);
1158 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1159 vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1161 return (uint32x4_t) __builtin_aarch64_raddhn2v2di ((int32x2_t) __a,
1162 (int64x2_t) __b,
1163 (int64x2_t) __c);
1166 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1167 vdiv_f32 (float32x2_t __a, float32x2_t __b)
1169 return __a / __b;
1172 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1173 vdiv_f64 (float64x1_t __a, float64x1_t __b)
1175 return __a / __b;
1178 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1179 vdivq_f32 (float32x4_t __a, float32x4_t __b)
1181 return __a / __b;
1184 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1185 vdivq_f64 (float64x2_t __a, float64x2_t __b)
1187 return __a / __b;
1190 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1191 vmul_s8 (int8x8_t __a, int8x8_t __b)
1193 return __a * __b;
1196 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1197 vmul_s16 (int16x4_t __a, int16x4_t __b)
1199 return __a * __b;
1202 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1203 vmul_s32 (int32x2_t __a, int32x2_t __b)
1205 return __a * __b;
1208 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1209 vmul_f32 (float32x2_t __a, float32x2_t __b)
1211 return __a * __b;
1214 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1215 vmul_f64 (float64x1_t __a, float64x1_t __b)
1217 return __a * __b;
1220 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1221 vmul_u8 (uint8x8_t __a, uint8x8_t __b)
1223 return __a * __b;
1226 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1227 vmul_u16 (uint16x4_t __a, uint16x4_t __b)
1229 return __a * __b;
1232 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1233 vmul_u32 (uint32x2_t __a, uint32x2_t __b)
1235 return __a * __b;
1238 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
1239 vmul_p8 (poly8x8_t __a, poly8x8_t __b)
1241 return (poly8x8_t) __builtin_aarch64_pmulv8qi ((int8x8_t) __a,
1242 (int8x8_t) __b);
1245 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1246 vmulq_s8 (int8x16_t __a, int8x16_t __b)
1248 return __a * __b;
1251 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1252 vmulq_s16 (int16x8_t __a, int16x8_t __b)
1254 return __a * __b;
1257 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1258 vmulq_s32 (int32x4_t __a, int32x4_t __b)
1260 return __a * __b;
1263 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1264 vmulq_f32 (float32x4_t __a, float32x4_t __b)
1266 return __a * __b;
1269 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1270 vmulq_f64 (float64x2_t __a, float64x2_t __b)
1272 return __a * __b;
1275 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1276 vmulq_u8 (uint8x16_t __a, uint8x16_t __b)
1278 return __a * __b;
1281 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1282 vmulq_u16 (uint16x8_t __a, uint16x8_t __b)
1284 return __a * __b;
1287 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1288 vmulq_u32 (uint32x4_t __a, uint32x4_t __b)
1290 return __a * __b;
1293 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
1294 vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
1296 return (poly8x16_t) __builtin_aarch64_pmulv16qi ((int8x16_t) __a,
1297 (int8x16_t) __b);
1300 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1301 vand_s8 (int8x8_t __a, int8x8_t __b)
1303 return __a & __b;
1306 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1307 vand_s16 (int16x4_t __a, int16x4_t __b)
1309 return __a & __b;
1312 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1313 vand_s32 (int32x2_t __a, int32x2_t __b)
1315 return __a & __b;
1318 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1319 vand_u8 (uint8x8_t __a, uint8x8_t __b)
1321 return __a & __b;
1324 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1325 vand_u16 (uint16x4_t __a, uint16x4_t __b)
1327 return __a & __b;
1330 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1331 vand_u32 (uint32x2_t __a, uint32x2_t __b)
1333 return __a & __b;
1336 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1337 vand_s64 (int64x1_t __a, int64x1_t __b)
1339 return __a & __b;
1342 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1343 vand_u64 (uint64x1_t __a, uint64x1_t __b)
1345 return __a & __b;
1348 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1349 vandq_s8 (int8x16_t __a, int8x16_t __b)
1351 return __a & __b;
1354 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1355 vandq_s16 (int16x8_t __a, int16x8_t __b)
1357 return __a & __b;
1360 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1361 vandq_s32 (int32x4_t __a, int32x4_t __b)
1363 return __a & __b;
1366 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1367 vandq_s64 (int64x2_t __a, int64x2_t __b)
1369 return __a & __b;
1372 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1373 vandq_u8 (uint8x16_t __a, uint8x16_t __b)
1375 return __a & __b;
1378 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1379 vandq_u16 (uint16x8_t __a, uint16x8_t __b)
1381 return __a & __b;
1384 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1385 vandq_u32 (uint32x4_t __a, uint32x4_t __b)
1387 return __a & __b;
1390 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1391 vandq_u64 (uint64x2_t __a, uint64x2_t __b)
1393 return __a & __b;
1396 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1397 vorr_s8 (int8x8_t __a, int8x8_t __b)
1399 return __a | __b;
1402 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1403 vorr_s16 (int16x4_t __a, int16x4_t __b)
1405 return __a | __b;
1408 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1409 vorr_s32 (int32x2_t __a, int32x2_t __b)
1411 return __a | __b;
1414 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1415 vorr_u8 (uint8x8_t __a, uint8x8_t __b)
1417 return __a | __b;
1420 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1421 vorr_u16 (uint16x4_t __a, uint16x4_t __b)
1423 return __a | __b;
1426 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1427 vorr_u32 (uint32x2_t __a, uint32x2_t __b)
1429 return __a | __b;
1432 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1433 vorr_s64 (int64x1_t __a, int64x1_t __b)
1435 return __a | __b;
1438 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1439 vorr_u64 (uint64x1_t __a, uint64x1_t __b)
1441 return __a | __b;
1444 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1445 vorrq_s8 (int8x16_t __a, int8x16_t __b)
1447 return __a | __b;
1450 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1451 vorrq_s16 (int16x8_t __a, int16x8_t __b)
1453 return __a | __b;
1456 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1457 vorrq_s32 (int32x4_t __a, int32x4_t __b)
1459 return __a | __b;
1462 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1463 vorrq_s64 (int64x2_t __a, int64x2_t __b)
1465 return __a | __b;
1468 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1469 vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
1471 return __a | __b;
1474 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1475 vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
1477 return __a | __b;
1480 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1481 vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
1483 return __a | __b;
1486 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1487 vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
1489 return __a | __b;
1492 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1493 veor_s8 (int8x8_t __a, int8x8_t __b)
1495 return __a ^ __b;
1498 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1499 veor_s16 (int16x4_t __a, int16x4_t __b)
1501 return __a ^ __b;
1504 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1505 veor_s32 (int32x2_t __a, int32x2_t __b)
1507 return __a ^ __b;
1510 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1511 veor_u8 (uint8x8_t __a, uint8x8_t __b)
1513 return __a ^ __b;
1516 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1517 veor_u16 (uint16x4_t __a, uint16x4_t __b)
1519 return __a ^ __b;
1522 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1523 veor_u32 (uint32x2_t __a, uint32x2_t __b)
1525 return __a ^ __b;
1528 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1529 veor_s64 (int64x1_t __a, int64x1_t __b)
1531 return __a ^ __b;
1534 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1535 veor_u64 (uint64x1_t __a, uint64x1_t __b)
1537 return __a ^ __b;
1540 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1541 veorq_s8 (int8x16_t __a, int8x16_t __b)
1543 return __a ^ __b;
1546 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1547 veorq_s16 (int16x8_t __a, int16x8_t __b)
1549 return __a ^ __b;
1552 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1553 veorq_s32 (int32x4_t __a, int32x4_t __b)
1555 return __a ^ __b;
1558 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1559 veorq_s64 (int64x2_t __a, int64x2_t __b)
1561 return __a ^ __b;
1564 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1565 veorq_u8 (uint8x16_t __a, uint8x16_t __b)
1567 return __a ^ __b;
1570 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1571 veorq_u16 (uint16x8_t __a, uint16x8_t __b)
1573 return __a ^ __b;
1576 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1577 veorq_u32 (uint32x4_t __a, uint32x4_t __b)
1579 return __a ^ __b;
1582 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1583 veorq_u64 (uint64x2_t __a, uint64x2_t __b)
1585 return __a ^ __b;
1588 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1589 vbic_s8 (int8x8_t __a, int8x8_t __b)
1591 return __a & ~__b;
1594 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1595 vbic_s16 (int16x4_t __a, int16x4_t __b)
1597 return __a & ~__b;
1600 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1601 vbic_s32 (int32x2_t __a, int32x2_t __b)
1603 return __a & ~__b;
1606 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1607 vbic_u8 (uint8x8_t __a, uint8x8_t __b)
1609 return __a & ~__b;
1612 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1613 vbic_u16 (uint16x4_t __a, uint16x4_t __b)
1615 return __a & ~__b;
1618 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1619 vbic_u32 (uint32x2_t __a, uint32x2_t __b)
1621 return __a & ~__b;
1624 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1625 vbic_s64 (int64x1_t __a, int64x1_t __b)
1627 return __a & ~__b;
1630 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1631 vbic_u64 (uint64x1_t __a, uint64x1_t __b)
1633 return __a & ~__b;
1636 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1637 vbicq_s8 (int8x16_t __a, int8x16_t __b)
1639 return __a & ~__b;
1642 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1643 vbicq_s16 (int16x8_t __a, int16x8_t __b)
1645 return __a & ~__b;
1648 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1649 vbicq_s32 (int32x4_t __a, int32x4_t __b)
1651 return __a & ~__b;
1654 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1655 vbicq_s64 (int64x2_t __a, int64x2_t __b)
1657 return __a & ~__b;
1660 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1661 vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
1663 return __a & ~__b;
1666 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1667 vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
1669 return __a & ~__b;
1672 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1673 vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
1675 return __a & ~__b;
1678 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1679 vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
1681 return __a & ~__b;
1684 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1685 vorn_s8 (int8x8_t __a, int8x8_t __b)
1687 return __a | ~__b;
1690 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1691 vorn_s16 (int16x4_t __a, int16x4_t __b)
1693 return __a | ~__b;
1696 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1697 vorn_s32 (int32x2_t __a, int32x2_t __b)
1699 return __a | ~__b;
1702 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1703 vorn_u8 (uint8x8_t __a, uint8x8_t __b)
1705 return __a | ~__b;
1708 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1709 vorn_u16 (uint16x4_t __a, uint16x4_t __b)
1711 return __a | ~__b;
1714 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1715 vorn_u32 (uint32x2_t __a, uint32x2_t __b)
1717 return __a | ~__b;
1720 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1721 vorn_s64 (int64x1_t __a, int64x1_t __b)
1723 return __a | ~__b;
1726 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1727 vorn_u64 (uint64x1_t __a, uint64x1_t __b)
1729 return __a | ~__b;
1732 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1733 vornq_s8 (int8x16_t __a, int8x16_t __b)
1735 return __a | ~__b;
1738 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1739 vornq_s16 (int16x8_t __a, int16x8_t __b)
1741 return __a | ~__b;
1744 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1745 vornq_s32 (int32x4_t __a, int32x4_t __b)
1747 return __a | ~__b;
1750 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1751 vornq_s64 (int64x2_t __a, int64x2_t __b)
1753 return __a | ~__b;
1756 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1757 vornq_u8 (uint8x16_t __a, uint8x16_t __b)
1759 return __a | ~__b;
1762 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1763 vornq_u16 (uint16x8_t __a, uint16x8_t __b)
1765 return __a | ~__b;
1768 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1769 vornq_u32 (uint32x4_t __a, uint32x4_t __b)
1771 return __a | ~__b;
1774 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1775 vornq_u64 (uint64x2_t __a, uint64x2_t __b)
1777 return __a | ~__b;
1780 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1781 vsub_s8 (int8x8_t __a, int8x8_t __b)
1783 return __a - __b;
1786 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1787 vsub_s16 (int16x4_t __a, int16x4_t __b)
1789 return __a - __b;
1792 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1793 vsub_s32 (int32x2_t __a, int32x2_t __b)
1795 return __a - __b;
1798 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1799 vsub_f32 (float32x2_t __a, float32x2_t __b)
1801 return __a - __b;
1804 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1805 vsub_f64 (float64x1_t __a, float64x1_t __b)
1807 return __a - __b;
1810 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1811 vsub_u8 (uint8x8_t __a, uint8x8_t __b)
1813 return __a - __b;
1816 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1817 vsub_u16 (uint16x4_t __a, uint16x4_t __b)
1819 return __a - __b;
1822 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1823 vsub_u32 (uint32x2_t __a, uint32x2_t __b)
1825 return __a - __b;
1828 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1829 vsub_s64 (int64x1_t __a, int64x1_t __b)
1831 return __a - __b;
1834 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1835 vsub_u64 (uint64x1_t __a, uint64x1_t __b)
1837 return __a - __b;
1840 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1841 vsubq_s8 (int8x16_t __a, int8x16_t __b)
1843 return __a - __b;
1846 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1847 vsubq_s16 (int16x8_t __a, int16x8_t __b)
1849 return __a - __b;
1852 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1853 vsubq_s32 (int32x4_t __a, int32x4_t __b)
1855 return __a - __b;
1858 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1859 vsubq_s64 (int64x2_t __a, int64x2_t __b)
1861 return __a - __b;
1864 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1865 vsubq_f32 (float32x4_t __a, float32x4_t __b)
1867 return __a - __b;
1870 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1871 vsubq_f64 (float64x2_t __a, float64x2_t __b)
1873 return __a - __b;
1876 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1877 vsubq_u8 (uint8x16_t __a, uint8x16_t __b)
1879 return __a - __b;
1882 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1883 vsubq_u16 (uint16x8_t __a, uint16x8_t __b)
1885 return __a - __b;
1888 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1889 vsubq_u32 (uint32x4_t __a, uint32x4_t __b)
1891 return __a - __b;
1894 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1895 vsubq_u64 (uint64x2_t __a, uint64x2_t __b)
1897 return __a - __b;
1900 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1901 vsubl_s8 (int8x8_t __a, int8x8_t __b)
1903 return (int16x8_t) __builtin_aarch64_ssublv8qi (__a, __b);
1906 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1907 vsubl_s16 (int16x4_t __a, int16x4_t __b)
1909 return (int32x4_t) __builtin_aarch64_ssublv4hi (__a, __b);
1912 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1913 vsubl_s32 (int32x2_t __a, int32x2_t __b)
1915 return (int64x2_t) __builtin_aarch64_ssublv2si (__a, __b);
1918 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1919 vsubl_u8 (uint8x8_t __a, uint8x8_t __b)
1921 return (uint16x8_t) __builtin_aarch64_usublv8qi ((int8x8_t) __a,
1922 (int8x8_t) __b);
1925 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1926 vsubl_u16 (uint16x4_t __a, uint16x4_t __b)
1928 return (uint32x4_t) __builtin_aarch64_usublv4hi ((int16x4_t) __a,
1929 (int16x4_t) __b);
1932 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1933 vsubl_u32 (uint32x2_t __a, uint32x2_t __b)
1935 return (uint64x2_t) __builtin_aarch64_usublv2si ((int32x2_t) __a,
1936 (int32x2_t) __b);
1939 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1940 vsubl_high_s8 (int8x16_t __a, int8x16_t __b)
1942 return (int16x8_t) __builtin_aarch64_ssubl2v16qi (__a, __b);
1945 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1946 vsubl_high_s16 (int16x8_t __a, int16x8_t __b)
1948 return (int32x4_t) __builtin_aarch64_ssubl2v8hi (__a, __b);
1951 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1952 vsubl_high_s32 (int32x4_t __a, int32x4_t __b)
1954 return (int64x2_t) __builtin_aarch64_ssubl2v4si (__a, __b);
1957 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1958 vsubl_high_u8 (uint8x16_t __a, uint8x16_t __b)
1960 return (uint16x8_t) __builtin_aarch64_usubl2v16qi ((int8x16_t) __a,
1961 (int8x16_t) __b);
1964 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1965 vsubl_high_u16 (uint16x8_t __a, uint16x8_t __b)
1967 return (uint32x4_t) __builtin_aarch64_usubl2v8hi ((int16x8_t) __a,
1968 (int16x8_t) __b);
1971 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1972 vsubl_high_u32 (uint32x4_t __a, uint32x4_t __b)
1974 return (uint64x2_t) __builtin_aarch64_usubl2v4si ((int32x4_t) __a,
1975 (int32x4_t) __b);
1978 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1979 vsubw_s8 (int16x8_t __a, int8x8_t __b)
1981 return (int16x8_t) __builtin_aarch64_ssubwv8qi (__a, __b);
1984 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1985 vsubw_s16 (int32x4_t __a, int16x4_t __b)
1987 return (int32x4_t) __builtin_aarch64_ssubwv4hi (__a, __b);
1990 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1991 vsubw_s32 (int64x2_t __a, int32x2_t __b)
1993 return (int64x2_t) __builtin_aarch64_ssubwv2si (__a, __b);
1996 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1997 vsubw_u8 (uint16x8_t __a, uint8x8_t __b)
1999 return (uint16x8_t) __builtin_aarch64_usubwv8qi ((int16x8_t) __a,
2000 (int8x8_t) __b);
2003 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2004 vsubw_u16 (uint32x4_t __a, uint16x4_t __b)
2006 return (uint32x4_t) __builtin_aarch64_usubwv4hi ((int32x4_t) __a,
2007 (int16x4_t) __b);
2010 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2011 vsubw_u32 (uint64x2_t __a, uint32x2_t __b)
2013 return (uint64x2_t) __builtin_aarch64_usubwv2si ((int64x2_t) __a,
2014 (int32x2_t) __b);
2017 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2018 vsubw_high_s8 (int16x8_t __a, int8x16_t __b)
2020 return (int16x8_t) __builtin_aarch64_ssubw2v16qi (__a, __b);
2023 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2024 vsubw_high_s16 (int32x4_t __a, int16x8_t __b)
2026 return (int32x4_t) __builtin_aarch64_ssubw2v8hi (__a, __b);
2029 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2030 vsubw_high_s32 (int64x2_t __a, int32x4_t __b)
2032 return (int64x2_t) __builtin_aarch64_ssubw2v4si (__a, __b);
2035 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2036 vsubw_high_u8 (uint16x8_t __a, uint8x16_t __b)
2038 return (uint16x8_t) __builtin_aarch64_usubw2v16qi ((int16x8_t) __a,
2039 (int8x16_t) __b);
2042 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2043 vsubw_high_u16 (uint32x4_t __a, uint16x8_t __b)
2045 return (uint32x4_t) __builtin_aarch64_usubw2v8hi ((int32x4_t) __a,
2046 (int16x8_t) __b);
2049 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2050 vsubw_high_u32 (uint64x2_t __a, uint32x4_t __b)
2052 return (uint64x2_t) __builtin_aarch64_usubw2v4si ((int64x2_t) __a,
2053 (int32x4_t) __b);
2056 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2057 vqadd_s8 (int8x8_t __a, int8x8_t __b)
2059 return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b);
2062 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2063 vqadd_s16 (int16x4_t __a, int16x4_t __b)
2065 return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b);
2068 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2069 vqadd_s32 (int32x2_t __a, int32x2_t __b)
2071 return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b);
2074 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2075 vqadd_s64 (int64x1_t __a, int64x1_t __b)
2077 return (int64x1_t) {__builtin_aarch64_sqadddi (__a[0], __b[0])};
2080 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2081 vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
2083 return __builtin_aarch64_uqaddv8qi_uuu (__a, __b);
2086 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2087 vhsub_s8 (int8x8_t __a, int8x8_t __b)
2089 return (int8x8_t)__builtin_aarch64_shsubv8qi (__a, __b);
2092 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2093 vhsub_s16 (int16x4_t __a, int16x4_t __b)
2095 return (int16x4_t) __builtin_aarch64_shsubv4hi (__a, __b);
2098 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2099 vhsub_s32 (int32x2_t __a, int32x2_t __b)
2101 return (int32x2_t) __builtin_aarch64_shsubv2si (__a, __b);
2104 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2105 vhsub_u8 (uint8x8_t __a, uint8x8_t __b)
2107 return (uint8x8_t) __builtin_aarch64_uhsubv8qi ((int8x8_t) __a,
2108 (int8x8_t) __b);
2111 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2112 vhsub_u16 (uint16x4_t __a, uint16x4_t __b)
2114 return (uint16x4_t) __builtin_aarch64_uhsubv4hi ((int16x4_t) __a,
2115 (int16x4_t) __b);
2118 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2119 vhsub_u32 (uint32x2_t __a, uint32x2_t __b)
2121 return (uint32x2_t) __builtin_aarch64_uhsubv2si ((int32x2_t) __a,
2122 (int32x2_t) __b);
2125 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2126 vhsubq_s8 (int8x16_t __a, int8x16_t __b)
2128 return (int8x16_t) __builtin_aarch64_shsubv16qi (__a, __b);
2131 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2132 vhsubq_s16 (int16x8_t __a, int16x8_t __b)
2134 return (int16x8_t) __builtin_aarch64_shsubv8hi (__a, __b);
2137 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2138 vhsubq_s32 (int32x4_t __a, int32x4_t __b)
2140 return (int32x4_t) __builtin_aarch64_shsubv4si (__a, __b);
2143 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2144 vhsubq_u8 (uint8x16_t __a, uint8x16_t __b)
2146 return (uint8x16_t) __builtin_aarch64_uhsubv16qi ((int8x16_t) __a,
2147 (int8x16_t) __b);
2150 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2151 vhsubq_u16 (uint16x8_t __a, uint16x8_t __b)
2153 return (uint16x8_t) __builtin_aarch64_uhsubv8hi ((int16x8_t) __a,
2154 (int16x8_t) __b);
2157 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2158 vhsubq_u32 (uint32x4_t __a, uint32x4_t __b)
2160 return (uint32x4_t) __builtin_aarch64_uhsubv4si ((int32x4_t) __a,
2161 (int32x4_t) __b);
2164 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2165 vsubhn_s16 (int16x8_t __a, int16x8_t __b)
2167 return (int8x8_t) __builtin_aarch64_subhnv8hi (__a, __b);
2170 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2171 vsubhn_s32 (int32x4_t __a, int32x4_t __b)
2173 return (int16x4_t) __builtin_aarch64_subhnv4si (__a, __b);
2176 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2177 vsubhn_s64 (int64x2_t __a, int64x2_t __b)
2179 return (int32x2_t) __builtin_aarch64_subhnv2di (__a, __b);
2182 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2183 vsubhn_u16 (uint16x8_t __a, uint16x8_t __b)
2185 return (uint8x8_t) __builtin_aarch64_subhnv8hi ((int16x8_t) __a,
2186 (int16x8_t) __b);
2189 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2190 vsubhn_u32 (uint32x4_t __a, uint32x4_t __b)
2192 return (uint16x4_t) __builtin_aarch64_subhnv4si ((int32x4_t) __a,
2193 (int32x4_t) __b);
2196 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2197 vsubhn_u64 (uint64x2_t __a, uint64x2_t __b)
2199 return (uint32x2_t) __builtin_aarch64_subhnv2di ((int64x2_t) __a,
2200 (int64x2_t) __b);
2203 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2204 vrsubhn_s16 (int16x8_t __a, int16x8_t __b)
2206 return (int8x8_t) __builtin_aarch64_rsubhnv8hi (__a, __b);
2209 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2210 vrsubhn_s32 (int32x4_t __a, int32x4_t __b)
2212 return (int16x4_t) __builtin_aarch64_rsubhnv4si (__a, __b);
2215 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2216 vrsubhn_s64 (int64x2_t __a, int64x2_t __b)
2218 return (int32x2_t) __builtin_aarch64_rsubhnv2di (__a, __b);
2221 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2222 vrsubhn_u16 (uint16x8_t __a, uint16x8_t __b)
2224 return (uint8x8_t) __builtin_aarch64_rsubhnv8hi ((int16x8_t) __a,
2225 (int16x8_t) __b);
2228 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2229 vrsubhn_u32 (uint32x4_t __a, uint32x4_t __b)
2231 return (uint16x4_t) __builtin_aarch64_rsubhnv4si ((int32x4_t) __a,
2232 (int32x4_t) __b);
2235 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2236 vrsubhn_u64 (uint64x2_t __a, uint64x2_t __b)
2238 return (uint32x2_t) __builtin_aarch64_rsubhnv2di ((int64x2_t) __a,
2239 (int64x2_t) __b);
2242 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2243 vrsubhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
2245 return (int8x16_t) __builtin_aarch64_rsubhn2v8hi (__a, __b, __c);
2248 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2249 vrsubhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
2251 return (int16x8_t) __builtin_aarch64_rsubhn2v4si (__a, __b, __c);
2254 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2255 vrsubhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
2257 return (int32x4_t) __builtin_aarch64_rsubhn2v2di (__a, __b, __c);
2260 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2261 vrsubhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
2263 return (uint8x16_t) __builtin_aarch64_rsubhn2v8hi ((int8x8_t) __a,
2264 (int16x8_t) __b,
2265 (int16x8_t) __c);
2268 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2269 vrsubhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
2271 return (uint16x8_t) __builtin_aarch64_rsubhn2v4si ((int16x4_t) __a,
2272 (int32x4_t) __b,
2273 (int32x4_t) __c);
2276 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2277 vrsubhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
2279 return (uint32x4_t) __builtin_aarch64_rsubhn2v2di ((int32x2_t) __a,
2280 (int64x2_t) __b,
2281 (int64x2_t) __c);
2284 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2285 vsubhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
2287 return (int8x16_t) __builtin_aarch64_subhn2v8hi (__a, __b, __c);
2290 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2291 vsubhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
2293 return (int16x8_t) __builtin_aarch64_subhn2v4si (__a, __b, __c);;
2296 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2297 vsubhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
2299 return (int32x4_t) __builtin_aarch64_subhn2v2di (__a, __b, __c);
2302 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2303 vsubhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
2305 return (uint8x16_t) __builtin_aarch64_subhn2v8hi ((int8x8_t) __a,
2306 (int16x8_t) __b,
2307 (int16x8_t) __c);
2310 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2311 vsubhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
2313 return (uint16x8_t) __builtin_aarch64_subhn2v4si ((int16x4_t) __a,
2314 (int32x4_t) __b,
2315 (int32x4_t) __c);
2318 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2319 vsubhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
2321 return (uint32x4_t) __builtin_aarch64_subhn2v2di ((int32x2_t) __a,
2322 (int64x2_t) __b,
2323 (int64x2_t) __c);
2326 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2327 vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
2329 return __builtin_aarch64_uqaddv4hi_uuu (__a, __b);
2332 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2333 vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
2335 return __builtin_aarch64_uqaddv2si_uuu (__a, __b);
2338 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2339 vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
2341 return (uint64x1_t) {__builtin_aarch64_uqadddi_uuu (__a[0], __b[0])};
2344 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2345 vqaddq_s8 (int8x16_t __a, int8x16_t __b)
2347 return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b);
2350 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2351 vqaddq_s16 (int16x8_t __a, int16x8_t __b)
2353 return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b);
2356 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2357 vqaddq_s32 (int32x4_t __a, int32x4_t __b)
2359 return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b);
2362 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2363 vqaddq_s64 (int64x2_t __a, int64x2_t __b)
2365 return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b);
2368 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2369 vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
2371 return __builtin_aarch64_uqaddv16qi_uuu (__a, __b);
2374 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2375 vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
2377 return __builtin_aarch64_uqaddv8hi_uuu (__a, __b);
2380 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2381 vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
2383 return __builtin_aarch64_uqaddv4si_uuu (__a, __b);
2386 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2387 vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
2389 return __builtin_aarch64_uqaddv2di_uuu (__a, __b);
2392 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2393 vqsub_s8 (int8x8_t __a, int8x8_t __b)
2395 return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b);
2398 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2399 vqsub_s16 (int16x4_t __a, int16x4_t __b)
2401 return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b);
2404 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2405 vqsub_s32 (int32x2_t __a, int32x2_t __b)
2407 return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b);
2410 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2411 vqsub_s64 (int64x1_t __a, int64x1_t __b)
2413 return (int64x1_t) {__builtin_aarch64_sqsubdi (__a[0], __b[0])};
2416 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2417 vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
2419 return __builtin_aarch64_uqsubv8qi_uuu (__a, __b);
2422 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2423 vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
2425 return __builtin_aarch64_uqsubv4hi_uuu (__a, __b);
2428 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2429 vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
2431 return __builtin_aarch64_uqsubv2si_uuu (__a, __b);
2434 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2435 vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
2437 return (uint64x1_t) {__builtin_aarch64_uqsubdi_uuu (__a[0], __b[0])};
2440 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2441 vqsubq_s8 (int8x16_t __a, int8x16_t __b)
2443 return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b);
2446 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2447 vqsubq_s16 (int16x8_t __a, int16x8_t __b)
2449 return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b);
2452 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2453 vqsubq_s32 (int32x4_t __a, int32x4_t __b)
2455 return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b);
2458 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2459 vqsubq_s64 (int64x2_t __a, int64x2_t __b)
2461 return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b);
2464 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2465 vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
2467 return __builtin_aarch64_uqsubv16qi_uuu (__a, __b);
2470 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2471 vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
2473 return __builtin_aarch64_uqsubv8hi_uuu (__a, __b);
2476 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2477 vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
2479 return __builtin_aarch64_uqsubv4si_uuu (__a, __b);
2482 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2483 vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
2485 return __builtin_aarch64_uqsubv2di_uuu (__a, __b);
2488 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2489 vqneg_s8 (int8x8_t __a)
2491 return (int8x8_t) __builtin_aarch64_sqnegv8qi (__a);
2494 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2495 vqneg_s16 (int16x4_t __a)
2497 return (int16x4_t) __builtin_aarch64_sqnegv4hi (__a);
2500 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2501 vqneg_s32 (int32x2_t __a)
2503 return (int32x2_t) __builtin_aarch64_sqnegv2si (__a);
2506 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2507 vqneg_s64 (int64x1_t __a)
2509 return (int64x1_t) {__builtin_aarch64_sqnegdi (__a[0])};
2512 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2513 vqnegq_s8 (int8x16_t __a)
2515 return (int8x16_t) __builtin_aarch64_sqnegv16qi (__a);
2518 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2519 vqnegq_s16 (int16x8_t __a)
2521 return (int16x8_t) __builtin_aarch64_sqnegv8hi (__a);
2524 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2525 vqnegq_s32 (int32x4_t __a)
2527 return (int32x4_t) __builtin_aarch64_sqnegv4si (__a);
2530 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2531 vqabs_s8 (int8x8_t __a)
2533 return (int8x8_t) __builtin_aarch64_sqabsv8qi (__a);
2536 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2537 vqabs_s16 (int16x4_t __a)
2539 return (int16x4_t) __builtin_aarch64_sqabsv4hi (__a);
2542 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2543 vqabs_s32 (int32x2_t __a)
2545 return (int32x2_t) __builtin_aarch64_sqabsv2si (__a);
2548 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2549 vqabs_s64 (int64x1_t __a)
2551 return (int64x1_t) {__builtin_aarch64_sqabsdi (__a[0])};
2554 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2555 vqabsq_s8 (int8x16_t __a)
2557 return (int8x16_t) __builtin_aarch64_sqabsv16qi (__a);
2560 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2561 vqabsq_s16 (int16x8_t __a)
2563 return (int16x8_t) __builtin_aarch64_sqabsv8hi (__a);
2566 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2567 vqabsq_s32 (int32x4_t __a)
2569 return (int32x4_t) __builtin_aarch64_sqabsv4si (__a);
2572 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2573 vqdmulh_s16 (int16x4_t __a, int16x4_t __b)
2575 return (int16x4_t) __builtin_aarch64_sqdmulhv4hi (__a, __b);
2578 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2579 vqdmulh_s32 (int32x2_t __a, int32x2_t __b)
2581 return (int32x2_t) __builtin_aarch64_sqdmulhv2si (__a, __b);
2584 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2585 vqdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2587 return (int16x8_t) __builtin_aarch64_sqdmulhv8hi (__a, __b);
2590 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2591 vqdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2593 return (int32x4_t) __builtin_aarch64_sqdmulhv4si (__a, __b);
2596 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2597 vqrdmulh_s16 (int16x4_t __a, int16x4_t __b)
2599 return (int16x4_t) __builtin_aarch64_sqrdmulhv4hi (__a, __b);
2602 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2603 vqrdmulh_s32 (int32x2_t __a, int32x2_t __b)
2605 return (int32x2_t) __builtin_aarch64_sqrdmulhv2si (__a, __b);
2608 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2609 vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2611 return (int16x8_t) __builtin_aarch64_sqrdmulhv8hi (__a, __b);
2614 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2615 vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2617 return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b);
2620 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2621 vcreate_s8 (uint64_t __a)
2623 return (int8x8_t) __a;
2626 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2627 vcreate_s16 (uint64_t __a)
2629 return (int16x4_t) __a;
2632 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2633 vcreate_s32 (uint64_t __a)
2635 return (int32x2_t) __a;
2638 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2639 vcreate_s64 (uint64_t __a)
2641 return (int64x1_t) {__a};
2644 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2645 vcreate_f32 (uint64_t __a)
2647 return (float32x2_t) __a;
2650 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2651 vcreate_u8 (uint64_t __a)
2653 return (uint8x8_t) __a;
2656 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2657 vcreate_u16 (uint64_t __a)
2659 return (uint16x4_t) __a;
2662 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2663 vcreate_u32 (uint64_t __a)
2665 return (uint32x2_t) __a;
2668 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2669 vcreate_u64 (uint64_t __a)
2671 return (uint64x1_t) {__a};
2674 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
2675 vcreate_f64 (uint64_t __a)
2677 return (float64x1_t) __a;
2680 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2681 vcreate_p8 (uint64_t __a)
2683 return (poly8x8_t) __a;
2686 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2687 vcreate_p16 (uint64_t __a)
2689 return (poly16x4_t) __a;
2692 /* vget_lane */
2694 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2695 vget_lane_f32 (float32x2_t __a, const int __b)
2697 return __aarch64_vget_lane_any (__a, __b);
2700 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2701 vget_lane_f64 (float64x1_t __a, const int __b)
2703 return __aarch64_vget_lane_any (__a, __b);
2706 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2707 vget_lane_p8 (poly8x8_t __a, const int __b)
2709 return __aarch64_vget_lane_any (__a, __b);
2712 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2713 vget_lane_p16 (poly16x4_t __a, const int __b)
2715 return __aarch64_vget_lane_any (__a, __b);
2718 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2719 vget_lane_s8 (int8x8_t __a, const int __b)
2721 return __aarch64_vget_lane_any (__a, __b);
2724 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2725 vget_lane_s16 (int16x4_t __a, const int __b)
2727 return __aarch64_vget_lane_any (__a, __b);
2730 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2731 vget_lane_s32 (int32x2_t __a, const int __b)
2733 return __aarch64_vget_lane_any (__a, __b);
2736 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2737 vget_lane_s64 (int64x1_t __a, const int __b)
2739 return __aarch64_vget_lane_any (__a, __b);
2742 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2743 vget_lane_u8 (uint8x8_t __a, const int __b)
2745 return __aarch64_vget_lane_any (__a, __b);
2748 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2749 vget_lane_u16 (uint16x4_t __a, const int __b)
2751 return __aarch64_vget_lane_any (__a, __b);
2754 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2755 vget_lane_u32 (uint32x2_t __a, const int __b)
2757 return __aarch64_vget_lane_any (__a, __b);
2760 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2761 vget_lane_u64 (uint64x1_t __a, const int __b)
2763 return __aarch64_vget_lane_any (__a, __b);
2766 /* vgetq_lane */
2768 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2769 vgetq_lane_f32 (float32x4_t __a, const int __b)
2771 return __aarch64_vget_lane_any (__a, __b);
2774 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2775 vgetq_lane_f64 (float64x2_t __a, const int __b)
2777 return __aarch64_vget_lane_any (__a, __b);
2780 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2781 vgetq_lane_p8 (poly8x16_t __a, const int __b)
2783 return __aarch64_vget_lane_any (__a, __b);
2786 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2787 vgetq_lane_p16 (poly16x8_t __a, const int __b)
2789 return __aarch64_vget_lane_any (__a, __b);
2792 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2793 vgetq_lane_s8 (int8x16_t __a, const int __b)
2795 return __aarch64_vget_lane_any (__a, __b);
2798 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2799 vgetq_lane_s16 (int16x8_t __a, const int __b)
2801 return __aarch64_vget_lane_any (__a, __b);
2804 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2805 vgetq_lane_s32 (int32x4_t __a, const int __b)
2807 return __aarch64_vget_lane_any (__a, __b);
2810 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2811 vgetq_lane_s64 (int64x2_t __a, const int __b)
2813 return __aarch64_vget_lane_any (__a, __b);
2816 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2817 vgetq_lane_u8 (uint8x16_t __a, const int __b)
2819 return __aarch64_vget_lane_any (__a, __b);
2822 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2823 vgetq_lane_u16 (uint16x8_t __a, const int __b)
2825 return __aarch64_vget_lane_any (__a, __b);
2828 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2829 vgetq_lane_u32 (uint32x4_t __a, const int __b)
2831 return __aarch64_vget_lane_any (__a, __b);
2834 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2835 vgetq_lane_u64 (uint64x2_t __a, const int __b)
2837 return __aarch64_vget_lane_any (__a, __b);
2840 /* vreinterpret */
2842 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2843 vreinterpret_p8_f64 (float64x1_t __a)
2845 return (poly8x8_t) __a;
2848 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2849 vreinterpret_p8_s8 (int8x8_t __a)
2851 return (poly8x8_t) __a;
2854 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2855 vreinterpret_p8_s16 (int16x4_t __a)
2857 return (poly8x8_t) __a;
2860 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2861 vreinterpret_p8_s32 (int32x2_t __a)
2863 return (poly8x8_t) __a;
2866 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2867 vreinterpret_p8_s64 (int64x1_t __a)
2869 return (poly8x8_t) __a;
2872 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2873 vreinterpret_p8_f32 (float32x2_t __a)
2875 return (poly8x8_t) __a;
2878 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2879 vreinterpret_p8_u8 (uint8x8_t __a)
2881 return (poly8x8_t) __a;
2884 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2885 vreinterpret_p8_u16 (uint16x4_t __a)
2887 return (poly8x8_t) __a;
2890 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2891 vreinterpret_p8_u32 (uint32x2_t __a)
2893 return (poly8x8_t) __a;
2896 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2897 vreinterpret_p8_u64 (uint64x1_t __a)
2899 return (poly8x8_t) __a;
2902 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2903 vreinterpret_p8_p16 (poly16x4_t __a)
2905 return (poly8x8_t) __a;
2908 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2909 vreinterpretq_p8_f64 (float64x2_t __a)
2911 return (poly8x16_t) __a;
2914 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2915 vreinterpretq_p8_s8 (int8x16_t __a)
2917 return (poly8x16_t) __a;
2920 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2921 vreinterpretq_p8_s16 (int16x8_t __a)
2923 return (poly8x16_t) __a;
2926 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2927 vreinterpretq_p8_s32 (int32x4_t __a)
2929 return (poly8x16_t) __a;
2932 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2933 vreinterpretq_p8_s64 (int64x2_t __a)
2935 return (poly8x16_t) __a;
2938 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2939 vreinterpretq_p8_f32 (float32x4_t __a)
2941 return (poly8x16_t) __a;
2944 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2945 vreinterpretq_p8_u8 (uint8x16_t __a)
2947 return (poly8x16_t) __a;
2950 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2951 vreinterpretq_p8_u16 (uint16x8_t __a)
2953 return (poly8x16_t) __a;
2956 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2957 vreinterpretq_p8_u32 (uint32x4_t __a)
2959 return (poly8x16_t) __a;
2962 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2963 vreinterpretq_p8_u64 (uint64x2_t __a)
2965 return (poly8x16_t) __a;
2968 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2969 vreinterpretq_p8_p16 (poly16x8_t __a)
2971 return (poly8x16_t) __a;
2974 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2975 vreinterpret_p16_f64 (float64x1_t __a)
2977 return (poly16x4_t) __a;
2980 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2981 vreinterpret_p16_s8 (int8x8_t __a)
2983 return (poly16x4_t) __a;
2986 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2987 vreinterpret_p16_s16 (int16x4_t __a)
2989 return (poly16x4_t) __a;
2992 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2993 vreinterpret_p16_s32 (int32x2_t __a)
2995 return (poly16x4_t) __a;
2998 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2999 vreinterpret_p16_s64 (int64x1_t __a)
3001 return (poly16x4_t) __a;
3004 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3005 vreinterpret_p16_f32 (float32x2_t __a)
3007 return (poly16x4_t) __a;
3010 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3011 vreinterpret_p16_u8 (uint8x8_t __a)
3013 return (poly16x4_t) __a;
3016 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3017 vreinterpret_p16_u16 (uint16x4_t __a)
3019 return (poly16x4_t) __a;
3022 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3023 vreinterpret_p16_u32 (uint32x2_t __a)
3025 return (poly16x4_t) __a;
3028 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3029 vreinterpret_p16_u64 (uint64x1_t __a)
3031 return (poly16x4_t) __a;
3034 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3035 vreinterpret_p16_p8 (poly8x8_t __a)
3037 return (poly16x4_t) __a;
3040 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3041 vreinterpretq_p16_f64 (float64x2_t __a)
3043 return (poly16x8_t) __a;
3046 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3047 vreinterpretq_p16_s8 (int8x16_t __a)
3049 return (poly16x8_t) __a;
3052 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3053 vreinterpretq_p16_s16 (int16x8_t __a)
3055 return (poly16x8_t) __a;
3058 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3059 vreinterpretq_p16_s32 (int32x4_t __a)
3061 return (poly16x8_t) __a;
3064 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3065 vreinterpretq_p16_s64 (int64x2_t __a)
3067 return (poly16x8_t) __a;
3070 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3071 vreinterpretq_p16_f32 (float32x4_t __a)
3073 return (poly16x8_t) __a;
3076 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3077 vreinterpretq_p16_u8 (uint8x16_t __a)
3079 return (poly16x8_t) __a;
3082 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3083 vreinterpretq_p16_u16 (uint16x8_t __a)
3085 return (poly16x8_t) __a;
3088 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3089 vreinterpretq_p16_u32 (uint32x4_t __a)
3091 return (poly16x8_t) __a;
3094 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3095 vreinterpretq_p16_u64 (uint64x2_t __a)
3097 return (poly16x8_t) __a;
3100 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3101 vreinterpretq_p16_p8 (poly8x16_t __a)
3103 return (poly16x8_t) __a;
3106 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3107 vreinterpret_f32_f64 (float64x1_t __a)
3109 return (float32x2_t) __a;
3112 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3113 vreinterpret_f32_s8 (int8x8_t __a)
3115 return (float32x2_t) __a;
3118 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3119 vreinterpret_f32_s16 (int16x4_t __a)
3121 return (float32x2_t) __a;
3124 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3125 vreinterpret_f32_s32 (int32x2_t __a)
3127 return (float32x2_t) __a;
3130 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3131 vreinterpret_f32_s64 (int64x1_t __a)
3133 return (float32x2_t) __a;
3136 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3137 vreinterpret_f32_u8 (uint8x8_t __a)
3139 return (float32x2_t) __a;
3142 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3143 vreinterpret_f32_u16 (uint16x4_t __a)
3145 return (float32x2_t) __a;
3148 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3149 vreinterpret_f32_u32 (uint32x2_t __a)
3151 return (float32x2_t) __a;
3154 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3155 vreinterpret_f32_u64 (uint64x1_t __a)
3157 return (float32x2_t) __a;
3160 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3161 vreinterpret_f32_p8 (poly8x8_t __a)
3163 return (float32x2_t) __a;
3166 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3167 vreinterpret_f32_p16 (poly16x4_t __a)
3169 return (float32x2_t) __a;
3172 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3173 vreinterpretq_f32_f64 (float64x2_t __a)
3175 return (float32x4_t) __a;
3178 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3179 vreinterpretq_f32_s8 (int8x16_t __a)
3181 return (float32x4_t) __a;
3184 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3185 vreinterpretq_f32_s16 (int16x8_t __a)
3187 return (float32x4_t) __a;
3190 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3191 vreinterpretq_f32_s32 (int32x4_t __a)
3193 return (float32x4_t) __a;
3196 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3197 vreinterpretq_f32_s64 (int64x2_t __a)
3199 return (float32x4_t) __a;
3202 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3203 vreinterpretq_f32_u8 (uint8x16_t __a)
3205 return (float32x4_t) __a;
3208 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3209 vreinterpretq_f32_u16 (uint16x8_t __a)
3211 return (float32x4_t) __a;
3214 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3215 vreinterpretq_f32_u32 (uint32x4_t __a)
3217 return (float32x4_t) __a;
3220 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3221 vreinterpretq_f32_u64 (uint64x2_t __a)
3223 return (float32x4_t) __a;
3226 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3227 vreinterpretq_f32_p8 (poly8x16_t __a)
3229 return (float32x4_t) __a;
3232 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3233 vreinterpretq_f32_p16 (poly16x8_t __a)
3235 return (float32x4_t) __a;
3238 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3239 vreinterpret_f64_f32 (float32x2_t __a)
3241 return (float64x1_t) __a;
3244 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3245 vreinterpret_f64_p8 (poly8x8_t __a)
3247 return (float64x1_t) __a;
3250 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3251 vreinterpret_f64_p16 (poly16x4_t __a)
3253 return (float64x1_t) __a;
3256 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3257 vreinterpret_f64_s8 (int8x8_t __a)
3259 return (float64x1_t) __a;
3262 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3263 vreinterpret_f64_s16 (int16x4_t __a)
3265 return (float64x1_t) __a;
3268 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3269 vreinterpret_f64_s32 (int32x2_t __a)
3271 return (float64x1_t) __a;
3274 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3275 vreinterpret_f64_s64 (int64x1_t __a)
3277 return (float64x1_t) __a;
3280 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3281 vreinterpret_f64_u8 (uint8x8_t __a)
3283 return (float64x1_t) __a;
3286 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3287 vreinterpret_f64_u16 (uint16x4_t __a)
3289 return (float64x1_t) __a;
3292 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3293 vreinterpret_f64_u32 (uint32x2_t __a)
3295 return (float64x1_t) __a;
3298 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3299 vreinterpret_f64_u64 (uint64x1_t __a)
3301 return (float64x1_t) __a;
3304 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3305 vreinterpretq_f64_f32 (float32x4_t __a)
3307 return (float64x2_t) __a;
3310 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3311 vreinterpretq_f64_p8 (poly8x16_t __a)
3313 return (float64x2_t) __a;
3316 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3317 vreinterpretq_f64_p16 (poly16x8_t __a)
3319 return (float64x2_t) __a;
3322 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3323 vreinterpretq_f64_s8 (int8x16_t __a)
3325 return (float64x2_t) __a;
3328 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3329 vreinterpretq_f64_s16 (int16x8_t __a)
3331 return (float64x2_t) __a;
3334 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3335 vreinterpretq_f64_s32 (int32x4_t __a)
3337 return (float64x2_t) __a;
3340 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3341 vreinterpretq_f64_s64 (int64x2_t __a)
3343 return (float64x2_t) __a;
3346 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3347 vreinterpretq_f64_u8 (uint8x16_t __a)
3349 return (float64x2_t) __a;
3352 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3353 vreinterpretq_f64_u16 (uint16x8_t __a)
3355 return (float64x2_t) __a;
3358 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3359 vreinterpretq_f64_u32 (uint32x4_t __a)
3361 return (float64x2_t) __a;
3364 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3365 vreinterpretq_f64_u64 (uint64x2_t __a)
3367 return (float64x2_t) __a;
3370 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3371 vreinterpret_s64_f64 (float64x1_t __a)
3373 return (int64x1_t) __a;
3376 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3377 vreinterpret_s64_s8 (int8x8_t __a)
3379 return (int64x1_t) __a;
3382 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3383 vreinterpret_s64_s16 (int16x4_t __a)
3385 return (int64x1_t) __a;
3388 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3389 vreinterpret_s64_s32 (int32x2_t __a)
3391 return (int64x1_t) __a;
3394 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3395 vreinterpret_s64_f32 (float32x2_t __a)
3397 return (int64x1_t) __a;
3400 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3401 vreinterpret_s64_u8 (uint8x8_t __a)
3403 return (int64x1_t) __a;
3406 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3407 vreinterpret_s64_u16 (uint16x4_t __a)
3409 return (int64x1_t) __a;
3412 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3413 vreinterpret_s64_u32 (uint32x2_t __a)
3415 return (int64x1_t) __a;
3418 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3419 vreinterpret_s64_u64 (uint64x1_t __a)
3421 return (int64x1_t) __a;
3424 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3425 vreinterpret_s64_p8 (poly8x8_t __a)
3427 return (int64x1_t) __a;
3430 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3431 vreinterpret_s64_p16 (poly16x4_t __a)
3433 return (int64x1_t) __a;
3436 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3437 vreinterpretq_s64_f64 (float64x2_t __a)
3439 return (int64x2_t) __a;
3442 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3443 vreinterpretq_s64_s8 (int8x16_t __a)
3445 return (int64x2_t) __a;
3448 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3449 vreinterpretq_s64_s16 (int16x8_t __a)
3451 return (int64x2_t) __a;
3454 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3455 vreinterpretq_s64_s32 (int32x4_t __a)
3457 return (int64x2_t) __a;
3460 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3461 vreinterpretq_s64_f32 (float32x4_t __a)
3463 return (int64x2_t) __a;
3466 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3467 vreinterpretq_s64_u8 (uint8x16_t __a)
3469 return (int64x2_t) __a;
3472 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3473 vreinterpretq_s64_u16 (uint16x8_t __a)
3475 return (int64x2_t) __a;
3478 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3479 vreinterpretq_s64_u32 (uint32x4_t __a)
3481 return (int64x2_t) __a;
3484 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3485 vreinterpretq_s64_u64 (uint64x2_t __a)
3487 return (int64x2_t) __a;
3490 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3491 vreinterpretq_s64_p8 (poly8x16_t __a)
3493 return (int64x2_t) __a;
3496 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3497 vreinterpretq_s64_p16 (poly16x8_t __a)
3499 return (int64x2_t) __a;
3502 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3503 vreinterpret_u64_f64 (float64x1_t __a)
3505 return (uint64x1_t) __a;
3508 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3509 vreinterpret_u64_s8 (int8x8_t __a)
3511 return (uint64x1_t) __a;
3514 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3515 vreinterpret_u64_s16 (int16x4_t __a)
3517 return (uint64x1_t) __a;
3520 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3521 vreinterpret_u64_s32 (int32x2_t __a)
3523 return (uint64x1_t) __a;
3526 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3527 vreinterpret_u64_s64 (int64x1_t __a)
3529 return (uint64x1_t) __a;
3532 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3533 vreinterpret_u64_f32 (float32x2_t __a)
3535 return (uint64x1_t) __a;
3538 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3539 vreinterpret_u64_u8 (uint8x8_t __a)
3541 return (uint64x1_t) __a;
3544 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3545 vreinterpret_u64_u16 (uint16x4_t __a)
3547 return (uint64x1_t) __a;
3550 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3551 vreinterpret_u64_u32 (uint32x2_t __a)
3553 return (uint64x1_t) __a;
3556 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3557 vreinterpret_u64_p8 (poly8x8_t __a)
3559 return (uint64x1_t) __a;
3562 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3563 vreinterpret_u64_p16 (poly16x4_t __a)
3565 return (uint64x1_t) __a;
3568 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3569 vreinterpretq_u64_f64 (float64x2_t __a)
3571 return (uint64x2_t) __a;
3574 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3575 vreinterpretq_u64_s8 (int8x16_t __a)
3577 return (uint64x2_t) __a;
3580 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3581 vreinterpretq_u64_s16 (int16x8_t __a)
3583 return (uint64x2_t) __a;
3586 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3587 vreinterpretq_u64_s32 (int32x4_t __a)
3589 return (uint64x2_t) __a;
3592 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3593 vreinterpretq_u64_s64 (int64x2_t __a)
3595 return (uint64x2_t) __a;
3598 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3599 vreinterpretq_u64_f32 (float32x4_t __a)
3601 return (uint64x2_t) __a;
3604 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3605 vreinterpretq_u64_u8 (uint8x16_t __a)
3607 return (uint64x2_t) __a;
3610 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3611 vreinterpretq_u64_u16 (uint16x8_t __a)
3613 return (uint64x2_t) __a;
3616 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3617 vreinterpretq_u64_u32 (uint32x4_t __a)
3619 return (uint64x2_t) __a;
3622 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3623 vreinterpretq_u64_p8 (poly8x16_t __a)
3625 return (uint64x2_t) __a;
3628 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3629 vreinterpretq_u64_p16 (poly16x8_t __a)
3631 return (uint64x2_t) __a;
3634 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3635 vreinterpret_s8_f64 (float64x1_t __a)
3637 return (int8x8_t) __a;
3640 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3641 vreinterpret_s8_s16 (int16x4_t __a)
3643 return (int8x8_t) __a;
3646 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3647 vreinterpret_s8_s32 (int32x2_t __a)
3649 return (int8x8_t) __a;
3652 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3653 vreinterpret_s8_s64 (int64x1_t __a)
3655 return (int8x8_t) __a;
3658 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3659 vreinterpret_s8_f32 (float32x2_t __a)
3661 return (int8x8_t) __a;
3664 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3665 vreinterpret_s8_u8 (uint8x8_t __a)
3667 return (int8x8_t) __a;
3670 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3671 vreinterpret_s8_u16 (uint16x4_t __a)
3673 return (int8x8_t) __a;
3676 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3677 vreinterpret_s8_u32 (uint32x2_t __a)
3679 return (int8x8_t) __a;
3682 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3683 vreinterpret_s8_u64 (uint64x1_t __a)
3685 return (int8x8_t) __a;
3688 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3689 vreinterpret_s8_p8 (poly8x8_t __a)
3691 return (int8x8_t) __a;
3694 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3695 vreinterpret_s8_p16 (poly16x4_t __a)
3697 return (int8x8_t) __a;
3700 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3701 vreinterpretq_s8_f64 (float64x2_t __a)
3703 return (int8x16_t) __a;
3706 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3707 vreinterpretq_s8_s16 (int16x8_t __a)
3709 return (int8x16_t) __a;
3712 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3713 vreinterpretq_s8_s32 (int32x4_t __a)
3715 return (int8x16_t) __a;
3718 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3719 vreinterpretq_s8_s64 (int64x2_t __a)
3721 return (int8x16_t) __a;
3724 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3725 vreinterpretq_s8_f32 (float32x4_t __a)
3727 return (int8x16_t) __a;
3730 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3731 vreinterpretq_s8_u8 (uint8x16_t __a)
3733 return (int8x16_t) __a;
3736 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3737 vreinterpretq_s8_u16 (uint16x8_t __a)
3739 return (int8x16_t) __a;
3742 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3743 vreinterpretq_s8_u32 (uint32x4_t __a)
3745 return (int8x16_t) __a;
3748 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3749 vreinterpretq_s8_u64 (uint64x2_t __a)
3751 return (int8x16_t) __a;
3754 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3755 vreinterpretq_s8_p8 (poly8x16_t __a)
3757 return (int8x16_t) __a;
3760 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3761 vreinterpretq_s8_p16 (poly16x8_t __a)
3763 return (int8x16_t) __a;
3766 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3767 vreinterpret_s16_f64 (float64x1_t __a)
3769 return (int16x4_t) __a;
3772 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3773 vreinterpret_s16_s8 (int8x8_t __a)
3775 return (int16x4_t) __a;
3778 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3779 vreinterpret_s16_s32 (int32x2_t __a)
3781 return (int16x4_t) __a;
3784 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3785 vreinterpret_s16_s64 (int64x1_t __a)
3787 return (int16x4_t) __a;
3790 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3791 vreinterpret_s16_f32 (float32x2_t __a)
3793 return (int16x4_t) __a;
3796 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3797 vreinterpret_s16_u8 (uint8x8_t __a)
3799 return (int16x4_t) __a;
3802 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3803 vreinterpret_s16_u16 (uint16x4_t __a)
3805 return (int16x4_t) __a;
3808 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3809 vreinterpret_s16_u32 (uint32x2_t __a)
3811 return (int16x4_t) __a;
3814 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3815 vreinterpret_s16_u64 (uint64x1_t __a)
3817 return (int16x4_t) __a;
3820 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3821 vreinterpret_s16_p8 (poly8x8_t __a)
3823 return (int16x4_t) __a;
3826 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3827 vreinterpret_s16_p16 (poly16x4_t __a)
3829 return (int16x4_t) __a;
3832 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3833 vreinterpretq_s16_f64 (float64x2_t __a)
3835 return (int16x8_t) __a;
3838 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3839 vreinterpretq_s16_s8 (int8x16_t __a)
3841 return (int16x8_t) __a;
3844 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3845 vreinterpretq_s16_s32 (int32x4_t __a)
3847 return (int16x8_t) __a;
3850 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3851 vreinterpretq_s16_s64 (int64x2_t __a)
3853 return (int16x8_t) __a;
3856 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3857 vreinterpretq_s16_f32 (float32x4_t __a)
3859 return (int16x8_t) __a;
3862 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3863 vreinterpretq_s16_u8 (uint8x16_t __a)
3865 return (int16x8_t) __a;
3868 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3869 vreinterpretq_s16_u16 (uint16x8_t __a)
3871 return (int16x8_t) __a;
3874 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3875 vreinterpretq_s16_u32 (uint32x4_t __a)
3877 return (int16x8_t) __a;
3880 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3881 vreinterpretq_s16_u64 (uint64x2_t __a)
3883 return (int16x8_t) __a;
3886 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3887 vreinterpretq_s16_p8 (poly8x16_t __a)
3889 return (int16x8_t) __a;
3892 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3893 vreinterpretq_s16_p16 (poly16x8_t __a)
3895 return (int16x8_t) __a;
3898 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3899 vreinterpret_s32_f64 (float64x1_t __a)
3901 return (int32x2_t) __a;
3904 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3905 vreinterpret_s32_s8 (int8x8_t __a)
3907 return (int32x2_t) __a;
3910 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3911 vreinterpret_s32_s16 (int16x4_t __a)
3913 return (int32x2_t) __a;
3916 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3917 vreinterpret_s32_s64 (int64x1_t __a)
3919 return (int32x2_t) __a;
3922 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3923 vreinterpret_s32_f32 (float32x2_t __a)
3925 return (int32x2_t) __a;
3928 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3929 vreinterpret_s32_u8 (uint8x8_t __a)
3931 return (int32x2_t) __a;
3934 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3935 vreinterpret_s32_u16 (uint16x4_t __a)
3937 return (int32x2_t) __a;
3940 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3941 vreinterpret_s32_u32 (uint32x2_t __a)
3943 return (int32x2_t) __a;
3946 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3947 vreinterpret_s32_u64 (uint64x1_t __a)
3949 return (int32x2_t) __a;
3952 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3953 vreinterpret_s32_p8 (poly8x8_t __a)
3955 return (int32x2_t) __a;
3958 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3959 vreinterpret_s32_p16 (poly16x4_t __a)
3961 return (int32x2_t) __a;
3964 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3965 vreinterpretq_s32_f64 (float64x2_t __a)
3967 return (int32x4_t) __a;
3970 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3971 vreinterpretq_s32_s8 (int8x16_t __a)
3973 return (int32x4_t) __a;
3976 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3977 vreinterpretq_s32_s16 (int16x8_t __a)
3979 return (int32x4_t) __a;
3982 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3983 vreinterpretq_s32_s64 (int64x2_t __a)
3985 return (int32x4_t) __a;
3988 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3989 vreinterpretq_s32_f32 (float32x4_t __a)
3991 return (int32x4_t) __a;
3994 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3995 vreinterpretq_s32_u8 (uint8x16_t __a)
3997 return (int32x4_t) __a;
4000 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4001 vreinterpretq_s32_u16 (uint16x8_t __a)
4003 return (int32x4_t) __a;
4006 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4007 vreinterpretq_s32_u32 (uint32x4_t __a)
4009 return (int32x4_t) __a;
4012 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4013 vreinterpretq_s32_u64 (uint64x2_t __a)
4015 return (int32x4_t) __a;
4018 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4019 vreinterpretq_s32_p8 (poly8x16_t __a)
4021 return (int32x4_t) __a;
4024 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4025 vreinterpretq_s32_p16 (poly16x8_t __a)
4027 return (int32x4_t) __a;
4030 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4031 vreinterpret_u8_f64 (float64x1_t __a)
4033 return (uint8x8_t) __a;
4036 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4037 vreinterpret_u8_s8 (int8x8_t __a)
4039 return (uint8x8_t) __a;
4042 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4043 vreinterpret_u8_s16 (int16x4_t __a)
4045 return (uint8x8_t) __a;
4048 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4049 vreinterpret_u8_s32 (int32x2_t __a)
4051 return (uint8x8_t) __a;
4054 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4055 vreinterpret_u8_s64 (int64x1_t __a)
4057 return (uint8x8_t) __a;
4060 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4061 vreinterpret_u8_f32 (float32x2_t __a)
4063 return (uint8x8_t) __a;
4066 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4067 vreinterpret_u8_u16 (uint16x4_t __a)
4069 return (uint8x8_t) __a;
4072 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4073 vreinterpret_u8_u32 (uint32x2_t __a)
4075 return (uint8x8_t) __a;
4078 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4079 vreinterpret_u8_u64 (uint64x1_t __a)
4081 return (uint8x8_t) __a;
4084 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4085 vreinterpret_u8_p8 (poly8x8_t __a)
4087 return (uint8x8_t) __a;
4090 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4091 vreinterpret_u8_p16 (poly16x4_t __a)
4093 return (uint8x8_t) __a;
4096 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4097 vreinterpretq_u8_f64 (float64x2_t __a)
4099 return (uint8x16_t) __a;
4102 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4103 vreinterpretq_u8_s8 (int8x16_t __a)
4105 return (uint8x16_t) __a;
4108 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4109 vreinterpretq_u8_s16 (int16x8_t __a)
4111 return (uint8x16_t) __a;
4114 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4115 vreinterpretq_u8_s32 (int32x4_t __a)
4117 return (uint8x16_t) __a;
4120 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4121 vreinterpretq_u8_s64 (int64x2_t __a)
4123 return (uint8x16_t) __a;
4126 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4127 vreinterpretq_u8_f32 (float32x4_t __a)
4129 return (uint8x16_t) __a;
4132 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4133 vreinterpretq_u8_u16 (uint16x8_t __a)
4135 return (uint8x16_t) __a;
4138 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4139 vreinterpretq_u8_u32 (uint32x4_t __a)
4141 return (uint8x16_t) __a;
4144 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4145 vreinterpretq_u8_u64 (uint64x2_t __a)
4147 return (uint8x16_t) __a;
4150 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4151 vreinterpretq_u8_p8 (poly8x16_t __a)
4153 return (uint8x16_t) __a;
4156 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4157 vreinterpretq_u8_p16 (poly16x8_t __a)
4159 return (uint8x16_t) __a;
4162 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4163 vreinterpret_u16_f64 (float64x1_t __a)
4165 return (uint16x4_t) __a;
4168 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4169 vreinterpret_u16_s8 (int8x8_t __a)
4171 return (uint16x4_t) __a;
4174 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4175 vreinterpret_u16_s16 (int16x4_t __a)
4177 return (uint16x4_t) __a;
4180 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4181 vreinterpret_u16_s32 (int32x2_t __a)
4183 return (uint16x4_t) __a;
4186 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4187 vreinterpret_u16_s64 (int64x1_t __a)
4189 return (uint16x4_t) __a;
4192 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4193 vreinterpret_u16_f32 (float32x2_t __a)
4195 return (uint16x4_t) __a;
4198 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4199 vreinterpret_u16_u8 (uint8x8_t __a)
4201 return (uint16x4_t) __a;
4204 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4205 vreinterpret_u16_u32 (uint32x2_t __a)
4207 return (uint16x4_t) __a;
4210 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4211 vreinterpret_u16_u64 (uint64x1_t __a)
4213 return (uint16x4_t) __a;
4216 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4217 vreinterpret_u16_p8 (poly8x8_t __a)
4219 return (uint16x4_t) __a;
4222 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4223 vreinterpret_u16_p16 (poly16x4_t __a)
4225 return (uint16x4_t) __a;
4228 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4229 vreinterpretq_u16_f64 (float64x2_t __a)
4231 return (uint16x8_t) __a;
4234 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4235 vreinterpretq_u16_s8 (int8x16_t __a)
4237 return (uint16x8_t) __a;
4240 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4241 vreinterpretq_u16_s16 (int16x8_t __a)
4243 return (uint16x8_t) __a;
4246 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4247 vreinterpretq_u16_s32 (int32x4_t __a)
4249 return (uint16x8_t) __a;
4252 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4253 vreinterpretq_u16_s64 (int64x2_t __a)
4255 return (uint16x8_t) __a;
4258 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4259 vreinterpretq_u16_f32 (float32x4_t __a)
4261 return (uint16x8_t) __a;
4264 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4265 vreinterpretq_u16_u8 (uint8x16_t __a)
4267 return (uint16x8_t) __a;
4270 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4271 vreinterpretq_u16_u32 (uint32x4_t __a)
4273 return (uint16x8_t) __a;
4276 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4277 vreinterpretq_u16_u64 (uint64x2_t __a)
4279 return (uint16x8_t) __a;
4282 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4283 vreinterpretq_u16_p8 (poly8x16_t __a)
4285 return (uint16x8_t) __a;
4288 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4289 vreinterpretq_u16_p16 (poly16x8_t __a)
4291 return (uint16x8_t) __a;
4294 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4295 vreinterpret_u32_f64 (float64x1_t __a)
4297 return (uint32x2_t) __a;
4300 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4301 vreinterpret_u32_s8 (int8x8_t __a)
4303 return (uint32x2_t) __a;
4306 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4307 vreinterpret_u32_s16 (int16x4_t __a)
4309 return (uint32x2_t) __a;
4312 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4313 vreinterpret_u32_s32 (int32x2_t __a)
4315 return (uint32x2_t) __a;
4318 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4319 vreinterpret_u32_s64 (int64x1_t __a)
4321 return (uint32x2_t) __a;
4324 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4325 vreinterpret_u32_f32 (float32x2_t __a)
4327 return (uint32x2_t) __a;
4330 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4331 vreinterpret_u32_u8 (uint8x8_t __a)
4333 return (uint32x2_t) __a;
4336 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4337 vreinterpret_u32_u16 (uint16x4_t __a)
4339 return (uint32x2_t) __a;
4342 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4343 vreinterpret_u32_u64 (uint64x1_t __a)
4345 return (uint32x2_t) __a;
4348 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4349 vreinterpret_u32_p8 (poly8x8_t __a)
4351 return (uint32x2_t) __a;
4354 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4355 vreinterpret_u32_p16 (poly16x4_t __a)
4357 return (uint32x2_t) __a;
4360 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4361 vreinterpretq_u32_f64 (float64x2_t __a)
4363 return (uint32x4_t) __a;
4366 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4367 vreinterpretq_u32_s8 (int8x16_t __a)
4369 return (uint32x4_t) __a;
4372 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4373 vreinterpretq_u32_s16 (int16x8_t __a)
4375 return (uint32x4_t) __a;
4378 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4379 vreinterpretq_u32_s32 (int32x4_t __a)
4381 return (uint32x4_t) __a;
4384 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4385 vreinterpretq_u32_s64 (int64x2_t __a)
4387 return (uint32x4_t) __a;
4390 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4391 vreinterpretq_u32_f32 (float32x4_t __a)
4393 return (uint32x4_t) __a;
4396 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4397 vreinterpretq_u32_u8 (uint8x16_t __a)
4399 return (uint32x4_t) __a;
4402 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4403 vreinterpretq_u32_u16 (uint16x8_t __a)
4405 return (uint32x4_t) __a;
4408 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4409 vreinterpretq_u32_u64 (uint64x2_t __a)
4411 return (uint32x4_t) __a;
4414 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4415 vreinterpretq_u32_p8 (poly8x16_t __a)
4417 return (uint32x4_t) __a;
4420 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4421 vreinterpretq_u32_p16 (poly16x8_t __a)
4423 return (uint32x4_t) __a;
4426 /* vset_lane */
4428 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4429 vset_lane_f32 (float32_t __elem, float32x2_t __vec, const int __index)
4431 return __aarch64_vset_lane_any (__elem, __vec, __index);
4434 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
4435 vset_lane_f64 (float64_t __elem, float64x1_t __vec, const int __index)
4437 return __aarch64_vset_lane_any (__elem, __vec, __index);
4440 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4441 vset_lane_p8 (poly8_t __elem, poly8x8_t __vec, const int __index)
4443 return __aarch64_vset_lane_any (__elem, __vec, __index);
4446 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
4447 vset_lane_p16 (poly16_t __elem, poly16x4_t __vec, const int __index)
4449 return __aarch64_vset_lane_any (__elem, __vec, __index);
4452 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4453 vset_lane_s8 (int8_t __elem, int8x8_t __vec, const int __index)
4455 return __aarch64_vset_lane_any (__elem, __vec, __index);
4458 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4459 vset_lane_s16 (int16_t __elem, int16x4_t __vec, const int __index)
4461 return __aarch64_vset_lane_any (__elem, __vec, __index);
4464 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4465 vset_lane_s32 (int32_t __elem, int32x2_t __vec, const int __index)
4467 return __aarch64_vset_lane_any (__elem, __vec, __index);
4470 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4471 vset_lane_s64 (int64_t __elem, int64x1_t __vec, const int __index)
4473 return __aarch64_vset_lane_any (__elem, __vec, __index);
4476 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4477 vset_lane_u8 (uint8_t __elem, uint8x8_t __vec, const int __index)
4479 return __aarch64_vset_lane_any (__elem, __vec, __index);
4482 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4483 vset_lane_u16 (uint16_t __elem, uint16x4_t __vec, const int __index)
4485 return __aarch64_vset_lane_any (__elem, __vec, __index);
4488 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4489 vset_lane_u32 (uint32_t __elem, uint32x2_t __vec, const int __index)
4491 return __aarch64_vset_lane_any (__elem, __vec, __index);
4494 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4495 vset_lane_u64 (uint64_t __elem, uint64x1_t __vec, const int __index)
4497 return __aarch64_vset_lane_any (__elem, __vec, __index);
4500 /* vsetq_lane */
4502 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4503 vsetq_lane_f32 (float32_t __elem, float32x4_t __vec, const int __index)
4505 return __aarch64_vset_lane_any (__elem, __vec, __index);
4508 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4509 vsetq_lane_f64 (float64_t __elem, float64x2_t __vec, const int __index)
4511 return __aarch64_vset_lane_any (__elem, __vec, __index);
4514 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
4515 vsetq_lane_p8 (poly8_t __elem, poly8x16_t __vec, const int __index)
4517 return __aarch64_vset_lane_any (__elem, __vec, __index);
4520 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
4521 vsetq_lane_p16 (poly16_t __elem, poly16x8_t __vec, const int __index)
4523 return __aarch64_vset_lane_any (__elem, __vec, __index);
4526 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4527 vsetq_lane_s8 (int8_t __elem, int8x16_t __vec, const int __index)
4529 return __aarch64_vset_lane_any (__elem, __vec, __index);
4532 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4533 vsetq_lane_s16 (int16_t __elem, int16x8_t __vec, const int __index)
4535 return __aarch64_vset_lane_any (__elem, __vec, __index);
4538 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4539 vsetq_lane_s32 (int32_t __elem, int32x4_t __vec, const int __index)
4541 return __aarch64_vset_lane_any (__elem, __vec, __index);
4544 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4545 vsetq_lane_s64 (int64_t __elem, int64x2_t __vec, const int __index)
4547 return __aarch64_vset_lane_any (__elem, __vec, __index);
4550 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4551 vsetq_lane_u8 (uint8_t __elem, uint8x16_t __vec, const int __index)
4553 return __aarch64_vset_lane_any (__elem, __vec, __index);
4556 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4557 vsetq_lane_u16 (uint16_t __elem, uint16x8_t __vec, const int __index)
4559 return __aarch64_vset_lane_any (__elem, __vec, __index);
4562 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4563 vsetq_lane_u32 (uint32_t __elem, uint32x4_t __vec, const int __index)
4565 return __aarch64_vset_lane_any (__elem, __vec, __index);
4568 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4569 vsetq_lane_u64 (uint64_t __elem, uint64x2_t __vec, const int __index)
4571 return __aarch64_vset_lane_any (__elem, __vec, __index);
4574 #define __GET_LOW(__TYPE) \
4575 uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \
4576 uint64x1_t lo = vcreate_u64 (vgetq_lane_u64 (tmp, 0)); \
4577 return vreinterpret_##__TYPE##_u64 (lo);
4579 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4580 vget_low_f32 (float32x4_t __a)
4582 __GET_LOW (f32);
4585 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
4586 vget_low_f64 (float64x2_t __a)
4588 return (float64x1_t) {vgetq_lane_f64 (__a, 0)};
4591 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4592 vget_low_p8 (poly8x16_t __a)
4594 __GET_LOW (p8);
4597 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
4598 vget_low_p16 (poly16x8_t __a)
4600 __GET_LOW (p16);
4603 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4604 vget_low_s8 (int8x16_t __a)
4606 __GET_LOW (s8);
4609 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4610 vget_low_s16 (int16x8_t __a)
4612 __GET_LOW (s16);
4615 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4616 vget_low_s32 (int32x4_t __a)
4618 __GET_LOW (s32);
4621 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4622 vget_low_s64 (int64x2_t __a)
4624 __GET_LOW (s64);
4627 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4628 vget_low_u8 (uint8x16_t __a)
4630 __GET_LOW (u8);
4633 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4634 vget_low_u16 (uint16x8_t __a)
4636 __GET_LOW (u16);
4639 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4640 vget_low_u32 (uint32x4_t __a)
4642 __GET_LOW (u32);
4645 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4646 vget_low_u64 (uint64x2_t __a)
4648 return vcreate_u64 (vgetq_lane_u64 (__a, 0));
4651 #undef __GET_LOW
4653 #define __GET_HIGH(__TYPE) \
4654 uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \
4655 uint64x1_t hi = vcreate_u64 (vgetq_lane_u64 (tmp, 1)); \
4656 return vreinterpret_##__TYPE##_u64 (hi);
4658 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4659 vget_high_f32 (float32x4_t __a)
4661 __GET_HIGH (f32);
4664 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
4665 vget_high_f64 (float64x2_t __a)
4667 __GET_HIGH (f64);
4670 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4671 vget_high_p8 (poly8x16_t __a)
4673 __GET_HIGH (p8);
4676 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
4677 vget_high_p16 (poly16x8_t __a)
4679 __GET_HIGH (p16);
4682 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4683 vget_high_s8 (int8x16_t __a)
4685 __GET_HIGH (s8);
4688 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4689 vget_high_s16 (int16x8_t __a)
4691 __GET_HIGH (s16);
4694 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4695 vget_high_s32 (int32x4_t __a)
4697 __GET_HIGH (s32);
4700 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4701 vget_high_s64 (int64x2_t __a)
4703 __GET_HIGH (s64);
4706 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4707 vget_high_u8 (uint8x16_t __a)
4709 __GET_HIGH (u8);
4712 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4713 vget_high_u16 (uint16x8_t __a)
4715 __GET_HIGH (u16);
4718 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4719 vget_high_u32 (uint32x4_t __a)
4721 __GET_HIGH (u32);
4724 #undef __GET_HIGH
4726 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4727 vget_high_u64 (uint64x2_t __a)
4729 return vcreate_u64 (vgetq_lane_u64 (__a, 1));
4732 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4733 vcombine_s8 (int8x8_t __a, int8x8_t __b)
4735 return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b);
4738 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4739 vcombine_s16 (int16x4_t __a, int16x4_t __b)
4741 return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b);
4744 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4745 vcombine_s32 (int32x2_t __a, int32x2_t __b)
4747 return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b);
4750 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4751 vcombine_s64 (int64x1_t __a, int64x1_t __b)
4753 return __builtin_aarch64_combinedi (__a[0], __b[0]);
4756 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4757 vcombine_f32 (float32x2_t __a, float32x2_t __b)
4759 return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b);
4762 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4763 vcombine_u8 (uint8x8_t __a, uint8x8_t __b)
4765 return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
4766 (int8x8_t) __b);
4769 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4770 vcombine_u16 (uint16x4_t __a, uint16x4_t __b)
4772 return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
4773 (int16x4_t) __b);
4776 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4777 vcombine_u32 (uint32x2_t __a, uint32x2_t __b)
4779 return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a,
4780 (int32x2_t) __b);
4783 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4784 vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
4786 return (uint64x2_t) __builtin_aarch64_combinedi (__a[0], __b[0]);
4789 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4790 vcombine_f64 (float64x1_t __a, float64x1_t __b)
4792 return __builtin_aarch64_combinedf (__a[0], __b[0]);
4795 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
4796 vcombine_p8 (poly8x8_t __a, poly8x8_t __b)
4798 return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
4799 (int8x8_t) __b);
4802 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
4803 vcombine_p16 (poly16x4_t __a, poly16x4_t __b)
4805 return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
4806 (int16x4_t) __b);
4809 /* Start of temporary inline asm implementations. */
4811 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4812 vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
4814 int8x8_t result;
4815 __asm__ ("saba %0.8b,%2.8b,%3.8b"
4816 : "=w"(result)
4817 : "0"(a), "w"(b), "w"(c)
4818 : /* No clobbers */);
4819 return result;
4822 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4823 vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
4825 int16x4_t result;
4826 __asm__ ("saba %0.4h,%2.4h,%3.4h"
4827 : "=w"(result)
4828 : "0"(a), "w"(b), "w"(c)
4829 : /* No clobbers */);
4830 return result;
4833 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4834 vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
4836 int32x2_t result;
4837 __asm__ ("saba %0.2s,%2.2s,%3.2s"
4838 : "=w"(result)
4839 : "0"(a), "w"(b), "w"(c)
4840 : /* No clobbers */);
4841 return result;
4844 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4845 vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
4847 uint8x8_t result;
4848 __asm__ ("uaba %0.8b,%2.8b,%3.8b"
4849 : "=w"(result)
4850 : "0"(a), "w"(b), "w"(c)
4851 : /* No clobbers */);
4852 return result;
4855 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4856 vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
4858 uint16x4_t result;
4859 __asm__ ("uaba %0.4h,%2.4h,%3.4h"
4860 : "=w"(result)
4861 : "0"(a), "w"(b), "w"(c)
4862 : /* No clobbers */);
4863 return result;
4866 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4867 vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
4869 uint32x2_t result;
4870 __asm__ ("uaba %0.2s,%2.2s,%3.2s"
4871 : "=w"(result)
4872 : "0"(a), "w"(b), "w"(c)
4873 : /* No clobbers */);
4874 return result;
4877 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4878 vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
4880 int16x8_t result;
4881 __asm__ ("sabal2 %0.8h,%2.16b,%3.16b"
4882 : "=w"(result)
4883 : "0"(a), "w"(b), "w"(c)
4884 : /* No clobbers */);
4885 return result;
4888 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4889 vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
4891 int32x4_t result;
4892 __asm__ ("sabal2 %0.4s,%2.8h,%3.8h"
4893 : "=w"(result)
4894 : "0"(a), "w"(b), "w"(c)
4895 : /* No clobbers */);
4896 return result;
4899 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4900 vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
4902 int64x2_t result;
4903 __asm__ ("sabal2 %0.2d,%2.4s,%3.4s"
4904 : "=w"(result)
4905 : "0"(a), "w"(b), "w"(c)
4906 : /* No clobbers */);
4907 return result;
4910 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4911 vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
4913 uint16x8_t result;
4914 __asm__ ("uabal2 %0.8h,%2.16b,%3.16b"
4915 : "=w"(result)
4916 : "0"(a), "w"(b), "w"(c)
4917 : /* No clobbers */);
4918 return result;
4921 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4922 vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
4924 uint32x4_t result;
4925 __asm__ ("uabal2 %0.4s,%2.8h,%3.8h"
4926 : "=w"(result)
4927 : "0"(a), "w"(b), "w"(c)
4928 : /* No clobbers */);
4929 return result;
4932 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4933 vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
4935 uint64x2_t result;
4936 __asm__ ("uabal2 %0.2d,%2.4s,%3.4s"
4937 : "=w"(result)
4938 : "0"(a), "w"(b), "w"(c)
4939 : /* No clobbers */);
4940 return result;
4943 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4944 vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
4946 int16x8_t result;
4947 __asm__ ("sabal %0.8h,%2.8b,%3.8b"
4948 : "=w"(result)
4949 : "0"(a), "w"(b), "w"(c)
4950 : /* No clobbers */);
4951 return result;
4954 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4955 vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
4957 int32x4_t result;
4958 __asm__ ("sabal %0.4s,%2.4h,%3.4h"
4959 : "=w"(result)
4960 : "0"(a), "w"(b), "w"(c)
4961 : /* No clobbers */);
4962 return result;
4965 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4966 vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
4968 int64x2_t result;
4969 __asm__ ("sabal %0.2d,%2.2s,%3.2s"
4970 : "=w"(result)
4971 : "0"(a), "w"(b), "w"(c)
4972 : /* No clobbers */);
4973 return result;
4976 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4977 vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
4979 uint16x8_t result;
4980 __asm__ ("uabal %0.8h,%2.8b,%3.8b"
4981 : "=w"(result)
4982 : "0"(a), "w"(b), "w"(c)
4983 : /* No clobbers */);
4984 return result;
4987 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4988 vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
4990 uint32x4_t result;
4991 __asm__ ("uabal %0.4s,%2.4h,%3.4h"
4992 : "=w"(result)
4993 : "0"(a), "w"(b), "w"(c)
4994 : /* No clobbers */);
4995 return result;
4998 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4999 vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
5001 uint64x2_t result;
5002 __asm__ ("uabal %0.2d,%2.2s,%3.2s"
5003 : "=w"(result)
5004 : "0"(a), "w"(b), "w"(c)
5005 : /* No clobbers */);
5006 return result;
5009 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5010 vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
5012 int8x16_t result;
5013 __asm__ ("saba %0.16b,%2.16b,%3.16b"
5014 : "=w"(result)
5015 : "0"(a), "w"(b), "w"(c)
5016 : /* No clobbers */);
5017 return result;
5020 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5021 vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
5023 int16x8_t result;
5024 __asm__ ("saba %0.8h,%2.8h,%3.8h"
5025 : "=w"(result)
5026 : "0"(a), "w"(b), "w"(c)
5027 : /* No clobbers */);
5028 return result;
5031 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5032 vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
5034 int32x4_t result;
5035 __asm__ ("saba %0.4s,%2.4s,%3.4s"
5036 : "=w"(result)
5037 : "0"(a), "w"(b), "w"(c)
5038 : /* No clobbers */);
5039 return result;
5042 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5043 vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
5045 uint8x16_t result;
5046 __asm__ ("uaba %0.16b,%2.16b,%3.16b"
5047 : "=w"(result)
5048 : "0"(a), "w"(b), "w"(c)
5049 : /* No clobbers */);
5050 return result;
5053 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5054 vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
5056 uint16x8_t result;
5057 __asm__ ("uaba %0.8h,%2.8h,%3.8h"
5058 : "=w"(result)
5059 : "0"(a), "w"(b), "w"(c)
5060 : /* No clobbers */);
5061 return result;
5064 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5065 vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
5067 uint32x4_t result;
5068 __asm__ ("uaba %0.4s,%2.4s,%3.4s"
5069 : "=w"(result)
5070 : "0"(a), "w"(b), "w"(c)
5071 : /* No clobbers */);
5072 return result;
5075 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5076 vabd_f32 (float32x2_t a, float32x2_t b)
5078 float32x2_t result;
5079 __asm__ ("fabd %0.2s, %1.2s, %2.2s"
5080 : "=w"(result)
5081 : "w"(a), "w"(b)
5082 : /* No clobbers */);
5083 return result;
5086 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5087 vabd_s8 (int8x8_t a, int8x8_t b)
5089 int8x8_t result;
5090 __asm__ ("sabd %0.8b, %1.8b, %2.8b"
5091 : "=w"(result)
5092 : "w"(a), "w"(b)
5093 : /* No clobbers */);
5094 return result;
5097 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5098 vabd_s16 (int16x4_t a, int16x4_t b)
5100 int16x4_t result;
5101 __asm__ ("sabd %0.4h, %1.4h, %2.4h"
5102 : "=w"(result)
5103 : "w"(a), "w"(b)
5104 : /* No clobbers */);
5105 return result;
5108 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5109 vabd_s32 (int32x2_t a, int32x2_t b)
5111 int32x2_t result;
5112 __asm__ ("sabd %0.2s, %1.2s, %2.2s"
5113 : "=w"(result)
5114 : "w"(a), "w"(b)
5115 : /* No clobbers */);
5116 return result;
5119 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5120 vabd_u8 (uint8x8_t a, uint8x8_t b)
5122 uint8x8_t result;
5123 __asm__ ("uabd %0.8b, %1.8b, %2.8b"
5124 : "=w"(result)
5125 : "w"(a), "w"(b)
5126 : /* No clobbers */);
5127 return result;
5130 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5131 vabd_u16 (uint16x4_t a, uint16x4_t b)
5133 uint16x4_t result;
5134 __asm__ ("uabd %0.4h, %1.4h, %2.4h"
5135 : "=w"(result)
5136 : "w"(a), "w"(b)
5137 : /* No clobbers */);
5138 return result;
5141 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5142 vabd_u32 (uint32x2_t a, uint32x2_t b)
5144 uint32x2_t result;
5145 __asm__ ("uabd %0.2s, %1.2s, %2.2s"
5146 : "=w"(result)
5147 : "w"(a), "w"(b)
5148 : /* No clobbers */);
5149 return result;
5152 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
5153 vabdd_f64 (float64_t a, float64_t b)
5155 float64_t result;
5156 __asm__ ("fabd %d0, %d1, %d2"
5157 : "=w"(result)
5158 : "w"(a), "w"(b)
5159 : /* No clobbers */);
5160 return result;
5163 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5164 vabdl_high_s8 (int8x16_t a, int8x16_t b)
5166 int16x8_t result;
5167 __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b"
5168 : "=w"(result)
5169 : "w"(a), "w"(b)
5170 : /* No clobbers */);
5171 return result;
5174 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5175 vabdl_high_s16 (int16x8_t a, int16x8_t b)
5177 int32x4_t result;
5178 __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h"
5179 : "=w"(result)
5180 : "w"(a), "w"(b)
5181 : /* No clobbers */);
5182 return result;
5185 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
5186 vabdl_high_s32 (int32x4_t a, int32x4_t b)
5188 int64x2_t result;
5189 __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s"
5190 : "=w"(result)
5191 : "w"(a), "w"(b)
5192 : /* No clobbers */);
5193 return result;
5196 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5197 vabdl_high_u8 (uint8x16_t a, uint8x16_t b)
5199 uint16x8_t result;
5200 __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b"
5201 : "=w"(result)
5202 : "w"(a), "w"(b)
5203 : /* No clobbers */);
5204 return result;
5207 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5208 vabdl_high_u16 (uint16x8_t a, uint16x8_t b)
5210 uint32x4_t result;
5211 __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h"
5212 : "=w"(result)
5213 : "w"(a), "w"(b)
5214 : /* No clobbers */);
5215 return result;
5218 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5219 vabdl_high_u32 (uint32x4_t a, uint32x4_t b)
5221 uint64x2_t result;
5222 __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s"
5223 : "=w"(result)
5224 : "w"(a), "w"(b)
5225 : /* No clobbers */);
5226 return result;
5229 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5230 vabdl_s8 (int8x8_t a, int8x8_t b)
5232 int16x8_t result;
5233 __asm__ ("sabdl %0.8h, %1.8b, %2.8b"
5234 : "=w"(result)
5235 : "w"(a), "w"(b)
5236 : /* No clobbers */);
5237 return result;
5240 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5241 vabdl_s16 (int16x4_t a, int16x4_t b)
5243 int32x4_t result;
5244 __asm__ ("sabdl %0.4s, %1.4h, %2.4h"
5245 : "=w"(result)
5246 : "w"(a), "w"(b)
5247 : /* No clobbers */);
5248 return result;
5251 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
5252 vabdl_s32 (int32x2_t a, int32x2_t b)
5254 int64x2_t result;
5255 __asm__ ("sabdl %0.2d, %1.2s, %2.2s"
5256 : "=w"(result)
5257 : "w"(a), "w"(b)
5258 : /* No clobbers */);
5259 return result;
5262 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5263 vabdl_u8 (uint8x8_t a, uint8x8_t b)
5265 uint16x8_t result;
5266 __asm__ ("uabdl %0.8h, %1.8b, %2.8b"
5267 : "=w"(result)
5268 : "w"(a), "w"(b)
5269 : /* No clobbers */);
5270 return result;
5273 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5274 vabdl_u16 (uint16x4_t a, uint16x4_t b)
5276 uint32x4_t result;
5277 __asm__ ("uabdl %0.4s, %1.4h, %2.4h"
5278 : "=w"(result)
5279 : "w"(a), "w"(b)
5280 : /* No clobbers */);
5281 return result;
5284 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5285 vabdl_u32 (uint32x2_t a, uint32x2_t b)
5287 uint64x2_t result;
5288 __asm__ ("uabdl %0.2d, %1.2s, %2.2s"
5289 : "=w"(result)
5290 : "w"(a), "w"(b)
5291 : /* No clobbers */);
5292 return result;
5295 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5296 vabdq_f32 (float32x4_t a, float32x4_t b)
5298 float32x4_t result;
5299 __asm__ ("fabd %0.4s, %1.4s, %2.4s"
5300 : "=w"(result)
5301 : "w"(a), "w"(b)
5302 : /* No clobbers */);
5303 return result;
5306 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5307 vabdq_f64 (float64x2_t a, float64x2_t b)
5309 float64x2_t result;
5310 __asm__ ("fabd %0.2d, %1.2d, %2.2d"
5311 : "=w"(result)
5312 : "w"(a), "w"(b)
5313 : /* No clobbers */);
5314 return result;
5317 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5318 vabdq_s8 (int8x16_t a, int8x16_t b)
5320 int8x16_t result;
5321 __asm__ ("sabd %0.16b, %1.16b, %2.16b"
5322 : "=w"(result)
5323 : "w"(a), "w"(b)
5324 : /* No clobbers */);
5325 return result;
5328 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5329 vabdq_s16 (int16x8_t a, int16x8_t b)
5331 int16x8_t result;
5332 __asm__ ("sabd %0.8h, %1.8h, %2.8h"
5333 : "=w"(result)
5334 : "w"(a), "w"(b)
5335 : /* No clobbers */);
5336 return result;
5339 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5340 vabdq_s32 (int32x4_t a, int32x4_t b)
5342 int32x4_t result;
5343 __asm__ ("sabd %0.4s, %1.4s, %2.4s"
5344 : "=w"(result)
5345 : "w"(a), "w"(b)
5346 : /* No clobbers */);
5347 return result;
5350 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5351 vabdq_u8 (uint8x16_t a, uint8x16_t b)
5353 uint8x16_t result;
5354 __asm__ ("uabd %0.16b, %1.16b, %2.16b"
5355 : "=w"(result)
5356 : "w"(a), "w"(b)
5357 : /* No clobbers */);
5358 return result;
5361 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5362 vabdq_u16 (uint16x8_t a, uint16x8_t b)
5364 uint16x8_t result;
5365 __asm__ ("uabd %0.8h, %1.8h, %2.8h"
5366 : "=w"(result)
5367 : "w"(a), "w"(b)
5368 : /* No clobbers */);
5369 return result;
5372 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5373 vabdq_u32 (uint32x4_t a, uint32x4_t b)
5375 uint32x4_t result;
5376 __asm__ ("uabd %0.4s, %1.4s, %2.4s"
5377 : "=w"(result)
5378 : "w"(a), "w"(b)
5379 : /* No clobbers */);
5380 return result;
5383 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
5384 vabds_f32 (float32_t a, float32_t b)
5386 float32_t result;
5387 __asm__ ("fabd %s0, %s1, %s2"
5388 : "=w"(result)
5389 : "w"(a), "w"(b)
5390 : /* No clobbers */);
5391 return result;
5394 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
5395 vaddlv_s8 (int8x8_t a)
5397 int16_t result;
5398 __asm__ ("saddlv %h0,%1.8b"
5399 : "=w"(result)
5400 : "w"(a)
5401 : /* No clobbers */);
5402 return result;
5405 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
5406 vaddlv_s16 (int16x4_t a)
5408 int32_t result;
5409 __asm__ ("saddlv %s0,%1.4h"
5410 : "=w"(result)
5411 : "w"(a)
5412 : /* No clobbers */);
5413 return result;
5416 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
5417 vaddlv_u8 (uint8x8_t a)
5419 uint16_t result;
5420 __asm__ ("uaddlv %h0,%1.8b"
5421 : "=w"(result)
5422 : "w"(a)
5423 : /* No clobbers */);
5424 return result;
5427 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
5428 vaddlv_u16 (uint16x4_t a)
5430 uint32_t result;
5431 __asm__ ("uaddlv %s0,%1.4h"
5432 : "=w"(result)
5433 : "w"(a)
5434 : /* No clobbers */);
5435 return result;
5438 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
5439 vaddlvq_s8 (int8x16_t a)
5441 int16_t result;
5442 __asm__ ("saddlv %h0,%1.16b"
5443 : "=w"(result)
5444 : "w"(a)
5445 : /* No clobbers */);
5446 return result;
5449 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
5450 vaddlvq_s16 (int16x8_t a)
5452 int32_t result;
5453 __asm__ ("saddlv %s0,%1.8h"
5454 : "=w"(result)
5455 : "w"(a)
5456 : /* No clobbers */);
5457 return result;
5460 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
5461 vaddlvq_s32 (int32x4_t a)
5463 int64_t result;
5464 __asm__ ("saddlv %d0,%1.4s"
5465 : "=w"(result)
5466 : "w"(a)
5467 : /* No clobbers */);
5468 return result;
5471 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
5472 vaddlvq_u8 (uint8x16_t a)
5474 uint16_t result;
5475 __asm__ ("uaddlv %h0,%1.16b"
5476 : "=w"(result)
5477 : "w"(a)
5478 : /* No clobbers */);
5479 return result;
5482 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
5483 vaddlvq_u16 (uint16x8_t a)
5485 uint32_t result;
5486 __asm__ ("uaddlv %s0,%1.8h"
5487 : "=w"(result)
5488 : "w"(a)
5489 : /* No clobbers */);
5490 return result;
5493 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
5494 vaddlvq_u32 (uint32x4_t a)
5496 uint64_t result;
5497 __asm__ ("uaddlv %d0,%1.4s"
5498 : "=w"(result)
5499 : "w"(a)
5500 : /* No clobbers */);
5501 return result;
5504 #define vcopyq_lane_f32(a, b, c, d) \
5505 __extension__ \
5506 ({ \
5507 float32x4_t c_ = (c); \
5508 float32x4_t a_ = (a); \
5509 float32x4_t result; \
5510 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5511 : "=w"(result) \
5512 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5513 : /* No clobbers */); \
5514 result; \
5517 #define vcopyq_lane_f64(a, b, c, d) \
5518 __extension__ \
5519 ({ \
5520 float64x2_t c_ = (c); \
5521 float64x2_t a_ = (a); \
5522 float64x2_t result; \
5523 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5524 : "=w"(result) \
5525 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5526 : /* No clobbers */); \
5527 result; \
5530 #define vcopyq_lane_p8(a, b, c, d) \
5531 __extension__ \
5532 ({ \
5533 poly8x16_t c_ = (c); \
5534 poly8x16_t a_ = (a); \
5535 poly8x16_t result; \
5536 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5537 : "=w"(result) \
5538 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5539 : /* No clobbers */); \
5540 result; \
5543 #define vcopyq_lane_p16(a, b, c, d) \
5544 __extension__ \
5545 ({ \
5546 poly16x8_t c_ = (c); \
5547 poly16x8_t a_ = (a); \
5548 poly16x8_t result; \
5549 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5550 : "=w"(result) \
5551 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5552 : /* No clobbers */); \
5553 result; \
5556 #define vcopyq_lane_s8(a, b, c, d) \
5557 __extension__ \
5558 ({ \
5559 int8x16_t c_ = (c); \
5560 int8x16_t a_ = (a); \
5561 int8x16_t result; \
5562 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5563 : "=w"(result) \
5564 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5565 : /* No clobbers */); \
5566 result; \
5569 #define vcopyq_lane_s16(a, b, c, d) \
5570 __extension__ \
5571 ({ \
5572 int16x8_t c_ = (c); \
5573 int16x8_t a_ = (a); \
5574 int16x8_t result; \
5575 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5576 : "=w"(result) \
5577 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5578 : /* No clobbers */); \
5579 result; \
5582 #define vcopyq_lane_s32(a, b, c, d) \
5583 __extension__ \
5584 ({ \
5585 int32x4_t c_ = (c); \
5586 int32x4_t a_ = (a); \
5587 int32x4_t result; \
5588 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5589 : "=w"(result) \
5590 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5591 : /* No clobbers */); \
5592 result; \
5595 #define vcopyq_lane_s64(a, b, c, d) \
5596 __extension__ \
5597 ({ \
5598 int64x2_t c_ = (c); \
5599 int64x2_t a_ = (a); \
5600 int64x2_t result; \
5601 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5602 : "=w"(result) \
5603 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5604 : /* No clobbers */); \
5605 result; \
5608 #define vcopyq_lane_u8(a, b, c, d) \
5609 __extension__ \
5610 ({ \
5611 uint8x16_t c_ = (c); \
5612 uint8x16_t a_ = (a); \
5613 uint8x16_t result; \
5614 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5615 : "=w"(result) \
5616 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5617 : /* No clobbers */); \
5618 result; \
5621 #define vcopyq_lane_u16(a, b, c, d) \
5622 __extension__ \
5623 ({ \
5624 uint16x8_t c_ = (c); \
5625 uint16x8_t a_ = (a); \
5626 uint16x8_t result; \
5627 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5628 : "=w"(result) \
5629 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5630 : /* No clobbers */); \
5631 result; \
5634 #define vcopyq_lane_u32(a, b, c, d) \
5635 __extension__ \
5636 ({ \
5637 uint32x4_t c_ = (c); \
5638 uint32x4_t a_ = (a); \
5639 uint32x4_t result; \
5640 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5641 : "=w"(result) \
5642 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5643 : /* No clobbers */); \
5644 result; \
5647 #define vcopyq_lane_u64(a, b, c, d) \
5648 __extension__ \
5649 ({ \
5650 uint64x2_t c_ = (c); \
5651 uint64x2_t a_ = (a); \
5652 uint64x2_t result; \
5653 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5654 : "=w"(result) \
5655 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5656 : /* No clobbers */); \
5657 result; \
5660 /* vcvt_f16_f32 not supported */
5662 /* vcvt_f32_f16 not supported */
5664 /* vcvt_high_f16_f32 not supported */
5666 /* vcvt_high_f32_f16 not supported */
5668 static float32x2_t vdup_n_f32 (float32_t);
5670 #define vcvt_n_f32_s32(a, b) \
5671 __extension__ \
5672 ({ \
5673 int32x2_t a_ = (a); \
5674 float32x2_t result; \
5675 __asm__ ("scvtf %0.2s, %1.2s, #%2" \
5676 : "=w"(result) \
5677 : "w"(a_), "i"(b) \
5678 : /* No clobbers */); \
5679 result; \
5682 #define vcvt_n_f32_u32(a, b) \
5683 __extension__ \
5684 ({ \
5685 uint32x2_t a_ = (a); \
5686 float32x2_t result; \
5687 __asm__ ("ucvtf %0.2s, %1.2s, #%2" \
5688 : "=w"(result) \
5689 : "w"(a_), "i"(b) \
5690 : /* No clobbers */); \
5691 result; \
5694 #define vcvt_n_s32_f32(a, b) \
5695 __extension__ \
5696 ({ \
5697 float32x2_t a_ = (a); \
5698 int32x2_t result; \
5699 __asm__ ("fcvtzs %0.2s, %1.2s, #%2" \
5700 : "=w"(result) \
5701 : "w"(a_), "i"(b) \
5702 : /* No clobbers */); \
5703 result; \
5706 #define vcvt_n_u32_f32(a, b) \
5707 __extension__ \
5708 ({ \
5709 float32x2_t a_ = (a); \
5710 uint32x2_t result; \
5711 __asm__ ("fcvtzu %0.2s, %1.2s, #%2" \
5712 : "=w"(result) \
5713 : "w"(a_), "i"(b) \
5714 : /* No clobbers */); \
5715 result; \
5718 #define vcvtd_n_f64_s64(a, b) \
5719 __extension__ \
5720 ({ \
5721 int64_t a_ = (a); \
5722 float64_t result; \
5723 __asm__ ("scvtf %d0,%d1,%2" \
5724 : "=w"(result) \
5725 : "w"(a_), "i"(b) \
5726 : /* No clobbers */); \
5727 result; \
5730 #define vcvtd_n_f64_u64(a, b) \
5731 __extension__ \
5732 ({ \
5733 uint64_t a_ = (a); \
5734 float64_t result; \
5735 __asm__ ("ucvtf %d0,%d1,%2" \
5736 : "=w"(result) \
5737 : "w"(a_), "i"(b) \
5738 : /* No clobbers */); \
5739 result; \
5742 #define vcvtd_n_s64_f64(a, b) \
5743 __extension__ \
5744 ({ \
5745 float64_t a_ = (a); \
5746 int64_t result; \
5747 __asm__ ("fcvtzs %d0,%d1,%2" \
5748 : "=w"(result) \
5749 : "w"(a_), "i"(b) \
5750 : /* No clobbers */); \
5751 result; \
5754 #define vcvtd_n_u64_f64(a, b) \
5755 __extension__ \
5756 ({ \
5757 float64_t a_ = (a); \
5758 uint64_t result; \
5759 __asm__ ("fcvtzu %d0,%d1,%2" \
5760 : "=w"(result) \
5761 : "w"(a_), "i"(b) \
5762 : /* No clobbers */); \
5763 result; \
5766 #define vcvtq_n_f32_s32(a, b) \
5767 __extension__ \
5768 ({ \
5769 int32x4_t a_ = (a); \
5770 float32x4_t result; \
5771 __asm__ ("scvtf %0.4s, %1.4s, #%2" \
5772 : "=w"(result) \
5773 : "w"(a_), "i"(b) \
5774 : /* No clobbers */); \
5775 result; \
5778 #define vcvtq_n_f32_u32(a, b) \
5779 __extension__ \
5780 ({ \
5781 uint32x4_t a_ = (a); \
5782 float32x4_t result; \
5783 __asm__ ("ucvtf %0.4s, %1.4s, #%2" \
5784 : "=w"(result) \
5785 : "w"(a_), "i"(b) \
5786 : /* No clobbers */); \
5787 result; \
5790 #define vcvtq_n_f64_s64(a, b) \
5791 __extension__ \
5792 ({ \
5793 int64x2_t a_ = (a); \
5794 float64x2_t result; \
5795 __asm__ ("scvtf %0.2d, %1.2d, #%2" \
5796 : "=w"(result) \
5797 : "w"(a_), "i"(b) \
5798 : /* No clobbers */); \
5799 result; \
5802 #define vcvtq_n_f64_u64(a, b) \
5803 __extension__ \
5804 ({ \
5805 uint64x2_t a_ = (a); \
5806 float64x2_t result; \
5807 __asm__ ("ucvtf %0.2d, %1.2d, #%2" \
5808 : "=w"(result) \
5809 : "w"(a_), "i"(b) \
5810 : /* No clobbers */); \
5811 result; \
5814 #define vcvtq_n_s32_f32(a, b) \
5815 __extension__ \
5816 ({ \
5817 float32x4_t a_ = (a); \
5818 int32x4_t result; \
5819 __asm__ ("fcvtzs %0.4s, %1.4s, #%2" \
5820 : "=w"(result) \
5821 : "w"(a_), "i"(b) \
5822 : /* No clobbers */); \
5823 result; \
5826 #define vcvtq_n_s64_f64(a, b) \
5827 __extension__ \
5828 ({ \
5829 float64x2_t a_ = (a); \
5830 int64x2_t result; \
5831 __asm__ ("fcvtzs %0.2d, %1.2d, #%2" \
5832 : "=w"(result) \
5833 : "w"(a_), "i"(b) \
5834 : /* No clobbers */); \
5835 result; \
5838 #define vcvtq_n_u32_f32(a, b) \
5839 __extension__ \
5840 ({ \
5841 float32x4_t a_ = (a); \
5842 uint32x4_t result; \
5843 __asm__ ("fcvtzu %0.4s, %1.4s, #%2" \
5844 : "=w"(result) \
5845 : "w"(a_), "i"(b) \
5846 : /* No clobbers */); \
5847 result; \
5850 #define vcvtq_n_u64_f64(a, b) \
5851 __extension__ \
5852 ({ \
5853 float64x2_t a_ = (a); \
5854 uint64x2_t result; \
5855 __asm__ ("fcvtzu %0.2d, %1.2d, #%2" \
5856 : "=w"(result) \
5857 : "w"(a_), "i"(b) \
5858 : /* No clobbers */); \
5859 result; \
5862 #define vcvts_n_f32_s32(a, b) \
5863 __extension__ \
5864 ({ \
5865 int32_t a_ = (a); \
5866 float32_t result; \
5867 __asm__ ("scvtf %s0,%s1,%2" \
5868 : "=w"(result) \
5869 : "w"(a_), "i"(b) \
5870 : /* No clobbers */); \
5871 result; \
5874 #define vcvts_n_f32_u32(a, b) \
5875 __extension__ \
5876 ({ \
5877 uint32_t a_ = (a); \
5878 float32_t result; \
5879 __asm__ ("ucvtf %s0,%s1,%2" \
5880 : "=w"(result) \
5881 : "w"(a_), "i"(b) \
5882 : /* No clobbers */); \
5883 result; \
5886 #define vcvts_n_s32_f32(a, b) \
5887 __extension__ \
5888 ({ \
5889 float32_t a_ = (a); \
5890 int32_t result; \
5891 __asm__ ("fcvtzs %s0,%s1,%2" \
5892 : "=w"(result) \
5893 : "w"(a_), "i"(b) \
5894 : /* No clobbers */); \
5895 result; \
5898 #define vcvts_n_u32_f32(a, b) \
5899 __extension__ \
5900 ({ \
5901 float32_t a_ = (a); \
5902 uint32_t result; \
5903 __asm__ ("fcvtzu %s0,%s1,%2" \
5904 : "=w"(result) \
5905 : "w"(a_), "i"(b) \
5906 : /* No clobbers */); \
5907 result; \
5910 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5911 vcvtx_f32_f64 (float64x2_t a)
5913 float32x2_t result;
5914 __asm__ ("fcvtxn %0.2s,%1.2d"
5915 : "=w"(result)
5916 : "w"(a)
5917 : /* No clobbers */);
5918 return result;
5921 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5922 vcvtx_high_f32_f64 (float32x2_t a, float64x2_t b)
5924 float32x4_t result;
5925 __asm__ ("fcvtxn2 %0.4s,%1.2d"
5926 : "=w"(result)
5927 : "w" (b), "0"(a)
5928 : /* No clobbers */);
5929 return result;
5932 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
5933 vcvtxd_f32_f64 (float64_t a)
5935 float32_t result;
5936 __asm__ ("fcvtxn %s0,%d1"
5937 : "=w"(result)
5938 : "w"(a)
5939 : /* No clobbers */);
5940 return result;
5943 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5944 vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
5946 float32x2_t result;
5947 float32x2_t t1;
5948 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s"
5949 : "=w"(result), "=w"(t1)
5950 : "0"(a), "w"(b), "w"(c)
5951 : /* No clobbers */);
5952 return result;
5955 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5956 vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
5958 int16x4_t result;
5959 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
5960 : "=w"(result)
5961 : "0"(a), "w"(b), "x"(c)
5962 : /* No clobbers */);
5963 return result;
5966 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5967 vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
5969 int32x2_t result;
5970 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
5971 : "=w"(result)
5972 : "0"(a), "w"(b), "w"(c)
5973 : /* No clobbers */);
5974 return result;
5977 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5978 vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
5980 uint16x4_t result;
5981 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
5982 : "=w"(result)
5983 : "0"(a), "w"(b), "x"(c)
5984 : /* No clobbers */);
5985 return result;
5988 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5989 vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
5991 uint32x2_t result;
5992 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
5993 : "=w"(result)
5994 : "0"(a), "w"(b), "w"(c)
5995 : /* No clobbers */);
5996 return result;
5999 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6000 vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
6002 int8x8_t result;
6003 __asm__ ("mla %0.8b, %2.8b, %3.8b"
6004 : "=w"(result)
6005 : "0"(a), "w"(b), "w"(c)
6006 : /* No clobbers */);
6007 return result;
6010 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6011 vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
6013 int16x4_t result;
6014 __asm__ ("mla %0.4h, %2.4h, %3.4h"
6015 : "=w"(result)
6016 : "0"(a), "w"(b), "w"(c)
6017 : /* No clobbers */);
6018 return result;
6021 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6022 vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
6024 int32x2_t result;
6025 __asm__ ("mla %0.2s, %2.2s, %3.2s"
6026 : "=w"(result)
6027 : "0"(a), "w"(b), "w"(c)
6028 : /* No clobbers */);
6029 return result;
6032 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6033 vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
6035 uint8x8_t result;
6036 __asm__ ("mla %0.8b, %2.8b, %3.8b"
6037 : "=w"(result)
6038 : "0"(a), "w"(b), "w"(c)
6039 : /* No clobbers */);
6040 return result;
6043 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6044 vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
6046 uint16x4_t result;
6047 __asm__ ("mla %0.4h, %2.4h, %3.4h"
6048 : "=w"(result)
6049 : "0"(a), "w"(b), "w"(c)
6050 : /* No clobbers */);
6051 return result;
6054 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6055 vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
6057 uint32x2_t result;
6058 __asm__ ("mla %0.2s, %2.2s, %3.2s"
6059 : "=w"(result)
6060 : "0"(a), "w"(b), "w"(c)
6061 : /* No clobbers */);
6062 return result;
6065 #define vmlal_high_lane_s16(a, b, c, d) \
6066 __extension__ \
6067 ({ \
6068 int16x4_t c_ = (c); \
6069 int16x8_t b_ = (b); \
6070 int32x4_t a_ = (a); \
6071 int32x4_t result; \
6072 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
6073 : "=w"(result) \
6074 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6075 : /* No clobbers */); \
6076 result; \
6079 #define vmlal_high_lane_s32(a, b, c, d) \
6080 __extension__ \
6081 ({ \
6082 int32x2_t c_ = (c); \
6083 int32x4_t b_ = (b); \
6084 int64x2_t a_ = (a); \
6085 int64x2_t result; \
6086 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
6087 : "=w"(result) \
6088 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6089 : /* No clobbers */); \
6090 result; \
6093 #define vmlal_high_lane_u16(a, b, c, d) \
6094 __extension__ \
6095 ({ \
6096 uint16x4_t c_ = (c); \
6097 uint16x8_t b_ = (b); \
6098 uint32x4_t a_ = (a); \
6099 uint32x4_t result; \
6100 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
6101 : "=w"(result) \
6102 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6103 : /* No clobbers */); \
6104 result; \
6107 #define vmlal_high_lane_u32(a, b, c, d) \
6108 __extension__ \
6109 ({ \
6110 uint32x2_t c_ = (c); \
6111 uint32x4_t b_ = (b); \
6112 uint64x2_t a_ = (a); \
6113 uint64x2_t result; \
6114 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
6115 : "=w"(result) \
6116 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6117 : /* No clobbers */); \
6118 result; \
6121 #define vmlal_high_laneq_s16(a, b, c, d) \
6122 __extension__ \
6123 ({ \
6124 int16x8_t c_ = (c); \
6125 int16x8_t b_ = (b); \
6126 int32x4_t a_ = (a); \
6127 int32x4_t result; \
6128 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
6129 : "=w"(result) \
6130 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6131 : /* No clobbers */); \
6132 result; \
6135 #define vmlal_high_laneq_s32(a, b, c, d) \
6136 __extension__ \
6137 ({ \
6138 int32x4_t c_ = (c); \
6139 int32x4_t b_ = (b); \
6140 int64x2_t a_ = (a); \
6141 int64x2_t result; \
6142 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
6143 : "=w"(result) \
6144 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6145 : /* No clobbers */); \
6146 result; \
6149 #define vmlal_high_laneq_u16(a, b, c, d) \
6150 __extension__ \
6151 ({ \
6152 uint16x8_t c_ = (c); \
6153 uint16x8_t b_ = (b); \
6154 uint32x4_t a_ = (a); \
6155 uint32x4_t result; \
6156 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
6157 : "=w"(result) \
6158 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6159 : /* No clobbers */); \
6160 result; \
6163 #define vmlal_high_laneq_u32(a, b, c, d) \
6164 __extension__ \
6165 ({ \
6166 uint32x4_t c_ = (c); \
6167 uint32x4_t b_ = (b); \
6168 uint64x2_t a_ = (a); \
6169 uint64x2_t result; \
6170 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
6171 : "=w"(result) \
6172 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6173 : /* No clobbers */); \
6174 result; \
6177 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6178 vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
6180 int32x4_t result;
6181 __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]"
6182 : "=w"(result)
6183 : "0"(a), "w"(b), "x"(c)
6184 : /* No clobbers */);
6185 return result;
6188 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6189 vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
6191 int64x2_t result;
6192 __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]"
6193 : "=w"(result)
6194 : "0"(a), "w"(b), "w"(c)
6195 : /* No clobbers */);
6196 return result;
6199 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6200 vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
6202 uint32x4_t result;
6203 __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]"
6204 : "=w"(result)
6205 : "0"(a), "w"(b), "x"(c)
6206 : /* No clobbers */);
6207 return result;
6210 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6211 vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
6213 uint64x2_t result;
6214 __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]"
6215 : "=w"(result)
6216 : "0"(a), "w"(b), "w"(c)
6217 : /* No clobbers */);
6218 return result;
6221 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6222 vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
6224 int16x8_t result;
6225 __asm__ ("smlal2 %0.8h,%2.16b,%3.16b"
6226 : "=w"(result)
6227 : "0"(a), "w"(b), "w"(c)
6228 : /* No clobbers */);
6229 return result;
6232 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6233 vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
6235 int32x4_t result;
6236 __asm__ ("smlal2 %0.4s,%2.8h,%3.8h"
6237 : "=w"(result)
6238 : "0"(a), "w"(b), "w"(c)
6239 : /* No clobbers */);
6240 return result;
6243 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6244 vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
6246 int64x2_t result;
6247 __asm__ ("smlal2 %0.2d,%2.4s,%3.4s"
6248 : "=w"(result)
6249 : "0"(a), "w"(b), "w"(c)
6250 : /* No clobbers */);
6251 return result;
6254 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6255 vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
6257 uint16x8_t result;
6258 __asm__ ("umlal2 %0.8h,%2.16b,%3.16b"
6259 : "=w"(result)
6260 : "0"(a), "w"(b), "w"(c)
6261 : /* No clobbers */);
6262 return result;
6265 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6266 vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
6268 uint32x4_t result;
6269 __asm__ ("umlal2 %0.4s,%2.8h,%3.8h"
6270 : "=w"(result)
6271 : "0"(a), "w"(b), "w"(c)
6272 : /* No clobbers */);
6273 return result;
6276 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6277 vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
6279 uint64x2_t result;
6280 __asm__ ("umlal2 %0.2d,%2.4s,%3.4s"
6281 : "=w"(result)
6282 : "0"(a), "w"(b), "w"(c)
6283 : /* No clobbers */);
6284 return result;
6287 #define vmlal_lane_s16(a, b, c, d) \
6288 __extension__ \
6289 ({ \
6290 int16x4_t c_ = (c); \
6291 int16x4_t b_ = (b); \
6292 int32x4_t a_ = (a); \
6293 int32x4_t result; \
6294 __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]" \
6295 : "=w"(result) \
6296 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6297 : /* No clobbers */); \
6298 result; \
6301 #define vmlal_lane_s32(a, b, c, d) \
6302 __extension__ \
6303 ({ \
6304 int32x2_t c_ = (c); \
6305 int32x2_t b_ = (b); \
6306 int64x2_t a_ = (a); \
6307 int64x2_t result; \
6308 __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]" \
6309 : "=w"(result) \
6310 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6311 : /* No clobbers */); \
6312 result; \
6315 #define vmlal_lane_u16(a, b, c, d) \
6316 __extension__ \
6317 ({ \
6318 uint16x4_t c_ = (c); \
6319 uint16x4_t b_ = (b); \
6320 uint32x4_t a_ = (a); \
6321 uint32x4_t result; \
6322 __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]" \
6323 : "=w"(result) \
6324 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6325 : /* No clobbers */); \
6326 result; \
6329 #define vmlal_lane_u32(a, b, c, d) \
6330 __extension__ \
6331 ({ \
6332 uint32x2_t c_ = (c); \
6333 uint32x2_t b_ = (b); \
6334 uint64x2_t a_ = (a); \
6335 uint64x2_t result; \
6336 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
6337 : "=w"(result) \
6338 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6339 : /* No clobbers */); \
6340 result; \
6343 #define vmlal_laneq_s16(a, b, c, d) \
6344 __extension__ \
6345 ({ \
6346 int16x8_t c_ = (c); \
6347 int16x4_t b_ = (b); \
6348 int32x4_t a_ = (a); \
6349 int32x4_t result; \
6350 __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]" \
6351 : "=w"(result) \
6352 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6353 : /* No clobbers */); \
6354 result; \
6357 #define vmlal_laneq_s32(a, b, c, d) \
6358 __extension__ \
6359 ({ \
6360 int32x4_t c_ = (c); \
6361 int32x2_t b_ = (b); \
6362 int64x2_t a_ = (a); \
6363 int64x2_t result; \
6364 __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]" \
6365 : "=w"(result) \
6366 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6367 : /* No clobbers */); \
6368 result; \
6371 #define vmlal_laneq_u16(a, b, c, d) \
6372 __extension__ \
6373 ({ \
6374 uint16x8_t c_ = (c); \
6375 uint16x4_t b_ = (b); \
6376 uint32x4_t a_ = (a); \
6377 uint32x4_t result; \
6378 __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]" \
6379 : "=w"(result) \
6380 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6381 : /* No clobbers */); \
6382 result; \
6385 #define vmlal_laneq_u32(a, b, c, d) \
6386 __extension__ \
6387 ({ \
6388 uint32x4_t c_ = (c); \
6389 uint32x2_t b_ = (b); \
6390 uint64x2_t a_ = (a); \
6391 uint64x2_t result; \
6392 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
6393 : "=w"(result) \
6394 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6395 : /* No clobbers */); \
6396 result; \
6399 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6400 vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
6402 int32x4_t result;
6403 __asm__ ("smlal %0.4s,%2.4h,%3.h[0]"
6404 : "=w"(result)
6405 : "0"(a), "w"(b), "x"(c)
6406 : /* No clobbers */);
6407 return result;
6410 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6411 vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
6413 int64x2_t result;
6414 __asm__ ("smlal %0.2d,%2.2s,%3.s[0]"
6415 : "=w"(result)
6416 : "0"(a), "w"(b), "w"(c)
6417 : /* No clobbers */);
6418 return result;
6421 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6422 vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
6424 uint32x4_t result;
6425 __asm__ ("umlal %0.4s,%2.4h,%3.h[0]"
6426 : "=w"(result)
6427 : "0"(a), "w"(b), "x"(c)
6428 : /* No clobbers */);
6429 return result;
6432 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6433 vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
6435 uint64x2_t result;
6436 __asm__ ("umlal %0.2d,%2.2s,%3.s[0]"
6437 : "=w"(result)
6438 : "0"(a), "w"(b), "w"(c)
6439 : /* No clobbers */);
6440 return result;
6443 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6444 vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
6446 int16x8_t result;
6447 __asm__ ("smlal %0.8h,%2.8b,%3.8b"
6448 : "=w"(result)
6449 : "0"(a), "w"(b), "w"(c)
6450 : /* No clobbers */);
6451 return result;
6454 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6455 vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
6457 int32x4_t result;
6458 __asm__ ("smlal %0.4s,%2.4h,%3.4h"
6459 : "=w"(result)
6460 : "0"(a), "w"(b), "w"(c)
6461 : /* No clobbers */);
6462 return result;
6465 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6466 vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
6468 int64x2_t result;
6469 __asm__ ("smlal %0.2d,%2.2s,%3.2s"
6470 : "=w"(result)
6471 : "0"(a), "w"(b), "w"(c)
6472 : /* No clobbers */);
6473 return result;
6476 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6477 vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
6479 uint16x8_t result;
6480 __asm__ ("umlal %0.8h,%2.8b,%3.8b"
6481 : "=w"(result)
6482 : "0"(a), "w"(b), "w"(c)
6483 : /* No clobbers */);
6484 return result;
6487 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6488 vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
6490 uint32x4_t result;
6491 __asm__ ("umlal %0.4s,%2.4h,%3.4h"
6492 : "=w"(result)
6493 : "0"(a), "w"(b), "w"(c)
6494 : /* No clobbers */);
6495 return result;
6498 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6499 vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
6501 uint64x2_t result;
6502 __asm__ ("umlal %0.2d,%2.2s,%3.2s"
6503 : "=w"(result)
6504 : "0"(a), "w"(b), "w"(c)
6505 : /* No clobbers */);
6506 return result;
6509 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6510 vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
6512 float32x4_t result;
6513 float32x4_t t1;
6514 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s"
6515 : "=w"(result), "=w"(t1)
6516 : "0"(a), "w"(b), "w"(c)
6517 : /* No clobbers */);
6518 return result;
6521 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6522 vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
6524 int16x8_t result;
6525 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
6526 : "=w"(result)
6527 : "0"(a), "w"(b), "x"(c)
6528 : /* No clobbers */);
6529 return result;
6532 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6533 vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
6535 int32x4_t result;
6536 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
6537 : "=w"(result)
6538 : "0"(a), "w"(b), "w"(c)
6539 : /* No clobbers */);
6540 return result;
6543 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6544 vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
6546 uint16x8_t result;
6547 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
6548 : "=w"(result)
6549 : "0"(a), "w"(b), "x"(c)
6550 : /* No clobbers */);
6551 return result;
6554 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6555 vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
6557 uint32x4_t result;
6558 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
6559 : "=w"(result)
6560 : "0"(a), "w"(b), "w"(c)
6561 : /* No clobbers */);
6562 return result;
6565 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
6566 vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
6568 int8x16_t result;
6569 __asm__ ("mla %0.16b, %2.16b, %3.16b"
6570 : "=w"(result)
6571 : "0"(a), "w"(b), "w"(c)
6572 : /* No clobbers */);
6573 return result;
6576 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6577 vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
6579 int16x8_t result;
6580 __asm__ ("mla %0.8h, %2.8h, %3.8h"
6581 : "=w"(result)
6582 : "0"(a), "w"(b), "w"(c)
6583 : /* No clobbers */);
6584 return result;
6587 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6588 vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
6590 int32x4_t result;
6591 __asm__ ("mla %0.4s, %2.4s, %3.4s"
6592 : "=w"(result)
6593 : "0"(a), "w"(b), "w"(c)
6594 : /* No clobbers */);
6595 return result;
6598 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
6599 vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
6601 uint8x16_t result;
6602 __asm__ ("mla %0.16b, %2.16b, %3.16b"
6603 : "=w"(result)
6604 : "0"(a), "w"(b), "w"(c)
6605 : /* No clobbers */);
6606 return result;
6609 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6610 vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
6612 uint16x8_t result;
6613 __asm__ ("mla %0.8h, %2.8h, %3.8h"
6614 : "=w"(result)
6615 : "0"(a), "w"(b), "w"(c)
6616 : /* No clobbers */);
6617 return result;
6620 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6621 vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
6623 uint32x4_t result;
6624 __asm__ ("mla %0.4s, %2.4s, %3.4s"
6625 : "=w"(result)
6626 : "0"(a), "w"(b), "w"(c)
6627 : /* No clobbers */);
6628 return result;
6631 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6632 vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
6634 float32x2_t result;
6635 float32x2_t t1;
6636 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s"
6637 : "=w"(result), "=w"(t1)
6638 : "0"(a), "w"(b), "w"(c)
6639 : /* No clobbers */);
6640 return result;
6643 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6644 vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
6646 int16x4_t result;
6647 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
6648 : "=w"(result)
6649 : "0"(a), "w"(b), "x"(c)
6650 : /* No clobbers */);
6651 return result;
6654 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6655 vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
6657 int32x2_t result;
6658 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
6659 : "=w"(result)
6660 : "0"(a), "w"(b), "w"(c)
6661 : /* No clobbers */);
6662 return result;
6665 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6666 vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
6668 uint16x4_t result;
6669 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
6670 : "=w"(result)
6671 : "0"(a), "w"(b), "x"(c)
6672 : /* No clobbers */);
6673 return result;
6676 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6677 vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
6679 uint32x2_t result;
6680 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
6681 : "=w"(result)
6682 : "0"(a), "w"(b), "w"(c)
6683 : /* No clobbers */);
6684 return result;
6687 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6688 vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
6690 int8x8_t result;
6691 __asm__ ("mls %0.8b,%2.8b,%3.8b"
6692 : "=w"(result)
6693 : "0"(a), "w"(b), "w"(c)
6694 : /* No clobbers */);
6695 return result;
6698 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6699 vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
6701 int16x4_t result;
6702 __asm__ ("mls %0.4h,%2.4h,%3.4h"
6703 : "=w"(result)
6704 : "0"(a), "w"(b), "w"(c)
6705 : /* No clobbers */);
6706 return result;
6709 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6710 vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
6712 int32x2_t result;
6713 __asm__ ("mls %0.2s,%2.2s,%3.2s"
6714 : "=w"(result)
6715 : "0"(a), "w"(b), "w"(c)
6716 : /* No clobbers */);
6717 return result;
6720 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6721 vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
6723 uint8x8_t result;
6724 __asm__ ("mls %0.8b,%2.8b,%3.8b"
6725 : "=w"(result)
6726 : "0"(a), "w"(b), "w"(c)
6727 : /* No clobbers */);
6728 return result;
6731 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6732 vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
6734 uint16x4_t result;
6735 __asm__ ("mls %0.4h,%2.4h,%3.4h"
6736 : "=w"(result)
6737 : "0"(a), "w"(b), "w"(c)
6738 : /* No clobbers */);
6739 return result;
6742 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6743 vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
6745 uint32x2_t result;
6746 __asm__ ("mls %0.2s,%2.2s,%3.2s"
6747 : "=w"(result)
6748 : "0"(a), "w"(b), "w"(c)
6749 : /* No clobbers */);
6750 return result;
6753 #define vmlsl_high_lane_s16(a, b, c, d) \
6754 __extension__ \
6755 ({ \
6756 int16x4_t c_ = (c); \
6757 int16x8_t b_ = (b); \
6758 int32x4_t a_ = (a); \
6759 int32x4_t result; \
6760 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
6761 : "=w"(result) \
6762 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6763 : /* No clobbers */); \
6764 result; \
6767 #define vmlsl_high_lane_s32(a, b, c, d) \
6768 __extension__ \
6769 ({ \
6770 int32x2_t c_ = (c); \
6771 int32x4_t b_ = (b); \
6772 int64x2_t a_ = (a); \
6773 int64x2_t result; \
6774 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
6775 : "=w"(result) \
6776 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6777 : /* No clobbers */); \
6778 result; \
6781 #define vmlsl_high_lane_u16(a, b, c, d) \
6782 __extension__ \
6783 ({ \
6784 uint16x4_t c_ = (c); \
6785 uint16x8_t b_ = (b); \
6786 uint32x4_t a_ = (a); \
6787 uint32x4_t result; \
6788 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
6789 : "=w"(result) \
6790 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6791 : /* No clobbers */); \
6792 result; \
6795 #define vmlsl_high_lane_u32(a, b, c, d) \
6796 __extension__ \
6797 ({ \
6798 uint32x2_t c_ = (c); \
6799 uint32x4_t b_ = (b); \
6800 uint64x2_t a_ = (a); \
6801 uint64x2_t result; \
6802 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
6803 : "=w"(result) \
6804 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6805 : /* No clobbers */); \
6806 result; \
6809 #define vmlsl_high_laneq_s16(a, b, c, d) \
6810 __extension__ \
6811 ({ \
6812 int16x8_t c_ = (c); \
6813 int16x8_t b_ = (b); \
6814 int32x4_t a_ = (a); \
6815 int32x4_t result; \
6816 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
6817 : "=w"(result) \
6818 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6819 : /* No clobbers */); \
6820 result; \
6823 #define vmlsl_high_laneq_s32(a, b, c, d) \
6824 __extension__ \
6825 ({ \
6826 int32x4_t c_ = (c); \
6827 int32x4_t b_ = (b); \
6828 int64x2_t a_ = (a); \
6829 int64x2_t result; \
6830 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
6831 : "=w"(result) \
6832 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6833 : /* No clobbers */); \
6834 result; \
6837 #define vmlsl_high_laneq_u16(a, b, c, d) \
6838 __extension__ \
6839 ({ \
6840 uint16x8_t c_ = (c); \
6841 uint16x8_t b_ = (b); \
6842 uint32x4_t a_ = (a); \
6843 uint32x4_t result; \
6844 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
6845 : "=w"(result) \
6846 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6847 : /* No clobbers */); \
6848 result; \
6851 #define vmlsl_high_laneq_u32(a, b, c, d) \
6852 __extension__ \
6853 ({ \
6854 uint32x4_t c_ = (c); \
6855 uint32x4_t b_ = (b); \
6856 uint64x2_t a_ = (a); \
6857 uint64x2_t result; \
6858 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
6859 : "=w"(result) \
6860 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6861 : /* No clobbers */); \
6862 result; \
6865 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6866 vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
6868 int32x4_t result;
6869 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]"
6870 : "=w"(result)
6871 : "0"(a), "w"(b), "x"(c)
6872 : /* No clobbers */);
6873 return result;
6876 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6877 vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
6879 int64x2_t result;
6880 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]"
6881 : "=w"(result)
6882 : "0"(a), "w"(b), "w"(c)
6883 : /* No clobbers */);
6884 return result;
6887 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6888 vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
6890 uint32x4_t result;
6891 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]"
6892 : "=w"(result)
6893 : "0"(a), "w"(b), "x"(c)
6894 : /* No clobbers */);
6895 return result;
6898 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6899 vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
6901 uint64x2_t result;
6902 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]"
6903 : "=w"(result)
6904 : "0"(a), "w"(b), "w"(c)
6905 : /* No clobbers */);
6906 return result;
6909 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6910 vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
6912 int16x8_t result;
6913 __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b"
6914 : "=w"(result)
6915 : "0"(a), "w"(b), "w"(c)
6916 : /* No clobbers */);
6917 return result;
6920 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6921 vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
6923 int32x4_t result;
6924 __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h"
6925 : "=w"(result)
6926 : "0"(a), "w"(b), "w"(c)
6927 : /* No clobbers */);
6928 return result;
6931 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6932 vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
6934 int64x2_t result;
6935 __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s"
6936 : "=w"(result)
6937 : "0"(a), "w"(b), "w"(c)
6938 : /* No clobbers */);
6939 return result;
6942 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6943 vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
6945 uint16x8_t result;
6946 __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b"
6947 : "=w"(result)
6948 : "0"(a), "w"(b), "w"(c)
6949 : /* No clobbers */);
6950 return result;
6953 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6954 vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
6956 uint32x4_t result;
6957 __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h"
6958 : "=w"(result)
6959 : "0"(a), "w"(b), "w"(c)
6960 : /* No clobbers */);
6961 return result;
6964 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6965 vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
6967 uint64x2_t result;
6968 __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s"
6969 : "=w"(result)
6970 : "0"(a), "w"(b), "w"(c)
6971 : /* No clobbers */);
6972 return result;
6975 #define vmlsl_lane_s16(a, b, c, d) \
6976 __extension__ \
6977 ({ \
6978 int16x4_t c_ = (c); \
6979 int16x4_t b_ = (b); \
6980 int32x4_t a_ = (a); \
6981 int32x4_t result; \
6982 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
6983 : "=w"(result) \
6984 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6985 : /* No clobbers */); \
6986 result; \
6989 #define vmlsl_lane_s32(a, b, c, d) \
6990 __extension__ \
6991 ({ \
6992 int32x2_t c_ = (c); \
6993 int32x2_t b_ = (b); \
6994 int64x2_t a_ = (a); \
6995 int64x2_t result; \
6996 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
6997 : "=w"(result) \
6998 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6999 : /* No clobbers */); \
7000 result; \
7003 #define vmlsl_lane_u16(a, b, c, d) \
7004 __extension__ \
7005 ({ \
7006 uint16x4_t c_ = (c); \
7007 uint16x4_t b_ = (b); \
7008 uint32x4_t a_ = (a); \
7009 uint32x4_t result; \
7010 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
7011 : "=w"(result) \
7012 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7013 : /* No clobbers */); \
7014 result; \
7017 #define vmlsl_lane_u32(a, b, c, d) \
7018 __extension__ \
7019 ({ \
7020 uint32x2_t c_ = (c); \
7021 uint32x2_t b_ = (b); \
7022 uint64x2_t a_ = (a); \
7023 uint64x2_t result; \
7024 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
7025 : "=w"(result) \
7026 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7027 : /* No clobbers */); \
7028 result; \
7031 #define vmlsl_laneq_s16(a, b, c, d) \
7032 __extension__ \
7033 ({ \
7034 int16x8_t c_ = (c); \
7035 int16x4_t b_ = (b); \
7036 int32x4_t a_ = (a); \
7037 int32x4_t result; \
7038 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
7039 : "=w"(result) \
7040 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7041 : /* No clobbers */); \
7042 result; \
7045 #define vmlsl_laneq_s32(a, b, c, d) \
7046 __extension__ \
7047 ({ \
7048 int32x4_t c_ = (c); \
7049 int32x2_t b_ = (b); \
7050 int64x2_t a_ = (a); \
7051 int64x2_t result; \
7052 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
7053 : "=w"(result) \
7054 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7055 : /* No clobbers */); \
7056 result; \
7059 #define vmlsl_laneq_u16(a, b, c, d) \
7060 __extension__ \
7061 ({ \
7062 uint16x8_t c_ = (c); \
7063 uint16x4_t b_ = (b); \
7064 uint32x4_t a_ = (a); \
7065 uint32x4_t result; \
7066 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
7067 : "=w"(result) \
7068 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7069 : /* No clobbers */); \
7070 result; \
7073 #define vmlsl_laneq_u32(a, b, c, d) \
7074 __extension__ \
7075 ({ \
7076 uint32x4_t c_ = (c); \
7077 uint32x2_t b_ = (b); \
7078 uint64x2_t a_ = (a); \
7079 uint64x2_t result; \
7080 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
7081 : "=w"(result) \
7082 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7083 : /* No clobbers */); \
7084 result; \
7087 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7088 vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
7090 int32x4_t result;
7091 __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]"
7092 : "=w"(result)
7093 : "0"(a), "w"(b), "x"(c)
7094 : /* No clobbers */);
7095 return result;
7098 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7099 vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
7101 int64x2_t result;
7102 __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]"
7103 : "=w"(result)
7104 : "0"(a), "w"(b), "w"(c)
7105 : /* No clobbers */);
7106 return result;
7109 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7110 vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
7112 uint32x4_t result;
7113 __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]"
7114 : "=w"(result)
7115 : "0"(a), "w"(b), "x"(c)
7116 : /* No clobbers */);
7117 return result;
7120 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7121 vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
7123 uint64x2_t result;
7124 __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]"
7125 : "=w"(result)
7126 : "0"(a), "w"(b), "w"(c)
7127 : /* No clobbers */);
7128 return result;
7131 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7132 vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
7134 int16x8_t result;
7135 __asm__ ("smlsl %0.8h, %2.8b, %3.8b"
7136 : "=w"(result)
7137 : "0"(a), "w"(b), "w"(c)
7138 : /* No clobbers */);
7139 return result;
7142 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7143 vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
7145 int32x4_t result;
7146 __asm__ ("smlsl %0.4s, %2.4h, %3.4h"
7147 : "=w"(result)
7148 : "0"(a), "w"(b), "w"(c)
7149 : /* No clobbers */);
7150 return result;
7153 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7154 vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
7156 int64x2_t result;
7157 __asm__ ("smlsl %0.2d, %2.2s, %3.2s"
7158 : "=w"(result)
7159 : "0"(a), "w"(b), "w"(c)
7160 : /* No clobbers */);
7161 return result;
7164 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7165 vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
7167 uint16x8_t result;
7168 __asm__ ("umlsl %0.8h, %2.8b, %3.8b"
7169 : "=w"(result)
7170 : "0"(a), "w"(b), "w"(c)
7171 : /* No clobbers */);
7172 return result;
7175 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7176 vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
7178 uint32x4_t result;
7179 __asm__ ("umlsl %0.4s, %2.4h, %3.4h"
7180 : "=w"(result)
7181 : "0"(a), "w"(b), "w"(c)
7182 : /* No clobbers */);
7183 return result;
7186 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7187 vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
7189 uint64x2_t result;
7190 __asm__ ("umlsl %0.2d, %2.2s, %3.2s"
7191 : "=w"(result)
7192 : "0"(a), "w"(b), "w"(c)
7193 : /* No clobbers */);
7194 return result;
7197 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7198 vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
7200 float32x4_t result;
7201 float32x4_t t1;
7202 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s"
7203 : "=w"(result), "=w"(t1)
7204 : "0"(a), "w"(b), "w"(c)
7205 : /* No clobbers */);
7206 return result;
7209 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7210 vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
7212 int16x8_t result;
7213 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
7214 : "=w"(result)
7215 : "0"(a), "w"(b), "x"(c)
7216 : /* No clobbers */);
7217 return result;
7220 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7221 vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
7223 int32x4_t result;
7224 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
7225 : "=w"(result)
7226 : "0"(a), "w"(b), "w"(c)
7227 : /* No clobbers */);
7228 return result;
7231 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7232 vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
7234 uint16x8_t result;
7235 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
7236 : "=w"(result)
7237 : "0"(a), "w"(b), "x"(c)
7238 : /* No clobbers */);
7239 return result;
7242 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7243 vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
7245 uint32x4_t result;
7246 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
7247 : "=w"(result)
7248 : "0"(a), "w"(b), "w"(c)
7249 : /* No clobbers */);
7250 return result;
7253 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7254 vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
7256 int8x16_t result;
7257 __asm__ ("mls %0.16b,%2.16b,%3.16b"
7258 : "=w"(result)
7259 : "0"(a), "w"(b), "w"(c)
7260 : /* No clobbers */);
7261 return result;
7264 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7265 vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
7267 int16x8_t result;
7268 __asm__ ("mls %0.8h,%2.8h,%3.8h"
7269 : "=w"(result)
7270 : "0"(a), "w"(b), "w"(c)
7271 : /* No clobbers */);
7272 return result;
7275 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7276 vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
7278 int32x4_t result;
7279 __asm__ ("mls %0.4s,%2.4s,%3.4s"
7280 : "=w"(result)
7281 : "0"(a), "w"(b), "w"(c)
7282 : /* No clobbers */);
7283 return result;
7286 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7287 vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
7289 uint8x16_t result;
7290 __asm__ ("mls %0.16b,%2.16b,%3.16b"
7291 : "=w"(result)
7292 : "0"(a), "w"(b), "w"(c)
7293 : /* No clobbers */);
7294 return result;
7297 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7298 vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
7300 uint16x8_t result;
7301 __asm__ ("mls %0.8h,%2.8h,%3.8h"
7302 : "=w"(result)
7303 : "0"(a), "w"(b), "w"(c)
7304 : /* No clobbers */);
7305 return result;
7308 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7309 vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
7311 uint32x4_t result;
7312 __asm__ ("mls %0.4s,%2.4s,%3.4s"
7313 : "=w"(result)
7314 : "0"(a), "w"(b), "w"(c)
7315 : /* No clobbers */);
7316 return result;
7319 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7320 vmovl_high_s8 (int8x16_t a)
7322 int16x8_t result;
7323 __asm__ ("sshll2 %0.8h,%1.16b,#0"
7324 : "=w"(result)
7325 : "w"(a)
7326 : /* No clobbers */);
7327 return result;
7330 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7331 vmovl_high_s16 (int16x8_t a)
7333 int32x4_t result;
7334 __asm__ ("sshll2 %0.4s,%1.8h,#0"
7335 : "=w"(result)
7336 : "w"(a)
7337 : /* No clobbers */);
7338 return result;
7341 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7342 vmovl_high_s32 (int32x4_t a)
7344 int64x2_t result;
7345 __asm__ ("sshll2 %0.2d,%1.4s,#0"
7346 : "=w"(result)
7347 : "w"(a)
7348 : /* No clobbers */);
7349 return result;
7352 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7353 vmovl_high_u8 (uint8x16_t a)
7355 uint16x8_t result;
7356 __asm__ ("ushll2 %0.8h,%1.16b,#0"
7357 : "=w"(result)
7358 : "w"(a)
7359 : /* No clobbers */);
7360 return result;
7363 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7364 vmovl_high_u16 (uint16x8_t a)
7366 uint32x4_t result;
7367 __asm__ ("ushll2 %0.4s,%1.8h,#0"
7368 : "=w"(result)
7369 : "w"(a)
7370 : /* No clobbers */);
7371 return result;
7374 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7375 vmovl_high_u32 (uint32x4_t a)
7377 uint64x2_t result;
7378 __asm__ ("ushll2 %0.2d,%1.4s,#0"
7379 : "=w"(result)
7380 : "w"(a)
7381 : /* No clobbers */);
7382 return result;
7385 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7386 vmovl_s8 (int8x8_t a)
7388 int16x8_t result;
7389 __asm__ ("sshll %0.8h,%1.8b,#0"
7390 : "=w"(result)
7391 : "w"(a)
7392 : /* No clobbers */);
7393 return result;
7396 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7397 vmovl_s16 (int16x4_t a)
7399 int32x4_t result;
7400 __asm__ ("sshll %0.4s,%1.4h,#0"
7401 : "=w"(result)
7402 : "w"(a)
7403 : /* No clobbers */);
7404 return result;
7407 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7408 vmovl_s32 (int32x2_t a)
7410 int64x2_t result;
7411 __asm__ ("sshll %0.2d,%1.2s,#0"
7412 : "=w"(result)
7413 : "w"(a)
7414 : /* No clobbers */);
7415 return result;
7418 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7419 vmovl_u8 (uint8x8_t a)
7421 uint16x8_t result;
7422 __asm__ ("ushll %0.8h,%1.8b,#0"
7423 : "=w"(result)
7424 : "w"(a)
7425 : /* No clobbers */);
7426 return result;
7429 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7430 vmovl_u16 (uint16x4_t a)
7432 uint32x4_t result;
7433 __asm__ ("ushll %0.4s,%1.4h,#0"
7434 : "=w"(result)
7435 : "w"(a)
7436 : /* No clobbers */);
7437 return result;
7440 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7441 vmovl_u32 (uint32x2_t a)
7443 uint64x2_t result;
7444 __asm__ ("ushll %0.2d,%1.2s,#0"
7445 : "=w"(result)
7446 : "w"(a)
7447 : /* No clobbers */);
7448 return result;
7451 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7452 vmovn_high_s16 (int8x8_t a, int16x8_t b)
7454 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
7455 __asm__ ("xtn2 %0.16b,%1.8h"
7456 : "+w"(result)
7457 : "w"(b)
7458 : /* No clobbers */);
7459 return result;
7462 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7463 vmovn_high_s32 (int16x4_t a, int32x4_t b)
7465 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
7466 __asm__ ("xtn2 %0.8h,%1.4s"
7467 : "+w"(result)
7468 : "w"(b)
7469 : /* No clobbers */);
7470 return result;
7473 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7474 vmovn_high_s64 (int32x2_t a, int64x2_t b)
7476 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
7477 __asm__ ("xtn2 %0.4s,%1.2d"
7478 : "+w"(result)
7479 : "w"(b)
7480 : /* No clobbers */);
7481 return result;
7484 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7485 vmovn_high_u16 (uint8x8_t a, uint16x8_t b)
7487 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
7488 __asm__ ("xtn2 %0.16b,%1.8h"
7489 : "+w"(result)
7490 : "w"(b)
7491 : /* No clobbers */);
7492 return result;
7495 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7496 vmovn_high_u32 (uint16x4_t a, uint32x4_t b)
7498 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
7499 __asm__ ("xtn2 %0.8h,%1.4s"
7500 : "+w"(result)
7501 : "w"(b)
7502 : /* No clobbers */);
7503 return result;
7506 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7507 vmovn_high_u64 (uint32x2_t a, uint64x2_t b)
7509 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
7510 __asm__ ("xtn2 %0.4s,%1.2d"
7511 : "+w"(result)
7512 : "w"(b)
7513 : /* No clobbers */);
7514 return result;
7517 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7518 vmovn_s16 (int16x8_t a)
7520 int8x8_t result;
7521 __asm__ ("xtn %0.8b,%1.8h"
7522 : "=w"(result)
7523 : "w"(a)
7524 : /* No clobbers */);
7525 return result;
7528 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7529 vmovn_s32 (int32x4_t a)
7531 int16x4_t result;
7532 __asm__ ("xtn %0.4h,%1.4s"
7533 : "=w"(result)
7534 : "w"(a)
7535 : /* No clobbers */);
7536 return result;
7539 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7540 vmovn_s64 (int64x2_t a)
7542 int32x2_t result;
7543 __asm__ ("xtn %0.2s,%1.2d"
7544 : "=w"(result)
7545 : "w"(a)
7546 : /* No clobbers */);
7547 return result;
7550 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7551 vmovn_u16 (uint16x8_t a)
7553 uint8x8_t result;
7554 __asm__ ("xtn %0.8b,%1.8h"
7555 : "=w"(result)
7556 : "w"(a)
7557 : /* No clobbers */);
7558 return result;
7561 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7562 vmovn_u32 (uint32x4_t a)
7564 uint16x4_t result;
7565 __asm__ ("xtn %0.4h,%1.4s"
7566 : "=w"(result)
7567 : "w"(a)
7568 : /* No clobbers */);
7569 return result;
7572 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7573 vmovn_u64 (uint64x2_t a)
7575 uint32x2_t result;
7576 __asm__ ("xtn %0.2s,%1.2d"
7577 : "=w"(result)
7578 : "w"(a)
7579 : /* No clobbers */);
7580 return result;
7583 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7584 vmul_n_f32 (float32x2_t a, float32_t b)
7586 float32x2_t result;
7587 __asm__ ("fmul %0.2s,%1.2s,%2.s[0]"
7588 : "=w"(result)
7589 : "w"(a), "w"(b)
7590 : /* No clobbers */);
7591 return result;
7594 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7595 vmul_n_s16 (int16x4_t a, int16_t b)
7597 int16x4_t result;
7598 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
7599 : "=w"(result)
7600 : "w"(a), "x"(b)
7601 : /* No clobbers */);
7602 return result;
7605 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7606 vmul_n_s32 (int32x2_t a, int32_t b)
7608 int32x2_t result;
7609 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
7610 : "=w"(result)
7611 : "w"(a), "w"(b)
7612 : /* No clobbers */);
7613 return result;
7616 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7617 vmul_n_u16 (uint16x4_t a, uint16_t b)
7619 uint16x4_t result;
7620 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
7621 : "=w"(result)
7622 : "w"(a), "x"(b)
7623 : /* No clobbers */);
7624 return result;
7627 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7628 vmul_n_u32 (uint32x2_t a, uint32_t b)
7630 uint32x2_t result;
7631 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
7632 : "=w"(result)
7633 : "w"(a), "w"(b)
7634 : /* No clobbers */);
7635 return result;
7638 #define vmull_high_lane_s16(a, b, c) \
7639 __extension__ \
7640 ({ \
7641 int16x4_t b_ = (b); \
7642 int16x8_t a_ = (a); \
7643 int32x4_t result; \
7644 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
7645 : "=w"(result) \
7646 : "w"(a_), "x"(b_), "i"(c) \
7647 : /* No clobbers */); \
7648 result; \
7651 #define vmull_high_lane_s32(a, b, c) \
7652 __extension__ \
7653 ({ \
7654 int32x2_t b_ = (b); \
7655 int32x4_t a_ = (a); \
7656 int64x2_t result; \
7657 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
7658 : "=w"(result) \
7659 : "w"(a_), "w"(b_), "i"(c) \
7660 : /* No clobbers */); \
7661 result; \
7664 #define vmull_high_lane_u16(a, b, c) \
7665 __extension__ \
7666 ({ \
7667 uint16x4_t b_ = (b); \
7668 uint16x8_t a_ = (a); \
7669 uint32x4_t result; \
7670 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
7671 : "=w"(result) \
7672 : "w"(a_), "x"(b_), "i"(c) \
7673 : /* No clobbers */); \
7674 result; \
7677 #define vmull_high_lane_u32(a, b, c) \
7678 __extension__ \
7679 ({ \
7680 uint32x2_t b_ = (b); \
7681 uint32x4_t a_ = (a); \
7682 uint64x2_t result; \
7683 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
7684 : "=w"(result) \
7685 : "w"(a_), "w"(b_), "i"(c) \
7686 : /* No clobbers */); \
7687 result; \
7690 #define vmull_high_laneq_s16(a, b, c) \
7691 __extension__ \
7692 ({ \
7693 int16x8_t b_ = (b); \
7694 int16x8_t a_ = (a); \
7695 int32x4_t result; \
7696 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
7697 : "=w"(result) \
7698 : "w"(a_), "x"(b_), "i"(c) \
7699 : /* No clobbers */); \
7700 result; \
7703 #define vmull_high_laneq_s32(a, b, c) \
7704 __extension__ \
7705 ({ \
7706 int32x4_t b_ = (b); \
7707 int32x4_t a_ = (a); \
7708 int64x2_t result; \
7709 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
7710 : "=w"(result) \
7711 : "w"(a_), "w"(b_), "i"(c) \
7712 : /* No clobbers */); \
7713 result; \
7716 #define vmull_high_laneq_u16(a, b, c) \
7717 __extension__ \
7718 ({ \
7719 uint16x8_t b_ = (b); \
7720 uint16x8_t a_ = (a); \
7721 uint32x4_t result; \
7722 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
7723 : "=w"(result) \
7724 : "w"(a_), "x"(b_), "i"(c) \
7725 : /* No clobbers */); \
7726 result; \
7729 #define vmull_high_laneq_u32(a, b, c) \
7730 __extension__ \
7731 ({ \
7732 uint32x4_t b_ = (b); \
7733 uint32x4_t a_ = (a); \
7734 uint64x2_t result; \
7735 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
7736 : "=w"(result) \
7737 : "w"(a_), "w"(b_), "i"(c) \
7738 : /* No clobbers */); \
7739 result; \
7742 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7743 vmull_high_n_s16 (int16x8_t a, int16_t b)
7745 int32x4_t result;
7746 __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]"
7747 : "=w"(result)
7748 : "w"(a), "x"(b)
7749 : /* No clobbers */);
7750 return result;
7753 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7754 vmull_high_n_s32 (int32x4_t a, int32_t b)
7756 int64x2_t result;
7757 __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]"
7758 : "=w"(result)
7759 : "w"(a), "w"(b)
7760 : /* No clobbers */);
7761 return result;
7764 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7765 vmull_high_n_u16 (uint16x8_t a, uint16_t b)
7767 uint32x4_t result;
7768 __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]"
7769 : "=w"(result)
7770 : "w"(a), "x"(b)
7771 : /* No clobbers */);
7772 return result;
7775 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7776 vmull_high_n_u32 (uint32x4_t a, uint32_t b)
7778 uint64x2_t result;
7779 __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]"
7780 : "=w"(result)
7781 : "w"(a), "w"(b)
7782 : /* No clobbers */);
7783 return result;
7786 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
7787 vmull_high_p8 (poly8x16_t a, poly8x16_t b)
7789 poly16x8_t result;
7790 __asm__ ("pmull2 %0.8h,%1.16b,%2.16b"
7791 : "=w"(result)
7792 : "w"(a), "w"(b)
7793 : /* No clobbers */);
7794 return result;
7797 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7798 vmull_high_s8 (int8x16_t a, int8x16_t b)
7800 int16x8_t result;
7801 __asm__ ("smull2 %0.8h,%1.16b,%2.16b"
7802 : "=w"(result)
7803 : "w"(a), "w"(b)
7804 : /* No clobbers */);
7805 return result;
7808 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7809 vmull_high_s16 (int16x8_t a, int16x8_t b)
7811 int32x4_t result;
7812 __asm__ ("smull2 %0.4s,%1.8h,%2.8h"
7813 : "=w"(result)
7814 : "w"(a), "w"(b)
7815 : /* No clobbers */);
7816 return result;
7819 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7820 vmull_high_s32 (int32x4_t a, int32x4_t b)
7822 int64x2_t result;
7823 __asm__ ("smull2 %0.2d,%1.4s,%2.4s"
7824 : "=w"(result)
7825 : "w"(a), "w"(b)
7826 : /* No clobbers */);
7827 return result;
7830 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7831 vmull_high_u8 (uint8x16_t a, uint8x16_t b)
7833 uint16x8_t result;
7834 __asm__ ("umull2 %0.8h,%1.16b,%2.16b"
7835 : "=w"(result)
7836 : "w"(a), "w"(b)
7837 : /* No clobbers */);
7838 return result;
7841 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7842 vmull_high_u16 (uint16x8_t a, uint16x8_t b)
7844 uint32x4_t result;
7845 __asm__ ("umull2 %0.4s,%1.8h,%2.8h"
7846 : "=w"(result)
7847 : "w"(a), "w"(b)
7848 : /* No clobbers */);
7849 return result;
7852 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7853 vmull_high_u32 (uint32x4_t a, uint32x4_t b)
7855 uint64x2_t result;
7856 __asm__ ("umull2 %0.2d,%1.4s,%2.4s"
7857 : "=w"(result)
7858 : "w"(a), "w"(b)
7859 : /* No clobbers */);
7860 return result;
7863 #define vmull_lane_s16(a, b, c) \
7864 __extension__ \
7865 ({ \
7866 int16x4_t b_ = (b); \
7867 int16x4_t a_ = (a); \
7868 int32x4_t result; \
7869 __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \
7870 : "=w"(result) \
7871 : "w"(a_), "x"(b_), "i"(c) \
7872 : /* No clobbers */); \
7873 result; \
7876 #define vmull_lane_s32(a, b, c) \
7877 __extension__ \
7878 ({ \
7879 int32x2_t b_ = (b); \
7880 int32x2_t a_ = (a); \
7881 int64x2_t result; \
7882 __asm__ ("smull %0.2d,%1.2s,%2.s[%3]" \
7883 : "=w"(result) \
7884 : "w"(a_), "w"(b_), "i"(c) \
7885 : /* No clobbers */); \
7886 result; \
7889 #define vmull_lane_u16(a, b, c) \
7890 __extension__ \
7891 ({ \
7892 uint16x4_t b_ = (b); \
7893 uint16x4_t a_ = (a); \
7894 uint32x4_t result; \
7895 __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \
7896 : "=w"(result) \
7897 : "w"(a_), "x"(b_), "i"(c) \
7898 : /* No clobbers */); \
7899 result; \
7902 #define vmull_lane_u32(a, b, c) \
7903 __extension__ \
7904 ({ \
7905 uint32x2_t b_ = (b); \
7906 uint32x2_t a_ = (a); \
7907 uint64x2_t result; \
7908 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
7909 : "=w"(result) \
7910 : "w"(a_), "w"(b_), "i"(c) \
7911 : /* No clobbers */); \
7912 result; \
7915 #define vmull_laneq_s16(a, b, c) \
7916 __extension__ \
7917 ({ \
7918 int16x8_t b_ = (b); \
7919 int16x4_t a_ = (a); \
7920 int32x4_t result; \
7921 __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \
7922 : "=w"(result) \
7923 : "w"(a_), "x"(b_), "i"(c) \
7924 : /* No clobbers */); \
7925 result; \
7928 #define vmull_laneq_s32(a, b, c) \
7929 __extension__ \
7930 ({ \
7931 int32x4_t b_ = (b); \
7932 int32x2_t a_ = (a); \
7933 int64x2_t result; \
7934 __asm__ ("smull %0.2d, %1.2s, %2.s[%3]" \
7935 : "=w"(result) \
7936 : "w"(a_), "w"(b_), "i"(c) \
7937 : /* No clobbers */); \
7938 result; \
7941 #define vmull_laneq_u16(a, b, c) \
7942 __extension__ \
7943 ({ \
7944 uint16x8_t b_ = (b); \
7945 uint16x4_t a_ = (a); \
7946 uint32x4_t result; \
7947 __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \
7948 : "=w"(result) \
7949 : "w"(a_), "x"(b_), "i"(c) \
7950 : /* No clobbers */); \
7951 result; \
7954 #define vmull_laneq_u32(a, b, c) \
7955 __extension__ \
7956 ({ \
7957 uint32x4_t b_ = (b); \
7958 uint32x2_t a_ = (a); \
7959 uint64x2_t result; \
7960 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
7961 : "=w"(result) \
7962 : "w"(a_), "w"(b_), "i"(c) \
7963 : /* No clobbers */); \
7964 result; \
7967 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7968 vmull_n_s16 (int16x4_t a, int16_t b)
7970 int32x4_t result;
7971 __asm__ ("smull %0.4s,%1.4h,%2.h[0]"
7972 : "=w"(result)
7973 : "w"(a), "x"(b)
7974 : /* No clobbers */);
7975 return result;
7978 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7979 vmull_n_s32 (int32x2_t a, int32_t b)
7981 int64x2_t result;
7982 __asm__ ("smull %0.2d,%1.2s,%2.s[0]"
7983 : "=w"(result)
7984 : "w"(a), "w"(b)
7985 : /* No clobbers */);
7986 return result;
7989 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7990 vmull_n_u16 (uint16x4_t a, uint16_t b)
7992 uint32x4_t result;
7993 __asm__ ("umull %0.4s,%1.4h,%2.h[0]"
7994 : "=w"(result)
7995 : "w"(a), "x"(b)
7996 : /* No clobbers */);
7997 return result;
8000 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8001 vmull_n_u32 (uint32x2_t a, uint32_t b)
8003 uint64x2_t result;
8004 __asm__ ("umull %0.2d,%1.2s,%2.s[0]"
8005 : "=w"(result)
8006 : "w"(a), "w"(b)
8007 : /* No clobbers */);
8008 return result;
8011 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
8012 vmull_p8 (poly8x8_t a, poly8x8_t b)
8014 poly16x8_t result;
8015 __asm__ ("pmull %0.8h, %1.8b, %2.8b"
8016 : "=w"(result)
8017 : "w"(a), "w"(b)
8018 : /* No clobbers */);
8019 return result;
8022 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8023 vmull_s8 (int8x8_t a, int8x8_t b)
8025 int16x8_t result;
8026 __asm__ ("smull %0.8h, %1.8b, %2.8b"
8027 : "=w"(result)
8028 : "w"(a), "w"(b)
8029 : /* No clobbers */);
8030 return result;
8033 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8034 vmull_s16 (int16x4_t a, int16x4_t b)
8036 int32x4_t result;
8037 __asm__ ("smull %0.4s, %1.4h, %2.4h"
8038 : "=w"(result)
8039 : "w"(a), "w"(b)
8040 : /* No clobbers */);
8041 return result;
8044 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8045 vmull_s32 (int32x2_t a, int32x2_t b)
8047 int64x2_t result;
8048 __asm__ ("smull %0.2d, %1.2s, %2.2s"
8049 : "=w"(result)
8050 : "w"(a), "w"(b)
8051 : /* No clobbers */);
8052 return result;
8055 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8056 vmull_u8 (uint8x8_t a, uint8x8_t b)
8058 uint16x8_t result;
8059 __asm__ ("umull %0.8h, %1.8b, %2.8b"
8060 : "=w"(result)
8061 : "w"(a), "w"(b)
8062 : /* No clobbers */);
8063 return result;
8066 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8067 vmull_u16 (uint16x4_t a, uint16x4_t b)
8069 uint32x4_t result;
8070 __asm__ ("umull %0.4s, %1.4h, %2.4h"
8071 : "=w"(result)
8072 : "w"(a), "w"(b)
8073 : /* No clobbers */);
8074 return result;
8077 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8078 vmull_u32 (uint32x2_t a, uint32x2_t b)
8080 uint64x2_t result;
8081 __asm__ ("umull %0.2d, %1.2s, %2.2s"
8082 : "=w"(result)
8083 : "w"(a), "w"(b)
8084 : /* No clobbers */);
8085 return result;
8088 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8089 vmulq_n_f32 (float32x4_t a, float32_t b)
8091 float32x4_t result;
8092 __asm__ ("fmul %0.4s,%1.4s,%2.s[0]"
8093 : "=w"(result)
8094 : "w"(a), "w"(b)
8095 : /* No clobbers */);
8096 return result;
8099 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8100 vmulq_n_f64 (float64x2_t a, float64_t b)
8102 float64x2_t result;
8103 __asm__ ("fmul %0.2d,%1.2d,%2.d[0]"
8104 : "=w"(result)
8105 : "w"(a), "w"(b)
8106 : /* No clobbers */);
8107 return result;
8110 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8111 vmulq_n_s16 (int16x8_t a, int16_t b)
8113 int16x8_t result;
8114 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
8115 : "=w"(result)
8116 : "w"(a), "x"(b)
8117 : /* No clobbers */);
8118 return result;
8121 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8122 vmulq_n_s32 (int32x4_t a, int32_t b)
8124 int32x4_t result;
8125 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
8126 : "=w"(result)
8127 : "w"(a), "w"(b)
8128 : /* No clobbers */);
8129 return result;
8132 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8133 vmulq_n_u16 (uint16x8_t a, uint16_t b)
8135 uint16x8_t result;
8136 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
8137 : "=w"(result)
8138 : "w"(a), "x"(b)
8139 : /* No clobbers */);
8140 return result;
8143 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8144 vmulq_n_u32 (uint32x4_t a, uint32_t b)
8146 uint32x4_t result;
8147 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
8148 : "=w"(result)
8149 : "w"(a), "w"(b)
8150 : /* No clobbers */);
8151 return result;
8154 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8155 vmulx_f32 (float32x2_t a, float32x2_t b)
8157 float32x2_t result;
8158 __asm__ ("fmulx %0.2s,%1.2s,%2.2s"
8159 : "=w"(result)
8160 : "w"(a), "w"(b)
8161 : /* No clobbers */);
8162 return result;
8165 #define vmulx_lane_f32(a, b, c) \
8166 __extension__ \
8167 ({ \
8168 float32x4_t b_ = (b); \
8169 float32x2_t a_ = (a); \
8170 float32x2_t result; \
8171 __asm__ ("fmulx %0.2s,%1.2s,%2.s[%3]" \
8172 : "=w"(result) \
8173 : "w"(a_), "w"(b_), "i"(c) \
8174 : /* No clobbers */); \
8175 result; \
8178 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
8179 vmulxd_f64 (float64_t a, float64_t b)
8181 float64_t result;
8182 __asm__ ("fmulx %d0, %d1, %d2"
8183 : "=w"(result)
8184 : "w"(a), "w"(b)
8185 : /* No clobbers */);
8186 return result;
8189 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8190 vmulxq_f32 (float32x4_t a, float32x4_t b)
8192 float32x4_t result;
8193 __asm__ ("fmulx %0.4s,%1.4s,%2.4s"
8194 : "=w"(result)
8195 : "w"(a), "w"(b)
8196 : /* No clobbers */);
8197 return result;
8200 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8201 vmulxq_f64 (float64x2_t a, float64x2_t b)
8203 float64x2_t result;
8204 __asm__ ("fmulx %0.2d,%1.2d,%2.2d"
8205 : "=w"(result)
8206 : "w"(a), "w"(b)
8207 : /* No clobbers */);
8208 return result;
8211 #define vmulxq_lane_f32(a, b, c) \
8212 __extension__ \
8213 ({ \
8214 float32x4_t b_ = (b); \
8215 float32x4_t a_ = (a); \
8216 float32x4_t result; \
8217 __asm__ ("fmulx %0.4s,%1.4s,%2.s[%3]" \
8218 : "=w"(result) \
8219 : "w"(a_), "w"(b_), "i"(c) \
8220 : /* No clobbers */); \
8221 result; \
8224 #define vmulxq_lane_f64(a, b, c) \
8225 __extension__ \
8226 ({ \
8227 float64x2_t b_ = (b); \
8228 float64x2_t a_ = (a); \
8229 float64x2_t result; \
8230 __asm__ ("fmulx %0.2d,%1.2d,%2.d[%3]" \
8231 : "=w"(result) \
8232 : "w"(a_), "w"(b_), "i"(c) \
8233 : /* No clobbers */); \
8234 result; \
8237 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
8238 vmulxs_f32 (float32_t a, float32_t b)
8240 float32_t result;
8241 __asm__ ("fmulx %s0, %s1, %s2"
8242 : "=w"(result)
8243 : "w"(a), "w"(b)
8244 : /* No clobbers */);
8245 return result;
8248 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
8249 vmvn_p8 (poly8x8_t a)
8251 poly8x8_t result;
8252 __asm__ ("mvn %0.8b,%1.8b"
8253 : "=w"(result)
8254 : "w"(a)
8255 : /* No clobbers */);
8256 return result;
8259 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
8260 vmvn_s8 (int8x8_t a)
8262 int8x8_t result;
8263 __asm__ ("mvn %0.8b,%1.8b"
8264 : "=w"(result)
8265 : "w"(a)
8266 : /* No clobbers */);
8267 return result;
8270 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8271 vmvn_s16 (int16x4_t a)
8273 int16x4_t result;
8274 __asm__ ("mvn %0.8b,%1.8b"
8275 : "=w"(result)
8276 : "w"(a)
8277 : /* No clobbers */);
8278 return result;
8281 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8282 vmvn_s32 (int32x2_t a)
8284 int32x2_t result;
8285 __asm__ ("mvn %0.8b,%1.8b"
8286 : "=w"(result)
8287 : "w"(a)
8288 : /* No clobbers */);
8289 return result;
8292 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8293 vmvn_u8 (uint8x8_t a)
8295 uint8x8_t result;
8296 __asm__ ("mvn %0.8b,%1.8b"
8297 : "=w"(result)
8298 : "w"(a)
8299 : /* No clobbers */);
8300 return result;
8303 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8304 vmvn_u16 (uint16x4_t a)
8306 uint16x4_t result;
8307 __asm__ ("mvn %0.8b,%1.8b"
8308 : "=w"(result)
8309 : "w"(a)
8310 : /* No clobbers */);
8311 return result;
8314 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8315 vmvn_u32 (uint32x2_t a)
8317 uint32x2_t result;
8318 __asm__ ("mvn %0.8b,%1.8b"
8319 : "=w"(result)
8320 : "w"(a)
8321 : /* No clobbers */);
8322 return result;
8325 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
8326 vmvnq_p8 (poly8x16_t a)
8328 poly8x16_t result;
8329 __asm__ ("mvn %0.16b,%1.16b"
8330 : "=w"(result)
8331 : "w"(a)
8332 : /* No clobbers */);
8333 return result;
8336 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8337 vmvnq_s8 (int8x16_t a)
8339 int8x16_t result;
8340 __asm__ ("mvn %0.16b,%1.16b"
8341 : "=w"(result)
8342 : "w"(a)
8343 : /* No clobbers */);
8344 return result;
8347 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8348 vmvnq_s16 (int16x8_t a)
8350 int16x8_t result;
8351 __asm__ ("mvn %0.16b,%1.16b"
8352 : "=w"(result)
8353 : "w"(a)
8354 : /* No clobbers */);
8355 return result;
8358 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8359 vmvnq_s32 (int32x4_t a)
8361 int32x4_t result;
8362 __asm__ ("mvn %0.16b,%1.16b"
8363 : "=w"(result)
8364 : "w"(a)
8365 : /* No clobbers */);
8366 return result;
8369 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8370 vmvnq_u8 (uint8x16_t a)
8372 uint8x16_t result;
8373 __asm__ ("mvn %0.16b,%1.16b"
8374 : "=w"(result)
8375 : "w"(a)
8376 : /* No clobbers */);
8377 return result;
8380 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8381 vmvnq_u16 (uint16x8_t a)
8383 uint16x8_t result;
8384 __asm__ ("mvn %0.16b,%1.16b"
8385 : "=w"(result)
8386 : "w"(a)
8387 : /* No clobbers */);
8388 return result;
8391 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8392 vmvnq_u32 (uint32x4_t a)
8394 uint32x4_t result;
8395 __asm__ ("mvn %0.16b,%1.16b"
8396 : "=w"(result)
8397 : "w"(a)
8398 : /* No clobbers */);
8399 return result;
8403 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8404 vpadal_s8 (int16x4_t a, int8x8_t b)
8406 int16x4_t result;
8407 __asm__ ("sadalp %0.4h,%2.8b"
8408 : "=w"(result)
8409 : "0"(a), "w"(b)
8410 : /* No clobbers */);
8411 return result;
8414 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8415 vpadal_s16 (int32x2_t a, int16x4_t b)
8417 int32x2_t result;
8418 __asm__ ("sadalp %0.2s,%2.4h"
8419 : "=w"(result)
8420 : "0"(a), "w"(b)
8421 : /* No clobbers */);
8422 return result;
8425 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
8426 vpadal_s32 (int64x1_t a, int32x2_t b)
8428 int64x1_t result;
8429 __asm__ ("sadalp %0.1d,%2.2s"
8430 : "=w"(result)
8431 : "0"(a), "w"(b)
8432 : /* No clobbers */);
8433 return result;
8436 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8437 vpadal_u8 (uint16x4_t a, uint8x8_t b)
8439 uint16x4_t result;
8440 __asm__ ("uadalp %0.4h,%2.8b"
8441 : "=w"(result)
8442 : "0"(a), "w"(b)
8443 : /* No clobbers */);
8444 return result;
8447 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8448 vpadal_u16 (uint32x2_t a, uint16x4_t b)
8450 uint32x2_t result;
8451 __asm__ ("uadalp %0.2s,%2.4h"
8452 : "=w"(result)
8453 : "0"(a), "w"(b)
8454 : /* No clobbers */);
8455 return result;
8458 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8459 vpadal_u32 (uint64x1_t a, uint32x2_t b)
8461 uint64x1_t result;
8462 __asm__ ("uadalp %0.1d,%2.2s"
8463 : "=w"(result)
8464 : "0"(a), "w"(b)
8465 : /* No clobbers */);
8466 return result;
8469 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8470 vpadalq_s8 (int16x8_t a, int8x16_t b)
8472 int16x8_t result;
8473 __asm__ ("sadalp %0.8h,%2.16b"
8474 : "=w"(result)
8475 : "0"(a), "w"(b)
8476 : /* No clobbers */);
8477 return result;
8480 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8481 vpadalq_s16 (int32x4_t a, int16x8_t b)
8483 int32x4_t result;
8484 __asm__ ("sadalp %0.4s,%2.8h"
8485 : "=w"(result)
8486 : "0"(a), "w"(b)
8487 : /* No clobbers */);
8488 return result;
8491 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8492 vpadalq_s32 (int64x2_t a, int32x4_t b)
8494 int64x2_t result;
8495 __asm__ ("sadalp %0.2d,%2.4s"
8496 : "=w"(result)
8497 : "0"(a), "w"(b)
8498 : /* No clobbers */);
8499 return result;
8502 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8503 vpadalq_u8 (uint16x8_t a, uint8x16_t b)
8505 uint16x8_t result;
8506 __asm__ ("uadalp %0.8h,%2.16b"
8507 : "=w"(result)
8508 : "0"(a), "w"(b)
8509 : /* No clobbers */);
8510 return result;
8513 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8514 vpadalq_u16 (uint32x4_t a, uint16x8_t b)
8516 uint32x4_t result;
8517 __asm__ ("uadalp %0.4s,%2.8h"
8518 : "=w"(result)
8519 : "0"(a), "w"(b)
8520 : /* No clobbers */);
8521 return result;
8524 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8525 vpadalq_u32 (uint64x2_t a, uint32x4_t b)
8527 uint64x2_t result;
8528 __asm__ ("uadalp %0.2d,%2.4s"
8529 : "=w"(result)
8530 : "0"(a), "w"(b)
8531 : /* No clobbers */);
8532 return result;
8535 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8536 vpadd_f32 (float32x2_t a, float32x2_t b)
8538 float32x2_t result;
8539 __asm__ ("faddp %0.2s,%1.2s,%2.2s"
8540 : "=w"(result)
8541 : "w"(a), "w"(b)
8542 : /* No clobbers */);
8543 return result;
8546 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8547 vpaddl_s8 (int8x8_t a)
8549 int16x4_t result;
8550 __asm__ ("saddlp %0.4h,%1.8b"
8551 : "=w"(result)
8552 : "w"(a)
8553 : /* No clobbers */);
8554 return result;
8557 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8558 vpaddl_s16 (int16x4_t a)
8560 int32x2_t result;
8561 __asm__ ("saddlp %0.2s,%1.4h"
8562 : "=w"(result)
8563 : "w"(a)
8564 : /* No clobbers */);
8565 return result;
8568 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
8569 vpaddl_s32 (int32x2_t a)
8571 int64x1_t result;
8572 __asm__ ("saddlp %0.1d,%1.2s"
8573 : "=w"(result)
8574 : "w"(a)
8575 : /* No clobbers */);
8576 return result;
8579 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8580 vpaddl_u8 (uint8x8_t a)
8582 uint16x4_t result;
8583 __asm__ ("uaddlp %0.4h,%1.8b"
8584 : "=w"(result)
8585 : "w"(a)
8586 : /* No clobbers */);
8587 return result;
8590 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8591 vpaddl_u16 (uint16x4_t a)
8593 uint32x2_t result;
8594 __asm__ ("uaddlp %0.2s,%1.4h"
8595 : "=w"(result)
8596 : "w"(a)
8597 : /* No clobbers */);
8598 return result;
8601 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8602 vpaddl_u32 (uint32x2_t a)
8604 uint64x1_t result;
8605 __asm__ ("uaddlp %0.1d,%1.2s"
8606 : "=w"(result)
8607 : "w"(a)
8608 : /* No clobbers */);
8609 return result;
8612 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8613 vpaddlq_s8 (int8x16_t a)
8615 int16x8_t result;
8616 __asm__ ("saddlp %0.8h,%1.16b"
8617 : "=w"(result)
8618 : "w"(a)
8619 : /* No clobbers */);
8620 return result;
8623 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8624 vpaddlq_s16 (int16x8_t a)
8626 int32x4_t result;
8627 __asm__ ("saddlp %0.4s,%1.8h"
8628 : "=w"(result)
8629 : "w"(a)
8630 : /* No clobbers */);
8631 return result;
8634 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8635 vpaddlq_s32 (int32x4_t a)
8637 int64x2_t result;
8638 __asm__ ("saddlp %0.2d,%1.4s"
8639 : "=w"(result)
8640 : "w"(a)
8641 : /* No clobbers */);
8642 return result;
8645 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8646 vpaddlq_u8 (uint8x16_t a)
8648 uint16x8_t result;
8649 __asm__ ("uaddlp %0.8h,%1.16b"
8650 : "=w"(result)
8651 : "w"(a)
8652 : /* No clobbers */);
8653 return result;
8656 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8657 vpaddlq_u16 (uint16x8_t a)
8659 uint32x4_t result;
8660 __asm__ ("uaddlp %0.4s,%1.8h"
8661 : "=w"(result)
8662 : "w"(a)
8663 : /* No clobbers */);
8664 return result;
8667 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8668 vpaddlq_u32 (uint32x4_t a)
8670 uint64x2_t result;
8671 __asm__ ("uaddlp %0.2d,%1.4s"
8672 : "=w"(result)
8673 : "w"(a)
8674 : /* No clobbers */);
8675 return result;
8678 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8679 vpaddq_f32 (float32x4_t a, float32x4_t b)
8681 float32x4_t result;
8682 __asm__ ("faddp %0.4s,%1.4s,%2.4s"
8683 : "=w"(result)
8684 : "w"(a), "w"(b)
8685 : /* No clobbers */);
8686 return result;
8689 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8690 vpaddq_f64 (float64x2_t a, float64x2_t b)
8692 float64x2_t result;
8693 __asm__ ("faddp %0.2d,%1.2d,%2.2d"
8694 : "=w"(result)
8695 : "w"(a), "w"(b)
8696 : /* No clobbers */);
8697 return result;
8700 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8701 vpaddq_s8 (int8x16_t a, int8x16_t b)
8703 int8x16_t result;
8704 __asm__ ("addp %0.16b,%1.16b,%2.16b"
8705 : "=w"(result)
8706 : "w"(a), "w"(b)
8707 : /* No clobbers */);
8708 return result;
8711 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8712 vpaddq_s16 (int16x8_t a, int16x8_t b)
8714 int16x8_t result;
8715 __asm__ ("addp %0.8h,%1.8h,%2.8h"
8716 : "=w"(result)
8717 : "w"(a), "w"(b)
8718 : /* No clobbers */);
8719 return result;
8722 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8723 vpaddq_s32 (int32x4_t a, int32x4_t b)
8725 int32x4_t result;
8726 __asm__ ("addp %0.4s,%1.4s,%2.4s"
8727 : "=w"(result)
8728 : "w"(a), "w"(b)
8729 : /* No clobbers */);
8730 return result;
8733 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8734 vpaddq_s64 (int64x2_t a, int64x2_t b)
8736 int64x2_t result;
8737 __asm__ ("addp %0.2d,%1.2d,%2.2d"
8738 : "=w"(result)
8739 : "w"(a), "w"(b)
8740 : /* No clobbers */);
8741 return result;
8744 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8745 vpaddq_u8 (uint8x16_t a, uint8x16_t b)
8747 uint8x16_t result;
8748 __asm__ ("addp %0.16b,%1.16b,%2.16b"
8749 : "=w"(result)
8750 : "w"(a), "w"(b)
8751 : /* No clobbers */);
8752 return result;
8755 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8756 vpaddq_u16 (uint16x8_t a, uint16x8_t b)
8758 uint16x8_t result;
8759 __asm__ ("addp %0.8h,%1.8h,%2.8h"
8760 : "=w"(result)
8761 : "w"(a), "w"(b)
8762 : /* No clobbers */);
8763 return result;
8766 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8767 vpaddq_u32 (uint32x4_t a, uint32x4_t b)
8769 uint32x4_t result;
8770 __asm__ ("addp %0.4s,%1.4s,%2.4s"
8771 : "=w"(result)
8772 : "w"(a), "w"(b)
8773 : /* No clobbers */);
8774 return result;
8777 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8778 vpaddq_u64 (uint64x2_t a, uint64x2_t b)
8780 uint64x2_t result;
8781 __asm__ ("addp %0.2d,%1.2d,%2.2d"
8782 : "=w"(result)
8783 : "w"(a), "w"(b)
8784 : /* No clobbers */);
8785 return result;
8788 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
8789 vpadds_f32 (float32x2_t a)
8791 float32_t result;
8792 __asm__ ("faddp %s0,%1.2s"
8793 : "=w"(result)
8794 : "w"(a)
8795 : /* No clobbers */);
8796 return result;
8799 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8800 vpmax_f32 (float32x2_t a, float32x2_t b)
8802 float32x2_t result;
8803 __asm__ ("fmaxp %0.2s, %1.2s, %2.2s"
8804 : "=w"(result)
8805 : "w"(a), "w"(b)
8806 : /* No clobbers */);
8807 return result;
8810 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
8811 vpmax_s8 (int8x8_t a, int8x8_t b)
8813 int8x8_t result;
8814 __asm__ ("smaxp %0.8b, %1.8b, %2.8b"
8815 : "=w"(result)
8816 : "w"(a), "w"(b)
8817 : /* No clobbers */);
8818 return result;
8821 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8822 vpmax_s16 (int16x4_t a, int16x4_t b)
8824 int16x4_t result;
8825 __asm__ ("smaxp %0.4h, %1.4h, %2.4h"
8826 : "=w"(result)
8827 : "w"(a), "w"(b)
8828 : /* No clobbers */);
8829 return result;
8832 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8833 vpmax_s32 (int32x2_t a, int32x2_t b)
8835 int32x2_t result;
8836 __asm__ ("smaxp %0.2s, %1.2s, %2.2s"
8837 : "=w"(result)
8838 : "w"(a), "w"(b)
8839 : /* No clobbers */);
8840 return result;
8843 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8844 vpmax_u8 (uint8x8_t a, uint8x8_t b)
8846 uint8x8_t result;
8847 __asm__ ("umaxp %0.8b, %1.8b, %2.8b"
8848 : "=w"(result)
8849 : "w"(a), "w"(b)
8850 : /* No clobbers */);
8851 return result;
8854 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8855 vpmax_u16 (uint16x4_t a, uint16x4_t b)
8857 uint16x4_t result;
8858 __asm__ ("umaxp %0.4h, %1.4h, %2.4h"
8859 : "=w"(result)
8860 : "w"(a), "w"(b)
8861 : /* No clobbers */);
8862 return result;
8865 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8866 vpmax_u32 (uint32x2_t a, uint32x2_t b)
8868 uint32x2_t result;
8869 __asm__ ("umaxp %0.2s, %1.2s, %2.2s"
8870 : "=w"(result)
8871 : "w"(a), "w"(b)
8872 : /* No clobbers */);
8873 return result;
8876 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8877 vpmaxnm_f32 (float32x2_t a, float32x2_t b)
8879 float32x2_t result;
8880 __asm__ ("fmaxnmp %0.2s,%1.2s,%2.2s"
8881 : "=w"(result)
8882 : "w"(a), "w"(b)
8883 : /* No clobbers */);
8884 return result;
8887 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8888 vpmaxnmq_f32 (float32x4_t a, float32x4_t b)
8890 float32x4_t result;
8891 __asm__ ("fmaxnmp %0.4s,%1.4s,%2.4s"
8892 : "=w"(result)
8893 : "w"(a), "w"(b)
8894 : /* No clobbers */);
8895 return result;
8898 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8899 vpmaxnmq_f64 (float64x2_t a, float64x2_t b)
8901 float64x2_t result;
8902 __asm__ ("fmaxnmp %0.2d,%1.2d,%2.2d"
8903 : "=w"(result)
8904 : "w"(a), "w"(b)
8905 : /* No clobbers */);
8906 return result;
8909 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
8910 vpmaxnmqd_f64 (float64x2_t a)
8912 float64_t result;
8913 __asm__ ("fmaxnmp %d0,%1.2d"
8914 : "=w"(result)
8915 : "w"(a)
8916 : /* No clobbers */);
8917 return result;
8920 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
8921 vpmaxnms_f32 (float32x2_t a)
8923 float32_t result;
8924 __asm__ ("fmaxnmp %s0,%1.2s"
8925 : "=w"(result)
8926 : "w"(a)
8927 : /* No clobbers */);
8928 return result;
8931 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8932 vpmaxq_f32 (float32x4_t a, float32x4_t b)
8934 float32x4_t result;
8935 __asm__ ("fmaxp %0.4s, %1.4s, %2.4s"
8936 : "=w"(result)
8937 : "w"(a), "w"(b)
8938 : /* No clobbers */);
8939 return result;
8942 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8943 vpmaxq_f64 (float64x2_t a, float64x2_t b)
8945 float64x2_t result;
8946 __asm__ ("fmaxp %0.2d, %1.2d, %2.2d"
8947 : "=w"(result)
8948 : "w"(a), "w"(b)
8949 : /* No clobbers */);
8950 return result;
8953 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8954 vpmaxq_s8 (int8x16_t a, int8x16_t b)
8956 int8x16_t result;
8957 __asm__ ("smaxp %0.16b, %1.16b, %2.16b"
8958 : "=w"(result)
8959 : "w"(a), "w"(b)
8960 : /* No clobbers */);
8961 return result;
8964 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8965 vpmaxq_s16 (int16x8_t a, int16x8_t b)
8967 int16x8_t result;
8968 __asm__ ("smaxp %0.8h, %1.8h, %2.8h"
8969 : "=w"(result)
8970 : "w"(a), "w"(b)
8971 : /* No clobbers */);
8972 return result;
8975 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8976 vpmaxq_s32 (int32x4_t a, int32x4_t b)
8978 int32x4_t result;
8979 __asm__ ("smaxp %0.4s, %1.4s, %2.4s"
8980 : "=w"(result)
8981 : "w"(a), "w"(b)
8982 : /* No clobbers */);
8983 return result;
8986 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8987 vpmaxq_u8 (uint8x16_t a, uint8x16_t b)
8989 uint8x16_t result;
8990 __asm__ ("umaxp %0.16b, %1.16b, %2.16b"
8991 : "=w"(result)
8992 : "w"(a), "w"(b)
8993 : /* No clobbers */);
8994 return result;
8997 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8998 vpmaxq_u16 (uint16x8_t a, uint16x8_t b)
9000 uint16x8_t result;
9001 __asm__ ("umaxp %0.8h, %1.8h, %2.8h"
9002 : "=w"(result)
9003 : "w"(a), "w"(b)
9004 : /* No clobbers */);
9005 return result;
9008 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9009 vpmaxq_u32 (uint32x4_t a, uint32x4_t b)
9011 uint32x4_t result;
9012 __asm__ ("umaxp %0.4s, %1.4s, %2.4s"
9013 : "=w"(result)
9014 : "w"(a), "w"(b)
9015 : /* No clobbers */);
9016 return result;
9019 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9020 vpmaxqd_f64 (float64x2_t a)
9022 float64_t result;
9023 __asm__ ("fmaxp %d0,%1.2d"
9024 : "=w"(result)
9025 : "w"(a)
9026 : /* No clobbers */);
9027 return result;
9030 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9031 vpmaxs_f32 (float32x2_t a)
9033 float32_t result;
9034 __asm__ ("fmaxp %s0,%1.2s"
9035 : "=w"(result)
9036 : "w"(a)
9037 : /* No clobbers */);
9038 return result;
9041 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9042 vpmin_f32 (float32x2_t a, float32x2_t b)
9044 float32x2_t result;
9045 __asm__ ("fminp %0.2s, %1.2s, %2.2s"
9046 : "=w"(result)
9047 : "w"(a), "w"(b)
9048 : /* No clobbers */);
9049 return result;
9052 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9053 vpmin_s8 (int8x8_t a, int8x8_t b)
9055 int8x8_t result;
9056 __asm__ ("sminp %0.8b, %1.8b, %2.8b"
9057 : "=w"(result)
9058 : "w"(a), "w"(b)
9059 : /* No clobbers */);
9060 return result;
9063 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9064 vpmin_s16 (int16x4_t a, int16x4_t b)
9066 int16x4_t result;
9067 __asm__ ("sminp %0.4h, %1.4h, %2.4h"
9068 : "=w"(result)
9069 : "w"(a), "w"(b)
9070 : /* No clobbers */);
9071 return result;
9074 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9075 vpmin_s32 (int32x2_t a, int32x2_t b)
9077 int32x2_t result;
9078 __asm__ ("sminp %0.2s, %1.2s, %2.2s"
9079 : "=w"(result)
9080 : "w"(a), "w"(b)
9081 : /* No clobbers */);
9082 return result;
9085 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9086 vpmin_u8 (uint8x8_t a, uint8x8_t b)
9088 uint8x8_t result;
9089 __asm__ ("uminp %0.8b, %1.8b, %2.8b"
9090 : "=w"(result)
9091 : "w"(a), "w"(b)
9092 : /* No clobbers */);
9093 return result;
9096 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9097 vpmin_u16 (uint16x4_t a, uint16x4_t b)
9099 uint16x4_t result;
9100 __asm__ ("uminp %0.4h, %1.4h, %2.4h"
9101 : "=w"(result)
9102 : "w"(a), "w"(b)
9103 : /* No clobbers */);
9104 return result;
9107 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9108 vpmin_u32 (uint32x2_t a, uint32x2_t b)
9110 uint32x2_t result;
9111 __asm__ ("uminp %0.2s, %1.2s, %2.2s"
9112 : "=w"(result)
9113 : "w"(a), "w"(b)
9114 : /* No clobbers */);
9115 return result;
9118 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9119 vpminnm_f32 (float32x2_t a, float32x2_t b)
9121 float32x2_t result;
9122 __asm__ ("fminnmp %0.2s,%1.2s,%2.2s"
9123 : "=w"(result)
9124 : "w"(a), "w"(b)
9125 : /* No clobbers */);
9126 return result;
9129 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9130 vpminnmq_f32 (float32x4_t a, float32x4_t b)
9132 float32x4_t result;
9133 __asm__ ("fminnmp %0.4s,%1.4s,%2.4s"
9134 : "=w"(result)
9135 : "w"(a), "w"(b)
9136 : /* No clobbers */);
9137 return result;
9140 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9141 vpminnmq_f64 (float64x2_t a, float64x2_t b)
9143 float64x2_t result;
9144 __asm__ ("fminnmp %0.2d,%1.2d,%2.2d"
9145 : "=w"(result)
9146 : "w"(a), "w"(b)
9147 : /* No clobbers */);
9148 return result;
9151 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9152 vpminnmqd_f64 (float64x2_t a)
9154 float64_t result;
9155 __asm__ ("fminnmp %d0,%1.2d"
9156 : "=w"(result)
9157 : "w"(a)
9158 : /* No clobbers */);
9159 return result;
9162 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9163 vpminnms_f32 (float32x2_t a)
9165 float32_t result;
9166 __asm__ ("fminnmp %s0,%1.2s"
9167 : "=w"(result)
9168 : "w"(a)
9169 : /* No clobbers */);
9170 return result;
9173 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9174 vpminq_f32 (float32x4_t a, float32x4_t b)
9176 float32x4_t result;
9177 __asm__ ("fminp %0.4s, %1.4s, %2.4s"
9178 : "=w"(result)
9179 : "w"(a), "w"(b)
9180 : /* No clobbers */);
9181 return result;
9184 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9185 vpminq_f64 (float64x2_t a, float64x2_t b)
9187 float64x2_t result;
9188 __asm__ ("fminp %0.2d, %1.2d, %2.2d"
9189 : "=w"(result)
9190 : "w"(a), "w"(b)
9191 : /* No clobbers */);
9192 return result;
9195 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9196 vpminq_s8 (int8x16_t a, int8x16_t b)
9198 int8x16_t result;
9199 __asm__ ("sminp %0.16b, %1.16b, %2.16b"
9200 : "=w"(result)
9201 : "w"(a), "w"(b)
9202 : /* No clobbers */);
9203 return result;
9206 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9207 vpminq_s16 (int16x8_t a, int16x8_t b)
9209 int16x8_t result;
9210 __asm__ ("sminp %0.8h, %1.8h, %2.8h"
9211 : "=w"(result)
9212 : "w"(a), "w"(b)
9213 : /* No clobbers */);
9214 return result;
9217 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9218 vpminq_s32 (int32x4_t a, int32x4_t b)
9220 int32x4_t result;
9221 __asm__ ("sminp %0.4s, %1.4s, %2.4s"
9222 : "=w"(result)
9223 : "w"(a), "w"(b)
9224 : /* No clobbers */);
9225 return result;
9228 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9229 vpminq_u8 (uint8x16_t a, uint8x16_t b)
9231 uint8x16_t result;
9232 __asm__ ("uminp %0.16b, %1.16b, %2.16b"
9233 : "=w"(result)
9234 : "w"(a), "w"(b)
9235 : /* No clobbers */);
9236 return result;
9239 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9240 vpminq_u16 (uint16x8_t a, uint16x8_t b)
9242 uint16x8_t result;
9243 __asm__ ("uminp %0.8h, %1.8h, %2.8h"
9244 : "=w"(result)
9245 : "w"(a), "w"(b)
9246 : /* No clobbers */);
9247 return result;
9250 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9251 vpminq_u32 (uint32x4_t a, uint32x4_t b)
9253 uint32x4_t result;
9254 __asm__ ("uminp %0.4s, %1.4s, %2.4s"
9255 : "=w"(result)
9256 : "w"(a), "w"(b)
9257 : /* No clobbers */);
9258 return result;
9261 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9262 vpminqd_f64 (float64x2_t a)
9264 float64_t result;
9265 __asm__ ("fminp %d0,%1.2d"
9266 : "=w"(result)
9267 : "w"(a)
9268 : /* No clobbers */);
9269 return result;
9272 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9273 vpmins_f32 (float32x2_t a)
9275 float32_t result;
9276 __asm__ ("fminp %s0,%1.2s"
9277 : "=w"(result)
9278 : "w"(a)
9279 : /* No clobbers */);
9280 return result;
9283 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9284 vqdmulh_n_s16 (int16x4_t a, int16_t b)
9286 int16x4_t result;
9287 __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]"
9288 : "=w"(result)
9289 : "w"(a), "x"(b)
9290 : /* No clobbers */);
9291 return result;
9294 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9295 vqdmulh_n_s32 (int32x2_t a, int32_t b)
9297 int32x2_t result;
9298 __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]"
9299 : "=w"(result)
9300 : "w"(a), "w"(b)
9301 : /* No clobbers */);
9302 return result;
9305 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9306 vqdmulhq_n_s16 (int16x8_t a, int16_t b)
9308 int16x8_t result;
9309 __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]"
9310 : "=w"(result)
9311 : "w"(a), "x"(b)
9312 : /* No clobbers */);
9313 return result;
9316 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9317 vqdmulhq_n_s32 (int32x4_t a, int32_t b)
9319 int32x4_t result;
9320 __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]"
9321 : "=w"(result)
9322 : "w"(a), "w"(b)
9323 : /* No clobbers */);
9324 return result;
9327 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9328 vqmovn_high_s16 (int8x8_t a, int16x8_t b)
9330 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
9331 __asm__ ("sqxtn2 %0.16b, %1.8h"
9332 : "+w"(result)
9333 : "w"(b)
9334 : /* No clobbers */);
9335 return result;
9338 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9339 vqmovn_high_s32 (int16x4_t a, int32x4_t b)
9341 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
9342 __asm__ ("sqxtn2 %0.8h, %1.4s"
9343 : "+w"(result)
9344 : "w"(b)
9345 : /* No clobbers */);
9346 return result;
9349 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9350 vqmovn_high_s64 (int32x2_t a, int64x2_t b)
9352 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
9353 __asm__ ("sqxtn2 %0.4s, %1.2d"
9354 : "+w"(result)
9355 : "w"(b)
9356 : /* No clobbers */);
9357 return result;
9360 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9361 vqmovn_high_u16 (uint8x8_t a, uint16x8_t b)
9363 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
9364 __asm__ ("uqxtn2 %0.16b, %1.8h"
9365 : "+w"(result)
9366 : "w"(b)
9367 : /* No clobbers */);
9368 return result;
9371 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9372 vqmovn_high_u32 (uint16x4_t a, uint32x4_t b)
9374 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
9375 __asm__ ("uqxtn2 %0.8h, %1.4s"
9376 : "+w"(result)
9377 : "w"(b)
9378 : /* No clobbers */);
9379 return result;
9382 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9383 vqmovn_high_u64 (uint32x2_t a, uint64x2_t b)
9385 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
9386 __asm__ ("uqxtn2 %0.4s, %1.2d"
9387 : "+w"(result)
9388 : "w"(b)
9389 : /* No clobbers */);
9390 return result;
9393 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9394 vqmovun_high_s16 (uint8x8_t a, int16x8_t b)
9396 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
9397 __asm__ ("sqxtun2 %0.16b, %1.8h"
9398 : "+w"(result)
9399 : "w"(b)
9400 : /* No clobbers */);
9401 return result;
9404 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9405 vqmovun_high_s32 (uint16x4_t a, int32x4_t b)
9407 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
9408 __asm__ ("sqxtun2 %0.8h, %1.4s"
9409 : "+w"(result)
9410 : "w"(b)
9411 : /* No clobbers */);
9412 return result;
9415 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9416 vqmovun_high_s64 (uint32x2_t a, int64x2_t b)
9418 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
9419 __asm__ ("sqxtun2 %0.4s, %1.2d"
9420 : "+w"(result)
9421 : "w"(b)
9422 : /* No clobbers */);
9423 return result;
9426 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9427 vqrdmulh_n_s16 (int16x4_t a, int16_t b)
9429 int16x4_t result;
9430 __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
9431 : "=w"(result)
9432 : "w"(a), "x"(b)
9433 : /* No clobbers */);
9434 return result;
9437 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9438 vqrdmulh_n_s32 (int32x2_t a, int32_t b)
9440 int32x2_t result;
9441 __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]"
9442 : "=w"(result)
9443 : "w"(a), "w"(b)
9444 : /* No clobbers */);
9445 return result;
9448 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9449 vqrdmulhq_n_s16 (int16x8_t a, int16_t b)
9451 int16x8_t result;
9452 __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
9453 : "=w"(result)
9454 : "w"(a), "x"(b)
9455 : /* No clobbers */);
9456 return result;
9459 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9460 vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
9462 int32x4_t result;
9463 __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]"
9464 : "=w"(result)
9465 : "w"(a), "w"(b)
9466 : /* No clobbers */);
9467 return result;
9470 #define vqrshrn_high_n_s16(a, b, c) \
9471 __extension__ \
9472 ({ \
9473 int16x8_t b_ = (b); \
9474 int8x8_t a_ = (a); \
9475 int8x16_t result = vcombine_s8 \
9476 (a_, vcreate_s8 \
9477 (__AARCH64_UINT64_C (0x0))); \
9478 __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \
9479 : "+w"(result) \
9480 : "w"(b_), "i"(c) \
9481 : /* No clobbers */); \
9482 result; \
9485 #define vqrshrn_high_n_s32(a, b, c) \
9486 __extension__ \
9487 ({ \
9488 int32x4_t b_ = (b); \
9489 int16x4_t a_ = (a); \
9490 int16x8_t result = vcombine_s16 \
9491 (a_, vcreate_s16 \
9492 (__AARCH64_UINT64_C (0x0))); \
9493 __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \
9494 : "+w"(result) \
9495 : "w"(b_), "i"(c) \
9496 : /* No clobbers */); \
9497 result; \
9500 #define vqrshrn_high_n_s64(a, b, c) \
9501 __extension__ \
9502 ({ \
9503 int64x2_t b_ = (b); \
9504 int32x2_t a_ = (a); \
9505 int32x4_t result = vcombine_s32 \
9506 (a_, vcreate_s32 \
9507 (__AARCH64_UINT64_C (0x0))); \
9508 __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \
9509 : "+w"(result) \
9510 : "w"(b_), "i"(c) \
9511 : /* No clobbers */); \
9512 result; \
9515 #define vqrshrn_high_n_u16(a, b, c) \
9516 __extension__ \
9517 ({ \
9518 uint16x8_t b_ = (b); \
9519 uint8x8_t a_ = (a); \
9520 uint8x16_t result = vcombine_u8 \
9521 (a_, vcreate_u8 \
9522 (__AARCH64_UINT64_C (0x0))); \
9523 __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \
9524 : "+w"(result) \
9525 : "w"(b_), "i"(c) \
9526 : /* No clobbers */); \
9527 result; \
9530 #define vqrshrn_high_n_u32(a, b, c) \
9531 __extension__ \
9532 ({ \
9533 uint32x4_t b_ = (b); \
9534 uint16x4_t a_ = (a); \
9535 uint16x8_t result = vcombine_u16 \
9536 (a_, vcreate_u16 \
9537 (__AARCH64_UINT64_C (0x0))); \
9538 __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \
9539 : "+w"(result) \
9540 : "w"(b_), "i"(c) \
9541 : /* No clobbers */); \
9542 result; \
9545 #define vqrshrn_high_n_u64(a, b, c) \
9546 __extension__ \
9547 ({ \
9548 uint64x2_t b_ = (b); \
9549 uint32x2_t a_ = (a); \
9550 uint32x4_t result = vcombine_u32 \
9551 (a_, vcreate_u32 \
9552 (__AARCH64_UINT64_C (0x0))); \
9553 __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \
9554 : "+w"(result) \
9555 : "w"(b_), "i"(c) \
9556 : /* No clobbers */); \
9557 result; \
9560 #define vqrshrun_high_n_s16(a, b, c) \
9561 __extension__ \
9562 ({ \
9563 int16x8_t b_ = (b); \
9564 uint8x8_t a_ = (a); \
9565 uint8x16_t result = vcombine_u8 \
9566 (a_, vcreate_u8 \
9567 (__AARCH64_UINT64_C (0x0))); \
9568 __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \
9569 : "+w"(result) \
9570 : "w"(b_), "i"(c) \
9571 : /* No clobbers */); \
9572 result; \
9575 #define vqrshrun_high_n_s32(a, b, c) \
9576 __extension__ \
9577 ({ \
9578 int32x4_t b_ = (b); \
9579 uint16x4_t a_ = (a); \
9580 uint16x8_t result = vcombine_u16 \
9581 (a_, vcreate_u16 \
9582 (__AARCH64_UINT64_C (0x0))); \
9583 __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \
9584 : "+w"(result) \
9585 : "w"(b_), "i"(c) \
9586 : /* No clobbers */); \
9587 result; \
9590 #define vqrshrun_high_n_s64(a, b, c) \
9591 __extension__ \
9592 ({ \
9593 int64x2_t b_ = (b); \
9594 uint32x2_t a_ = (a); \
9595 uint32x4_t result = vcombine_u32 \
9596 (a_, vcreate_u32 \
9597 (__AARCH64_UINT64_C (0x0))); \
9598 __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \
9599 : "+w"(result) \
9600 : "w"(b_), "i"(c) \
9601 : /* No clobbers */); \
9602 result; \
9605 #define vqshrn_high_n_s16(a, b, c) \
9606 __extension__ \
9607 ({ \
9608 int16x8_t b_ = (b); \
9609 int8x8_t a_ = (a); \
9610 int8x16_t result = vcombine_s8 \
9611 (a_, vcreate_s8 \
9612 (__AARCH64_UINT64_C (0x0))); \
9613 __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \
9614 : "+w"(result) \
9615 : "w"(b_), "i"(c) \
9616 : /* No clobbers */); \
9617 result; \
9620 #define vqshrn_high_n_s32(a, b, c) \
9621 __extension__ \
9622 ({ \
9623 int32x4_t b_ = (b); \
9624 int16x4_t a_ = (a); \
9625 int16x8_t result = vcombine_s16 \
9626 (a_, vcreate_s16 \
9627 (__AARCH64_UINT64_C (0x0))); \
9628 __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \
9629 : "+w"(result) \
9630 : "w"(b_), "i"(c) \
9631 : /* No clobbers */); \
9632 result; \
9635 #define vqshrn_high_n_s64(a, b, c) \
9636 __extension__ \
9637 ({ \
9638 int64x2_t b_ = (b); \
9639 int32x2_t a_ = (a); \
9640 int32x4_t result = vcombine_s32 \
9641 (a_, vcreate_s32 \
9642 (__AARCH64_UINT64_C (0x0))); \
9643 __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \
9644 : "+w"(result) \
9645 : "w"(b_), "i"(c) \
9646 : /* No clobbers */); \
9647 result; \
9650 #define vqshrn_high_n_u16(a, b, c) \
9651 __extension__ \
9652 ({ \
9653 uint16x8_t b_ = (b); \
9654 uint8x8_t a_ = (a); \
9655 uint8x16_t result = vcombine_u8 \
9656 (a_, vcreate_u8 \
9657 (__AARCH64_UINT64_C (0x0))); \
9658 __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \
9659 : "+w"(result) \
9660 : "w"(b_), "i"(c) \
9661 : /* No clobbers */); \
9662 result; \
9665 #define vqshrn_high_n_u32(a, b, c) \
9666 __extension__ \
9667 ({ \
9668 uint32x4_t b_ = (b); \
9669 uint16x4_t a_ = (a); \
9670 uint16x8_t result = vcombine_u16 \
9671 (a_, vcreate_u16 \
9672 (__AARCH64_UINT64_C (0x0))); \
9673 __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \
9674 : "+w"(result) \
9675 : "w"(b_), "i"(c) \
9676 : /* No clobbers */); \
9677 result; \
9680 #define vqshrn_high_n_u64(a, b, c) \
9681 __extension__ \
9682 ({ \
9683 uint64x2_t b_ = (b); \
9684 uint32x2_t a_ = (a); \
9685 uint32x4_t result = vcombine_u32 \
9686 (a_, vcreate_u32 \
9687 (__AARCH64_UINT64_C (0x0))); \
9688 __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \
9689 : "+w"(result) \
9690 : "w"(b_), "i"(c) \
9691 : /* No clobbers */); \
9692 result; \
9695 #define vqshrun_high_n_s16(a, b, c) \
9696 __extension__ \
9697 ({ \
9698 int16x8_t b_ = (b); \
9699 uint8x8_t a_ = (a); \
9700 uint8x16_t result = vcombine_u8 \
9701 (a_, vcreate_u8 \
9702 (__AARCH64_UINT64_C (0x0))); \
9703 __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \
9704 : "+w"(result) \
9705 : "w"(b_), "i"(c) \
9706 : /* No clobbers */); \
9707 result; \
9710 #define vqshrun_high_n_s32(a, b, c) \
9711 __extension__ \
9712 ({ \
9713 int32x4_t b_ = (b); \
9714 uint16x4_t a_ = (a); \
9715 uint16x8_t result = vcombine_u16 \
9716 (a_, vcreate_u16 \
9717 (__AARCH64_UINT64_C (0x0))); \
9718 __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \
9719 : "+w"(result) \
9720 : "w"(b_), "i"(c) \
9721 : /* No clobbers */); \
9722 result; \
9725 #define vqshrun_high_n_s64(a, b, c) \
9726 __extension__ \
9727 ({ \
9728 int64x2_t b_ = (b); \
9729 uint32x2_t a_ = (a); \
9730 uint32x4_t result = vcombine_u32 \
9731 (a_, vcreate_u32 \
9732 (__AARCH64_UINT64_C (0x0))); \
9733 __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \
9734 : "+w"(result) \
9735 : "w"(b_), "i"(c) \
9736 : /* No clobbers */); \
9737 result; \
9740 #define vrshrn_high_n_s16(a, b, c) \
9741 __extension__ \
9742 ({ \
9743 int16x8_t b_ = (b); \
9744 int8x8_t a_ = (a); \
9745 int8x16_t result = vcombine_s8 \
9746 (a_, vcreate_s8 \
9747 (__AARCH64_UINT64_C (0x0))); \
9748 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
9749 : "+w"(result) \
9750 : "w"(b_), "i"(c) \
9751 : /* No clobbers */); \
9752 result; \
9755 #define vrshrn_high_n_s32(a, b, c) \
9756 __extension__ \
9757 ({ \
9758 int32x4_t b_ = (b); \
9759 int16x4_t a_ = (a); \
9760 int16x8_t result = vcombine_s16 \
9761 (a_, vcreate_s16 \
9762 (__AARCH64_UINT64_C (0x0))); \
9763 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
9764 : "+w"(result) \
9765 : "w"(b_), "i"(c) \
9766 : /* No clobbers */); \
9767 result; \
9770 #define vrshrn_high_n_s64(a, b, c) \
9771 __extension__ \
9772 ({ \
9773 int64x2_t b_ = (b); \
9774 int32x2_t a_ = (a); \
9775 int32x4_t result = vcombine_s32 \
9776 (a_, vcreate_s32 \
9777 (__AARCH64_UINT64_C (0x0))); \
9778 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
9779 : "+w"(result) \
9780 : "w"(b_), "i"(c) \
9781 : /* No clobbers */); \
9782 result; \
9785 #define vrshrn_high_n_u16(a, b, c) \
9786 __extension__ \
9787 ({ \
9788 uint16x8_t b_ = (b); \
9789 uint8x8_t a_ = (a); \
9790 uint8x16_t result = vcombine_u8 \
9791 (a_, vcreate_u8 \
9792 (__AARCH64_UINT64_C (0x0))); \
9793 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
9794 : "+w"(result) \
9795 : "w"(b_), "i"(c) \
9796 : /* No clobbers */); \
9797 result; \
9800 #define vrshrn_high_n_u32(a, b, c) \
9801 __extension__ \
9802 ({ \
9803 uint32x4_t b_ = (b); \
9804 uint16x4_t a_ = (a); \
9805 uint16x8_t result = vcombine_u16 \
9806 (a_, vcreate_u16 \
9807 (__AARCH64_UINT64_C (0x0))); \
9808 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
9809 : "+w"(result) \
9810 : "w"(b_), "i"(c) \
9811 : /* No clobbers */); \
9812 result; \
9815 #define vrshrn_high_n_u64(a, b, c) \
9816 __extension__ \
9817 ({ \
9818 uint64x2_t b_ = (b); \
9819 uint32x2_t a_ = (a); \
9820 uint32x4_t result = vcombine_u32 \
9821 (a_, vcreate_u32 \
9822 (__AARCH64_UINT64_C (0x0))); \
9823 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
9824 : "+w"(result) \
9825 : "w"(b_), "i"(c) \
9826 : /* No clobbers */); \
9827 result; \
9830 #define vrshrn_n_s16(a, b) \
9831 __extension__ \
9832 ({ \
9833 int16x8_t a_ = (a); \
9834 int8x8_t result; \
9835 __asm__ ("rshrn %0.8b,%1.8h,%2" \
9836 : "=w"(result) \
9837 : "w"(a_), "i"(b) \
9838 : /* No clobbers */); \
9839 result; \
9842 #define vrshrn_n_s32(a, b) \
9843 __extension__ \
9844 ({ \
9845 int32x4_t a_ = (a); \
9846 int16x4_t result; \
9847 __asm__ ("rshrn %0.4h,%1.4s,%2" \
9848 : "=w"(result) \
9849 : "w"(a_), "i"(b) \
9850 : /* No clobbers */); \
9851 result; \
9854 #define vrshrn_n_s64(a, b) \
9855 __extension__ \
9856 ({ \
9857 int64x2_t a_ = (a); \
9858 int32x2_t result; \
9859 __asm__ ("rshrn %0.2s,%1.2d,%2" \
9860 : "=w"(result) \
9861 : "w"(a_), "i"(b) \
9862 : /* No clobbers */); \
9863 result; \
9866 #define vrshrn_n_u16(a, b) \
9867 __extension__ \
9868 ({ \
9869 uint16x8_t a_ = (a); \
9870 uint8x8_t result; \
9871 __asm__ ("rshrn %0.8b,%1.8h,%2" \
9872 : "=w"(result) \
9873 : "w"(a_), "i"(b) \
9874 : /* No clobbers */); \
9875 result; \
9878 #define vrshrn_n_u32(a, b) \
9879 __extension__ \
9880 ({ \
9881 uint32x4_t a_ = (a); \
9882 uint16x4_t result; \
9883 __asm__ ("rshrn %0.4h,%1.4s,%2" \
9884 : "=w"(result) \
9885 : "w"(a_), "i"(b) \
9886 : /* No clobbers */); \
9887 result; \
9890 #define vrshrn_n_u64(a, b) \
9891 __extension__ \
9892 ({ \
9893 uint64x2_t a_ = (a); \
9894 uint32x2_t result; \
9895 __asm__ ("rshrn %0.2s,%1.2d,%2" \
9896 : "=w"(result) \
9897 : "w"(a_), "i"(b) \
9898 : /* No clobbers */); \
9899 result; \
9902 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9903 vrsqrte_f32 (float32x2_t a)
9905 float32x2_t result;
9906 __asm__ ("frsqrte %0.2s,%1.2s"
9907 : "=w"(result)
9908 : "w"(a)
9909 : /* No clobbers */);
9910 return result;
9913 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
9914 vrsqrte_f64 (float64x1_t a)
9916 float64x1_t result;
9917 __asm__ ("frsqrte %d0,%d1"
9918 : "=w"(result)
9919 : "w"(a)
9920 : /* No clobbers */);
9921 return result;
9924 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9925 vrsqrte_u32 (uint32x2_t a)
9927 uint32x2_t result;
9928 __asm__ ("ursqrte %0.2s,%1.2s"
9929 : "=w"(result)
9930 : "w"(a)
9931 : /* No clobbers */);
9932 return result;
9935 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9936 vrsqrted_f64 (float64_t a)
9938 float64_t result;
9939 __asm__ ("frsqrte %d0,%d1"
9940 : "=w"(result)
9941 : "w"(a)
9942 : /* No clobbers */);
9943 return result;
9946 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9947 vrsqrteq_f32 (float32x4_t a)
9949 float32x4_t result;
9950 __asm__ ("frsqrte %0.4s,%1.4s"
9951 : "=w"(result)
9952 : "w"(a)
9953 : /* No clobbers */);
9954 return result;
9957 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9958 vrsqrteq_f64 (float64x2_t a)
9960 float64x2_t result;
9961 __asm__ ("frsqrte %0.2d,%1.2d"
9962 : "=w"(result)
9963 : "w"(a)
9964 : /* No clobbers */);
9965 return result;
9968 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9969 vrsqrteq_u32 (uint32x4_t a)
9971 uint32x4_t result;
9972 __asm__ ("ursqrte %0.4s,%1.4s"
9973 : "=w"(result)
9974 : "w"(a)
9975 : /* No clobbers */);
9976 return result;
9979 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9980 vrsqrtes_f32 (float32_t a)
9982 float32_t result;
9983 __asm__ ("frsqrte %s0,%s1"
9984 : "=w"(result)
9985 : "w"(a)
9986 : /* No clobbers */);
9987 return result;
9990 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9991 vrsqrts_f32 (float32x2_t a, float32x2_t b)
9993 float32x2_t result;
9994 __asm__ ("frsqrts %0.2s,%1.2s,%2.2s"
9995 : "=w"(result)
9996 : "w"(a), "w"(b)
9997 : /* No clobbers */);
9998 return result;
10001 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
10002 vrsqrtsd_f64 (float64_t a, float64_t b)
10004 float64_t result;
10005 __asm__ ("frsqrts %d0,%d1,%d2"
10006 : "=w"(result)
10007 : "w"(a), "w"(b)
10008 : /* No clobbers */);
10009 return result;
10012 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10013 vrsqrtsq_f32 (float32x4_t a, float32x4_t b)
10015 float32x4_t result;
10016 __asm__ ("frsqrts %0.4s,%1.4s,%2.4s"
10017 : "=w"(result)
10018 : "w"(a), "w"(b)
10019 : /* No clobbers */);
10020 return result;
10023 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10024 vrsqrtsq_f64 (float64x2_t a, float64x2_t b)
10026 float64x2_t result;
10027 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
10028 : "=w"(result)
10029 : "w"(a), "w"(b)
10030 : /* No clobbers */);
10031 return result;
10034 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
10035 vrsqrtss_f32 (float32_t a, float32_t b)
10037 float32_t result;
10038 __asm__ ("frsqrts %s0,%s1,%s2"
10039 : "=w"(result)
10040 : "w"(a), "w"(b)
10041 : /* No clobbers */);
10042 return result;
10045 #define vshrn_high_n_s16(a, b, c) \
10046 __extension__ \
10047 ({ \
10048 int16x8_t b_ = (b); \
10049 int8x8_t a_ = (a); \
10050 int8x16_t result = vcombine_s8 \
10051 (a_, vcreate_s8 \
10052 (__AARCH64_UINT64_C (0x0))); \
10053 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
10054 : "+w"(result) \
10055 : "w"(b_), "i"(c) \
10056 : /* No clobbers */); \
10057 result; \
10060 #define vshrn_high_n_s32(a, b, c) \
10061 __extension__ \
10062 ({ \
10063 int32x4_t b_ = (b); \
10064 int16x4_t a_ = (a); \
10065 int16x8_t result = vcombine_s16 \
10066 (a_, vcreate_s16 \
10067 (__AARCH64_UINT64_C (0x0))); \
10068 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
10069 : "+w"(result) \
10070 : "w"(b_), "i"(c) \
10071 : /* No clobbers */); \
10072 result; \
10075 #define vshrn_high_n_s64(a, b, c) \
10076 __extension__ \
10077 ({ \
10078 int64x2_t b_ = (b); \
10079 int32x2_t a_ = (a); \
10080 int32x4_t result = vcombine_s32 \
10081 (a_, vcreate_s32 \
10082 (__AARCH64_UINT64_C (0x0))); \
10083 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
10084 : "+w"(result) \
10085 : "w"(b_), "i"(c) \
10086 : /* No clobbers */); \
10087 result; \
10090 #define vshrn_high_n_u16(a, b, c) \
10091 __extension__ \
10092 ({ \
10093 uint16x8_t b_ = (b); \
10094 uint8x8_t a_ = (a); \
10095 uint8x16_t result = vcombine_u8 \
10096 (a_, vcreate_u8 \
10097 (__AARCH64_UINT64_C (0x0))); \
10098 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
10099 : "+w"(result) \
10100 : "w"(b_), "i"(c) \
10101 : /* No clobbers */); \
10102 result; \
10105 #define vshrn_high_n_u32(a, b, c) \
10106 __extension__ \
10107 ({ \
10108 uint32x4_t b_ = (b); \
10109 uint16x4_t a_ = (a); \
10110 uint16x8_t result = vcombine_u16 \
10111 (a_, vcreate_u16 \
10112 (__AARCH64_UINT64_C (0x0))); \
10113 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
10114 : "+w"(result) \
10115 : "w"(b_), "i"(c) \
10116 : /* No clobbers */); \
10117 result; \
10120 #define vshrn_high_n_u64(a, b, c) \
10121 __extension__ \
10122 ({ \
10123 uint64x2_t b_ = (b); \
10124 uint32x2_t a_ = (a); \
10125 uint32x4_t result = vcombine_u32 \
10126 (a_, vcreate_u32 \
10127 (__AARCH64_UINT64_C (0x0))); \
10128 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
10129 : "+w"(result) \
10130 : "w"(b_), "i"(c) \
10131 : /* No clobbers */); \
10132 result; \
10135 #define vshrn_n_s16(a, b) \
10136 __extension__ \
10137 ({ \
10138 int16x8_t a_ = (a); \
10139 int8x8_t result; \
10140 __asm__ ("shrn %0.8b,%1.8h,%2" \
10141 : "=w"(result) \
10142 : "w"(a_), "i"(b) \
10143 : /* No clobbers */); \
10144 result; \
10147 #define vshrn_n_s32(a, b) \
10148 __extension__ \
10149 ({ \
10150 int32x4_t a_ = (a); \
10151 int16x4_t result; \
10152 __asm__ ("shrn %0.4h,%1.4s,%2" \
10153 : "=w"(result) \
10154 : "w"(a_), "i"(b) \
10155 : /* No clobbers */); \
10156 result; \
10159 #define vshrn_n_s64(a, b) \
10160 __extension__ \
10161 ({ \
10162 int64x2_t a_ = (a); \
10163 int32x2_t result; \
10164 __asm__ ("shrn %0.2s,%1.2d,%2" \
10165 : "=w"(result) \
10166 : "w"(a_), "i"(b) \
10167 : /* No clobbers */); \
10168 result; \
10171 #define vshrn_n_u16(a, b) \
10172 __extension__ \
10173 ({ \
10174 uint16x8_t a_ = (a); \
10175 uint8x8_t result; \
10176 __asm__ ("shrn %0.8b,%1.8h,%2" \
10177 : "=w"(result) \
10178 : "w"(a_), "i"(b) \
10179 : /* No clobbers */); \
10180 result; \
10183 #define vshrn_n_u32(a, b) \
10184 __extension__ \
10185 ({ \
10186 uint32x4_t a_ = (a); \
10187 uint16x4_t result; \
10188 __asm__ ("shrn %0.4h,%1.4s,%2" \
10189 : "=w"(result) \
10190 : "w"(a_), "i"(b) \
10191 : /* No clobbers */); \
10192 result; \
10195 #define vshrn_n_u64(a, b) \
10196 __extension__ \
10197 ({ \
10198 uint64x2_t a_ = (a); \
10199 uint32x2_t result; \
10200 __asm__ ("shrn %0.2s,%1.2d,%2" \
10201 : "=w"(result) \
10202 : "w"(a_), "i"(b) \
10203 : /* No clobbers */); \
10204 result; \
10207 #define vsli_n_p8(a, b, c) \
10208 __extension__ \
10209 ({ \
10210 poly8x8_t b_ = (b); \
10211 poly8x8_t a_ = (a); \
10212 poly8x8_t result; \
10213 __asm__ ("sli %0.8b,%2.8b,%3" \
10214 : "=w"(result) \
10215 : "0"(a_), "w"(b_), "i"(c) \
10216 : /* No clobbers */); \
10217 result; \
10220 #define vsli_n_p16(a, b, c) \
10221 __extension__ \
10222 ({ \
10223 poly16x4_t b_ = (b); \
10224 poly16x4_t a_ = (a); \
10225 poly16x4_t result; \
10226 __asm__ ("sli %0.4h,%2.4h,%3" \
10227 : "=w"(result) \
10228 : "0"(a_), "w"(b_), "i"(c) \
10229 : /* No clobbers */); \
10230 result; \
10233 #define vsliq_n_p8(a, b, c) \
10234 __extension__ \
10235 ({ \
10236 poly8x16_t b_ = (b); \
10237 poly8x16_t a_ = (a); \
10238 poly8x16_t result; \
10239 __asm__ ("sli %0.16b,%2.16b,%3" \
10240 : "=w"(result) \
10241 : "0"(a_), "w"(b_), "i"(c) \
10242 : /* No clobbers */); \
10243 result; \
10246 #define vsliq_n_p16(a, b, c) \
10247 __extension__ \
10248 ({ \
10249 poly16x8_t b_ = (b); \
10250 poly16x8_t a_ = (a); \
10251 poly16x8_t result; \
10252 __asm__ ("sli %0.8h,%2.8h,%3" \
10253 : "=w"(result) \
10254 : "0"(a_), "w"(b_), "i"(c) \
10255 : /* No clobbers */); \
10256 result; \
10259 #define vsri_n_p8(a, b, c) \
10260 __extension__ \
10261 ({ \
10262 poly8x8_t b_ = (b); \
10263 poly8x8_t a_ = (a); \
10264 poly8x8_t result; \
10265 __asm__ ("sri %0.8b,%2.8b,%3" \
10266 : "=w"(result) \
10267 : "0"(a_), "w"(b_), "i"(c) \
10268 : /* No clobbers */); \
10269 result; \
10272 #define vsri_n_p16(a, b, c) \
10273 __extension__ \
10274 ({ \
10275 poly16x4_t b_ = (b); \
10276 poly16x4_t a_ = (a); \
10277 poly16x4_t result; \
10278 __asm__ ("sri %0.4h,%2.4h,%3" \
10279 : "=w"(result) \
10280 : "0"(a_), "w"(b_), "i"(c) \
10281 : /* No clobbers */); \
10282 result; \
10285 #define vsriq_n_p8(a, b, c) \
10286 __extension__ \
10287 ({ \
10288 poly8x16_t b_ = (b); \
10289 poly8x16_t a_ = (a); \
10290 poly8x16_t result; \
10291 __asm__ ("sri %0.16b,%2.16b,%3" \
10292 : "=w"(result) \
10293 : "0"(a_), "w"(b_), "i"(c) \
10294 : /* No clobbers */); \
10295 result; \
10298 #define vsriq_n_p16(a, b, c) \
10299 __extension__ \
10300 ({ \
10301 poly16x8_t b_ = (b); \
10302 poly16x8_t a_ = (a); \
10303 poly16x8_t result; \
10304 __asm__ ("sri %0.8h,%2.8h,%3" \
10305 : "=w"(result) \
10306 : "0"(a_), "w"(b_), "i"(c) \
10307 : /* No clobbers */); \
10308 result; \
10311 #define vst1_lane_f32(a, b, c) \
10312 __extension__ \
10313 ({ \
10314 float32x2_t b_ = (b); \
10315 float32_t * a_ = (a); \
10316 __asm__ ("st1 {%1.s}[%2],[%0]" \
10318 : "r"(a_), "w"(b_), "i"(c) \
10319 : "memory"); \
10322 #define vst1_lane_f64(a, b, c) \
10323 __extension__ \
10324 ({ \
10325 float64x1_t b_ = (b); \
10326 float64_t * a_ = (a); \
10327 __asm__ ("st1 {%1.d}[%2],[%0]" \
10329 : "r"(a_), "w"(b_), "i"(c) \
10330 : "memory"); \
10333 #define vst1_lane_p8(a, b, c) \
10334 __extension__ \
10335 ({ \
10336 poly8x8_t b_ = (b); \
10337 poly8_t * a_ = (a); \
10338 __asm__ ("st1 {%1.b}[%2],[%0]" \
10340 : "r"(a_), "w"(b_), "i"(c) \
10341 : "memory"); \
10344 #define vst1_lane_p16(a, b, c) \
10345 __extension__ \
10346 ({ \
10347 poly16x4_t b_ = (b); \
10348 poly16_t * a_ = (a); \
10349 __asm__ ("st1 {%1.h}[%2],[%0]" \
10351 : "r"(a_), "w"(b_), "i"(c) \
10352 : "memory"); \
10355 #define vst1_lane_s8(a, b, c) \
10356 __extension__ \
10357 ({ \
10358 int8x8_t b_ = (b); \
10359 int8_t * a_ = (a); \
10360 __asm__ ("st1 {%1.b}[%2],[%0]" \
10362 : "r"(a_), "w"(b_), "i"(c) \
10363 : "memory"); \
10366 #define vst1_lane_s16(a, b, c) \
10367 __extension__ \
10368 ({ \
10369 int16x4_t b_ = (b); \
10370 int16_t * a_ = (a); \
10371 __asm__ ("st1 {%1.h}[%2],[%0]" \
10373 : "r"(a_), "w"(b_), "i"(c) \
10374 : "memory"); \
10377 #define vst1_lane_s32(a, b, c) \
10378 __extension__ \
10379 ({ \
10380 int32x2_t b_ = (b); \
10381 int32_t * a_ = (a); \
10382 __asm__ ("st1 {%1.s}[%2],[%0]" \
10384 : "r"(a_), "w"(b_), "i"(c) \
10385 : "memory"); \
10388 #define vst1_lane_s64(a, b, c) \
10389 __extension__ \
10390 ({ \
10391 int64x1_t b_ = (b); \
10392 int64_t * a_ = (a); \
10393 __asm__ ("st1 {%1.d}[%2],[%0]" \
10395 : "r"(a_), "w"(b_), "i"(c) \
10396 : "memory"); \
10399 #define vst1_lane_u8(a, b, c) \
10400 __extension__ \
10401 ({ \
10402 uint8x8_t b_ = (b); \
10403 uint8_t * a_ = (a); \
10404 __asm__ ("st1 {%1.b}[%2],[%0]" \
10406 : "r"(a_), "w"(b_), "i"(c) \
10407 : "memory"); \
10410 #define vst1_lane_u16(a, b, c) \
10411 __extension__ \
10412 ({ \
10413 uint16x4_t b_ = (b); \
10414 uint16_t * a_ = (a); \
10415 __asm__ ("st1 {%1.h}[%2],[%0]" \
10417 : "r"(a_), "w"(b_), "i"(c) \
10418 : "memory"); \
10421 #define vst1_lane_u32(a, b, c) \
10422 __extension__ \
10423 ({ \
10424 uint32x2_t b_ = (b); \
10425 uint32_t * a_ = (a); \
10426 __asm__ ("st1 {%1.s}[%2],[%0]" \
10428 : "r"(a_), "w"(b_), "i"(c) \
10429 : "memory"); \
10432 #define vst1_lane_u64(a, b, c) \
10433 __extension__ \
10434 ({ \
10435 uint64x1_t b_ = (b); \
10436 uint64_t * a_ = (a); \
10437 __asm__ ("st1 {%1.d}[%2],[%0]" \
10439 : "r"(a_), "w"(b_), "i"(c) \
10440 : "memory"); \
10444 #define vst1q_lane_f32(a, b, c) \
10445 __extension__ \
10446 ({ \
10447 float32x4_t b_ = (b); \
10448 float32_t * a_ = (a); \
10449 __asm__ ("st1 {%1.s}[%2],[%0]" \
10451 : "r"(a_), "w"(b_), "i"(c) \
10452 : "memory"); \
10455 #define vst1q_lane_f64(a, b, c) \
10456 __extension__ \
10457 ({ \
10458 float64x2_t b_ = (b); \
10459 float64_t * a_ = (a); \
10460 __asm__ ("st1 {%1.d}[%2],[%0]" \
10462 : "r"(a_), "w"(b_), "i"(c) \
10463 : "memory"); \
10466 #define vst1q_lane_p8(a, b, c) \
10467 __extension__ \
10468 ({ \
10469 poly8x16_t b_ = (b); \
10470 poly8_t * a_ = (a); \
10471 __asm__ ("st1 {%1.b}[%2],[%0]" \
10473 : "r"(a_), "w"(b_), "i"(c) \
10474 : "memory"); \
10477 #define vst1q_lane_p16(a, b, c) \
10478 __extension__ \
10479 ({ \
10480 poly16x8_t b_ = (b); \
10481 poly16_t * a_ = (a); \
10482 __asm__ ("st1 {%1.h}[%2],[%0]" \
10484 : "r"(a_), "w"(b_), "i"(c) \
10485 : "memory"); \
10488 #define vst1q_lane_s8(a, b, c) \
10489 __extension__ \
10490 ({ \
10491 int8x16_t b_ = (b); \
10492 int8_t * a_ = (a); \
10493 __asm__ ("st1 {%1.b}[%2],[%0]" \
10495 : "r"(a_), "w"(b_), "i"(c) \
10496 : "memory"); \
10499 #define vst1q_lane_s16(a, b, c) \
10500 __extension__ \
10501 ({ \
10502 int16x8_t b_ = (b); \
10503 int16_t * a_ = (a); \
10504 __asm__ ("st1 {%1.h}[%2],[%0]" \
10506 : "r"(a_), "w"(b_), "i"(c) \
10507 : "memory"); \
10510 #define vst1q_lane_s32(a, b, c) \
10511 __extension__ \
10512 ({ \
10513 int32x4_t b_ = (b); \
10514 int32_t * a_ = (a); \
10515 __asm__ ("st1 {%1.s}[%2],[%0]" \
10517 : "r"(a_), "w"(b_), "i"(c) \
10518 : "memory"); \
10521 #define vst1q_lane_s64(a, b, c) \
10522 __extension__ \
10523 ({ \
10524 int64x2_t b_ = (b); \
10525 int64_t * a_ = (a); \
10526 __asm__ ("st1 {%1.d}[%2],[%0]" \
10528 : "r"(a_), "w"(b_), "i"(c) \
10529 : "memory"); \
10532 #define vst1q_lane_u8(a, b, c) \
10533 __extension__ \
10534 ({ \
10535 uint8x16_t b_ = (b); \
10536 uint8_t * a_ = (a); \
10537 __asm__ ("st1 {%1.b}[%2],[%0]" \
10539 : "r"(a_), "w"(b_), "i"(c) \
10540 : "memory"); \
10543 #define vst1q_lane_u16(a, b, c) \
10544 __extension__ \
10545 ({ \
10546 uint16x8_t b_ = (b); \
10547 uint16_t * a_ = (a); \
10548 __asm__ ("st1 {%1.h}[%2],[%0]" \
10550 : "r"(a_), "w"(b_), "i"(c) \
10551 : "memory"); \
10554 #define vst1q_lane_u32(a, b, c) \
10555 __extension__ \
10556 ({ \
10557 uint32x4_t b_ = (b); \
10558 uint32_t * a_ = (a); \
10559 __asm__ ("st1 {%1.s}[%2],[%0]" \
10561 : "r"(a_), "w"(b_), "i"(c) \
10562 : "memory"); \
10565 #define vst1q_lane_u64(a, b, c) \
10566 __extension__ \
10567 ({ \
10568 uint64x2_t b_ = (b); \
10569 uint64_t * a_ = (a); \
10570 __asm__ ("st1 {%1.d}[%2],[%0]" \
10572 : "r"(a_), "w"(b_), "i"(c) \
10573 : "memory"); \
10577 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10578 vtst_p8 (poly8x8_t a, poly8x8_t b)
10580 uint8x8_t result;
10581 __asm__ ("cmtst %0.8b, %1.8b, %2.8b"
10582 : "=w"(result)
10583 : "w"(a), "w"(b)
10584 : /* No clobbers */);
10585 return result;
10588 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10589 vtst_p16 (poly16x4_t a, poly16x4_t b)
10591 uint16x4_t result;
10592 __asm__ ("cmtst %0.4h, %1.4h, %2.4h"
10593 : "=w"(result)
10594 : "w"(a), "w"(b)
10595 : /* No clobbers */);
10596 return result;
10599 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10600 vtstq_p8 (poly8x16_t a, poly8x16_t b)
10602 uint8x16_t result;
10603 __asm__ ("cmtst %0.16b, %1.16b, %2.16b"
10604 : "=w"(result)
10605 : "w"(a), "w"(b)
10606 : /* No clobbers */);
10607 return result;
10610 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10611 vtstq_p16 (poly16x8_t a, poly16x8_t b)
10613 uint16x8_t result;
10614 __asm__ ("cmtst %0.8h, %1.8h, %2.8h"
10615 : "=w"(result)
10616 : "w"(a), "w"(b)
10617 : /* No clobbers */);
10618 return result;
10621 /* End of temporary inline asm implementations. */
10623 /* Start of temporary inline asm for vldn, vstn and friends. */
10625 /* Create struct element types for duplicating loads.
10627 Create 2 element structures of:
10629 +------+----+----+----+----+
10630 | | 8 | 16 | 32 | 64 |
10631 +------+----+----+----+----+
10632 |int | Y | Y | N | N |
10633 +------+----+----+----+----+
10634 |uint | Y | Y | N | N |
10635 +------+----+----+----+----+
10636 |float | - | - | N | N |
10637 +------+----+----+----+----+
10638 |poly | Y | Y | - | - |
10639 +------+----+----+----+----+
10641 Create 3 element structures of:
10643 +------+----+----+----+----+
10644 | | 8 | 16 | 32 | 64 |
10645 +------+----+----+----+----+
10646 |int | Y | Y | Y | Y |
10647 +------+----+----+----+----+
10648 |uint | Y | Y | Y | Y |
10649 +------+----+----+----+----+
10650 |float | - | - | Y | Y |
10651 +------+----+----+----+----+
10652 |poly | Y | Y | - | - |
10653 +------+----+----+----+----+
10655 Create 4 element structures of:
10657 +------+----+----+----+----+
10658 | | 8 | 16 | 32 | 64 |
10659 +------+----+----+----+----+
10660 |int | Y | N | N | Y |
10661 +------+----+----+----+----+
10662 |uint | Y | N | N | Y |
10663 +------+----+----+----+----+
10664 |float | - | - | N | Y |
10665 +------+----+----+----+----+
10666 |poly | Y | N | - | - |
10667 +------+----+----+----+----+
10669 This is required for casting memory reference. */
10670 #define __STRUCTN(t, sz, nelem) \
10671 typedef struct t ## sz ## x ## nelem ## _t { \
10672 t ## sz ## _t val[nelem]; \
10673 } t ## sz ## x ## nelem ## _t;
10675 /* 2-element structs. */
10676 __STRUCTN (int, 8, 2)
10677 __STRUCTN (int, 16, 2)
10678 __STRUCTN (uint, 8, 2)
10679 __STRUCTN (uint, 16, 2)
10680 __STRUCTN (poly, 8, 2)
10681 __STRUCTN (poly, 16, 2)
10682 /* 3-element structs. */
10683 __STRUCTN (int, 8, 3)
10684 __STRUCTN (int, 16, 3)
10685 __STRUCTN (int, 32, 3)
10686 __STRUCTN (int, 64, 3)
10687 __STRUCTN (uint, 8, 3)
10688 __STRUCTN (uint, 16, 3)
10689 __STRUCTN (uint, 32, 3)
10690 __STRUCTN (uint, 64, 3)
10691 __STRUCTN (float, 32, 3)
10692 __STRUCTN (float, 64, 3)
10693 __STRUCTN (poly, 8, 3)
10694 __STRUCTN (poly, 16, 3)
10695 /* 4-element structs. */
10696 __STRUCTN (int, 8, 4)
10697 __STRUCTN (int, 64, 4)
10698 __STRUCTN (uint, 8, 4)
10699 __STRUCTN (uint, 64, 4)
10700 __STRUCTN (poly, 8, 4)
10701 __STRUCTN (float, 64, 4)
10702 #undef __STRUCTN
10705 #define __ST2_LANE_FUNC(intype, largetype, ptrtype, \
10706 mode, ptr_mode, funcsuffix, signedtype) \
10707 __extension__ static __inline void \
10708 __attribute__ ((__always_inline__)) \
10709 vst2_lane_ ## funcsuffix (ptrtype *__ptr, \
10710 intype __b, const int __c) \
10712 __builtin_aarch64_simd_oi __o; \
10713 largetype __temp; \
10714 __temp.val[0] \
10715 = vcombine_##funcsuffix (__b.val[0], \
10716 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10717 __temp.val[1] \
10718 = vcombine_##funcsuffix (__b.val[1], \
10719 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10720 __o = __builtin_aarch64_set_qregoi##mode (__o, \
10721 (signedtype) __temp.val[0], 0); \
10722 __o = __builtin_aarch64_set_qregoi##mode (__o, \
10723 (signedtype) __temp.val[1], 1); \
10724 __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
10725 __ptr, __o, __c); \
10728 __ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v4sf, sf, f32,
10729 float32x4_t)
10730 __ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, v2df, df, f64,
10731 float64x2_t)
10732 __ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v16qi, qi, p8, int8x16_t)
10733 __ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v8hi, hi, p16,
10734 int16x8_t)
10735 __ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v16qi, qi, s8, int8x16_t)
10736 __ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v8hi, hi, s16, int16x8_t)
10737 __ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v4si, si, s32, int32x4_t)
10738 __ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, v2di, di, s64, int64x2_t)
10739 __ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v16qi, qi, u8, int8x16_t)
10740 __ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v8hi, hi, u16,
10741 int16x8_t)
10742 __ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v4si, si, u32,
10743 int32x4_t)
10744 __ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, v2di, di, u64,
10745 int64x2_t)
10747 #undef __ST2_LANE_FUNC
10748 #define __ST2_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
10749 __extension__ static __inline void \
10750 __attribute__ ((__always_inline__)) \
10751 vst2q_lane_ ## funcsuffix (ptrtype *__ptr, \
10752 intype __b, const int __c) \
10754 union { intype __i; \
10755 __builtin_aarch64_simd_oi __o; } __temp = { __b }; \
10756 __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
10757 __ptr, __temp.__o, __c); \
10760 __ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32)
10761 __ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64)
10762 __ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8)
10763 __ST2_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16)
10764 __ST2_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8)
10765 __ST2_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16)
10766 __ST2_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32)
10767 __ST2_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64)
10768 __ST2_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8)
10769 __ST2_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16)
10770 __ST2_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32)
10771 __ST2_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64)
10773 #define __ST3_LANE_FUNC(intype, largetype, ptrtype, \
10774 mode, ptr_mode, funcsuffix, signedtype) \
10775 __extension__ static __inline void \
10776 __attribute__ ((__always_inline__)) \
10777 vst3_lane_ ## funcsuffix (ptrtype *__ptr, \
10778 intype __b, const int __c) \
10780 __builtin_aarch64_simd_ci __o; \
10781 largetype __temp; \
10782 __temp.val[0] \
10783 = vcombine_##funcsuffix (__b.val[0], \
10784 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10785 __temp.val[1] \
10786 = vcombine_##funcsuffix (__b.val[1], \
10787 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10788 __temp.val[2] \
10789 = vcombine_##funcsuffix (__b.val[2], \
10790 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10791 __o = __builtin_aarch64_set_qregci##mode (__o, \
10792 (signedtype) __temp.val[0], 0); \
10793 __o = __builtin_aarch64_set_qregci##mode (__o, \
10794 (signedtype) __temp.val[1], 1); \
10795 __o = __builtin_aarch64_set_qregci##mode (__o, \
10796 (signedtype) __temp.val[2], 2); \
10797 __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
10798 __ptr, __o, __c); \
10801 __ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v4sf, sf, f32,
10802 float32x4_t)
10803 __ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, v2df, df, f64,
10804 float64x2_t)
10805 __ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v16qi, qi, p8, int8x16_t)
10806 __ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v8hi, hi, p16,
10807 int16x8_t)
10808 __ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v16qi, qi, s8, int8x16_t)
10809 __ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v8hi, hi, s16, int16x8_t)
10810 __ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v4si, si, s32, int32x4_t)
10811 __ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, v2di, di, s64, int64x2_t)
10812 __ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v16qi, qi, u8, int8x16_t)
10813 __ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v8hi, hi, u16,
10814 int16x8_t)
10815 __ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v4si, si, u32,
10816 int32x4_t)
10817 __ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, v2di, di, u64,
10818 int64x2_t)
10820 #undef __ST3_LANE_FUNC
10821 #define __ST3_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
10822 __extension__ static __inline void \
10823 __attribute__ ((__always_inline__)) \
10824 vst3q_lane_ ## funcsuffix (ptrtype *__ptr, \
10825 intype __b, const int __c) \
10827 union { intype __i; \
10828 __builtin_aarch64_simd_ci __o; } __temp = { __b }; \
10829 __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
10830 __ptr, __temp.__o, __c); \
10833 __ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32)
10834 __ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64)
10835 __ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8)
10836 __ST3_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16)
10837 __ST3_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8)
10838 __ST3_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16)
10839 __ST3_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32)
10840 __ST3_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64)
10841 __ST3_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8)
10842 __ST3_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16)
10843 __ST3_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32)
10844 __ST3_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64)
10846 #define __ST4_LANE_FUNC(intype, largetype, ptrtype, \
10847 mode, ptr_mode, funcsuffix, signedtype) \
10848 __extension__ static __inline void \
10849 __attribute__ ((__always_inline__)) \
10850 vst4_lane_ ## funcsuffix (ptrtype *__ptr, \
10851 intype __b, const int __c) \
10853 __builtin_aarch64_simd_xi __o; \
10854 largetype __temp; \
10855 __temp.val[0] \
10856 = vcombine_##funcsuffix (__b.val[0], \
10857 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10858 __temp.val[1] \
10859 = vcombine_##funcsuffix (__b.val[1], \
10860 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10861 __temp.val[2] \
10862 = vcombine_##funcsuffix (__b.val[2], \
10863 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10864 __temp.val[3] \
10865 = vcombine_##funcsuffix (__b.val[3], \
10866 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10867 __o = __builtin_aarch64_set_qregxi##mode (__o, \
10868 (signedtype) __temp.val[0], 0); \
10869 __o = __builtin_aarch64_set_qregxi##mode (__o, \
10870 (signedtype) __temp.val[1], 1); \
10871 __o = __builtin_aarch64_set_qregxi##mode (__o, \
10872 (signedtype) __temp.val[2], 2); \
10873 __o = __builtin_aarch64_set_qregxi##mode (__o, \
10874 (signedtype) __temp.val[3], 3); \
10875 __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
10876 __ptr, __o, __c); \
10879 __ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v4sf, sf, f32,
10880 float32x4_t)
10881 __ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, v2df, df, f64,
10882 float64x2_t)
10883 __ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v16qi, qi, p8, int8x16_t)
10884 __ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v8hi, hi, p16,
10885 int16x8_t)
10886 __ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v16qi, qi, s8, int8x16_t)
10887 __ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v8hi, hi, s16, int16x8_t)
10888 __ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v4si, si, s32, int32x4_t)
10889 __ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, v2di, di, s64, int64x2_t)
10890 __ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v16qi, qi, u8, int8x16_t)
10891 __ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v8hi, hi, u16,
10892 int16x8_t)
10893 __ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v4si, si, u32,
10894 int32x4_t)
10895 __ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, v2di, di, u64,
10896 int64x2_t)
10898 #undef __ST4_LANE_FUNC
10899 #define __ST4_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
10900 __extension__ static __inline void \
10901 __attribute__ ((__always_inline__)) \
10902 vst4q_lane_ ## funcsuffix (ptrtype *__ptr, \
10903 intype __b, const int __c) \
10905 union { intype __i; \
10906 __builtin_aarch64_simd_xi __o; } __temp = { __b }; \
10907 __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
10908 __ptr, __temp.__o, __c); \
10911 __ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32)
10912 __ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64)
10913 __ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8)
10914 __ST4_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16)
10915 __ST4_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8)
10916 __ST4_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16)
10917 __ST4_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32)
10918 __ST4_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64)
10919 __ST4_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8)
10920 __ST4_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16)
10921 __ST4_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32)
10922 __ST4_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64)
10924 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
10925 vaddlv_s32 (int32x2_t a)
10927 int64_t result;
10928 __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
10929 return result;
10932 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
10933 vaddlv_u32 (uint32x2_t a)
10935 uint64_t result;
10936 __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
10937 return result;
10940 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10941 vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
10943 return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c);
10946 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10947 vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
10949 return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c);
10952 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10953 vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
10955 return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c);
10958 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10959 vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
10961 return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c);
10964 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10965 vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
10967 return __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c);
10970 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10971 vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
10973 return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c);
10976 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10977 vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
10979 return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c);
10982 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10983 vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
10985 return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c);
10988 /* Table intrinsics. */
10990 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
10991 vqtbl1_p8 (poly8x16_t a, uint8x8_t b)
10993 poly8x8_t result;
10994 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
10995 : "=w"(result)
10996 : "w"(a), "w"(b)
10997 : /* No clobbers */);
10998 return result;
11001 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11002 vqtbl1_s8 (int8x16_t a, uint8x8_t b)
11004 int8x8_t result;
11005 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
11006 : "=w"(result)
11007 : "w"(a), "w"(b)
11008 : /* No clobbers */);
11009 return result;
11012 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11013 vqtbl1_u8 (uint8x16_t a, uint8x8_t b)
11015 uint8x8_t result;
11016 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
11017 : "=w"(result)
11018 : "w"(a), "w"(b)
11019 : /* No clobbers */);
11020 return result;
11023 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
11024 vqtbl1q_p8 (poly8x16_t a, uint8x16_t b)
11026 poly8x16_t result;
11027 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
11028 : "=w"(result)
11029 : "w"(a), "w"(b)
11030 : /* No clobbers */);
11031 return result;
11034 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11035 vqtbl1q_s8 (int8x16_t a, uint8x16_t b)
11037 int8x16_t result;
11038 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
11039 : "=w"(result)
11040 : "w"(a), "w"(b)
11041 : /* No clobbers */);
11042 return result;
11045 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11046 vqtbl1q_u8 (uint8x16_t a, uint8x16_t b)
11048 uint8x16_t result;
11049 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
11050 : "=w"(result)
11051 : "w"(a), "w"(b)
11052 : /* No clobbers */);
11053 return result;
11056 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11057 vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx)
11059 int8x8_t result;
11060 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
11061 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
11062 :"=w"(result)
11063 :"Q"(tab),"w"(idx)
11064 :"memory", "v16", "v17");
11065 return result;
11068 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11069 vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx)
11071 uint8x8_t result;
11072 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
11073 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
11074 :"=w"(result)
11075 :"Q"(tab),"w"(idx)
11076 :"memory", "v16", "v17");
11077 return result;
11080 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11081 vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx)
11083 poly8x8_t result;
11084 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
11085 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
11086 :"=w"(result)
11087 :"Q"(tab),"w"(idx)
11088 :"memory", "v16", "v17");
11089 return result;
11092 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11093 vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx)
11095 int8x16_t result;
11096 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
11097 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
11098 :"=w"(result)
11099 :"Q"(tab),"w"(idx)
11100 :"memory", "v16", "v17");
11101 return result;
11104 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11105 vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx)
11107 uint8x16_t result;
11108 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
11109 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
11110 :"=w"(result)
11111 :"Q"(tab),"w"(idx)
11112 :"memory", "v16", "v17");
11113 return result;
11116 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
11117 vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx)
11119 poly8x16_t result;
11120 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
11121 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
11122 :"=w"(result)
11123 :"Q"(tab),"w"(idx)
11124 :"memory", "v16", "v17");
11125 return result;
11128 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11129 vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx)
11131 int8x8_t result;
11132 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
11133 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
11134 :"=w"(result)
11135 :"Q"(tab),"w"(idx)
11136 :"memory", "v16", "v17", "v18");
11137 return result;
11140 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11141 vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx)
11143 uint8x8_t result;
11144 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
11145 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
11146 :"=w"(result)
11147 :"Q"(tab),"w"(idx)
11148 :"memory", "v16", "v17", "v18");
11149 return result;
11152 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11153 vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx)
11155 poly8x8_t result;
11156 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
11157 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
11158 :"=w"(result)
11159 :"Q"(tab),"w"(idx)
11160 :"memory", "v16", "v17", "v18");
11161 return result;
11164 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11165 vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx)
11167 int8x16_t result;
11168 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
11169 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
11170 :"=w"(result)
11171 :"Q"(tab),"w"(idx)
11172 :"memory", "v16", "v17", "v18");
11173 return result;
11176 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11177 vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx)
11179 uint8x16_t result;
11180 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
11181 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
11182 :"=w"(result)
11183 :"Q"(tab),"w"(idx)
11184 :"memory", "v16", "v17", "v18");
11185 return result;
11188 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
11189 vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx)
11191 poly8x16_t result;
11192 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
11193 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
11194 :"=w"(result)
11195 :"Q"(tab),"w"(idx)
11196 :"memory", "v16", "v17", "v18");
11197 return result;
11200 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11201 vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx)
11203 int8x8_t result;
11204 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
11205 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
11206 :"=w"(result)
11207 :"Q"(tab),"w"(idx)
11208 :"memory", "v16", "v17", "v18", "v19");
11209 return result;
11212 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11213 vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx)
11215 uint8x8_t result;
11216 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
11217 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
11218 :"=w"(result)
11219 :"Q"(tab),"w"(idx)
11220 :"memory", "v16", "v17", "v18", "v19");
11221 return result;
11224 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11225 vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx)
11227 poly8x8_t result;
11228 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
11229 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
11230 :"=w"(result)
11231 :"Q"(tab),"w"(idx)
11232 :"memory", "v16", "v17", "v18", "v19");
11233 return result;
11237 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11238 vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx)
11240 int8x16_t result;
11241 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
11242 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
11243 :"=w"(result)
11244 :"Q"(tab),"w"(idx)
11245 :"memory", "v16", "v17", "v18", "v19");
11246 return result;
11249 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11250 vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx)
11252 uint8x16_t result;
11253 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
11254 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
11255 :"=w"(result)
11256 :"Q"(tab),"w"(idx)
11257 :"memory", "v16", "v17", "v18", "v19");
11258 return result;
11261 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
11262 vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx)
11264 poly8x16_t result;
11265 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
11266 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
11267 :"=w"(result)
11268 :"Q"(tab),"w"(idx)
11269 :"memory", "v16", "v17", "v18", "v19");
11270 return result;
11274 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11275 vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx)
11277 int8x8_t result = r;
11278 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
11279 : "+w"(result)
11280 : "w"(tab), "w"(idx)
11281 : /* No clobbers */);
11282 return result;
11285 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11286 vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx)
11288 uint8x8_t result = r;
11289 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
11290 : "+w"(result)
11291 : "w"(tab), "w"(idx)
11292 : /* No clobbers */);
11293 return result;
11296 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11297 vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx)
11299 poly8x8_t result = r;
11300 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
11301 : "+w"(result)
11302 : "w"(tab), "w"(idx)
11303 : /* No clobbers */);
11304 return result;
11307 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11308 vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx)
11310 int8x16_t result = r;
11311 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
11312 : "+w"(result)
11313 : "w"(tab), "w"(idx)
11314 : /* No clobbers */);
11315 return result;
11318 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11319 vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx)
11321 uint8x16_t result = r;
11322 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
11323 : "+w"(result)
11324 : "w"(tab), "w"(idx)
11325 : /* No clobbers */);
11326 return result;
11329 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
11330 vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx)
11332 poly8x16_t result = r;
11333 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
11334 : "+w"(result)
11335 : "w"(tab), "w"(idx)
11336 : /* No clobbers */);
11337 return result;
11340 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11341 vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx)
11343 int8x8_t result = r;
11344 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
11345 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
11346 :"+w"(result)
11347 :"Q"(tab),"w"(idx)
11348 :"memory", "v16", "v17");
11349 return result;
11352 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11353 vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx)
11355 uint8x8_t result = r;
11356 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
11357 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
11358 :"+w"(result)
11359 :"Q"(tab),"w"(idx)
11360 :"memory", "v16", "v17");
11361 return result;
11364 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11365 vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx)
11367 poly8x8_t result = r;
11368 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
11369 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
11370 :"+w"(result)
11371 :"Q"(tab),"w"(idx)
11372 :"memory", "v16", "v17");
11373 return result;
11377 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11378 vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx)
11380 int8x16_t result = r;
11381 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
11382 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
11383 :"+w"(result)
11384 :"Q"(tab),"w"(idx)
11385 :"memory", "v16", "v17");
11386 return result;
11389 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11390 vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx)
11392 uint8x16_t result = r;
11393 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
11394 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
11395 :"+w"(result)
11396 :"Q"(tab),"w"(idx)
11397 :"memory", "v16", "v17");
11398 return result;
11401 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
11402 vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx)
11404 poly8x16_t result = r;
11405 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
11406 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
11407 :"+w"(result)
11408 :"Q"(tab),"w"(idx)
11409 :"memory", "v16", "v17");
11410 return result;
11414 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11415 vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx)
11417 int8x8_t result = r;
11418 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
11419 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
11420 :"+w"(result)
11421 :"Q"(tab),"w"(idx)
11422 :"memory", "v16", "v17", "v18");
11423 return result;
11426 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11427 vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx)
11429 uint8x8_t result = r;
11430 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
11431 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
11432 :"+w"(result)
11433 :"Q"(tab),"w"(idx)
11434 :"memory", "v16", "v17", "v18");
11435 return result;
11438 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11439 vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx)
11441 poly8x8_t result = r;
11442 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
11443 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
11444 :"+w"(result)
11445 :"Q"(tab),"w"(idx)
11446 :"memory", "v16", "v17", "v18");
11447 return result;
11451 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11452 vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx)
11454 int8x16_t result = r;
11455 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
11456 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
11457 :"+w"(result)
11458 :"Q"(tab),"w"(idx)
11459 :"memory", "v16", "v17", "v18");
11460 return result;
11463 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11464 vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx)
11466 uint8x16_t result = r;
11467 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
11468 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
11469 :"+w"(result)
11470 :"Q"(tab),"w"(idx)
11471 :"memory", "v16", "v17", "v18");
11472 return result;
11475 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
11476 vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx)
11478 poly8x16_t result = r;
11479 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
11480 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
11481 :"+w"(result)
11482 :"Q"(tab),"w"(idx)
11483 :"memory", "v16", "v17", "v18");
11484 return result;
11488 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11489 vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx)
11491 int8x8_t result = r;
11492 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
11493 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
11494 :"+w"(result)
11495 :"Q"(tab),"w"(idx)
11496 :"memory", "v16", "v17", "v18", "v19");
11497 return result;
11500 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11501 vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx)
11503 uint8x8_t result = r;
11504 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
11505 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
11506 :"+w"(result)
11507 :"Q"(tab),"w"(idx)
11508 :"memory", "v16", "v17", "v18", "v19");
11509 return result;
11512 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11513 vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx)
11515 poly8x8_t result = r;
11516 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
11517 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
11518 :"+w"(result)
11519 :"Q"(tab),"w"(idx)
11520 :"memory", "v16", "v17", "v18", "v19");
11521 return result;
11525 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11526 vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx)
11528 int8x16_t result = r;
11529 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
11530 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
11531 :"+w"(result)
11532 :"Q"(tab),"w"(idx)
11533 :"memory", "v16", "v17", "v18", "v19");
11534 return result;
11537 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11538 vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx)
11540 uint8x16_t result = r;
11541 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
11542 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
11543 :"+w"(result)
11544 :"Q"(tab),"w"(idx)
11545 :"memory", "v16", "v17", "v18", "v19");
11546 return result;
11549 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
11550 vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx)
11552 poly8x16_t result = r;
11553 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
11554 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
11555 :"+w"(result)
11556 :"Q"(tab),"w"(idx)
11557 :"memory", "v16", "v17", "v18", "v19");
11558 return result;
11561 /* V7 legacy table intrinsics. */
11563 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11564 vtbl1_s8 (int8x8_t tab, int8x8_t idx)
11566 int8x8_t result;
11567 int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
11568 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
11569 : "=w"(result)
11570 : "w"(temp), "w"(idx)
11571 : /* No clobbers */);
11572 return result;
11575 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11576 vtbl1_u8 (uint8x8_t tab, uint8x8_t idx)
11578 uint8x8_t result;
11579 uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
11580 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
11581 : "=w"(result)
11582 : "w"(temp), "w"(idx)
11583 : /* No clobbers */);
11584 return result;
11587 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11588 vtbl1_p8 (poly8x8_t tab, uint8x8_t idx)
11590 poly8x8_t result;
11591 poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0)));
11592 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
11593 : "=w"(result)
11594 : "w"(temp), "w"(idx)
11595 : /* No clobbers */);
11596 return result;
11599 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11600 vtbl2_s8 (int8x8x2_t tab, int8x8_t idx)
11602 int8x8_t result;
11603 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
11604 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
11605 : "=w"(result)
11606 : "w"(temp), "w"(idx)
11607 : /* No clobbers */);
11608 return result;
11611 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11612 vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx)
11614 uint8x8_t result;
11615 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
11616 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
11617 : "=w"(result)
11618 : "w"(temp), "w"(idx)
11619 : /* No clobbers */);
11620 return result;
11623 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11624 vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx)
11626 poly8x8_t result;
11627 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
11628 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
11629 : "=w"(result)
11630 : "w"(temp), "w"(idx)
11631 : /* No clobbers */);
11632 return result;
11635 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11636 vtbl3_s8 (int8x8x3_t tab, int8x8_t idx)
11638 int8x8_t result;
11639 int8x16x2_t temp;
11640 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
11641 temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0)));
11642 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
11643 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
11644 : "=w"(result)
11645 : "Q"(temp), "w"(idx)
11646 : "v16", "v17", "memory");
11647 return result;
11650 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11651 vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx)
11653 uint8x8_t result;
11654 uint8x16x2_t temp;
11655 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
11656 temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0)));
11657 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
11658 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
11659 : "=w"(result)
11660 : "Q"(temp), "w"(idx)
11661 : "v16", "v17", "memory");
11662 return result;
11665 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11666 vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx)
11668 poly8x8_t result;
11669 poly8x16x2_t temp;
11670 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
11671 temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0)));
11672 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
11673 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
11674 : "=w"(result)
11675 : "Q"(temp), "w"(idx)
11676 : "v16", "v17", "memory");
11677 return result;
11680 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11681 vtbl4_s8 (int8x8x4_t tab, int8x8_t idx)
11683 int8x8_t result;
11684 int8x16x2_t temp;
11685 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
11686 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
11687 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
11688 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
11689 : "=w"(result)
11690 : "Q"(temp), "w"(idx)
11691 : "v16", "v17", "memory");
11692 return result;
11695 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11696 vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx)
11698 uint8x8_t result;
11699 uint8x16x2_t temp;
11700 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
11701 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
11702 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
11703 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
11704 : "=w"(result)
11705 : "Q"(temp), "w"(idx)
11706 : "v16", "v17", "memory");
11707 return result;
11710 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11711 vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx)
11713 poly8x8_t result;
11714 poly8x16x2_t temp;
11715 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
11716 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
11717 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
11718 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
11719 : "=w"(result)
11720 : "Q"(temp), "w"(idx)
11721 : "v16", "v17", "memory");
11722 return result;
11725 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11726 vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx)
11728 int8x8_t result = r;
11729 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
11730 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
11731 : "+w"(result)
11732 : "w"(temp), "w"(idx)
11733 : /* No clobbers */);
11734 return result;
11737 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11738 vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx)
11740 uint8x8_t result = r;
11741 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
11742 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
11743 : "+w"(result)
11744 : "w"(temp), "w"(idx)
11745 : /* No clobbers */);
11746 return result;
11749 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11750 vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx)
11752 poly8x8_t result = r;
11753 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
11754 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
11755 : "+w"(result)
11756 : "w"(temp), "w"(idx)
11757 : /* No clobbers */);
11758 return result;
11761 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11762 vtbx4_s8 (int8x8_t r, int8x8x4_t tab, int8x8_t idx)
11764 int8x8_t result = r;
11765 int8x16x2_t temp;
11766 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
11767 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
11768 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
11769 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
11770 : "+w"(result)
11771 : "Q"(temp), "w"(idx)
11772 : "v16", "v17", "memory");
11773 return result;
11776 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11777 vtbx4_u8 (uint8x8_t r, uint8x8x4_t tab, uint8x8_t idx)
11779 uint8x8_t result = r;
11780 uint8x16x2_t temp;
11781 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
11782 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
11783 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
11784 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
11785 : "+w"(result)
11786 : "Q"(temp), "w"(idx)
11787 : "v16", "v17", "memory");
11788 return result;
11791 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11792 vtbx4_p8 (poly8x8_t r, poly8x8x4_t tab, uint8x8_t idx)
11794 poly8x8_t result = r;
11795 poly8x16x2_t temp;
11796 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
11797 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
11798 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
11799 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
11800 : "+w"(result)
11801 : "Q"(temp), "w"(idx)
11802 : "v16", "v17", "memory");
11803 return result;
11806 /* End of temporary inline asm. */
11808 /* Start of optimal implementations in approved order. */
11810 /* vabs */
11812 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11813 vabs_f32 (float32x2_t __a)
11815 return __builtin_aarch64_absv2sf (__a);
11818 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
11819 vabs_f64 (float64x1_t __a)
11821 return (float64x1_t) {__builtin_fabs (__a[0])};
11824 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11825 vabs_s8 (int8x8_t __a)
11827 return __builtin_aarch64_absv8qi (__a);
11830 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11831 vabs_s16 (int16x4_t __a)
11833 return __builtin_aarch64_absv4hi (__a);
11836 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11837 vabs_s32 (int32x2_t __a)
11839 return __builtin_aarch64_absv2si (__a);
11842 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
11843 vabs_s64 (int64x1_t __a)
11845 return (int64x1_t) {__builtin_aarch64_absdi (__a[0])};
11848 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11849 vabsq_f32 (float32x4_t __a)
11851 return __builtin_aarch64_absv4sf (__a);
11854 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11855 vabsq_f64 (float64x2_t __a)
11857 return __builtin_aarch64_absv2df (__a);
11860 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11861 vabsq_s8 (int8x16_t __a)
11863 return __builtin_aarch64_absv16qi (__a);
11866 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11867 vabsq_s16 (int16x8_t __a)
11869 return __builtin_aarch64_absv8hi (__a);
11872 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11873 vabsq_s32 (int32x4_t __a)
11875 return __builtin_aarch64_absv4si (__a);
11878 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
11879 vabsq_s64 (int64x2_t __a)
11881 return __builtin_aarch64_absv2di (__a);
11884 /* vadd */
11886 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
11887 vaddd_s64 (int64_t __a, int64_t __b)
11889 return __a + __b;
11892 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11893 vaddd_u64 (uint64_t __a, uint64_t __b)
11895 return __a + __b;
11898 /* vaddv */
11900 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
11901 vaddv_s8 (int8x8_t __a)
11903 return __builtin_aarch64_reduc_plus_scal_v8qi (__a);
11906 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
11907 vaddv_s16 (int16x4_t __a)
11909 return __builtin_aarch64_reduc_plus_scal_v4hi (__a);
11912 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
11913 vaddv_s32 (int32x2_t __a)
11915 return __builtin_aarch64_reduc_plus_scal_v2si (__a);
11918 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
11919 vaddv_u8 (uint8x8_t __a)
11921 return (uint8_t) __builtin_aarch64_reduc_plus_scal_v8qi ((int8x8_t) __a);
11924 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
11925 vaddv_u16 (uint16x4_t __a)
11927 return (uint16_t) __builtin_aarch64_reduc_plus_scal_v4hi ((int16x4_t) __a);
11930 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
11931 vaddv_u32 (uint32x2_t __a)
11933 return (int32_t) __builtin_aarch64_reduc_plus_scal_v2si ((int32x2_t) __a);
11936 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
11937 vaddvq_s8 (int8x16_t __a)
11939 return __builtin_aarch64_reduc_plus_scal_v16qi (__a);
11942 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
11943 vaddvq_s16 (int16x8_t __a)
11945 return __builtin_aarch64_reduc_plus_scal_v8hi (__a);
11948 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
11949 vaddvq_s32 (int32x4_t __a)
11951 return __builtin_aarch64_reduc_plus_scal_v4si (__a);
11954 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
11955 vaddvq_s64 (int64x2_t __a)
11957 return __builtin_aarch64_reduc_plus_scal_v2di (__a);
11960 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
11961 vaddvq_u8 (uint8x16_t __a)
11963 return (uint8_t) __builtin_aarch64_reduc_plus_scal_v16qi ((int8x16_t) __a);
11966 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
11967 vaddvq_u16 (uint16x8_t __a)
11969 return (uint16_t) __builtin_aarch64_reduc_plus_scal_v8hi ((int16x8_t) __a);
11972 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
11973 vaddvq_u32 (uint32x4_t __a)
11975 return (uint32_t) __builtin_aarch64_reduc_plus_scal_v4si ((int32x4_t) __a);
11978 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11979 vaddvq_u64 (uint64x2_t __a)
11981 return (uint64_t) __builtin_aarch64_reduc_plus_scal_v2di ((int64x2_t) __a);
11984 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
11985 vaddv_f32 (float32x2_t __a)
11987 return __builtin_aarch64_reduc_plus_scal_v2sf (__a);
11990 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
11991 vaddvq_f32 (float32x4_t __a)
11993 return __builtin_aarch64_reduc_plus_scal_v4sf (__a);
11996 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
11997 vaddvq_f64 (float64x2_t __a)
11999 return __builtin_aarch64_reduc_plus_scal_v2df (__a);
12002 /* vbsl */
12004 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12005 vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c)
12007 return __builtin_aarch64_simd_bslv2sf_suss (__a, __b, __c);
12010 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
12011 vbsl_f64 (uint64x1_t __a, float64x1_t __b, float64x1_t __c)
12013 return (float64x1_t)
12014 { __builtin_aarch64_simd_bsldf_suss (__a[0], __b[0], __c[0]) };
12017 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12018 vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c)
12020 return __builtin_aarch64_simd_bslv8qi_pupp (__a, __b, __c);
12023 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12024 vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c)
12026 return __builtin_aarch64_simd_bslv4hi_pupp (__a, __b, __c);
12029 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12030 vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c)
12032 return __builtin_aarch64_simd_bslv8qi_suss (__a, __b, __c);
12035 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12036 vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c)
12038 return __builtin_aarch64_simd_bslv4hi_suss (__a, __b, __c);
12041 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12042 vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c)
12044 return __builtin_aarch64_simd_bslv2si_suss (__a, __b, __c);
12047 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
12048 vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c)
12050 return (int64x1_t)
12051 {__builtin_aarch64_simd_bsldi_suss (__a[0], __b[0], __c[0])};
12054 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12055 vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
12057 return __builtin_aarch64_simd_bslv8qi_uuuu (__a, __b, __c);
12060 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12061 vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
12063 return __builtin_aarch64_simd_bslv4hi_uuuu (__a, __b, __c);
12066 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12067 vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
12069 return __builtin_aarch64_simd_bslv2si_uuuu (__a, __b, __c);
12072 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12073 vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c)
12075 return (uint64x1_t)
12076 {__builtin_aarch64_simd_bsldi_uuuu (__a[0], __b[0], __c[0])};
12079 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12080 vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c)
12082 return __builtin_aarch64_simd_bslv4sf_suss (__a, __b, __c);
12085 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12086 vbslq_f64 (uint64x2_t __a, float64x2_t __b, float64x2_t __c)
12088 return __builtin_aarch64_simd_bslv2df_suss (__a, __b, __c);
12091 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12092 vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c)
12094 return __builtin_aarch64_simd_bslv16qi_pupp (__a, __b, __c);
12097 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
12098 vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c)
12100 return __builtin_aarch64_simd_bslv8hi_pupp (__a, __b, __c);
12103 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12104 vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c)
12106 return __builtin_aarch64_simd_bslv16qi_suss (__a, __b, __c);
12109 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12110 vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c)
12112 return __builtin_aarch64_simd_bslv8hi_suss (__a, __b, __c);
12115 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12116 vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c)
12118 return __builtin_aarch64_simd_bslv4si_suss (__a, __b, __c);
12121 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12122 vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c)
12124 return __builtin_aarch64_simd_bslv2di_suss (__a, __b, __c);
12127 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12128 vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
12130 return __builtin_aarch64_simd_bslv16qi_uuuu (__a, __b, __c);
12133 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12134 vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
12136 return __builtin_aarch64_simd_bslv8hi_uuuu (__a, __b, __c);
12139 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12140 vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
12142 return __builtin_aarch64_simd_bslv4si_uuuu (__a, __b, __c);
12145 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12146 vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
12148 return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c);
12151 #ifdef __ARM_FEATURE_CRYPTO
12153 /* vaes */
12155 static __inline uint8x16_t
12156 vaeseq_u8 (uint8x16_t data, uint8x16_t key)
12158 return __builtin_aarch64_crypto_aesev16qi_uuu (data, key);
12161 static __inline uint8x16_t
12162 vaesdq_u8 (uint8x16_t data, uint8x16_t key)
12164 return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key);
12167 static __inline uint8x16_t
12168 vaesmcq_u8 (uint8x16_t data)
12170 return __builtin_aarch64_crypto_aesmcv16qi_uu (data);
12173 static __inline uint8x16_t
12174 vaesimcq_u8 (uint8x16_t data)
12176 return __builtin_aarch64_crypto_aesimcv16qi_uu (data);
12179 #endif
12181 /* vcage */
12183 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12184 vcage_f64 (float64x1_t __a, float64x1_t __b)
12186 return vabs_f64 (__a) >= vabs_f64 (__b);
12189 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12190 vcages_f32 (float32_t __a, float32_t __b)
12192 return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0;
12195 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12196 vcage_f32 (float32x2_t __a, float32x2_t __b)
12198 return vabs_f32 (__a) >= vabs_f32 (__b);
12201 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12202 vcageq_f32 (float32x4_t __a, float32x4_t __b)
12204 return vabsq_f32 (__a) >= vabsq_f32 (__b);
12207 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12208 vcaged_f64 (float64_t __a, float64_t __b)
12210 return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0;
12213 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12214 vcageq_f64 (float64x2_t __a, float64x2_t __b)
12216 return vabsq_f64 (__a) >= vabsq_f64 (__b);
12219 /* vcagt */
12221 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12222 vcagts_f32 (float32_t __a, float32_t __b)
12224 return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0;
12227 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12228 vcagt_f32 (float32x2_t __a, float32x2_t __b)
12230 return vabs_f32 (__a) > vabs_f32 (__b);
12233 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12234 vcagt_f64 (float64x1_t __a, float64x1_t __b)
12236 return vabs_f64 (__a) > vabs_f64 (__b);
12239 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12240 vcagtq_f32 (float32x4_t __a, float32x4_t __b)
12242 return vabsq_f32 (__a) > vabsq_f32 (__b);
12245 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12246 vcagtd_f64 (float64_t __a, float64_t __b)
12248 return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0;
12251 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12252 vcagtq_f64 (float64x2_t __a, float64x2_t __b)
12254 return vabsq_f64 (__a) > vabsq_f64 (__b);
12257 /* vcale */
12259 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12260 vcale_f32 (float32x2_t __a, float32x2_t __b)
12262 return vabs_f32 (__a) <= vabs_f32 (__b);
12265 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12266 vcale_f64 (float64x1_t __a, float64x1_t __b)
12268 return vabs_f64 (__a) <= vabs_f64 (__b);
12271 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12272 vcaled_f64 (float64_t __a, float64_t __b)
12274 return __builtin_fabs (__a) <= __builtin_fabs (__b) ? -1 : 0;
12277 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12278 vcales_f32 (float32_t __a, float32_t __b)
12280 return __builtin_fabsf (__a) <= __builtin_fabsf (__b) ? -1 : 0;
12283 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12284 vcaleq_f32 (float32x4_t __a, float32x4_t __b)
12286 return vabsq_f32 (__a) <= vabsq_f32 (__b);
12289 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12290 vcaleq_f64 (float64x2_t __a, float64x2_t __b)
12292 return vabsq_f64 (__a) <= vabsq_f64 (__b);
12295 /* vcalt */
12297 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12298 vcalt_f32 (float32x2_t __a, float32x2_t __b)
12300 return vabs_f32 (__a) < vabs_f32 (__b);
12303 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12304 vcalt_f64 (float64x1_t __a, float64x1_t __b)
12306 return vabs_f64 (__a) < vabs_f64 (__b);
12309 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12310 vcaltd_f64 (float64_t __a, float64_t __b)
12312 return __builtin_fabs (__a) < __builtin_fabs (__b) ? -1 : 0;
12315 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12316 vcaltq_f32 (float32x4_t __a, float32x4_t __b)
12318 return vabsq_f32 (__a) < vabsq_f32 (__b);
12321 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12322 vcaltq_f64 (float64x2_t __a, float64x2_t __b)
12324 return vabsq_f64 (__a) < vabsq_f64 (__b);
12327 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12328 vcalts_f32 (float32_t __a, float32_t __b)
12330 return __builtin_fabsf (__a) < __builtin_fabsf (__b) ? -1 : 0;
12333 /* vceq - vector. */
12335 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12336 vceq_f32 (float32x2_t __a, float32x2_t __b)
12338 return (uint32x2_t) (__a == __b);
12341 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12342 vceq_f64 (float64x1_t __a, float64x1_t __b)
12344 return (uint64x1_t) (__a == __b);
12347 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12348 vceq_p8 (poly8x8_t __a, poly8x8_t __b)
12350 return (uint8x8_t) (__a == __b);
12353 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12354 vceq_s8 (int8x8_t __a, int8x8_t __b)
12356 return (uint8x8_t) (__a == __b);
12359 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12360 vceq_s16 (int16x4_t __a, int16x4_t __b)
12362 return (uint16x4_t) (__a == __b);
12365 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12366 vceq_s32 (int32x2_t __a, int32x2_t __b)
12368 return (uint32x2_t) (__a == __b);
12371 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12372 vceq_s64 (int64x1_t __a, int64x1_t __b)
12374 return (uint64x1_t) {__a[0] == __b[0] ? -1ll : 0ll};
12377 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12378 vceq_u8 (uint8x8_t __a, uint8x8_t __b)
12380 return (__a == __b);
12383 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12384 vceq_u16 (uint16x4_t __a, uint16x4_t __b)
12386 return (__a == __b);
12389 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12390 vceq_u32 (uint32x2_t __a, uint32x2_t __b)
12392 return (__a == __b);
12395 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12396 vceq_u64 (uint64x1_t __a, uint64x1_t __b)
12398 return (uint64x1_t) {__a[0] == __b[0] ? -1ll : 0ll};
12401 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12402 vceqq_f32 (float32x4_t __a, float32x4_t __b)
12404 return (uint32x4_t) (__a == __b);
12407 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12408 vceqq_f64 (float64x2_t __a, float64x2_t __b)
12410 return (uint64x2_t) (__a == __b);
12413 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12414 vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
12416 return (uint8x16_t) (__a == __b);
12419 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12420 vceqq_s8 (int8x16_t __a, int8x16_t __b)
12422 return (uint8x16_t) (__a == __b);
12425 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12426 vceqq_s16 (int16x8_t __a, int16x8_t __b)
12428 return (uint16x8_t) (__a == __b);
12431 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12432 vceqq_s32 (int32x4_t __a, int32x4_t __b)
12434 return (uint32x4_t) (__a == __b);
12437 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12438 vceqq_s64 (int64x2_t __a, int64x2_t __b)
12440 return (uint64x2_t) (__a == __b);
12443 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12444 vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
12446 return (__a == __b);
12449 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12450 vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
12452 return (__a == __b);
12455 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12456 vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
12458 return (__a == __b);
12461 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12462 vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
12464 return (__a == __b);
12467 /* vceq - scalar. */
12469 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12470 vceqs_f32 (float32_t __a, float32_t __b)
12472 return __a == __b ? -1 : 0;
12475 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12476 vceqd_s64 (int64_t __a, int64_t __b)
12478 return __a == __b ? -1ll : 0ll;
12481 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12482 vceqd_u64 (uint64_t __a, uint64_t __b)
12484 return __a == __b ? -1ll : 0ll;
12487 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12488 vceqd_f64 (float64_t __a, float64_t __b)
12490 return __a == __b ? -1ll : 0ll;
12493 /* vceqz - vector. */
12495 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12496 vceqz_f32 (float32x2_t __a)
12498 return (uint32x2_t) (__a == 0.0f);
12501 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12502 vceqz_f64 (float64x1_t __a)
12504 return (uint64x1_t) (__a == (float64x1_t) {0.0});
12507 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12508 vceqz_p8 (poly8x8_t __a)
12510 return (uint8x8_t) (__a == 0);
12513 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12514 vceqz_s8 (int8x8_t __a)
12516 return (uint8x8_t) (__a == 0);
12519 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12520 vceqz_s16 (int16x4_t __a)
12522 return (uint16x4_t) (__a == 0);
12525 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12526 vceqz_s32 (int32x2_t __a)
12528 return (uint32x2_t) (__a == 0);
12531 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12532 vceqz_s64 (int64x1_t __a)
12534 return (uint64x1_t) {__a[0] == 0ll ? -1ll : 0ll};
12537 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12538 vceqz_u8 (uint8x8_t __a)
12540 return (__a == 0);
12543 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12544 vceqz_u16 (uint16x4_t __a)
12546 return (__a == 0);
12549 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12550 vceqz_u32 (uint32x2_t __a)
12552 return (__a == 0);
12555 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12556 vceqz_u64 (uint64x1_t __a)
12558 return (uint64x1_t) {__a[0] == 0ll ? -1ll : 0ll};
12561 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12562 vceqzq_f32 (float32x4_t __a)
12564 return (uint32x4_t) (__a == 0.0f);
12567 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12568 vceqzq_f64 (float64x2_t __a)
12570 return (uint64x2_t) (__a == 0.0f);
12573 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12574 vceqzq_p8 (poly8x16_t __a)
12576 return (uint8x16_t) (__a == 0);
12579 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12580 vceqzq_s8 (int8x16_t __a)
12582 return (uint8x16_t) (__a == 0);
12585 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12586 vceqzq_s16 (int16x8_t __a)
12588 return (uint16x8_t) (__a == 0);
12591 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12592 vceqzq_s32 (int32x4_t __a)
12594 return (uint32x4_t) (__a == 0);
12597 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12598 vceqzq_s64 (int64x2_t __a)
12600 return (uint64x2_t) (__a == __AARCH64_INT64_C (0));
12603 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12604 vceqzq_u8 (uint8x16_t __a)
12606 return (__a == 0);
12609 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12610 vceqzq_u16 (uint16x8_t __a)
12612 return (__a == 0);
12615 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12616 vceqzq_u32 (uint32x4_t __a)
12618 return (__a == 0);
12621 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12622 vceqzq_u64 (uint64x2_t __a)
12624 return (__a == __AARCH64_UINT64_C (0));
12627 /* vceqz - scalar. */
12629 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12630 vceqzs_f32 (float32_t __a)
12632 return __a == 0.0f ? -1 : 0;
12635 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12636 vceqzd_s64 (int64_t __a)
12638 return __a == 0 ? -1ll : 0ll;
12641 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12642 vceqzd_u64 (uint64_t __a)
12644 return __a == 0 ? -1ll : 0ll;
12647 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12648 vceqzd_f64 (float64_t __a)
12650 return __a == 0.0 ? -1ll : 0ll;
12653 /* vcge - vector. */
12655 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12656 vcge_f32 (float32x2_t __a, float32x2_t __b)
12658 return (uint32x2_t) (__a >= __b);
12661 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12662 vcge_f64 (float64x1_t __a, float64x1_t __b)
12664 return (uint64x1_t) (__a >= __b);
12667 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12668 vcge_s8 (int8x8_t __a, int8x8_t __b)
12670 return (uint8x8_t) (__a >= __b);
12673 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12674 vcge_s16 (int16x4_t __a, int16x4_t __b)
12676 return (uint16x4_t) (__a >= __b);
12679 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12680 vcge_s32 (int32x2_t __a, int32x2_t __b)
12682 return (uint32x2_t) (__a >= __b);
12685 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12686 vcge_s64 (int64x1_t __a, int64x1_t __b)
12688 return (uint64x1_t) {__a[0] >= __b[0] ? -1ll : 0ll};
12691 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12692 vcge_u8 (uint8x8_t __a, uint8x8_t __b)
12694 return (__a >= __b);
12697 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12698 vcge_u16 (uint16x4_t __a, uint16x4_t __b)
12700 return (__a >= __b);
12703 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12704 vcge_u32 (uint32x2_t __a, uint32x2_t __b)
12706 return (__a >= __b);
12709 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12710 vcge_u64 (uint64x1_t __a, uint64x1_t __b)
12712 return (uint64x1_t) {__a[0] >= __b[0] ? -1ll : 0ll};
12715 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12716 vcgeq_f32 (float32x4_t __a, float32x4_t __b)
12718 return (uint32x4_t) (__a >= __b);
12721 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12722 vcgeq_f64 (float64x2_t __a, float64x2_t __b)
12724 return (uint64x2_t) (__a >= __b);
12727 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12728 vcgeq_s8 (int8x16_t __a, int8x16_t __b)
12730 return (uint8x16_t) (__a >= __b);
12733 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12734 vcgeq_s16 (int16x8_t __a, int16x8_t __b)
12736 return (uint16x8_t) (__a >= __b);
12739 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12740 vcgeq_s32 (int32x4_t __a, int32x4_t __b)
12742 return (uint32x4_t) (__a >= __b);
12745 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12746 vcgeq_s64 (int64x2_t __a, int64x2_t __b)
12748 return (uint64x2_t) (__a >= __b);
12751 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12752 vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
12754 return (__a >= __b);
12757 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12758 vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
12760 return (__a >= __b);
12763 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12764 vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
12766 return (__a >= __b);
12769 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12770 vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
12772 return (__a >= __b);
12775 /* vcge - scalar. */
12777 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12778 vcges_f32 (float32_t __a, float32_t __b)
12780 return __a >= __b ? -1 : 0;
12783 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12784 vcged_s64 (int64_t __a, int64_t __b)
12786 return __a >= __b ? -1ll : 0ll;
12789 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12790 vcged_u64 (uint64_t __a, uint64_t __b)
12792 return __a >= __b ? -1ll : 0ll;
12795 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12796 vcged_f64 (float64_t __a, float64_t __b)
12798 return __a >= __b ? -1ll : 0ll;
12801 /* vcgez - vector. */
12803 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12804 vcgez_f32 (float32x2_t __a)
12806 return (uint32x2_t) (__a >= 0.0f);
12809 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12810 vcgez_f64 (float64x1_t __a)
12812 return (uint64x1_t) (__a[0] >= (float64x1_t) {0.0});
12815 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12816 vcgez_s8 (int8x8_t __a)
12818 return (uint8x8_t) (__a >= 0);
12821 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12822 vcgez_s16 (int16x4_t __a)
12824 return (uint16x4_t) (__a >= 0);
12827 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12828 vcgez_s32 (int32x2_t __a)
12830 return (uint32x2_t) (__a >= 0);
12833 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12834 vcgez_s64 (int64x1_t __a)
12836 return (uint64x1_t) {__a[0] >= 0ll ? -1ll : 0ll};
12839 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12840 vcgezq_f32 (float32x4_t __a)
12842 return (uint32x4_t) (__a >= 0.0f);
12845 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12846 vcgezq_f64 (float64x2_t __a)
12848 return (uint64x2_t) (__a >= 0.0);
12851 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12852 vcgezq_s8 (int8x16_t __a)
12854 return (uint8x16_t) (__a >= 0);
12857 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12858 vcgezq_s16 (int16x8_t __a)
12860 return (uint16x8_t) (__a >= 0);
12863 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12864 vcgezq_s32 (int32x4_t __a)
12866 return (uint32x4_t) (__a >= 0);
12869 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12870 vcgezq_s64 (int64x2_t __a)
12872 return (uint64x2_t) (__a >= __AARCH64_INT64_C (0));
12875 /* vcgez - scalar. */
12877 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12878 vcgezs_f32 (float32_t __a)
12880 return __a >= 0.0f ? -1 : 0;
12883 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12884 vcgezd_s64 (int64_t __a)
12886 return __a >= 0 ? -1ll : 0ll;
12889 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12890 vcgezd_f64 (float64_t __a)
12892 return __a >= 0.0 ? -1ll : 0ll;
12895 /* vcgt - vector. */
12897 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12898 vcgt_f32 (float32x2_t __a, float32x2_t __b)
12900 return (uint32x2_t) (__a > __b);
12903 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12904 vcgt_f64 (float64x1_t __a, float64x1_t __b)
12906 return (uint64x1_t) (__a > __b);
12909 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12910 vcgt_s8 (int8x8_t __a, int8x8_t __b)
12912 return (uint8x8_t) (__a > __b);
12915 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12916 vcgt_s16 (int16x4_t __a, int16x4_t __b)
12918 return (uint16x4_t) (__a > __b);
12921 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12922 vcgt_s32 (int32x2_t __a, int32x2_t __b)
12924 return (uint32x2_t) (__a > __b);
12927 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12928 vcgt_s64 (int64x1_t __a, int64x1_t __b)
12930 return (uint64x1_t) (__a[0] > __b[0] ? -1ll : 0ll);
12933 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12934 vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
12936 return (__a > __b);
12939 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12940 vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
12942 return (__a > __b);
12945 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12946 vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
12948 return (__a > __b);
12951 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12952 vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
12954 return (uint64x1_t) (__a[0] > __b[0] ? -1ll : 0ll);
12957 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12958 vcgtq_f32 (float32x4_t __a, float32x4_t __b)
12960 return (uint32x4_t) (__a > __b);
12963 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12964 vcgtq_f64 (float64x2_t __a, float64x2_t __b)
12966 return (uint64x2_t) (__a > __b);
12969 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12970 vcgtq_s8 (int8x16_t __a, int8x16_t __b)
12972 return (uint8x16_t) (__a > __b);
12975 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12976 vcgtq_s16 (int16x8_t __a, int16x8_t __b)
12978 return (uint16x8_t) (__a > __b);
12981 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12982 vcgtq_s32 (int32x4_t __a, int32x4_t __b)
12984 return (uint32x4_t) (__a > __b);
12987 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12988 vcgtq_s64 (int64x2_t __a, int64x2_t __b)
12990 return (uint64x2_t) (__a > __b);
12993 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12994 vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
12996 return (__a > __b);
12999 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13000 vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
13002 return (__a > __b);
13005 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13006 vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
13008 return (__a > __b);
13011 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13012 vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
13014 return (__a > __b);
13017 /* vcgt - scalar. */
13019 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13020 vcgts_f32 (float32_t __a, float32_t __b)
13022 return __a > __b ? -1 : 0;
13025 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13026 vcgtd_s64 (int64_t __a, int64_t __b)
13028 return __a > __b ? -1ll : 0ll;
13031 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13032 vcgtd_u64 (uint64_t __a, uint64_t __b)
13034 return __a > __b ? -1ll : 0ll;
13037 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13038 vcgtd_f64 (float64_t __a, float64_t __b)
13040 return __a > __b ? -1ll : 0ll;
13043 /* vcgtz - vector. */
13045 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13046 vcgtz_f32 (float32x2_t __a)
13048 return (uint32x2_t) (__a > 0.0f);
13051 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13052 vcgtz_f64 (float64x1_t __a)
13054 return (uint64x1_t) (__a > (float64x1_t) {0.0});
13057 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13058 vcgtz_s8 (int8x8_t __a)
13060 return (uint8x8_t) (__a > 0);
13063 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13064 vcgtz_s16 (int16x4_t __a)
13066 return (uint16x4_t) (__a > 0);
13069 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13070 vcgtz_s32 (int32x2_t __a)
13072 return (uint32x2_t) (__a > 0);
13075 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13076 vcgtz_s64 (int64x1_t __a)
13078 return (uint64x1_t) {__a[0] > 0ll ? -1ll : 0ll};
13081 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13082 vcgtzq_f32 (float32x4_t __a)
13084 return (uint32x4_t) (__a > 0.0f);
13087 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13088 vcgtzq_f64 (float64x2_t __a)
13090 return (uint64x2_t) (__a > 0.0);
13093 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13094 vcgtzq_s8 (int8x16_t __a)
13096 return (uint8x16_t) (__a > 0);
13099 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13100 vcgtzq_s16 (int16x8_t __a)
13102 return (uint16x8_t) (__a > 0);
13105 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13106 vcgtzq_s32 (int32x4_t __a)
13108 return (uint32x4_t) (__a > 0);
13111 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13112 vcgtzq_s64 (int64x2_t __a)
13114 return (uint64x2_t) (__a > __AARCH64_INT64_C (0));
13117 /* vcgtz - scalar. */
13119 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13120 vcgtzs_f32 (float32_t __a)
13122 return __a > 0.0f ? -1 : 0;
13125 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13126 vcgtzd_s64 (int64_t __a)
13128 return __a > 0 ? -1ll : 0ll;
13131 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13132 vcgtzd_f64 (float64_t __a)
13134 return __a > 0.0 ? -1ll : 0ll;
13137 /* vcle - vector. */
13139 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13140 vcle_f32 (float32x2_t __a, float32x2_t __b)
13142 return (uint32x2_t) (__a <= __b);
13145 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13146 vcle_f64 (float64x1_t __a, float64x1_t __b)
13148 return (uint64x1_t) (__a <= __b);
13151 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13152 vcle_s8 (int8x8_t __a, int8x8_t __b)
13154 return (uint8x8_t) (__a <= __b);
13157 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13158 vcle_s16 (int16x4_t __a, int16x4_t __b)
13160 return (uint16x4_t) (__a <= __b);
13163 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13164 vcle_s32 (int32x2_t __a, int32x2_t __b)
13166 return (uint32x2_t) (__a <= __b);
13169 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13170 vcle_s64 (int64x1_t __a, int64x1_t __b)
13172 return (uint64x1_t) {__a[0] <= __b[0] ? -1ll : 0ll};
13175 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13176 vcle_u8 (uint8x8_t __a, uint8x8_t __b)
13178 return (__a <= __b);
13181 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13182 vcle_u16 (uint16x4_t __a, uint16x4_t __b)
13184 return (__a <= __b);
13187 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13188 vcle_u32 (uint32x2_t __a, uint32x2_t __b)
13190 return (__a <= __b);
13193 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13194 vcle_u64 (uint64x1_t __a, uint64x1_t __b)
13196 return (uint64x1_t) {__a[0] <= __b[0] ? -1ll : 0ll};
13199 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13200 vcleq_f32 (float32x4_t __a, float32x4_t __b)
13202 return (uint32x4_t) (__a <= __b);
13205 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13206 vcleq_f64 (float64x2_t __a, float64x2_t __b)
13208 return (uint64x2_t) (__a <= __b);
13211 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13212 vcleq_s8 (int8x16_t __a, int8x16_t __b)
13214 return (uint8x16_t) (__a <= __b);
13217 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13218 vcleq_s16 (int16x8_t __a, int16x8_t __b)
13220 return (uint16x8_t) (__a <= __b);
13223 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13224 vcleq_s32 (int32x4_t __a, int32x4_t __b)
13226 return (uint32x4_t) (__a <= __b);
13229 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13230 vcleq_s64 (int64x2_t __a, int64x2_t __b)
13232 return (uint64x2_t) (__a <= __b);
13235 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13236 vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
13238 return (__a <= __b);
13241 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13242 vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
13244 return (__a <= __b);
13247 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13248 vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
13250 return (__a <= __b);
13253 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13254 vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
13256 return (__a <= __b);
13259 /* vcle - scalar. */
13261 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13262 vcles_f32 (float32_t __a, float32_t __b)
13264 return __a <= __b ? -1 : 0;
13267 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13268 vcled_s64 (int64_t __a, int64_t __b)
13270 return __a <= __b ? -1ll : 0ll;
13273 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13274 vcled_u64 (uint64_t __a, uint64_t __b)
13276 return __a <= __b ? -1ll : 0ll;
13279 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13280 vcled_f64 (float64_t __a, float64_t __b)
13282 return __a <= __b ? -1ll : 0ll;
13285 /* vclez - vector. */
13287 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13288 vclez_f32 (float32x2_t __a)
13290 return (uint32x2_t) (__a <= 0.0f);
13293 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13294 vclez_f64 (float64x1_t __a)
13296 return (uint64x1_t) (__a <= (float64x1_t) {0.0});
13299 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13300 vclez_s8 (int8x8_t __a)
13302 return (uint8x8_t) (__a <= 0);
13305 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13306 vclez_s16 (int16x4_t __a)
13308 return (uint16x4_t) (__a <= 0);
13311 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13312 vclez_s32 (int32x2_t __a)
13314 return (uint32x2_t) (__a <= 0);
13317 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13318 vclez_s64 (int64x1_t __a)
13320 return (uint64x1_t) {__a[0] <= 0ll ? -1ll : 0ll};
13323 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13324 vclezq_f32 (float32x4_t __a)
13326 return (uint32x4_t) (__a <= 0.0f);
13329 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13330 vclezq_f64 (float64x2_t __a)
13332 return (uint64x2_t) (__a <= 0.0);
13335 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13336 vclezq_s8 (int8x16_t __a)
13338 return (uint8x16_t) (__a <= 0);
13341 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13342 vclezq_s16 (int16x8_t __a)
13344 return (uint16x8_t) (__a <= 0);
13347 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13348 vclezq_s32 (int32x4_t __a)
13350 return (uint32x4_t) (__a <= 0);
13353 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13354 vclezq_s64 (int64x2_t __a)
13356 return (uint64x2_t) (__a <= __AARCH64_INT64_C (0));
13359 /* vclez - scalar. */
13361 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13362 vclezs_f32 (float32_t __a)
13364 return __a <= 0.0f ? -1 : 0;
13367 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13368 vclezd_s64 (int64_t __a)
13370 return __a <= 0 ? -1ll : 0ll;
13373 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13374 vclezd_f64 (float64_t __a)
13376 return __a <= 0.0 ? -1ll : 0ll;
13379 /* vclt - vector. */
13381 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13382 vclt_f32 (float32x2_t __a, float32x2_t __b)
13384 return (uint32x2_t) (__a < __b);
13387 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13388 vclt_f64 (float64x1_t __a, float64x1_t __b)
13390 return (uint64x1_t) (__a < __b);
13393 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13394 vclt_s8 (int8x8_t __a, int8x8_t __b)
13396 return (uint8x8_t) (__a < __b);
13399 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13400 vclt_s16 (int16x4_t __a, int16x4_t __b)
13402 return (uint16x4_t) (__a < __b);
13405 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13406 vclt_s32 (int32x2_t __a, int32x2_t __b)
13408 return (uint32x2_t) (__a < __b);
13411 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13412 vclt_s64 (int64x1_t __a, int64x1_t __b)
13414 return (uint64x1_t) {__a[0] < __b[0] ? -1ll : 0ll};
13417 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13418 vclt_u8 (uint8x8_t __a, uint8x8_t __b)
13420 return (__a < __b);
13423 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13424 vclt_u16 (uint16x4_t __a, uint16x4_t __b)
13426 return (__a < __b);
13429 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13430 vclt_u32 (uint32x2_t __a, uint32x2_t __b)
13432 return (__a < __b);
13435 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13436 vclt_u64 (uint64x1_t __a, uint64x1_t __b)
13438 return (uint64x1_t) {__a[0] < __b[0] ? -1ll : 0ll};
13441 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13442 vcltq_f32 (float32x4_t __a, float32x4_t __b)
13444 return (uint32x4_t) (__a < __b);
13447 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13448 vcltq_f64 (float64x2_t __a, float64x2_t __b)
13450 return (uint64x2_t) (__a < __b);
13453 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13454 vcltq_s8 (int8x16_t __a, int8x16_t __b)
13456 return (uint8x16_t) (__a < __b);
13459 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13460 vcltq_s16 (int16x8_t __a, int16x8_t __b)
13462 return (uint16x8_t) (__a < __b);
13465 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13466 vcltq_s32 (int32x4_t __a, int32x4_t __b)
13468 return (uint32x4_t) (__a < __b);
13471 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13472 vcltq_s64 (int64x2_t __a, int64x2_t __b)
13474 return (uint64x2_t) (__a < __b);
13477 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13478 vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
13480 return (__a < __b);
13483 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13484 vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
13486 return (__a < __b);
13489 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13490 vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
13492 return (__a < __b);
13495 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13496 vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
13498 return (__a < __b);
13501 /* vclt - scalar. */
13503 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13504 vclts_f32 (float32_t __a, float32_t __b)
13506 return __a < __b ? -1 : 0;
13509 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13510 vcltd_s64 (int64_t __a, int64_t __b)
13512 return __a < __b ? -1ll : 0ll;
13515 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13516 vcltd_u64 (uint64_t __a, uint64_t __b)
13518 return __a < __b ? -1ll : 0ll;
13521 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13522 vcltd_f64 (float64_t __a, float64_t __b)
13524 return __a < __b ? -1ll : 0ll;
13527 /* vcltz - vector. */
13529 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13530 vcltz_f32 (float32x2_t __a)
13532 return (uint32x2_t) (__a < 0.0f);
13535 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13536 vcltz_f64 (float64x1_t __a)
13538 return (uint64x1_t) (__a < (float64x1_t) {0.0});
13541 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13542 vcltz_s8 (int8x8_t __a)
13544 return (uint8x8_t) (__a < 0);
13547 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13548 vcltz_s16 (int16x4_t __a)
13550 return (uint16x4_t) (__a < 0);
13553 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13554 vcltz_s32 (int32x2_t __a)
13556 return (uint32x2_t) (__a < 0);
13559 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13560 vcltz_s64 (int64x1_t __a)
13562 return (uint64x1_t) {__a[0] < 0ll ? -1ll : 0ll};
13565 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13566 vcltzq_f32 (float32x4_t __a)
13568 return (uint32x4_t) (__a < 0.0f);
13571 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13572 vcltzq_f64 (float64x2_t __a)
13574 return (uint64x2_t) (__a < 0.0);
13577 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13578 vcltzq_s8 (int8x16_t __a)
13580 return (uint8x16_t) (__a < 0);
13583 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13584 vcltzq_s16 (int16x8_t __a)
13586 return (uint16x8_t) (__a < 0);
13589 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13590 vcltzq_s32 (int32x4_t __a)
13592 return (uint32x4_t) (__a < 0);
13595 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13596 vcltzq_s64 (int64x2_t __a)
13598 return (uint64x2_t) (__a < __AARCH64_INT64_C (0));
13601 /* vcltz - scalar. */
13603 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13604 vcltzs_f32 (float32_t __a)
13606 return __a < 0.0f ? -1 : 0;
13609 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13610 vcltzd_s64 (int64_t __a)
13612 return __a < 0 ? -1ll : 0ll;
13615 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13616 vcltzd_f64 (float64_t __a)
13618 return __a < 0.0 ? -1ll : 0ll;
13621 /* vcls. */
13623 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13624 vcls_s8 (int8x8_t __a)
13626 return __builtin_aarch64_clrsbv8qi (__a);
13629 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13630 vcls_s16 (int16x4_t __a)
13632 return __builtin_aarch64_clrsbv4hi (__a);
13635 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13636 vcls_s32 (int32x2_t __a)
13638 return __builtin_aarch64_clrsbv2si (__a);
13641 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13642 vclsq_s8 (int8x16_t __a)
13644 return __builtin_aarch64_clrsbv16qi (__a);
13647 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13648 vclsq_s16 (int16x8_t __a)
13650 return __builtin_aarch64_clrsbv8hi (__a);
13653 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13654 vclsq_s32 (int32x4_t __a)
13656 return __builtin_aarch64_clrsbv4si (__a);
13659 /* vclz. */
13661 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13662 vclz_s8 (int8x8_t __a)
13664 return __builtin_aarch64_clzv8qi (__a);
13667 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13668 vclz_s16 (int16x4_t __a)
13670 return __builtin_aarch64_clzv4hi (__a);
13673 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13674 vclz_s32 (int32x2_t __a)
13676 return __builtin_aarch64_clzv2si (__a);
13679 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13680 vclz_u8 (uint8x8_t __a)
13682 return (uint8x8_t)__builtin_aarch64_clzv8qi ((int8x8_t)__a);
13685 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13686 vclz_u16 (uint16x4_t __a)
13688 return (uint16x4_t)__builtin_aarch64_clzv4hi ((int16x4_t)__a);
13691 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13692 vclz_u32 (uint32x2_t __a)
13694 return (uint32x2_t)__builtin_aarch64_clzv2si ((int32x2_t)__a);
13697 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13698 vclzq_s8 (int8x16_t __a)
13700 return __builtin_aarch64_clzv16qi (__a);
13703 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13704 vclzq_s16 (int16x8_t __a)
13706 return __builtin_aarch64_clzv8hi (__a);
13709 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13710 vclzq_s32 (int32x4_t __a)
13712 return __builtin_aarch64_clzv4si (__a);
13715 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13716 vclzq_u8 (uint8x16_t __a)
13718 return (uint8x16_t)__builtin_aarch64_clzv16qi ((int8x16_t)__a);
13721 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13722 vclzq_u16 (uint16x8_t __a)
13724 return (uint16x8_t)__builtin_aarch64_clzv8hi ((int16x8_t)__a);
13727 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13728 vclzq_u32 (uint32x4_t __a)
13730 return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a);
13733 /* vcnt. */
13735 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13736 vcnt_p8 (poly8x8_t __a)
13738 return (poly8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a);
13741 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13742 vcnt_s8 (int8x8_t __a)
13744 return __builtin_aarch64_popcountv8qi (__a);
13747 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13748 vcnt_u8 (uint8x8_t __a)
13750 return (uint8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a);
13753 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13754 vcntq_p8 (poly8x16_t __a)
13756 return (poly8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a);
13759 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13760 vcntq_s8 (int8x16_t __a)
13762 return __builtin_aarch64_popcountv16qi (__a);
13765 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13766 vcntq_u8 (uint8x16_t __a)
13768 return (uint8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a);
13771 /* vcvt (double -> float). */
13773 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13774 vcvt_f32_f64 (float64x2_t __a)
13776 return __builtin_aarch64_float_truncate_lo_v2sf (__a);
13779 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13780 vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
13782 return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b);
13785 /* vcvt (float -> double). */
13787 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13788 vcvt_f64_f32 (float32x2_t __a)
13791 return __builtin_aarch64_float_extend_lo_v2df (__a);
13794 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13795 vcvt_high_f64_f32 (float32x4_t __a)
13797 return __builtin_aarch64_vec_unpacks_hi_v4sf (__a);
13800 /* vcvt (<u>int -> float) */
13802 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
13803 vcvtd_f64_s64 (int64_t __a)
13805 return (float64_t) __a;
13808 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
13809 vcvtd_f64_u64 (uint64_t __a)
13811 return (float64_t) __a;
13814 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
13815 vcvts_f32_s32 (int32_t __a)
13817 return (float32_t) __a;
13820 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
13821 vcvts_f32_u32 (uint32_t __a)
13823 return (float32_t) __a;
13826 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13827 vcvt_f32_s32 (int32x2_t __a)
13829 return __builtin_aarch64_floatv2siv2sf (__a);
13832 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13833 vcvt_f32_u32 (uint32x2_t __a)
13835 return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a);
13838 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13839 vcvtq_f32_s32 (int32x4_t __a)
13841 return __builtin_aarch64_floatv4siv4sf (__a);
13844 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13845 vcvtq_f32_u32 (uint32x4_t __a)
13847 return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a);
13850 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13851 vcvtq_f64_s64 (int64x2_t __a)
13853 return __builtin_aarch64_floatv2div2df (__a);
13856 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13857 vcvtq_f64_u64 (uint64x2_t __a)
13859 return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a);
13862 /* vcvt (float -> <u>int) */
13864 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
13865 vcvtd_s64_f64 (float64_t __a)
13867 return (int64_t) __a;
13870 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13871 vcvtd_u64_f64 (float64_t __a)
13873 return (uint64_t) __a;
13876 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
13877 vcvts_s32_f32 (float32_t __a)
13879 return (int32_t) __a;
13882 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13883 vcvts_u32_f32 (float32_t __a)
13885 return (uint32_t) __a;
13888 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13889 vcvt_s32_f32 (float32x2_t __a)
13891 return __builtin_aarch64_lbtruncv2sfv2si (__a);
13894 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13895 vcvt_u32_f32 (float32x2_t __a)
13897 /* TODO: This cast should go away when builtins have
13898 their correct types. */
13899 return (uint32x2_t) __builtin_aarch64_lbtruncuv2sfv2si (__a);
13902 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13903 vcvtq_s32_f32 (float32x4_t __a)
13905 return __builtin_aarch64_lbtruncv4sfv4si (__a);
13908 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13909 vcvtq_u32_f32 (float32x4_t __a)
13911 /* TODO: This cast should go away when builtins have
13912 their correct types. */
13913 return (uint32x4_t) __builtin_aarch64_lbtruncuv4sfv4si (__a);
13916 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13917 vcvtq_s64_f64 (float64x2_t __a)
13919 return __builtin_aarch64_lbtruncv2dfv2di (__a);
13922 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13923 vcvtq_u64_f64 (float64x2_t __a)
13925 /* TODO: This cast should go away when builtins have
13926 their correct types. */
13927 return (uint64x2_t) __builtin_aarch64_lbtruncuv2dfv2di (__a);
13930 /* vcvta */
13932 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
13933 vcvtad_s64_f64 (float64_t __a)
13935 return __builtin_aarch64_lrounddfdi (__a);
13938 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13939 vcvtad_u64_f64 (float64_t __a)
13941 return __builtin_aarch64_lroundudfdi (__a);
13944 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
13945 vcvtas_s32_f32 (float32_t __a)
13947 return __builtin_aarch64_lroundsfsi (__a);
13950 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13951 vcvtas_u32_f32 (float32_t __a)
13953 return __builtin_aarch64_lroundusfsi (__a);
13956 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13957 vcvta_s32_f32 (float32x2_t __a)
13959 return __builtin_aarch64_lroundv2sfv2si (__a);
13962 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13963 vcvta_u32_f32 (float32x2_t __a)
13965 /* TODO: This cast should go away when builtins have
13966 their correct types. */
13967 return (uint32x2_t) __builtin_aarch64_lrounduv2sfv2si (__a);
13970 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13971 vcvtaq_s32_f32 (float32x4_t __a)
13973 return __builtin_aarch64_lroundv4sfv4si (__a);
13976 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13977 vcvtaq_u32_f32 (float32x4_t __a)
13979 /* TODO: This cast should go away when builtins have
13980 their correct types. */
13981 return (uint32x4_t) __builtin_aarch64_lrounduv4sfv4si (__a);
13984 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13985 vcvtaq_s64_f64 (float64x2_t __a)
13987 return __builtin_aarch64_lroundv2dfv2di (__a);
13990 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13991 vcvtaq_u64_f64 (float64x2_t __a)
13993 /* TODO: This cast should go away when builtins have
13994 their correct types. */
13995 return (uint64x2_t) __builtin_aarch64_lrounduv2dfv2di (__a);
13998 /* vcvtm */
14000 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
14001 vcvtmd_s64_f64 (float64_t __a)
14003 return __builtin_llfloor (__a);
14006 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14007 vcvtmd_u64_f64 (float64_t __a)
14009 return __builtin_aarch64_lfloorudfdi (__a);
14012 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
14013 vcvtms_s32_f32 (float32_t __a)
14015 return __builtin_ifloorf (__a);
14018 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14019 vcvtms_u32_f32 (float32_t __a)
14021 return __builtin_aarch64_lfloorusfsi (__a);
14024 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14025 vcvtm_s32_f32 (float32x2_t __a)
14027 return __builtin_aarch64_lfloorv2sfv2si (__a);
14030 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14031 vcvtm_u32_f32 (float32x2_t __a)
14033 /* TODO: This cast should go away when builtins have
14034 their correct types. */
14035 return (uint32x2_t) __builtin_aarch64_lflooruv2sfv2si (__a);
14038 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14039 vcvtmq_s32_f32 (float32x4_t __a)
14041 return __builtin_aarch64_lfloorv4sfv4si (__a);
14044 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14045 vcvtmq_u32_f32 (float32x4_t __a)
14047 /* TODO: This cast should go away when builtins have
14048 their correct types. */
14049 return (uint32x4_t) __builtin_aarch64_lflooruv4sfv4si (__a);
14052 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14053 vcvtmq_s64_f64 (float64x2_t __a)
14055 return __builtin_aarch64_lfloorv2dfv2di (__a);
14058 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14059 vcvtmq_u64_f64 (float64x2_t __a)
14061 /* TODO: This cast should go away when builtins have
14062 their correct types. */
14063 return (uint64x2_t) __builtin_aarch64_lflooruv2dfv2di (__a);
14066 /* vcvtn */
14068 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
14069 vcvtnd_s64_f64 (float64_t __a)
14071 return __builtin_aarch64_lfrintndfdi (__a);
14074 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14075 vcvtnd_u64_f64 (float64_t __a)
14077 return __builtin_aarch64_lfrintnudfdi (__a);
14080 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
14081 vcvtns_s32_f32 (float32_t __a)
14083 return __builtin_aarch64_lfrintnsfsi (__a);
14086 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14087 vcvtns_u32_f32 (float32_t __a)
14089 return __builtin_aarch64_lfrintnusfsi (__a);
14092 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14093 vcvtn_s32_f32 (float32x2_t __a)
14095 return __builtin_aarch64_lfrintnv2sfv2si (__a);
14098 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14099 vcvtn_u32_f32 (float32x2_t __a)
14101 /* TODO: This cast should go away when builtins have
14102 their correct types. */
14103 return (uint32x2_t) __builtin_aarch64_lfrintnuv2sfv2si (__a);
14106 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14107 vcvtnq_s32_f32 (float32x4_t __a)
14109 return __builtin_aarch64_lfrintnv4sfv4si (__a);
14112 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14113 vcvtnq_u32_f32 (float32x4_t __a)
14115 /* TODO: This cast should go away when builtins have
14116 their correct types. */
14117 return (uint32x4_t) __builtin_aarch64_lfrintnuv4sfv4si (__a);
14120 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14121 vcvtnq_s64_f64 (float64x2_t __a)
14123 return __builtin_aarch64_lfrintnv2dfv2di (__a);
14126 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14127 vcvtnq_u64_f64 (float64x2_t __a)
14129 /* TODO: This cast should go away when builtins have
14130 their correct types. */
14131 return (uint64x2_t) __builtin_aarch64_lfrintnuv2dfv2di (__a);
14134 /* vcvtp */
14136 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
14137 vcvtpd_s64_f64 (float64_t __a)
14139 return __builtin_llceil (__a);
14142 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14143 vcvtpd_u64_f64 (float64_t __a)
14145 return __builtin_aarch64_lceiludfdi (__a);
14148 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
14149 vcvtps_s32_f32 (float32_t __a)
14151 return __builtin_iceilf (__a);
14154 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14155 vcvtps_u32_f32 (float32_t __a)
14157 return __builtin_aarch64_lceilusfsi (__a);
14160 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14161 vcvtp_s32_f32 (float32x2_t __a)
14163 return __builtin_aarch64_lceilv2sfv2si (__a);
14166 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14167 vcvtp_u32_f32 (float32x2_t __a)
14169 /* TODO: This cast should go away when builtins have
14170 their correct types. */
14171 return (uint32x2_t) __builtin_aarch64_lceiluv2sfv2si (__a);
14174 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14175 vcvtpq_s32_f32 (float32x4_t __a)
14177 return __builtin_aarch64_lceilv4sfv4si (__a);
14180 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14181 vcvtpq_u32_f32 (float32x4_t __a)
14183 /* TODO: This cast should go away when builtins have
14184 their correct types. */
14185 return (uint32x4_t) __builtin_aarch64_lceiluv4sfv4si (__a);
14188 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14189 vcvtpq_s64_f64 (float64x2_t __a)
14191 return __builtin_aarch64_lceilv2dfv2di (__a);
14194 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14195 vcvtpq_u64_f64 (float64x2_t __a)
14197 /* TODO: This cast should go away when builtins have
14198 their correct types. */
14199 return (uint64x2_t) __builtin_aarch64_lceiluv2dfv2di (__a);
14202 /* vdup_n */
14204 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14205 vdup_n_f32 (float32_t __a)
14207 return (float32x2_t) {__a, __a};
14210 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
14211 vdup_n_f64 (float64_t __a)
14213 return (float64x1_t) {__a};
14216 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14217 vdup_n_p8 (poly8_t __a)
14219 return (poly8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
14222 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
14223 vdup_n_p16 (poly16_t __a)
14225 return (poly16x4_t) {__a, __a, __a, __a};
14228 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14229 vdup_n_s8 (int8_t __a)
14231 return (int8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
14234 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14235 vdup_n_s16 (int16_t __a)
14237 return (int16x4_t) {__a, __a, __a, __a};
14240 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14241 vdup_n_s32 (int32_t __a)
14243 return (int32x2_t) {__a, __a};
14246 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
14247 vdup_n_s64 (int64_t __a)
14249 return (int64x1_t) {__a};
14252 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14253 vdup_n_u8 (uint8_t __a)
14255 return (uint8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
14258 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14259 vdup_n_u16 (uint16_t __a)
14261 return (uint16x4_t) {__a, __a, __a, __a};
14264 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14265 vdup_n_u32 (uint32_t __a)
14267 return (uint32x2_t) {__a, __a};
14270 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14271 vdup_n_u64 (uint64_t __a)
14273 return (uint64x1_t) {__a};
14276 /* vdupq_n */
14278 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14279 vdupq_n_f32 (float32_t __a)
14281 return (float32x4_t) {__a, __a, __a, __a};
14284 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14285 vdupq_n_f64 (float64_t __a)
14287 return (float64x2_t) {__a, __a};
14290 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14291 vdupq_n_p8 (uint32_t __a)
14293 return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
14294 __a, __a, __a, __a, __a, __a, __a, __a};
14297 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
14298 vdupq_n_p16 (uint32_t __a)
14300 return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
14303 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14304 vdupq_n_s8 (int32_t __a)
14306 return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
14307 __a, __a, __a, __a, __a, __a, __a, __a};
14310 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14311 vdupq_n_s16 (int32_t __a)
14313 return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
14316 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14317 vdupq_n_s32 (int32_t __a)
14319 return (int32x4_t) {__a, __a, __a, __a};
14322 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14323 vdupq_n_s64 (int64_t __a)
14325 return (int64x2_t) {__a, __a};
14328 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14329 vdupq_n_u8 (uint32_t __a)
14331 return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
14332 __a, __a, __a, __a, __a, __a, __a, __a};
14335 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14336 vdupq_n_u16 (uint32_t __a)
14338 return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
14341 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14342 vdupq_n_u32 (uint32_t __a)
14344 return (uint32x4_t) {__a, __a, __a, __a};
14347 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14348 vdupq_n_u64 (uint64_t __a)
14350 return (uint64x2_t) {__a, __a};
14353 /* vdup_lane */
14355 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14356 vdup_lane_f32 (float32x2_t __a, const int __b)
14358 return __aarch64_vdup_lane_f32 (__a, __b);
14361 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
14362 vdup_lane_f64 (float64x1_t __a, const int __b)
14364 return __aarch64_vdup_lane_f64 (__a, __b);
14367 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14368 vdup_lane_p8 (poly8x8_t __a, const int __b)
14370 return __aarch64_vdup_lane_p8 (__a, __b);
14373 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
14374 vdup_lane_p16 (poly16x4_t __a, const int __b)
14376 return __aarch64_vdup_lane_p16 (__a, __b);
14379 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14380 vdup_lane_s8 (int8x8_t __a, const int __b)
14382 return __aarch64_vdup_lane_s8 (__a, __b);
14385 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14386 vdup_lane_s16 (int16x4_t __a, const int __b)
14388 return __aarch64_vdup_lane_s16 (__a, __b);
14391 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14392 vdup_lane_s32 (int32x2_t __a, const int __b)
14394 return __aarch64_vdup_lane_s32 (__a, __b);
14397 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
14398 vdup_lane_s64 (int64x1_t __a, const int __b)
14400 return __aarch64_vdup_lane_s64 (__a, __b);
14403 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14404 vdup_lane_u8 (uint8x8_t __a, const int __b)
14406 return __aarch64_vdup_lane_u8 (__a, __b);
14409 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14410 vdup_lane_u16 (uint16x4_t __a, const int __b)
14412 return __aarch64_vdup_lane_u16 (__a, __b);
14415 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14416 vdup_lane_u32 (uint32x2_t __a, const int __b)
14418 return __aarch64_vdup_lane_u32 (__a, __b);
14421 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14422 vdup_lane_u64 (uint64x1_t __a, const int __b)
14424 return __aarch64_vdup_lane_u64 (__a, __b);
14427 /* vdup_laneq */
14429 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14430 vdup_laneq_f32 (float32x4_t __a, const int __b)
14432 return __aarch64_vdup_laneq_f32 (__a, __b);
14435 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
14436 vdup_laneq_f64 (float64x2_t __a, const int __b)
14438 return __aarch64_vdup_laneq_f64 (__a, __b);
14441 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14442 vdup_laneq_p8 (poly8x16_t __a, const int __b)
14444 return __aarch64_vdup_laneq_p8 (__a, __b);
14447 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
14448 vdup_laneq_p16 (poly16x8_t __a, const int __b)
14450 return __aarch64_vdup_laneq_p16 (__a, __b);
14453 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14454 vdup_laneq_s8 (int8x16_t __a, const int __b)
14456 return __aarch64_vdup_laneq_s8 (__a, __b);
14459 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14460 vdup_laneq_s16 (int16x8_t __a, const int __b)
14462 return __aarch64_vdup_laneq_s16 (__a, __b);
14465 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14466 vdup_laneq_s32 (int32x4_t __a, const int __b)
14468 return __aarch64_vdup_laneq_s32 (__a, __b);
14471 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
14472 vdup_laneq_s64 (int64x2_t __a, const int __b)
14474 return __aarch64_vdup_laneq_s64 (__a, __b);
14477 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14478 vdup_laneq_u8 (uint8x16_t __a, const int __b)
14480 return __aarch64_vdup_laneq_u8 (__a, __b);
14483 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14484 vdup_laneq_u16 (uint16x8_t __a, const int __b)
14486 return __aarch64_vdup_laneq_u16 (__a, __b);
14489 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14490 vdup_laneq_u32 (uint32x4_t __a, const int __b)
14492 return __aarch64_vdup_laneq_u32 (__a, __b);
14495 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14496 vdup_laneq_u64 (uint64x2_t __a, const int __b)
14498 return __aarch64_vdup_laneq_u64 (__a, __b);
14501 /* vdupq_lane */
14502 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14503 vdupq_lane_f32 (float32x2_t __a, const int __b)
14505 return __aarch64_vdupq_lane_f32 (__a, __b);
14508 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14509 vdupq_lane_f64 (float64x1_t __a, const int __b)
14511 return __aarch64_vdupq_lane_f64 (__a, __b);
14514 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14515 vdupq_lane_p8 (poly8x8_t __a, const int __b)
14517 return __aarch64_vdupq_lane_p8 (__a, __b);
14520 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
14521 vdupq_lane_p16 (poly16x4_t __a, const int __b)
14523 return __aarch64_vdupq_lane_p16 (__a, __b);
14526 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14527 vdupq_lane_s8 (int8x8_t __a, const int __b)
14529 return __aarch64_vdupq_lane_s8 (__a, __b);
14532 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14533 vdupq_lane_s16 (int16x4_t __a, const int __b)
14535 return __aarch64_vdupq_lane_s16 (__a, __b);
14538 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14539 vdupq_lane_s32 (int32x2_t __a, const int __b)
14541 return __aarch64_vdupq_lane_s32 (__a, __b);
14544 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14545 vdupq_lane_s64 (int64x1_t __a, const int __b)
14547 return __aarch64_vdupq_lane_s64 (__a, __b);
14550 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14551 vdupq_lane_u8 (uint8x8_t __a, const int __b)
14553 return __aarch64_vdupq_lane_u8 (__a, __b);
14556 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14557 vdupq_lane_u16 (uint16x4_t __a, const int __b)
14559 return __aarch64_vdupq_lane_u16 (__a, __b);
14562 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14563 vdupq_lane_u32 (uint32x2_t __a, const int __b)
14565 return __aarch64_vdupq_lane_u32 (__a, __b);
14568 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14569 vdupq_lane_u64 (uint64x1_t __a, const int __b)
14571 return __aarch64_vdupq_lane_u64 (__a, __b);
14574 /* vdupq_laneq */
14575 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14576 vdupq_laneq_f32 (float32x4_t __a, const int __b)
14578 return __aarch64_vdupq_laneq_f32 (__a, __b);
14581 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14582 vdupq_laneq_f64 (float64x2_t __a, const int __b)
14584 return __aarch64_vdupq_laneq_f64 (__a, __b);
14587 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14588 vdupq_laneq_p8 (poly8x16_t __a, const int __b)
14590 return __aarch64_vdupq_laneq_p8 (__a, __b);
14593 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
14594 vdupq_laneq_p16 (poly16x8_t __a, const int __b)
14596 return __aarch64_vdupq_laneq_p16 (__a, __b);
14599 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14600 vdupq_laneq_s8 (int8x16_t __a, const int __b)
14602 return __aarch64_vdupq_laneq_s8 (__a, __b);
14605 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14606 vdupq_laneq_s16 (int16x8_t __a, const int __b)
14608 return __aarch64_vdupq_laneq_s16 (__a, __b);
14611 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14612 vdupq_laneq_s32 (int32x4_t __a, const int __b)
14614 return __aarch64_vdupq_laneq_s32 (__a, __b);
14617 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14618 vdupq_laneq_s64 (int64x2_t __a, const int __b)
14620 return __aarch64_vdupq_laneq_s64 (__a, __b);
14623 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14624 vdupq_laneq_u8 (uint8x16_t __a, const int __b)
14626 return __aarch64_vdupq_laneq_u8 (__a, __b);
14629 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14630 vdupq_laneq_u16 (uint16x8_t __a, const int __b)
14632 return __aarch64_vdupq_laneq_u16 (__a, __b);
14635 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14636 vdupq_laneq_u32 (uint32x4_t __a, const int __b)
14638 return __aarch64_vdupq_laneq_u32 (__a, __b);
14641 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14642 vdupq_laneq_u64 (uint64x2_t __a, const int __b)
14644 return __aarch64_vdupq_laneq_u64 (__a, __b);
14647 /* vdupb_lane */
14648 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
14649 vdupb_lane_p8 (poly8x8_t __a, const int __b)
14651 return __aarch64_vget_lane_any (__a, __b);
14654 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
14655 vdupb_lane_s8 (int8x8_t __a, const int __b)
14657 return __aarch64_vget_lane_any (__a, __b);
14660 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
14661 vdupb_lane_u8 (uint8x8_t __a, const int __b)
14663 return __aarch64_vget_lane_any (__a, __b);
14666 /* vduph_lane */
14667 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
14668 vduph_lane_p16 (poly16x4_t __a, const int __b)
14670 return __aarch64_vget_lane_any (__a, __b);
14673 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
14674 vduph_lane_s16 (int16x4_t __a, const int __b)
14676 return __aarch64_vget_lane_any (__a, __b);
14679 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
14680 vduph_lane_u16 (uint16x4_t __a, const int __b)
14682 return __aarch64_vget_lane_any (__a, __b);
14685 /* vdups_lane */
14686 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
14687 vdups_lane_f32 (float32x2_t __a, const int __b)
14689 return __aarch64_vget_lane_any (__a, __b);
14692 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
14693 vdups_lane_s32 (int32x2_t __a, const int __b)
14695 return __aarch64_vget_lane_any (__a, __b);
14698 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14699 vdups_lane_u32 (uint32x2_t __a, const int __b)
14701 return __aarch64_vget_lane_any (__a, __b);
14704 /* vdupd_lane */
14705 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
14706 vdupd_lane_f64 (float64x1_t __a, const int __b)
14708 __AARCH64_LANE_CHECK (__a, __b);
14709 return __a[0];
14712 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
14713 vdupd_lane_s64 (int64x1_t __a, const int __b)
14715 __AARCH64_LANE_CHECK (__a, __b);
14716 return __a[0];
14719 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14720 vdupd_lane_u64 (uint64x1_t __a, const int __b)
14722 __AARCH64_LANE_CHECK (__a, __b);
14723 return __a[0];
14726 /* vdupb_laneq */
14727 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
14728 vdupb_laneq_p8 (poly8x16_t __a, const int __b)
14730 return __aarch64_vget_lane_any (__a, __b);
14733 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
14734 vdupb_laneq_s8 (int8x16_t __a, const int __attribute__ ((unused)) __b)
14736 return __aarch64_vget_lane_any (__a, __b);
14739 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
14740 vdupb_laneq_u8 (uint8x16_t __a, const int __b)
14742 return __aarch64_vget_lane_any (__a, __b);
14745 /* vduph_laneq */
14746 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
14747 vduph_laneq_p16 (poly16x8_t __a, const int __b)
14749 return __aarch64_vget_lane_any (__a, __b);
14752 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
14753 vduph_laneq_s16 (int16x8_t __a, const int __b)
14755 return __aarch64_vget_lane_any (__a, __b);
14758 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
14759 vduph_laneq_u16 (uint16x8_t __a, const int __b)
14761 return __aarch64_vget_lane_any (__a, __b);
14764 /* vdups_laneq */
14765 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
14766 vdups_laneq_f32 (float32x4_t __a, const int __b)
14768 return __aarch64_vget_lane_any (__a, __b);
14771 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
14772 vdups_laneq_s32 (int32x4_t __a, const int __b)
14774 return __aarch64_vget_lane_any (__a, __b);
14777 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14778 vdups_laneq_u32 (uint32x4_t __a, const int __b)
14780 return __aarch64_vget_lane_any (__a, __b);
14783 /* vdupd_laneq */
14784 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
14785 vdupd_laneq_f64 (float64x2_t __a, const int __b)
14787 return __aarch64_vget_lane_any (__a, __b);
14790 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
14791 vdupd_laneq_s64 (int64x2_t __a, const int __b)
14793 return __aarch64_vget_lane_any (__a, __b);
14796 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14797 vdupd_laneq_u64 (uint64x2_t __a, const int __b)
14799 return __aarch64_vget_lane_any (__a, __b);
14802 /* vext */
14804 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14805 vext_f32 (float32x2_t __a, float32x2_t __b, __const int __c)
14807 __AARCH64_LANE_CHECK (__a, __c);
14808 #ifdef __AARCH64EB__
14809 return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
14810 #else
14811 return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
14812 #endif
14815 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
14816 vext_f64 (float64x1_t __a, float64x1_t __b, __const int __c)
14818 __AARCH64_LANE_CHECK (__a, __c);
14819 /* The only possible index to the assembler instruction returns element 0. */
14820 return __a;
14822 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14823 vext_p8 (poly8x8_t __a, poly8x8_t __b, __const int __c)
14825 __AARCH64_LANE_CHECK (__a, __c);
14826 #ifdef __AARCH64EB__
14827 return __builtin_shuffle (__b, __a, (uint8x8_t)
14828 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
14829 #else
14830 return __builtin_shuffle (__a, __b,
14831 (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
14832 #endif
14835 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
14836 vext_p16 (poly16x4_t __a, poly16x4_t __b, __const int __c)
14838 __AARCH64_LANE_CHECK (__a, __c);
14839 #ifdef __AARCH64EB__
14840 return __builtin_shuffle (__b, __a,
14841 (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
14842 #else
14843 return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
14844 #endif
14847 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14848 vext_s8 (int8x8_t __a, int8x8_t __b, __const int __c)
14850 __AARCH64_LANE_CHECK (__a, __c);
14851 #ifdef __AARCH64EB__
14852 return __builtin_shuffle (__b, __a, (uint8x8_t)
14853 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
14854 #else
14855 return __builtin_shuffle (__a, __b,
14856 (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
14857 #endif
14860 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14861 vext_s16 (int16x4_t __a, int16x4_t __b, __const int __c)
14863 __AARCH64_LANE_CHECK (__a, __c);
14864 #ifdef __AARCH64EB__
14865 return __builtin_shuffle (__b, __a,
14866 (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
14867 #else
14868 return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
14869 #endif
14872 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14873 vext_s32 (int32x2_t __a, int32x2_t __b, __const int __c)
14875 __AARCH64_LANE_CHECK (__a, __c);
14876 #ifdef __AARCH64EB__
14877 return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
14878 #else
14879 return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
14880 #endif
14883 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
14884 vext_s64 (int64x1_t __a, int64x1_t __b, __const int __c)
14886 __AARCH64_LANE_CHECK (__a, __c);
14887 /* The only possible index to the assembler instruction returns element 0. */
14888 return __a;
14891 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14892 vext_u8 (uint8x8_t __a, uint8x8_t __b, __const int __c)
14894 __AARCH64_LANE_CHECK (__a, __c);
14895 #ifdef __AARCH64EB__
14896 return __builtin_shuffle (__b, __a, (uint8x8_t)
14897 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
14898 #else
14899 return __builtin_shuffle (__a, __b,
14900 (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
14901 #endif
14904 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14905 vext_u16 (uint16x4_t __a, uint16x4_t __b, __const int __c)
14907 __AARCH64_LANE_CHECK (__a, __c);
14908 #ifdef __AARCH64EB__
14909 return __builtin_shuffle (__b, __a,
14910 (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
14911 #else
14912 return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
14913 #endif
14916 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14917 vext_u32 (uint32x2_t __a, uint32x2_t __b, __const int __c)
14919 __AARCH64_LANE_CHECK (__a, __c);
14920 #ifdef __AARCH64EB__
14921 return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
14922 #else
14923 return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
14924 #endif
14927 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14928 vext_u64 (uint64x1_t __a, uint64x1_t __b, __const int __c)
14930 __AARCH64_LANE_CHECK (__a, __c);
14931 /* The only possible index to the assembler instruction returns element 0. */
14932 return __a;
14935 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14936 vextq_f32 (float32x4_t __a, float32x4_t __b, __const int __c)
14938 __AARCH64_LANE_CHECK (__a, __c);
14939 #ifdef __AARCH64EB__
14940 return __builtin_shuffle (__b, __a,
14941 (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
14942 #else
14943 return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
14944 #endif
14947 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14948 vextq_f64 (float64x2_t __a, float64x2_t __b, __const int __c)
14950 __AARCH64_LANE_CHECK (__a, __c);
14951 #ifdef __AARCH64EB__
14952 return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
14953 #else
14954 return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
14955 #endif
14958 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14959 vextq_p8 (poly8x16_t __a, poly8x16_t __b, __const int __c)
14961 __AARCH64_LANE_CHECK (__a, __c);
14962 #ifdef __AARCH64EB__
14963 return __builtin_shuffle (__b, __a, (uint8x16_t)
14964 {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
14965 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
14966 #else
14967 return __builtin_shuffle (__a, __b, (uint8x16_t)
14968 {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
14969 __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
14970 #endif
14973 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
14974 vextq_p16 (poly16x8_t __a, poly16x8_t __b, __const int __c)
14976 __AARCH64_LANE_CHECK (__a, __c);
14977 #ifdef __AARCH64EB__
14978 return __builtin_shuffle (__b, __a, (uint16x8_t)
14979 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
14980 #else
14981 return __builtin_shuffle (__a, __b,
14982 (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
14983 #endif
14986 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14987 vextq_s8 (int8x16_t __a, int8x16_t __b, __const int __c)
14989 __AARCH64_LANE_CHECK (__a, __c);
14990 #ifdef __AARCH64EB__
14991 return __builtin_shuffle (__b, __a, (uint8x16_t)
14992 {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
14993 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
14994 #else
14995 return __builtin_shuffle (__a, __b, (uint8x16_t)
14996 {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
14997 __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
14998 #endif
15001 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15002 vextq_s16 (int16x8_t __a, int16x8_t __b, __const int __c)
15004 __AARCH64_LANE_CHECK (__a, __c);
15005 #ifdef __AARCH64EB__
15006 return __builtin_shuffle (__b, __a, (uint16x8_t)
15007 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
15008 #else
15009 return __builtin_shuffle (__a, __b,
15010 (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
15011 #endif
15014 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15015 vextq_s32 (int32x4_t __a, int32x4_t __b, __const int __c)
15017 __AARCH64_LANE_CHECK (__a, __c);
15018 #ifdef __AARCH64EB__
15019 return __builtin_shuffle (__b, __a,
15020 (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
15021 #else
15022 return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
15023 #endif
15026 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15027 vextq_s64 (int64x2_t __a, int64x2_t __b, __const int __c)
15029 __AARCH64_LANE_CHECK (__a, __c);
15030 #ifdef __AARCH64EB__
15031 return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
15032 #else
15033 return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
15034 #endif
15037 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15038 vextq_u8 (uint8x16_t __a, uint8x16_t __b, __const int __c)
15040 __AARCH64_LANE_CHECK (__a, __c);
15041 #ifdef __AARCH64EB__
15042 return __builtin_shuffle (__b, __a, (uint8x16_t)
15043 {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
15044 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
15045 #else
15046 return __builtin_shuffle (__a, __b, (uint8x16_t)
15047 {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
15048 __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
15049 #endif
15052 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15053 vextq_u16 (uint16x8_t __a, uint16x8_t __b, __const int __c)
15055 __AARCH64_LANE_CHECK (__a, __c);
15056 #ifdef __AARCH64EB__
15057 return __builtin_shuffle (__b, __a, (uint16x8_t)
15058 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
15059 #else
15060 return __builtin_shuffle (__a, __b,
15061 (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
15062 #endif
15065 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15066 vextq_u32 (uint32x4_t __a, uint32x4_t __b, __const int __c)
15068 __AARCH64_LANE_CHECK (__a, __c);
15069 #ifdef __AARCH64EB__
15070 return __builtin_shuffle (__b, __a,
15071 (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
15072 #else
15073 return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
15074 #endif
15077 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15078 vextq_u64 (uint64x2_t __a, uint64x2_t __b, __const int __c)
15080 __AARCH64_LANE_CHECK (__a, __c);
15081 #ifdef __AARCH64EB__
15082 return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
15083 #else
15084 return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
15085 #endif
15088 /* vfma */
15090 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15091 vfma_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
15093 return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])};
15096 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15097 vfma_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
15099 return __builtin_aarch64_fmav2sf (__b, __c, __a);
15102 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15103 vfmaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
15105 return __builtin_aarch64_fmav4sf (__b, __c, __a);
15108 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15109 vfmaq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c)
15111 return __builtin_aarch64_fmav2df (__b, __c, __a);
15114 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15115 vfma_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
15117 return __builtin_aarch64_fmav2sf (__b, vdup_n_f32 (__c), __a);
15120 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15121 vfmaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
15123 return __builtin_aarch64_fmav4sf (__b, vdupq_n_f32 (__c), __a);
15126 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15127 vfmaq_n_f64 (float64x2_t __a, float64x2_t __b, float64_t __c)
15129 return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c), __a);
15132 /* vfma_lane */
15134 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15135 vfma_lane_f32 (float32x2_t __a, float32x2_t __b,
15136 float32x2_t __c, const int __lane)
15138 return __builtin_aarch64_fmav2sf (__b,
15139 __aarch64_vdup_lane_f32 (__c, __lane),
15140 __a);
15143 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15144 vfma_lane_f64 (float64x1_t __a, float64x1_t __b,
15145 float64x1_t __c, const int __lane)
15147 return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])};
15150 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
15151 vfmad_lane_f64 (float64_t __a, float64_t __b,
15152 float64x1_t __c, const int __lane)
15154 return __builtin_fma (__b, __c[0], __a);
15157 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
15158 vfmas_lane_f32 (float32_t __a, float32_t __b,
15159 float32x2_t __c, const int __lane)
15161 return __builtin_fmaf (__b, __aarch64_vget_lane_any (__c, __lane), __a);
15164 /* vfma_laneq */
15166 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15167 vfma_laneq_f32 (float32x2_t __a, float32x2_t __b,
15168 float32x4_t __c, const int __lane)
15170 return __builtin_aarch64_fmav2sf (__b,
15171 __aarch64_vdup_laneq_f32 (__c, __lane),
15172 __a);
15175 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15176 vfma_laneq_f64 (float64x1_t __a, float64x1_t __b,
15177 float64x2_t __c, const int __lane)
15179 float64_t __c0 = __aarch64_vget_lane_any (__c, __lane);
15180 return (float64x1_t) {__builtin_fma (__b[0], __c0, __a[0])};
15183 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
15184 vfmad_laneq_f64 (float64_t __a, float64_t __b,
15185 float64x2_t __c, const int __lane)
15187 return __builtin_fma (__b, __aarch64_vget_lane_any (__c, __lane), __a);
15190 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
15191 vfmas_laneq_f32 (float32_t __a, float32_t __b,
15192 float32x4_t __c, const int __lane)
15194 return __builtin_fmaf (__b, __aarch64_vget_lane_any (__c, __lane), __a);
15197 /* vfmaq_lane */
15199 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15200 vfmaq_lane_f32 (float32x4_t __a, float32x4_t __b,
15201 float32x2_t __c, const int __lane)
15203 return __builtin_aarch64_fmav4sf (__b,
15204 __aarch64_vdupq_lane_f32 (__c, __lane),
15205 __a);
15208 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15209 vfmaq_lane_f64 (float64x2_t __a, float64x2_t __b,
15210 float64x1_t __c, const int __lane)
15212 return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c[0]), __a);
15215 /* vfmaq_laneq */
15217 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15218 vfmaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
15219 float32x4_t __c, const int __lane)
15221 return __builtin_aarch64_fmav4sf (__b,
15222 __aarch64_vdupq_laneq_f32 (__c, __lane),
15223 __a);
15226 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15227 vfmaq_laneq_f64 (float64x2_t __a, float64x2_t __b,
15228 float64x2_t __c, const int __lane)
15230 return __builtin_aarch64_fmav2df (__b,
15231 __aarch64_vdupq_laneq_f64 (__c, __lane),
15232 __a);
15235 /* vfms */
15237 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15238 vfms_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
15240 return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])};
15243 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15244 vfms_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
15246 return __builtin_aarch64_fmav2sf (-__b, __c, __a);
15249 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15250 vfmsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
15252 return __builtin_aarch64_fmav4sf (-__b, __c, __a);
15255 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15256 vfmsq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c)
15258 return __builtin_aarch64_fmav2df (-__b, __c, __a);
15262 /* vfms_lane */
15264 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15265 vfms_lane_f32 (float32x2_t __a, float32x2_t __b,
15266 float32x2_t __c, const int __lane)
15268 return __builtin_aarch64_fmav2sf (-__b,
15269 __aarch64_vdup_lane_f32 (__c, __lane),
15270 __a);
15273 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15274 vfms_lane_f64 (float64x1_t __a, float64x1_t __b,
15275 float64x1_t __c, const int __lane)
15277 return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])};
15280 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
15281 vfmsd_lane_f64 (float64_t __a, float64_t __b,
15282 float64x1_t __c, const int __lane)
15284 return __builtin_fma (-__b, __c[0], __a);
15287 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
15288 vfmss_lane_f32 (float32_t __a, float32_t __b,
15289 float32x2_t __c, const int __lane)
15291 return __builtin_fmaf (-__b, __aarch64_vget_lane_any (__c, __lane), __a);
15294 /* vfms_laneq */
15296 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15297 vfms_laneq_f32 (float32x2_t __a, float32x2_t __b,
15298 float32x4_t __c, const int __lane)
15300 return __builtin_aarch64_fmav2sf (-__b,
15301 __aarch64_vdup_laneq_f32 (__c, __lane),
15302 __a);
15305 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15306 vfms_laneq_f64 (float64x1_t __a, float64x1_t __b,
15307 float64x2_t __c, const int __lane)
15309 float64_t __c0 = __aarch64_vget_lane_any (__c, __lane);
15310 return (float64x1_t) {__builtin_fma (-__b[0], __c0, __a[0])};
15313 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
15314 vfmsd_laneq_f64 (float64_t __a, float64_t __b,
15315 float64x2_t __c, const int __lane)
15317 return __builtin_fma (-__b, __aarch64_vget_lane_any (__c, __lane), __a);
15320 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
15321 vfmss_laneq_f32 (float32_t __a, float32_t __b,
15322 float32x4_t __c, const int __lane)
15324 return __builtin_fmaf (-__b, __aarch64_vget_lane_any (__c, __lane), __a);
15327 /* vfmsq_lane */
15329 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15330 vfmsq_lane_f32 (float32x4_t __a, float32x4_t __b,
15331 float32x2_t __c, const int __lane)
15333 return __builtin_aarch64_fmav4sf (-__b,
15334 __aarch64_vdupq_lane_f32 (__c, __lane),
15335 __a);
15338 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15339 vfmsq_lane_f64 (float64x2_t __a, float64x2_t __b,
15340 float64x1_t __c, const int __lane)
15342 return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c[0]), __a);
15345 /* vfmsq_laneq */
15347 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15348 vfmsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
15349 float32x4_t __c, const int __lane)
15351 return __builtin_aarch64_fmav4sf (-__b,
15352 __aarch64_vdupq_laneq_f32 (__c, __lane),
15353 __a);
15356 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15357 vfmsq_laneq_f64 (float64x2_t __a, float64x2_t __b,
15358 float64x2_t __c, const int __lane)
15360 return __builtin_aarch64_fmav2df (-__b,
15361 __aarch64_vdupq_laneq_f64 (__c, __lane),
15362 __a);
15365 /* vld1 */
15367 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15368 vld1_f32 (const float32_t *a)
15370 return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a);
15373 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15374 vld1_f64 (const float64_t *a)
15376 return (float64x1_t) {*a};
15379 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15380 vld1_p8 (const poly8_t *a)
15382 return (poly8x8_t)
15383 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
15386 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
15387 vld1_p16 (const poly16_t *a)
15389 return (poly16x4_t)
15390 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
15393 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15394 vld1_s8 (const int8_t *a)
15396 return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
15399 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15400 vld1_s16 (const int16_t *a)
15402 return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
15405 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15406 vld1_s32 (const int32_t *a)
15408 return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
15411 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15412 vld1_s64 (const int64_t *a)
15414 return (int64x1_t) {*a};
15417 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15418 vld1_u8 (const uint8_t *a)
15420 return (uint8x8_t)
15421 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
15424 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15425 vld1_u16 (const uint16_t *a)
15427 return (uint16x4_t)
15428 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
15431 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15432 vld1_u32 (const uint32_t *a)
15434 return (uint32x2_t)
15435 __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
15438 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15439 vld1_u64 (const uint64_t *a)
15441 return (uint64x1_t) {*a};
15444 /* vld1q */
15446 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15447 vld1q_f32 (const float32_t *a)
15449 return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a);
15452 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15453 vld1q_f64 (const float64_t *a)
15455 return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a);
15458 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15459 vld1q_p8 (const poly8_t *a)
15461 return (poly8x16_t)
15462 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
15465 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
15466 vld1q_p16 (const poly16_t *a)
15468 return (poly16x8_t)
15469 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
15472 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15473 vld1q_s8 (const int8_t *a)
15475 return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
15478 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15479 vld1q_s16 (const int16_t *a)
15481 return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
15484 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15485 vld1q_s32 (const int32_t *a)
15487 return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
15490 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15491 vld1q_s64 (const int64_t *a)
15493 return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
15496 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15497 vld1q_u8 (const uint8_t *a)
15499 return (uint8x16_t)
15500 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
15503 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15504 vld1q_u16 (const uint16_t *a)
15506 return (uint16x8_t)
15507 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
15510 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15511 vld1q_u32 (const uint32_t *a)
15513 return (uint32x4_t)
15514 __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
15517 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15518 vld1q_u64 (const uint64_t *a)
15520 return (uint64x2_t)
15521 __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
15524 /* vld1_dup */
15526 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15527 vld1_dup_f32 (const float32_t* __a)
15529 return vdup_n_f32 (*__a);
15532 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15533 vld1_dup_f64 (const float64_t* __a)
15535 return vdup_n_f64 (*__a);
15538 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15539 vld1_dup_p8 (const poly8_t* __a)
15541 return vdup_n_p8 (*__a);
15544 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
15545 vld1_dup_p16 (const poly16_t* __a)
15547 return vdup_n_p16 (*__a);
15550 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15551 vld1_dup_s8 (const int8_t* __a)
15553 return vdup_n_s8 (*__a);
15556 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15557 vld1_dup_s16 (const int16_t* __a)
15559 return vdup_n_s16 (*__a);
15562 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15563 vld1_dup_s32 (const int32_t* __a)
15565 return vdup_n_s32 (*__a);
15568 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15569 vld1_dup_s64 (const int64_t* __a)
15571 return vdup_n_s64 (*__a);
15574 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15575 vld1_dup_u8 (const uint8_t* __a)
15577 return vdup_n_u8 (*__a);
15580 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15581 vld1_dup_u16 (const uint16_t* __a)
15583 return vdup_n_u16 (*__a);
15586 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15587 vld1_dup_u32 (const uint32_t* __a)
15589 return vdup_n_u32 (*__a);
15592 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15593 vld1_dup_u64 (const uint64_t* __a)
15595 return vdup_n_u64 (*__a);
15598 /* vld1q_dup */
15600 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15601 vld1q_dup_f32 (const float32_t* __a)
15603 return vdupq_n_f32 (*__a);
15606 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15607 vld1q_dup_f64 (const float64_t* __a)
15609 return vdupq_n_f64 (*__a);
15612 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15613 vld1q_dup_p8 (const poly8_t* __a)
15615 return vdupq_n_p8 (*__a);
15618 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
15619 vld1q_dup_p16 (const poly16_t* __a)
15621 return vdupq_n_p16 (*__a);
15624 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15625 vld1q_dup_s8 (const int8_t* __a)
15627 return vdupq_n_s8 (*__a);
15630 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15631 vld1q_dup_s16 (const int16_t* __a)
15633 return vdupq_n_s16 (*__a);
15636 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15637 vld1q_dup_s32 (const int32_t* __a)
15639 return vdupq_n_s32 (*__a);
15642 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15643 vld1q_dup_s64 (const int64_t* __a)
15645 return vdupq_n_s64 (*__a);
15648 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15649 vld1q_dup_u8 (const uint8_t* __a)
15651 return vdupq_n_u8 (*__a);
15654 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15655 vld1q_dup_u16 (const uint16_t* __a)
15657 return vdupq_n_u16 (*__a);
15660 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15661 vld1q_dup_u32 (const uint32_t* __a)
15663 return vdupq_n_u32 (*__a);
15666 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15667 vld1q_dup_u64 (const uint64_t* __a)
15669 return vdupq_n_u64 (*__a);
15672 /* vld1_lane */
15674 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15675 vld1_lane_f32 (const float32_t *__src, float32x2_t __vec, const int __lane)
15677 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15680 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15681 vld1_lane_f64 (const float64_t *__src, float64x1_t __vec, const int __lane)
15683 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15686 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15687 vld1_lane_p8 (const poly8_t *__src, poly8x8_t __vec, const int __lane)
15689 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15692 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
15693 vld1_lane_p16 (const poly16_t *__src, poly16x4_t __vec, const int __lane)
15695 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15698 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15699 vld1_lane_s8 (const int8_t *__src, int8x8_t __vec, const int __lane)
15701 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15704 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15705 vld1_lane_s16 (const int16_t *__src, int16x4_t __vec, const int __lane)
15707 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15710 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15711 vld1_lane_s32 (const int32_t *__src, int32x2_t __vec, const int __lane)
15713 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15716 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15717 vld1_lane_s64 (const int64_t *__src, int64x1_t __vec, const int __lane)
15719 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15722 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15723 vld1_lane_u8 (const uint8_t *__src, uint8x8_t __vec, const int __lane)
15725 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15728 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15729 vld1_lane_u16 (const uint16_t *__src, uint16x4_t __vec, const int __lane)
15731 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15734 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15735 vld1_lane_u32 (const uint32_t *__src, uint32x2_t __vec, const int __lane)
15737 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15740 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15741 vld1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane)
15743 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15746 /* vld1q_lane */
15748 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15749 vld1q_lane_f32 (const float32_t *__src, float32x4_t __vec, const int __lane)
15751 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15754 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15755 vld1q_lane_f64 (const float64_t *__src, float64x2_t __vec, const int __lane)
15757 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15760 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15761 vld1q_lane_p8 (const poly8_t *__src, poly8x16_t __vec, const int __lane)
15763 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15766 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
15767 vld1q_lane_p16 (const poly16_t *__src, poly16x8_t __vec, const int __lane)
15769 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15772 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15773 vld1q_lane_s8 (const int8_t *__src, int8x16_t __vec, const int __lane)
15775 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15778 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15779 vld1q_lane_s16 (const int16_t *__src, int16x8_t __vec, const int __lane)
15781 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15784 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15785 vld1q_lane_s32 (const int32_t *__src, int32x4_t __vec, const int __lane)
15787 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15790 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15791 vld1q_lane_s64 (const int64_t *__src, int64x2_t __vec, const int __lane)
15793 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15796 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15797 vld1q_lane_u8 (const uint8_t *__src, uint8x16_t __vec, const int __lane)
15799 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15802 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15803 vld1q_lane_u16 (const uint16_t *__src, uint16x8_t __vec, const int __lane)
15805 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15808 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15809 vld1q_lane_u32 (const uint32_t *__src, uint32x4_t __vec, const int __lane)
15811 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15814 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15815 vld1q_lane_u64 (const uint64_t *__src, uint64x2_t __vec, const int __lane)
15817 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15820 /* vldn */
15822 __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
15823 vld2_s64 (const int64_t * __a)
15825 int64x1x2_t ret;
15826 __builtin_aarch64_simd_oi __o;
15827 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
15828 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
15829 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
15830 return ret;
15833 __extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
15834 vld2_u64 (const uint64_t * __a)
15836 uint64x1x2_t ret;
15837 __builtin_aarch64_simd_oi __o;
15838 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
15839 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
15840 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
15841 return ret;
15844 __extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
15845 vld2_f64 (const float64_t * __a)
15847 float64x1x2_t ret;
15848 __builtin_aarch64_simd_oi __o;
15849 __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a);
15850 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
15851 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
15852 return ret;
15855 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
15856 vld2_s8 (const int8_t * __a)
15858 int8x8x2_t ret;
15859 __builtin_aarch64_simd_oi __o;
15860 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
15861 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
15862 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
15863 return ret;
15866 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
15867 vld2_p8 (const poly8_t * __a)
15869 poly8x8x2_t ret;
15870 __builtin_aarch64_simd_oi __o;
15871 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
15872 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
15873 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
15874 return ret;
15877 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
15878 vld2_s16 (const int16_t * __a)
15880 int16x4x2_t ret;
15881 __builtin_aarch64_simd_oi __o;
15882 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
15883 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
15884 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
15885 return ret;
15888 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
15889 vld2_p16 (const poly16_t * __a)
15891 poly16x4x2_t ret;
15892 __builtin_aarch64_simd_oi __o;
15893 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
15894 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
15895 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
15896 return ret;
15899 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
15900 vld2_s32 (const int32_t * __a)
15902 int32x2x2_t ret;
15903 __builtin_aarch64_simd_oi __o;
15904 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
15905 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
15906 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
15907 return ret;
15910 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
15911 vld2_u8 (const uint8_t * __a)
15913 uint8x8x2_t ret;
15914 __builtin_aarch64_simd_oi __o;
15915 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
15916 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
15917 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
15918 return ret;
15921 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
15922 vld2_u16 (const uint16_t * __a)
15924 uint16x4x2_t ret;
15925 __builtin_aarch64_simd_oi __o;
15926 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
15927 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
15928 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
15929 return ret;
15932 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
15933 vld2_u32 (const uint32_t * __a)
15935 uint32x2x2_t ret;
15936 __builtin_aarch64_simd_oi __o;
15937 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
15938 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
15939 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
15940 return ret;
15943 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
15944 vld2_f32 (const float32_t * __a)
15946 float32x2x2_t ret;
15947 __builtin_aarch64_simd_oi __o;
15948 __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a);
15949 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
15950 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
15951 return ret;
15954 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
15955 vld2q_s8 (const int8_t * __a)
15957 int8x16x2_t ret;
15958 __builtin_aarch64_simd_oi __o;
15959 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
15960 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
15961 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
15962 return ret;
15965 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
15966 vld2q_p8 (const poly8_t * __a)
15968 poly8x16x2_t ret;
15969 __builtin_aarch64_simd_oi __o;
15970 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
15971 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
15972 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
15973 return ret;
15976 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
15977 vld2q_s16 (const int16_t * __a)
15979 int16x8x2_t ret;
15980 __builtin_aarch64_simd_oi __o;
15981 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
15982 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
15983 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
15984 return ret;
15987 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
15988 vld2q_p16 (const poly16_t * __a)
15990 poly16x8x2_t ret;
15991 __builtin_aarch64_simd_oi __o;
15992 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
15993 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
15994 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
15995 return ret;
15998 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
15999 vld2q_s32 (const int32_t * __a)
16001 int32x4x2_t ret;
16002 __builtin_aarch64_simd_oi __o;
16003 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
16004 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
16005 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
16006 return ret;
16009 __extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
16010 vld2q_s64 (const int64_t * __a)
16012 int64x2x2_t ret;
16013 __builtin_aarch64_simd_oi __o;
16014 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
16015 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
16016 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
16017 return ret;
16020 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
16021 vld2q_u8 (const uint8_t * __a)
16023 uint8x16x2_t ret;
16024 __builtin_aarch64_simd_oi __o;
16025 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
16026 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
16027 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
16028 return ret;
16031 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
16032 vld2q_u16 (const uint16_t * __a)
16034 uint16x8x2_t ret;
16035 __builtin_aarch64_simd_oi __o;
16036 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
16037 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
16038 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
16039 return ret;
16042 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
16043 vld2q_u32 (const uint32_t * __a)
16045 uint32x4x2_t ret;
16046 __builtin_aarch64_simd_oi __o;
16047 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
16048 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
16049 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
16050 return ret;
16053 __extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
16054 vld2q_u64 (const uint64_t * __a)
16056 uint64x2x2_t ret;
16057 __builtin_aarch64_simd_oi __o;
16058 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
16059 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
16060 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
16061 return ret;
16064 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
16065 vld2q_f32 (const float32_t * __a)
16067 float32x4x2_t ret;
16068 __builtin_aarch64_simd_oi __o;
16069 __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a);
16070 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
16071 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
16072 return ret;
16075 __extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
16076 vld2q_f64 (const float64_t * __a)
16078 float64x2x2_t ret;
16079 __builtin_aarch64_simd_oi __o;
16080 __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a);
16081 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
16082 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
16083 return ret;
16086 __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
16087 vld3_s64 (const int64_t * __a)
16089 int64x1x3_t ret;
16090 __builtin_aarch64_simd_ci __o;
16091 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
16092 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
16093 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
16094 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
16095 return ret;
16098 __extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
16099 vld3_u64 (const uint64_t * __a)
16101 uint64x1x3_t ret;
16102 __builtin_aarch64_simd_ci __o;
16103 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
16104 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
16105 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
16106 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
16107 return ret;
16110 __extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
16111 vld3_f64 (const float64_t * __a)
16113 float64x1x3_t ret;
16114 __builtin_aarch64_simd_ci __o;
16115 __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a);
16116 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)};
16117 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)};
16118 ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)};
16119 return ret;
16122 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
16123 vld3_s8 (const int8_t * __a)
16125 int8x8x3_t ret;
16126 __builtin_aarch64_simd_ci __o;
16127 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
16128 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
16129 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
16130 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
16131 return ret;
16134 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
16135 vld3_p8 (const poly8_t * __a)
16137 poly8x8x3_t ret;
16138 __builtin_aarch64_simd_ci __o;
16139 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
16140 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
16141 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
16142 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
16143 return ret;
16146 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
16147 vld3_s16 (const int16_t * __a)
16149 int16x4x3_t ret;
16150 __builtin_aarch64_simd_ci __o;
16151 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
16152 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
16153 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
16154 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
16155 return ret;
16158 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
16159 vld3_p16 (const poly16_t * __a)
16161 poly16x4x3_t ret;
16162 __builtin_aarch64_simd_ci __o;
16163 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
16164 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
16165 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
16166 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
16167 return ret;
16170 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
16171 vld3_s32 (const int32_t * __a)
16173 int32x2x3_t ret;
16174 __builtin_aarch64_simd_ci __o;
16175 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
16176 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
16177 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
16178 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
16179 return ret;
16182 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
16183 vld3_u8 (const uint8_t * __a)
16185 uint8x8x3_t ret;
16186 __builtin_aarch64_simd_ci __o;
16187 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
16188 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
16189 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
16190 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
16191 return ret;
16194 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
16195 vld3_u16 (const uint16_t * __a)
16197 uint16x4x3_t ret;
16198 __builtin_aarch64_simd_ci __o;
16199 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
16200 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
16201 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
16202 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
16203 return ret;
16206 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
16207 vld3_u32 (const uint32_t * __a)
16209 uint32x2x3_t ret;
16210 __builtin_aarch64_simd_ci __o;
16211 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
16212 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
16213 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
16214 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
16215 return ret;
16218 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
16219 vld3_f32 (const float32_t * __a)
16221 float32x2x3_t ret;
16222 __builtin_aarch64_simd_ci __o;
16223 __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a);
16224 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
16225 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
16226 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
16227 return ret;
16230 __extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
16231 vld3q_s8 (const int8_t * __a)
16233 int8x16x3_t ret;
16234 __builtin_aarch64_simd_ci __o;
16235 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
16236 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
16237 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
16238 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
16239 return ret;
16242 __extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
16243 vld3q_p8 (const poly8_t * __a)
16245 poly8x16x3_t ret;
16246 __builtin_aarch64_simd_ci __o;
16247 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
16248 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
16249 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
16250 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
16251 return ret;
16254 __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
16255 vld3q_s16 (const int16_t * __a)
16257 int16x8x3_t ret;
16258 __builtin_aarch64_simd_ci __o;
16259 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
16260 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
16261 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
16262 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
16263 return ret;
16266 __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
16267 vld3q_p16 (const poly16_t * __a)
16269 poly16x8x3_t ret;
16270 __builtin_aarch64_simd_ci __o;
16271 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
16272 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
16273 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
16274 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
16275 return ret;
16278 __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
16279 vld3q_s32 (const int32_t * __a)
16281 int32x4x3_t ret;
16282 __builtin_aarch64_simd_ci __o;
16283 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
16284 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
16285 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
16286 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
16287 return ret;
16290 __extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
16291 vld3q_s64 (const int64_t * __a)
16293 int64x2x3_t ret;
16294 __builtin_aarch64_simd_ci __o;
16295 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
16296 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
16297 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
16298 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
16299 return ret;
16302 __extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
16303 vld3q_u8 (const uint8_t * __a)
16305 uint8x16x3_t ret;
16306 __builtin_aarch64_simd_ci __o;
16307 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
16308 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
16309 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
16310 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
16311 return ret;
16314 __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
16315 vld3q_u16 (const uint16_t * __a)
16317 uint16x8x3_t ret;
16318 __builtin_aarch64_simd_ci __o;
16319 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
16320 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
16321 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
16322 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
16323 return ret;
16326 __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
16327 vld3q_u32 (const uint32_t * __a)
16329 uint32x4x3_t ret;
16330 __builtin_aarch64_simd_ci __o;
16331 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
16332 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
16333 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
16334 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
16335 return ret;
16338 __extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
16339 vld3q_u64 (const uint64_t * __a)
16341 uint64x2x3_t ret;
16342 __builtin_aarch64_simd_ci __o;
16343 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
16344 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
16345 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
16346 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
16347 return ret;
16350 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
16351 vld3q_f32 (const float32_t * __a)
16353 float32x4x3_t ret;
16354 __builtin_aarch64_simd_ci __o;
16355 __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a);
16356 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
16357 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
16358 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
16359 return ret;
16362 __extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
16363 vld3q_f64 (const float64_t * __a)
16365 float64x2x3_t ret;
16366 __builtin_aarch64_simd_ci __o;
16367 __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a);
16368 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
16369 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
16370 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
16371 return ret;
16374 __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
16375 vld4_s64 (const int64_t * __a)
16377 int64x1x4_t ret;
16378 __builtin_aarch64_simd_xi __o;
16379 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
16380 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
16381 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
16382 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
16383 ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
16384 return ret;
16387 __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
16388 vld4_u64 (const uint64_t * __a)
16390 uint64x1x4_t ret;
16391 __builtin_aarch64_simd_xi __o;
16392 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
16393 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
16394 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
16395 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
16396 ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
16397 return ret;
16400 __extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
16401 vld4_f64 (const float64_t * __a)
16403 float64x1x4_t ret;
16404 __builtin_aarch64_simd_xi __o;
16405 __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a);
16406 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)};
16407 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)};
16408 ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)};
16409 ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)};
16410 return ret;
16413 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
16414 vld4_s8 (const int8_t * __a)
16416 int8x8x4_t ret;
16417 __builtin_aarch64_simd_xi __o;
16418 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
16419 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
16420 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
16421 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
16422 ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
16423 return ret;
16426 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
16427 vld4_p8 (const poly8_t * __a)
16429 poly8x8x4_t ret;
16430 __builtin_aarch64_simd_xi __o;
16431 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
16432 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
16433 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
16434 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
16435 ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
16436 return ret;
16439 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
16440 vld4_s16 (const int16_t * __a)
16442 int16x4x4_t ret;
16443 __builtin_aarch64_simd_xi __o;
16444 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
16445 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
16446 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
16447 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
16448 ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
16449 return ret;
16452 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
16453 vld4_p16 (const poly16_t * __a)
16455 poly16x4x4_t ret;
16456 __builtin_aarch64_simd_xi __o;
16457 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
16458 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
16459 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
16460 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
16461 ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
16462 return ret;
16465 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
16466 vld4_s32 (const int32_t * __a)
16468 int32x2x4_t ret;
16469 __builtin_aarch64_simd_xi __o;
16470 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
16471 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
16472 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
16473 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
16474 ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
16475 return ret;
16478 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
16479 vld4_u8 (const uint8_t * __a)
16481 uint8x8x4_t ret;
16482 __builtin_aarch64_simd_xi __o;
16483 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
16484 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
16485 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
16486 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
16487 ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
16488 return ret;
16491 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
16492 vld4_u16 (const uint16_t * __a)
16494 uint16x4x4_t ret;
16495 __builtin_aarch64_simd_xi __o;
16496 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
16497 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
16498 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
16499 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
16500 ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
16501 return ret;
16504 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
16505 vld4_u32 (const uint32_t * __a)
16507 uint32x2x4_t ret;
16508 __builtin_aarch64_simd_xi __o;
16509 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
16510 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
16511 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
16512 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
16513 ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
16514 return ret;
16517 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
16518 vld4_f32 (const float32_t * __a)
16520 float32x2x4_t ret;
16521 __builtin_aarch64_simd_xi __o;
16522 __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a);
16523 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
16524 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
16525 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
16526 ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
16527 return ret;
16530 __extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
16531 vld4q_s8 (const int8_t * __a)
16533 int8x16x4_t ret;
16534 __builtin_aarch64_simd_xi __o;
16535 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
16536 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
16537 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
16538 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
16539 ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
16540 return ret;
16543 __extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
16544 vld4q_p8 (const poly8_t * __a)
16546 poly8x16x4_t ret;
16547 __builtin_aarch64_simd_xi __o;
16548 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
16549 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
16550 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
16551 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
16552 ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
16553 return ret;
16556 __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
16557 vld4q_s16 (const int16_t * __a)
16559 int16x8x4_t ret;
16560 __builtin_aarch64_simd_xi __o;
16561 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
16562 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
16563 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
16564 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
16565 ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
16566 return ret;
16569 __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
16570 vld4q_p16 (const poly16_t * __a)
16572 poly16x8x4_t ret;
16573 __builtin_aarch64_simd_xi __o;
16574 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
16575 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
16576 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
16577 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
16578 ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
16579 return ret;
16582 __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
16583 vld4q_s32 (const int32_t * __a)
16585 int32x4x4_t ret;
16586 __builtin_aarch64_simd_xi __o;
16587 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
16588 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
16589 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
16590 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
16591 ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
16592 return ret;
16595 __extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
16596 vld4q_s64 (const int64_t * __a)
16598 int64x2x4_t ret;
16599 __builtin_aarch64_simd_xi __o;
16600 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
16601 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
16602 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
16603 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
16604 ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
16605 return ret;
16608 __extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
16609 vld4q_u8 (const uint8_t * __a)
16611 uint8x16x4_t ret;
16612 __builtin_aarch64_simd_xi __o;
16613 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
16614 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
16615 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
16616 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
16617 ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
16618 return ret;
16621 __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
16622 vld4q_u16 (const uint16_t * __a)
16624 uint16x8x4_t ret;
16625 __builtin_aarch64_simd_xi __o;
16626 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
16627 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
16628 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
16629 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
16630 ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
16631 return ret;
16634 __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
16635 vld4q_u32 (const uint32_t * __a)
16637 uint32x4x4_t ret;
16638 __builtin_aarch64_simd_xi __o;
16639 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
16640 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
16641 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
16642 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
16643 ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
16644 return ret;
16647 __extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
16648 vld4q_u64 (const uint64_t * __a)
16650 uint64x2x4_t ret;
16651 __builtin_aarch64_simd_xi __o;
16652 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
16653 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
16654 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
16655 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
16656 ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
16657 return ret;
16660 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
16661 vld4q_f32 (const float32_t * __a)
16663 float32x4x4_t ret;
16664 __builtin_aarch64_simd_xi __o;
16665 __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a);
16666 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
16667 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
16668 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
16669 ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
16670 return ret;
16673 __extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
16674 vld4q_f64 (const float64_t * __a)
16676 float64x2x4_t ret;
16677 __builtin_aarch64_simd_xi __o;
16678 __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a);
16679 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
16680 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
16681 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
16682 ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
16683 return ret;
16686 /* vldn_dup */
16688 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
16689 vld2_dup_s8 (const int8_t * __a)
16691 int8x8x2_t ret;
16692 __builtin_aarch64_simd_oi __o;
16693 __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16694 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
16695 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
16696 return ret;
16699 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
16700 vld2_dup_s16 (const int16_t * __a)
16702 int16x4x2_t ret;
16703 __builtin_aarch64_simd_oi __o;
16704 __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16705 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
16706 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
16707 return ret;
16710 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
16711 vld2_dup_s32 (const int32_t * __a)
16713 int32x2x2_t ret;
16714 __builtin_aarch64_simd_oi __o;
16715 __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a);
16716 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
16717 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
16718 return ret;
16721 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
16722 vld2_dup_f32 (const float32_t * __a)
16724 float32x2x2_t ret;
16725 __builtin_aarch64_simd_oi __o;
16726 __o = __builtin_aarch64_ld2rv2sf ((const __builtin_aarch64_simd_sf *) __a);
16727 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
16728 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
16729 return ret;
16732 __extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
16733 vld2_dup_f64 (const float64_t * __a)
16735 float64x1x2_t ret;
16736 __builtin_aarch64_simd_oi __o;
16737 __o = __builtin_aarch64_ld2rdf ((const __builtin_aarch64_simd_df *) __a);
16738 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
16739 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
16740 return ret;
16743 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
16744 vld2_dup_u8 (const uint8_t * __a)
16746 uint8x8x2_t ret;
16747 __builtin_aarch64_simd_oi __o;
16748 __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16749 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
16750 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
16751 return ret;
16754 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
16755 vld2_dup_u16 (const uint16_t * __a)
16757 uint16x4x2_t ret;
16758 __builtin_aarch64_simd_oi __o;
16759 __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16760 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
16761 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
16762 return ret;
16765 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
16766 vld2_dup_u32 (const uint32_t * __a)
16768 uint32x2x2_t ret;
16769 __builtin_aarch64_simd_oi __o;
16770 __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a);
16771 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
16772 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
16773 return ret;
16776 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
16777 vld2_dup_p8 (const poly8_t * __a)
16779 poly8x8x2_t ret;
16780 __builtin_aarch64_simd_oi __o;
16781 __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16782 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
16783 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
16784 return ret;
16787 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
16788 vld2_dup_p16 (const poly16_t * __a)
16790 poly16x4x2_t ret;
16791 __builtin_aarch64_simd_oi __o;
16792 __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16793 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
16794 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
16795 return ret;
16798 __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
16799 vld2_dup_s64 (const int64_t * __a)
16801 int64x1x2_t ret;
16802 __builtin_aarch64_simd_oi __o;
16803 __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a);
16804 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
16805 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
16806 return ret;
16809 __extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
16810 vld2_dup_u64 (const uint64_t * __a)
16812 uint64x1x2_t ret;
16813 __builtin_aarch64_simd_oi __o;
16814 __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a);
16815 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
16816 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
16817 return ret;
16820 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
16821 vld2q_dup_s8 (const int8_t * __a)
16823 int8x16x2_t ret;
16824 __builtin_aarch64_simd_oi __o;
16825 __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16826 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
16827 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
16828 return ret;
16831 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
16832 vld2q_dup_p8 (const poly8_t * __a)
16834 poly8x16x2_t ret;
16835 __builtin_aarch64_simd_oi __o;
16836 __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16837 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
16838 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
16839 return ret;
16842 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
16843 vld2q_dup_s16 (const int16_t * __a)
16845 int16x8x2_t ret;
16846 __builtin_aarch64_simd_oi __o;
16847 __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16848 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
16849 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
16850 return ret;
16853 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
16854 vld2q_dup_p16 (const poly16_t * __a)
16856 poly16x8x2_t ret;
16857 __builtin_aarch64_simd_oi __o;
16858 __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16859 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
16860 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
16861 return ret;
16864 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
16865 vld2q_dup_s32 (const int32_t * __a)
16867 int32x4x2_t ret;
16868 __builtin_aarch64_simd_oi __o;
16869 __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a);
16870 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
16871 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
16872 return ret;
16875 __extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
16876 vld2q_dup_s64 (const int64_t * __a)
16878 int64x2x2_t ret;
16879 __builtin_aarch64_simd_oi __o;
16880 __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
16881 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
16882 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
16883 return ret;
16886 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
16887 vld2q_dup_u8 (const uint8_t * __a)
16889 uint8x16x2_t ret;
16890 __builtin_aarch64_simd_oi __o;
16891 __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16892 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
16893 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
16894 return ret;
16897 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
16898 vld2q_dup_u16 (const uint16_t * __a)
16900 uint16x8x2_t ret;
16901 __builtin_aarch64_simd_oi __o;
16902 __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16903 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
16904 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
16905 return ret;
16908 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
16909 vld2q_dup_u32 (const uint32_t * __a)
16911 uint32x4x2_t ret;
16912 __builtin_aarch64_simd_oi __o;
16913 __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a);
16914 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
16915 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
16916 return ret;
16919 __extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
16920 vld2q_dup_u64 (const uint64_t * __a)
16922 uint64x2x2_t ret;
16923 __builtin_aarch64_simd_oi __o;
16924 __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
16925 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
16926 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
16927 return ret;
16930 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
16931 vld2q_dup_f32 (const float32_t * __a)
16933 float32x4x2_t ret;
16934 __builtin_aarch64_simd_oi __o;
16935 __o = __builtin_aarch64_ld2rv4sf ((const __builtin_aarch64_simd_sf *) __a);
16936 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
16937 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
16938 return ret;
16941 __extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
16942 vld2q_dup_f64 (const float64_t * __a)
16944 float64x2x2_t ret;
16945 __builtin_aarch64_simd_oi __o;
16946 __o = __builtin_aarch64_ld2rv2df ((const __builtin_aarch64_simd_df *) __a);
16947 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
16948 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
16949 return ret;
16952 __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
16953 vld3_dup_s64 (const int64_t * __a)
16955 int64x1x3_t ret;
16956 __builtin_aarch64_simd_ci __o;
16957 __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a);
16958 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
16959 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
16960 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
16961 return ret;
16964 __extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
16965 vld3_dup_u64 (const uint64_t * __a)
16967 uint64x1x3_t ret;
16968 __builtin_aarch64_simd_ci __o;
16969 __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a);
16970 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
16971 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
16972 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
16973 return ret;
16976 __extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
16977 vld3_dup_f64 (const float64_t * __a)
16979 float64x1x3_t ret;
16980 __builtin_aarch64_simd_ci __o;
16981 __o = __builtin_aarch64_ld3rdf ((const __builtin_aarch64_simd_df *) __a);
16982 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)};
16983 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)};
16984 ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)};
16985 return ret;
16988 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
16989 vld3_dup_s8 (const int8_t * __a)
16991 int8x8x3_t ret;
16992 __builtin_aarch64_simd_ci __o;
16993 __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16994 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
16995 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
16996 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
16997 return ret;
17000 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
17001 vld3_dup_p8 (const poly8_t * __a)
17003 poly8x8x3_t ret;
17004 __builtin_aarch64_simd_ci __o;
17005 __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
17006 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
17007 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
17008 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
17009 return ret;
17012 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
17013 vld3_dup_s16 (const int16_t * __a)
17015 int16x4x3_t ret;
17016 __builtin_aarch64_simd_ci __o;
17017 __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
17018 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
17019 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
17020 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
17021 return ret;
17024 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
17025 vld3_dup_p16 (const poly16_t * __a)
17027 poly16x4x3_t ret;
17028 __builtin_aarch64_simd_ci __o;
17029 __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
17030 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
17031 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
17032 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
17033 return ret;
17036 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
17037 vld3_dup_s32 (const int32_t * __a)
17039 int32x2x3_t ret;
17040 __builtin_aarch64_simd_ci __o;
17041 __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a);
17042 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
17043 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
17044 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
17045 return ret;
17048 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
17049 vld3_dup_u8 (const uint8_t * __a)
17051 uint8x8x3_t ret;
17052 __builtin_aarch64_simd_ci __o;
17053 __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
17054 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
17055 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
17056 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
17057 return ret;
17060 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
17061 vld3_dup_u16 (const uint16_t * __a)
17063 uint16x4x3_t ret;
17064 __builtin_aarch64_simd_ci __o;
17065 __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
17066 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
17067 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
17068 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
17069 return ret;
17072 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
17073 vld3_dup_u32 (const uint32_t * __a)
17075 uint32x2x3_t ret;
17076 __builtin_aarch64_simd_ci __o;
17077 __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a);
17078 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
17079 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
17080 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
17081 return ret;
17084 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
17085 vld3_dup_f32 (const float32_t * __a)
17087 float32x2x3_t ret;
17088 __builtin_aarch64_simd_ci __o;
17089 __o = __builtin_aarch64_ld3rv2sf ((const __builtin_aarch64_simd_sf *) __a);
17090 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
17091 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
17092 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
17093 return ret;
17096 __extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
17097 vld3q_dup_s8 (const int8_t * __a)
17099 int8x16x3_t ret;
17100 __builtin_aarch64_simd_ci __o;
17101 __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
17102 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
17103 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
17104 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
17105 return ret;
17108 __extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
17109 vld3q_dup_p8 (const poly8_t * __a)
17111 poly8x16x3_t ret;
17112 __builtin_aarch64_simd_ci __o;
17113 __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
17114 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
17115 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
17116 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
17117 return ret;
17120 __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
17121 vld3q_dup_s16 (const int16_t * __a)
17123 int16x8x3_t ret;
17124 __builtin_aarch64_simd_ci __o;
17125 __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
17126 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
17127 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
17128 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
17129 return ret;
17132 __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
17133 vld3q_dup_p16 (const poly16_t * __a)
17135 poly16x8x3_t ret;
17136 __builtin_aarch64_simd_ci __o;
17137 __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
17138 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
17139 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
17140 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
17141 return ret;
17144 __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
17145 vld3q_dup_s32 (const int32_t * __a)
17147 int32x4x3_t ret;
17148 __builtin_aarch64_simd_ci __o;
17149 __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a);
17150 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
17151 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
17152 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
17153 return ret;
17156 __extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
17157 vld3q_dup_s64 (const int64_t * __a)
17159 int64x2x3_t ret;
17160 __builtin_aarch64_simd_ci __o;
17161 __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
17162 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
17163 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
17164 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
17165 return ret;
17168 __extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
17169 vld3q_dup_u8 (const uint8_t * __a)
17171 uint8x16x3_t ret;
17172 __builtin_aarch64_simd_ci __o;
17173 __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
17174 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
17175 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
17176 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
17177 return ret;
17180 __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
17181 vld3q_dup_u16 (const uint16_t * __a)
17183 uint16x8x3_t ret;
17184 __builtin_aarch64_simd_ci __o;
17185 __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
17186 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
17187 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
17188 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
17189 return ret;
17192 __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
17193 vld3q_dup_u32 (const uint32_t * __a)
17195 uint32x4x3_t ret;
17196 __builtin_aarch64_simd_ci __o;
17197 __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a);
17198 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
17199 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
17200 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
17201 return ret;
17204 __extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
17205 vld3q_dup_u64 (const uint64_t * __a)
17207 uint64x2x3_t ret;
17208 __builtin_aarch64_simd_ci __o;
17209 __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
17210 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
17211 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
17212 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
17213 return ret;
17216 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
17217 vld3q_dup_f32 (const float32_t * __a)
17219 float32x4x3_t ret;
17220 __builtin_aarch64_simd_ci __o;
17221 __o = __builtin_aarch64_ld3rv4sf ((const __builtin_aarch64_simd_sf *) __a);
17222 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
17223 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
17224 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
17225 return ret;
17228 __extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
17229 vld3q_dup_f64 (const float64_t * __a)
17231 float64x2x3_t ret;
17232 __builtin_aarch64_simd_ci __o;
17233 __o = __builtin_aarch64_ld3rv2df ((const __builtin_aarch64_simd_df *) __a);
17234 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
17235 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
17236 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
17237 return ret;
17240 __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
17241 vld4_dup_s64 (const int64_t * __a)
17243 int64x1x4_t ret;
17244 __builtin_aarch64_simd_xi __o;
17245 __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a);
17246 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
17247 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
17248 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
17249 ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
17250 return ret;
17253 __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
17254 vld4_dup_u64 (const uint64_t * __a)
17256 uint64x1x4_t ret;
17257 __builtin_aarch64_simd_xi __o;
17258 __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a);
17259 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
17260 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
17261 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
17262 ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
17263 return ret;
17266 __extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
17267 vld4_dup_f64 (const float64_t * __a)
17269 float64x1x4_t ret;
17270 __builtin_aarch64_simd_xi __o;
17271 __o = __builtin_aarch64_ld4rdf ((const __builtin_aarch64_simd_df *) __a);
17272 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)};
17273 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)};
17274 ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)};
17275 ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)};
17276 return ret;
17279 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
17280 vld4_dup_s8 (const int8_t * __a)
17282 int8x8x4_t ret;
17283 __builtin_aarch64_simd_xi __o;
17284 __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
17285 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
17286 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
17287 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
17288 ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
17289 return ret;
17292 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
17293 vld4_dup_p8 (const poly8_t * __a)
17295 poly8x8x4_t ret;
17296 __builtin_aarch64_simd_xi __o;
17297 __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
17298 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
17299 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
17300 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
17301 ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
17302 return ret;
17305 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
17306 vld4_dup_s16 (const int16_t * __a)
17308 int16x4x4_t ret;
17309 __builtin_aarch64_simd_xi __o;
17310 __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
17311 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
17312 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
17313 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
17314 ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
17315 return ret;
17318 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
17319 vld4_dup_p16 (const poly16_t * __a)
17321 poly16x4x4_t ret;
17322 __builtin_aarch64_simd_xi __o;
17323 __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
17324 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
17325 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
17326 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
17327 ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
17328 return ret;
17331 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
17332 vld4_dup_s32 (const int32_t * __a)
17334 int32x2x4_t ret;
17335 __builtin_aarch64_simd_xi __o;
17336 __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a);
17337 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
17338 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
17339 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
17340 ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
17341 return ret;
17344 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
17345 vld4_dup_u8 (const uint8_t * __a)
17347 uint8x8x4_t ret;
17348 __builtin_aarch64_simd_xi __o;
17349 __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
17350 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
17351 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
17352 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
17353 ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
17354 return ret;
17357 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
17358 vld4_dup_u16 (const uint16_t * __a)
17360 uint16x4x4_t ret;
17361 __builtin_aarch64_simd_xi __o;
17362 __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
17363 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
17364 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
17365 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
17366 ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
17367 return ret;
17370 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
17371 vld4_dup_u32 (const uint32_t * __a)
17373 uint32x2x4_t ret;
17374 __builtin_aarch64_simd_xi __o;
17375 __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a);
17376 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
17377 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
17378 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
17379 ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
17380 return ret;
17383 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
17384 vld4_dup_f32 (const float32_t * __a)
17386 float32x2x4_t ret;
17387 __builtin_aarch64_simd_xi __o;
17388 __o = __builtin_aarch64_ld4rv2sf ((const __builtin_aarch64_simd_sf *) __a);
17389 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
17390 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
17391 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
17392 ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
17393 return ret;
17396 __extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
17397 vld4q_dup_s8 (const int8_t * __a)
17399 int8x16x4_t ret;
17400 __builtin_aarch64_simd_xi __o;
17401 __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
17402 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
17403 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
17404 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
17405 ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
17406 return ret;
17409 __extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
17410 vld4q_dup_p8 (const poly8_t * __a)
17412 poly8x16x4_t ret;
17413 __builtin_aarch64_simd_xi __o;
17414 __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
17415 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
17416 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
17417 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
17418 ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
17419 return ret;
17422 __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
17423 vld4q_dup_s16 (const int16_t * __a)
17425 int16x8x4_t ret;
17426 __builtin_aarch64_simd_xi __o;
17427 __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
17428 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
17429 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
17430 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
17431 ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
17432 return ret;
17435 __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
17436 vld4q_dup_p16 (const poly16_t * __a)
17438 poly16x8x4_t ret;
17439 __builtin_aarch64_simd_xi __o;
17440 __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
17441 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
17442 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
17443 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
17444 ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
17445 return ret;
17448 __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
17449 vld4q_dup_s32 (const int32_t * __a)
17451 int32x4x4_t ret;
17452 __builtin_aarch64_simd_xi __o;
17453 __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a);
17454 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
17455 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
17456 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
17457 ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
17458 return ret;
17461 __extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
17462 vld4q_dup_s64 (const int64_t * __a)
17464 int64x2x4_t ret;
17465 __builtin_aarch64_simd_xi __o;
17466 __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
17467 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
17468 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
17469 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
17470 ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
17471 return ret;
17474 __extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
17475 vld4q_dup_u8 (const uint8_t * __a)
17477 uint8x16x4_t ret;
17478 __builtin_aarch64_simd_xi __o;
17479 __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
17480 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
17481 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
17482 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
17483 ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
17484 return ret;
17487 __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
17488 vld4q_dup_u16 (const uint16_t * __a)
17490 uint16x8x4_t ret;
17491 __builtin_aarch64_simd_xi __o;
17492 __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
17493 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
17494 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
17495 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
17496 ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
17497 return ret;
17500 __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
17501 vld4q_dup_u32 (const uint32_t * __a)
17503 uint32x4x4_t ret;
17504 __builtin_aarch64_simd_xi __o;
17505 __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a);
17506 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
17507 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
17508 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
17509 ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
17510 return ret;
17513 __extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
17514 vld4q_dup_u64 (const uint64_t * __a)
17516 uint64x2x4_t ret;
17517 __builtin_aarch64_simd_xi __o;
17518 __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
17519 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
17520 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
17521 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
17522 ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
17523 return ret;
17526 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
17527 vld4q_dup_f32 (const float32_t * __a)
17529 float32x4x4_t ret;
17530 __builtin_aarch64_simd_xi __o;
17531 __o = __builtin_aarch64_ld4rv4sf ((const __builtin_aarch64_simd_sf *) __a);
17532 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
17533 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
17534 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
17535 ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
17536 return ret;
17539 __extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
17540 vld4q_dup_f64 (const float64_t * __a)
17542 float64x2x4_t ret;
17543 __builtin_aarch64_simd_xi __o;
17544 __o = __builtin_aarch64_ld4rv2df ((const __builtin_aarch64_simd_df *) __a);
17545 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
17546 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
17547 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
17548 ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
17549 return ret;
17552 /* vld2_lane */
17554 #define __LD2_LANE_FUNC(intype, vectype, largetype, ptrtype, \
17555 mode, ptrmode, funcsuffix, signedtype) \
17556 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
17557 vld2_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
17559 __builtin_aarch64_simd_oi __o; \
17560 largetype __temp; \
17561 __temp.val[0] = \
17562 vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); \
17563 __temp.val[1] = \
17564 vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); \
17565 __o = __builtin_aarch64_set_qregoi##mode (__o, \
17566 (signedtype) __temp.val[0], \
17567 0); \
17568 __o = __builtin_aarch64_set_qregoi##mode (__o, \
17569 (signedtype) __temp.val[1], \
17570 1); \
17571 __o = __builtin_aarch64_ld2_lane##mode ( \
17572 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
17573 __b.val[0] = (vectype) __builtin_aarch64_get_dregoidi (__o, 0); \
17574 __b.val[1] = (vectype) __builtin_aarch64_get_dregoidi (__o, 1); \
17575 return __b; \
17578 __LD2_LANE_FUNC (float32x2x2_t, float32x2_t, float32x4x2_t, float32_t, v4sf,
17579 sf, f32, float32x4_t)
17580 __LD2_LANE_FUNC (float64x1x2_t, float64x1_t, float64x2x2_t, float64_t, v2df,
17581 df, f64, float64x2_t)
17582 __LD2_LANE_FUNC (poly8x8x2_t, poly8x8_t, poly8x16x2_t, poly8_t, v16qi, qi, p8,
17583 int8x16_t)
17584 __LD2_LANE_FUNC (poly16x4x2_t, poly16x4_t, poly16x8x2_t, poly16_t, v8hi, hi,
17585 p16, int16x8_t)
17586 __LD2_LANE_FUNC (int8x8x2_t, int8x8_t, int8x16x2_t, int8_t, v16qi, qi, s8,
17587 int8x16_t)
17588 __LD2_LANE_FUNC (int16x4x2_t, int16x4_t, int16x8x2_t, int16_t, v8hi, hi, s16,
17589 int16x8_t)
17590 __LD2_LANE_FUNC (int32x2x2_t, int32x2_t, int32x4x2_t, int32_t, v4si, si, s32,
17591 int32x4_t)
17592 __LD2_LANE_FUNC (int64x1x2_t, int64x1_t, int64x2x2_t, int64_t, v2di, di, s64,
17593 int64x2_t)
17594 __LD2_LANE_FUNC (uint8x8x2_t, uint8x8_t, uint8x16x2_t, uint8_t, v16qi, qi, u8,
17595 int8x16_t)
17596 __LD2_LANE_FUNC (uint16x4x2_t, uint16x4_t, uint16x8x2_t, uint16_t, v8hi, hi,
17597 u16, int16x8_t)
17598 __LD2_LANE_FUNC (uint32x2x2_t, uint32x2_t, uint32x4x2_t, uint32_t, v4si, si,
17599 u32, int32x4_t)
17600 __LD2_LANE_FUNC (uint64x1x2_t, uint64x1_t, uint64x2x2_t, uint64_t, v2di, di,
17601 u64, int64x2_t)
17603 #undef __LD2_LANE_FUNC
17605 /* vld2q_lane */
17607 #define __LD2_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
17608 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
17609 vld2q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
17611 __builtin_aarch64_simd_oi __o; \
17612 intype ret; \
17613 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __b.val[0], 0); \
17614 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __b.val[1], 1); \
17615 __o = __builtin_aarch64_ld2_lane##mode ( \
17616 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
17617 ret.val[0] = (vtype) __builtin_aarch64_get_qregoiv4si (__o, 0); \
17618 ret.val[1] = (vtype) __builtin_aarch64_get_qregoiv4si (__o, 1); \
17619 return ret; \
17622 __LD2_LANE_FUNC (float32x4x2_t, float32x4_t, float32_t, v4sf, sf, f32)
17623 __LD2_LANE_FUNC (float64x2x2_t, float64x2_t, float64_t, v2df, df, f64)
17624 __LD2_LANE_FUNC (poly8x16x2_t, poly8x16_t, poly8_t, v16qi, qi, p8)
17625 __LD2_LANE_FUNC (poly16x8x2_t, poly16x8_t, poly16_t, v8hi, hi, p16)
17626 __LD2_LANE_FUNC (int8x16x2_t, int8x16_t, int8_t, v16qi, qi, s8)
17627 __LD2_LANE_FUNC (int16x8x2_t, int16x8_t, int16_t, v8hi, hi, s16)
17628 __LD2_LANE_FUNC (int32x4x2_t, int32x4_t, int32_t, v4si, si, s32)
17629 __LD2_LANE_FUNC (int64x2x2_t, int64x2_t, int64_t, v2di, di, s64)
17630 __LD2_LANE_FUNC (uint8x16x2_t, uint8x16_t, uint8_t, v16qi, qi, u8)
17631 __LD2_LANE_FUNC (uint16x8x2_t, uint16x8_t, uint16_t, v8hi, hi, u16)
17632 __LD2_LANE_FUNC (uint32x4x2_t, uint32x4_t, uint32_t, v4si, si, u32)
17633 __LD2_LANE_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, v2di, di, u64)
17635 #undef __LD2_LANE_FUNC
17637 /* vld3_lane */
17639 #define __LD3_LANE_FUNC(intype, vectype, largetype, ptrtype, \
17640 mode, ptrmode, funcsuffix, signedtype) \
17641 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
17642 vld3_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
17644 __builtin_aarch64_simd_ci __o; \
17645 largetype __temp; \
17646 __temp.val[0] = \
17647 vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); \
17648 __temp.val[1] = \
17649 vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); \
17650 __temp.val[2] = \
17651 vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0)); \
17652 __o = __builtin_aarch64_set_qregci##mode (__o, \
17653 (signedtype) __temp.val[0], \
17654 0); \
17655 __o = __builtin_aarch64_set_qregci##mode (__o, \
17656 (signedtype) __temp.val[1], \
17657 1); \
17658 __o = __builtin_aarch64_set_qregci##mode (__o, \
17659 (signedtype) __temp.val[2], \
17660 2); \
17661 __o = __builtin_aarch64_ld3_lane##mode ( \
17662 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
17663 __b.val[0] = (vectype) __builtin_aarch64_get_dregcidi (__o, 0); \
17664 __b.val[1] = (vectype) __builtin_aarch64_get_dregcidi (__o, 1); \
17665 __b.val[2] = (vectype) __builtin_aarch64_get_dregcidi (__o, 2); \
17666 return __b; \
17669 __LD3_LANE_FUNC (float32x2x3_t, float32x2_t, float32x4x3_t, float32_t, v4sf,
17670 sf, f32, float32x4_t)
17671 __LD3_LANE_FUNC (float64x1x3_t, float64x1_t, float64x2x3_t, float64_t, v2df,
17672 df, f64, float64x2_t)
17673 __LD3_LANE_FUNC (poly8x8x3_t, poly8x8_t, poly8x16x3_t, poly8_t, v16qi, qi, p8,
17674 int8x16_t)
17675 __LD3_LANE_FUNC (poly16x4x3_t, poly16x4_t, poly16x8x3_t, poly16_t, v8hi, hi,
17676 p16, int16x8_t)
17677 __LD3_LANE_FUNC (int8x8x3_t, int8x8_t, int8x16x3_t, int8_t, v16qi, qi, s8,
17678 int8x16_t)
17679 __LD3_LANE_FUNC (int16x4x3_t, int16x4_t, int16x8x3_t, int16_t, v8hi, hi, s16,
17680 int16x8_t)
17681 __LD3_LANE_FUNC (int32x2x3_t, int32x2_t, int32x4x3_t, int32_t, v4si, si, s32,
17682 int32x4_t)
17683 __LD3_LANE_FUNC (int64x1x3_t, int64x1_t, int64x2x3_t, int64_t, v2di, di, s64,
17684 int64x2_t)
17685 __LD3_LANE_FUNC (uint8x8x3_t, uint8x8_t, uint8x16x3_t, uint8_t, v16qi, qi, u8,
17686 int8x16_t)
17687 __LD3_LANE_FUNC (uint16x4x3_t, uint16x4_t, uint16x8x3_t, uint16_t, v8hi, hi,
17688 u16, int16x8_t)
17689 __LD3_LANE_FUNC (uint32x2x3_t, uint32x2_t, uint32x4x3_t, uint32_t, v4si, si,
17690 u32, int32x4_t)
17691 __LD3_LANE_FUNC (uint64x1x3_t, uint64x1_t, uint64x2x3_t, uint64_t, v2di, di,
17692 u64, int64x2_t)
17694 #undef __LD3_LANE_FUNC
17696 /* vld3q_lane */
17698 #define __LD3_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
17699 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
17700 vld3q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
17702 __builtin_aarch64_simd_ci __o; \
17703 intype ret; \
17704 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0); \
17705 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1); \
17706 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2); \
17707 __o = __builtin_aarch64_ld3_lane##mode ( \
17708 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
17709 ret.val[0] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 0); \
17710 ret.val[1] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 1); \
17711 ret.val[2] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 2); \
17712 return ret; \
17715 __LD3_LANE_FUNC (float32x4x3_t, float32x4_t, float32_t, v4sf, sf, f32)
17716 __LD3_LANE_FUNC (float64x2x3_t, float64x2_t, float64_t, v2df, df, f64)
17717 __LD3_LANE_FUNC (poly8x16x3_t, poly8x16_t, poly8_t, v16qi, qi, p8)
17718 __LD3_LANE_FUNC (poly16x8x3_t, poly16x8_t, poly16_t, v8hi, hi, p16)
17719 __LD3_LANE_FUNC (int8x16x3_t, int8x16_t, int8_t, v16qi, qi, s8)
17720 __LD3_LANE_FUNC (int16x8x3_t, int16x8_t, int16_t, v8hi, hi, s16)
17721 __LD3_LANE_FUNC (int32x4x3_t, int32x4_t, int32_t, v4si, si, s32)
17722 __LD3_LANE_FUNC (int64x2x3_t, int64x2_t, int64_t, v2di, di, s64)
17723 __LD3_LANE_FUNC (uint8x16x3_t, uint8x16_t, uint8_t, v16qi, qi, u8)
17724 __LD3_LANE_FUNC (uint16x8x3_t, uint16x8_t, uint16_t, v8hi, hi, u16)
17725 __LD3_LANE_FUNC (uint32x4x3_t, uint32x4_t, uint32_t, v4si, si, u32)
17726 __LD3_LANE_FUNC (uint64x2x3_t, uint64x2_t, uint64_t, v2di, di, u64)
17728 #undef __LD3_LANE_FUNC
17730 /* vld4_lane */
17732 #define __LD4_LANE_FUNC(intype, vectype, largetype, ptrtype, \
17733 mode, ptrmode, funcsuffix, signedtype) \
17734 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
17735 vld4_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
17737 __builtin_aarch64_simd_xi __o; \
17738 largetype __temp; \
17739 __temp.val[0] = \
17740 vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); \
17741 __temp.val[1] = \
17742 vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); \
17743 __temp.val[2] = \
17744 vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0)); \
17745 __temp.val[3] = \
17746 vcombine_##funcsuffix (__b.val[3], vcreate_##funcsuffix (0)); \
17747 __o = __builtin_aarch64_set_qregxi##mode (__o, \
17748 (signedtype) __temp.val[0], \
17749 0); \
17750 __o = __builtin_aarch64_set_qregxi##mode (__o, \
17751 (signedtype) __temp.val[1], \
17752 1); \
17753 __o = __builtin_aarch64_set_qregxi##mode (__o, \
17754 (signedtype) __temp.val[2], \
17755 2); \
17756 __o = __builtin_aarch64_set_qregxi##mode (__o, \
17757 (signedtype) __temp.val[3], \
17758 3); \
17759 __o = __builtin_aarch64_ld4_lane##mode ( \
17760 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
17761 __b.val[0] = (vectype) __builtin_aarch64_get_dregxidi (__o, 0); \
17762 __b.val[1] = (vectype) __builtin_aarch64_get_dregxidi (__o, 1); \
17763 __b.val[2] = (vectype) __builtin_aarch64_get_dregxidi (__o, 2); \
17764 __b.val[3] = (vectype) __builtin_aarch64_get_dregxidi (__o, 3); \
17765 return __b; \
17768 /* vld4q_lane */
17770 __LD4_LANE_FUNC (float32x2x4_t, float32x2_t, float32x4x4_t, float32_t, v4sf,
17771 sf, f32, float32x4_t)
17772 __LD4_LANE_FUNC (float64x1x4_t, float64x1_t, float64x2x4_t, float64_t, v2df,
17773 df, f64, float64x2_t)
17774 __LD4_LANE_FUNC (poly8x8x4_t, poly8x8_t, poly8x16x4_t, poly8_t, v16qi, qi, p8,
17775 int8x16_t)
17776 __LD4_LANE_FUNC (poly16x4x4_t, poly16x4_t, poly16x8x4_t, poly16_t, v8hi, hi,
17777 p16, int16x8_t)
17778 __LD4_LANE_FUNC (int8x8x4_t, int8x8_t, int8x16x4_t, int8_t, v16qi, qi, s8,
17779 int8x16_t)
17780 __LD4_LANE_FUNC (int16x4x4_t, int16x4_t, int16x8x4_t, int16_t, v8hi, hi, s16,
17781 int16x8_t)
17782 __LD4_LANE_FUNC (int32x2x4_t, int32x2_t, int32x4x4_t, int32_t, v4si, si, s32,
17783 int32x4_t)
17784 __LD4_LANE_FUNC (int64x1x4_t, int64x1_t, int64x2x4_t, int64_t, v2di, di, s64,
17785 int64x2_t)
17786 __LD4_LANE_FUNC (uint8x8x4_t, uint8x8_t, uint8x16x4_t, uint8_t, v16qi, qi, u8,
17787 int8x16_t)
17788 __LD4_LANE_FUNC (uint16x4x4_t, uint16x4_t, uint16x8x4_t, uint16_t, v8hi, hi,
17789 u16, int16x8_t)
17790 __LD4_LANE_FUNC (uint32x2x4_t, uint32x2_t, uint32x4x4_t, uint32_t, v4si, si,
17791 u32, int32x4_t)
17792 __LD4_LANE_FUNC (uint64x1x4_t, uint64x1_t, uint64x2x4_t, uint64_t, v2di, di,
17793 u64, int64x2_t)
17795 #undef __LD4_LANE_FUNC
17797 /* vld4q_lane */
17799 #define __LD4_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
17800 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
17801 vld4q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
17803 __builtin_aarch64_simd_xi __o; \
17804 intype ret; \
17805 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); \
17806 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); \
17807 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); \
17808 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); \
17809 __o = __builtin_aarch64_ld4_lane##mode ( \
17810 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
17811 ret.val[0] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 0); \
17812 ret.val[1] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 1); \
17813 ret.val[2] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 2); \
17814 ret.val[3] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 3); \
17815 return ret; \
17818 __LD4_LANE_FUNC (float32x4x4_t, float32x4_t, float32_t, v4sf, sf, f32)
17819 __LD4_LANE_FUNC (float64x2x4_t, float64x2_t, float64_t, v2df, df, f64)
17820 __LD4_LANE_FUNC (poly8x16x4_t, poly8x16_t, poly8_t, v16qi, qi, p8)
17821 __LD4_LANE_FUNC (poly16x8x4_t, poly16x8_t, poly16_t, v8hi, hi, p16)
17822 __LD4_LANE_FUNC (int8x16x4_t, int8x16_t, int8_t, v16qi, qi, s8)
17823 __LD4_LANE_FUNC (int16x8x4_t, int16x8_t, int16_t, v8hi, hi, s16)
17824 __LD4_LANE_FUNC (int32x4x4_t, int32x4_t, int32_t, v4si, si, s32)
17825 __LD4_LANE_FUNC (int64x2x4_t, int64x2_t, int64_t, v2di, di, s64)
17826 __LD4_LANE_FUNC (uint8x16x4_t, uint8x16_t, uint8_t, v16qi, qi, u8)
17827 __LD4_LANE_FUNC (uint16x8x4_t, uint16x8_t, uint16_t, v8hi, hi, u16)
17828 __LD4_LANE_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, v4si, si, u32)
17829 __LD4_LANE_FUNC (uint64x2x4_t, uint64x2_t, uint64_t, v2di, di, u64)
17831 #undef __LD4_LANE_FUNC
17833 /* vmax */
17835 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17836 vmax_f32 (float32x2_t __a, float32x2_t __b)
17838 return __builtin_aarch64_smax_nanv2sf (__a, __b);
17841 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17842 vmax_s8 (int8x8_t __a, int8x8_t __b)
17844 return __builtin_aarch64_smaxv8qi (__a, __b);
17847 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17848 vmax_s16 (int16x4_t __a, int16x4_t __b)
17850 return __builtin_aarch64_smaxv4hi (__a, __b);
17853 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17854 vmax_s32 (int32x2_t __a, int32x2_t __b)
17856 return __builtin_aarch64_smaxv2si (__a, __b);
17859 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17860 vmax_u8 (uint8x8_t __a, uint8x8_t __b)
17862 return (uint8x8_t) __builtin_aarch64_umaxv8qi ((int8x8_t) __a,
17863 (int8x8_t) __b);
17866 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17867 vmax_u16 (uint16x4_t __a, uint16x4_t __b)
17869 return (uint16x4_t) __builtin_aarch64_umaxv4hi ((int16x4_t) __a,
17870 (int16x4_t) __b);
17873 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17874 vmax_u32 (uint32x2_t __a, uint32x2_t __b)
17876 return (uint32x2_t) __builtin_aarch64_umaxv2si ((int32x2_t) __a,
17877 (int32x2_t) __b);
17880 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17881 vmaxq_f32 (float32x4_t __a, float32x4_t __b)
17883 return __builtin_aarch64_smax_nanv4sf (__a, __b);
17886 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17887 vmaxq_f64 (float64x2_t __a, float64x2_t __b)
17889 return __builtin_aarch64_smax_nanv2df (__a, __b);
17892 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
17893 vmaxq_s8 (int8x16_t __a, int8x16_t __b)
17895 return __builtin_aarch64_smaxv16qi (__a, __b);
17898 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
17899 vmaxq_s16 (int16x8_t __a, int16x8_t __b)
17901 return __builtin_aarch64_smaxv8hi (__a, __b);
17904 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17905 vmaxq_s32 (int32x4_t __a, int32x4_t __b)
17907 return __builtin_aarch64_smaxv4si (__a, __b);
17910 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17911 vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
17913 return (uint8x16_t) __builtin_aarch64_umaxv16qi ((int8x16_t) __a,
17914 (int8x16_t) __b);
17917 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17918 vmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
17920 return (uint16x8_t) __builtin_aarch64_umaxv8hi ((int16x8_t) __a,
17921 (int16x8_t) __b);
17924 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17925 vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
17927 return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a,
17928 (int32x4_t) __b);
17931 /* vmaxnm */
17933 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17934 vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
17936 return __builtin_aarch64_smaxv2sf (__a, __b);
17939 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17940 vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
17942 return __builtin_aarch64_smaxv4sf (__a, __b);
17945 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17946 vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
17948 return __builtin_aarch64_smaxv2df (__a, __b);
17951 /* vmaxv */
17953 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17954 vmaxv_f32 (float32x2_t __a)
17956 return __builtin_aarch64_reduc_smax_nan_scal_v2sf (__a);
17959 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
17960 vmaxv_s8 (int8x8_t __a)
17962 return __builtin_aarch64_reduc_smax_scal_v8qi (__a);
17965 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
17966 vmaxv_s16 (int16x4_t __a)
17968 return __builtin_aarch64_reduc_smax_scal_v4hi (__a);
17971 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17972 vmaxv_s32 (int32x2_t __a)
17974 return __builtin_aarch64_reduc_smax_scal_v2si (__a);
17977 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
17978 vmaxv_u8 (uint8x8_t __a)
17980 return __builtin_aarch64_reduc_umax_scal_v8qi_uu (__a);
17983 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
17984 vmaxv_u16 (uint16x4_t __a)
17986 return __builtin_aarch64_reduc_umax_scal_v4hi_uu (__a);
17989 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17990 vmaxv_u32 (uint32x2_t __a)
17992 return __builtin_aarch64_reduc_umax_scal_v2si_uu (__a);
17995 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17996 vmaxvq_f32 (float32x4_t __a)
17998 return __builtin_aarch64_reduc_smax_nan_scal_v4sf (__a);
18001 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18002 vmaxvq_f64 (float64x2_t __a)
18004 return __builtin_aarch64_reduc_smax_nan_scal_v2df (__a);
18007 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18008 vmaxvq_s8 (int8x16_t __a)
18010 return __builtin_aarch64_reduc_smax_scal_v16qi (__a);
18013 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18014 vmaxvq_s16 (int16x8_t __a)
18016 return __builtin_aarch64_reduc_smax_scal_v8hi (__a);
18019 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18020 vmaxvq_s32 (int32x4_t __a)
18022 return __builtin_aarch64_reduc_smax_scal_v4si (__a);
18025 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18026 vmaxvq_u8 (uint8x16_t __a)
18028 return __builtin_aarch64_reduc_umax_scal_v16qi_uu (__a);
18031 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18032 vmaxvq_u16 (uint16x8_t __a)
18034 return __builtin_aarch64_reduc_umax_scal_v8hi_uu (__a);
18037 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18038 vmaxvq_u32 (uint32x4_t __a)
18040 return __builtin_aarch64_reduc_umax_scal_v4si_uu (__a);
18043 /* vmaxnmv */
18045 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18046 vmaxnmv_f32 (float32x2_t __a)
18048 return __builtin_aarch64_reduc_smax_scal_v2sf (__a);
18051 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18052 vmaxnmvq_f32 (float32x4_t __a)
18054 return __builtin_aarch64_reduc_smax_scal_v4sf (__a);
18057 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18058 vmaxnmvq_f64 (float64x2_t __a)
18060 return __builtin_aarch64_reduc_smax_scal_v2df (__a);
18063 /* vmin */
18065 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18066 vmin_f32 (float32x2_t __a, float32x2_t __b)
18068 return __builtin_aarch64_smin_nanv2sf (__a, __b);
18071 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18072 vmin_s8 (int8x8_t __a, int8x8_t __b)
18074 return __builtin_aarch64_sminv8qi (__a, __b);
18077 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18078 vmin_s16 (int16x4_t __a, int16x4_t __b)
18080 return __builtin_aarch64_sminv4hi (__a, __b);
18083 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18084 vmin_s32 (int32x2_t __a, int32x2_t __b)
18086 return __builtin_aarch64_sminv2si (__a, __b);
18089 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18090 vmin_u8 (uint8x8_t __a, uint8x8_t __b)
18092 return (uint8x8_t) __builtin_aarch64_uminv8qi ((int8x8_t) __a,
18093 (int8x8_t) __b);
18096 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18097 vmin_u16 (uint16x4_t __a, uint16x4_t __b)
18099 return (uint16x4_t) __builtin_aarch64_uminv4hi ((int16x4_t) __a,
18100 (int16x4_t) __b);
18103 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18104 vmin_u32 (uint32x2_t __a, uint32x2_t __b)
18106 return (uint32x2_t) __builtin_aarch64_uminv2si ((int32x2_t) __a,
18107 (int32x2_t) __b);
18110 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18111 vminq_f32 (float32x4_t __a, float32x4_t __b)
18113 return __builtin_aarch64_smin_nanv4sf (__a, __b);
18116 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18117 vminq_f64 (float64x2_t __a, float64x2_t __b)
18119 return __builtin_aarch64_smin_nanv2df (__a, __b);
18122 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18123 vminq_s8 (int8x16_t __a, int8x16_t __b)
18125 return __builtin_aarch64_sminv16qi (__a, __b);
18128 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18129 vminq_s16 (int16x8_t __a, int16x8_t __b)
18131 return __builtin_aarch64_sminv8hi (__a, __b);
18134 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18135 vminq_s32 (int32x4_t __a, int32x4_t __b)
18137 return __builtin_aarch64_sminv4si (__a, __b);
18140 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18141 vminq_u8 (uint8x16_t __a, uint8x16_t __b)
18143 return (uint8x16_t) __builtin_aarch64_uminv16qi ((int8x16_t) __a,
18144 (int8x16_t) __b);
18147 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18148 vminq_u16 (uint16x8_t __a, uint16x8_t __b)
18150 return (uint16x8_t) __builtin_aarch64_uminv8hi ((int16x8_t) __a,
18151 (int16x8_t) __b);
18154 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18155 vminq_u32 (uint32x4_t __a, uint32x4_t __b)
18157 return (uint32x4_t) __builtin_aarch64_uminv4si ((int32x4_t) __a,
18158 (int32x4_t) __b);
18161 /* vminnm */
18163 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18164 vminnm_f32 (float32x2_t __a, float32x2_t __b)
18166 return __builtin_aarch64_sminv2sf (__a, __b);
18169 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18170 vminnmq_f32 (float32x4_t __a, float32x4_t __b)
18172 return __builtin_aarch64_sminv4sf (__a, __b);
18175 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18176 vminnmq_f64 (float64x2_t __a, float64x2_t __b)
18178 return __builtin_aarch64_sminv2df (__a, __b);
18181 /* vminv */
18183 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18184 vminv_f32 (float32x2_t __a)
18186 return __builtin_aarch64_reduc_smin_nan_scal_v2sf (__a);
18189 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18190 vminv_s8 (int8x8_t __a)
18192 return __builtin_aarch64_reduc_smin_scal_v8qi (__a);
18195 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18196 vminv_s16 (int16x4_t __a)
18198 return __builtin_aarch64_reduc_smin_scal_v4hi (__a);
18201 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18202 vminv_s32 (int32x2_t __a)
18204 return __builtin_aarch64_reduc_smin_scal_v2si (__a);
18207 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18208 vminv_u8 (uint8x8_t __a)
18210 return __builtin_aarch64_reduc_umin_scal_v8qi_uu (__a);
18213 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18214 vminv_u16 (uint16x4_t __a)
18216 return __builtin_aarch64_reduc_umin_scal_v4hi_uu (__a);
18219 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18220 vminv_u32 (uint32x2_t __a)
18222 return __builtin_aarch64_reduc_umin_scal_v2si_uu (__a);
18225 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18226 vminvq_f32 (float32x4_t __a)
18228 return __builtin_aarch64_reduc_smin_nan_scal_v4sf (__a);
18231 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18232 vminvq_f64 (float64x2_t __a)
18234 return __builtin_aarch64_reduc_smin_nan_scal_v2df (__a);
18237 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18238 vminvq_s8 (int8x16_t __a)
18240 return __builtin_aarch64_reduc_smin_scal_v16qi (__a);
18243 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18244 vminvq_s16 (int16x8_t __a)
18246 return __builtin_aarch64_reduc_smin_scal_v8hi (__a);
18249 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18250 vminvq_s32 (int32x4_t __a)
18252 return __builtin_aarch64_reduc_smin_scal_v4si (__a);
18255 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18256 vminvq_u8 (uint8x16_t __a)
18258 return __builtin_aarch64_reduc_umin_scal_v16qi_uu (__a);
18261 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18262 vminvq_u16 (uint16x8_t __a)
18264 return __builtin_aarch64_reduc_umin_scal_v8hi_uu (__a);
18267 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18268 vminvq_u32 (uint32x4_t __a)
18270 return __builtin_aarch64_reduc_umin_scal_v4si_uu (__a);
18273 /* vminnmv */
18275 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18276 vminnmv_f32 (float32x2_t __a)
18278 return __builtin_aarch64_reduc_smin_scal_v2sf (__a);
18281 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18282 vminnmvq_f32 (float32x4_t __a)
18284 return __builtin_aarch64_reduc_smin_scal_v4sf (__a);
18287 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18288 vminnmvq_f64 (float64x2_t __a)
18290 return __builtin_aarch64_reduc_smin_scal_v2df (__a);
18293 /* vmla */
18295 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18296 vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
18298 return a + b * c;
18301 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18302 vmla_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
18304 return __a + __b * __c;
18307 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18308 vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
18310 return a + b * c;
18313 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18314 vmlaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
18316 return a + b * c;
18319 /* vmla_lane */
18321 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18322 vmla_lane_f32 (float32x2_t __a, float32x2_t __b,
18323 float32x2_t __c, const int __lane)
18325 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18328 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18329 vmla_lane_s16 (int16x4_t __a, int16x4_t __b,
18330 int16x4_t __c, const int __lane)
18332 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18335 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18336 vmla_lane_s32 (int32x2_t __a, int32x2_t __b,
18337 int32x2_t __c, const int __lane)
18339 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18342 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18343 vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b,
18344 uint16x4_t __c, const int __lane)
18346 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18349 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18350 vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b,
18351 uint32x2_t __c, const int __lane)
18353 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18356 /* vmla_laneq */
18358 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18359 vmla_laneq_f32 (float32x2_t __a, float32x2_t __b,
18360 float32x4_t __c, const int __lane)
18362 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18365 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18366 vmla_laneq_s16 (int16x4_t __a, int16x4_t __b,
18367 int16x8_t __c, const int __lane)
18369 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18372 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18373 vmla_laneq_s32 (int32x2_t __a, int32x2_t __b,
18374 int32x4_t __c, const int __lane)
18376 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18379 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18380 vmla_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
18381 uint16x8_t __c, const int __lane)
18383 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18386 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18387 vmla_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
18388 uint32x4_t __c, const int __lane)
18390 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18393 /* vmlaq_lane */
18395 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18396 vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b,
18397 float32x2_t __c, const int __lane)
18399 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18402 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18403 vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b,
18404 int16x4_t __c, const int __lane)
18406 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18409 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18410 vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b,
18411 int32x2_t __c, const int __lane)
18413 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18416 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18417 vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
18418 uint16x4_t __c, const int __lane)
18420 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18423 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18424 vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
18425 uint32x2_t __c, const int __lane)
18427 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18430 /* vmlaq_laneq */
18432 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18433 vmlaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
18434 float32x4_t __c, const int __lane)
18436 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18439 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18440 vmlaq_laneq_s16 (int16x8_t __a, int16x8_t __b,
18441 int16x8_t __c, const int __lane)
18443 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18446 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18447 vmlaq_laneq_s32 (int32x4_t __a, int32x4_t __b,
18448 int32x4_t __c, const int __lane)
18450 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18453 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18454 vmlaq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
18455 uint16x8_t __c, const int __lane)
18457 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18460 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18461 vmlaq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
18462 uint32x4_t __c, const int __lane)
18464 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18467 /* vmls */
18469 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18470 vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
18472 return a - b * c;
18475 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18476 vmls_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
18478 return __a - __b * __c;
18481 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18482 vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
18484 return a - b * c;
18487 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18488 vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
18490 return a - b * c;
18493 /* vmls_lane */
18495 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18496 vmls_lane_f32 (float32x2_t __a, float32x2_t __b,
18497 float32x2_t __c, const int __lane)
18499 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18502 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18503 vmls_lane_s16 (int16x4_t __a, int16x4_t __b,
18504 int16x4_t __c, const int __lane)
18506 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18509 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18510 vmls_lane_s32 (int32x2_t __a, int32x2_t __b,
18511 int32x2_t __c, const int __lane)
18513 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18516 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18517 vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b,
18518 uint16x4_t __c, const int __lane)
18520 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18523 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18524 vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b,
18525 uint32x2_t __c, const int __lane)
18527 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18530 /* vmls_laneq */
18532 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18533 vmls_laneq_f32 (float32x2_t __a, float32x2_t __b,
18534 float32x4_t __c, const int __lane)
18536 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18539 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18540 vmls_laneq_s16 (int16x4_t __a, int16x4_t __b,
18541 int16x8_t __c, const int __lane)
18543 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18546 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18547 vmls_laneq_s32 (int32x2_t __a, int32x2_t __b,
18548 int32x4_t __c, const int __lane)
18550 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18553 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18554 vmls_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
18555 uint16x8_t __c, const int __lane)
18557 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18560 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18561 vmls_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
18562 uint32x4_t __c, const int __lane)
18564 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18567 /* vmlsq_lane */
18569 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18570 vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b,
18571 float32x2_t __c, const int __lane)
18573 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18576 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18577 vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b,
18578 int16x4_t __c, const int __lane)
18580 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18583 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18584 vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b,
18585 int32x2_t __c, const int __lane)
18587 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18590 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18591 vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
18592 uint16x4_t __c, const int __lane)
18594 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18597 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18598 vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
18599 uint32x2_t __c, const int __lane)
18601 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18604 /* vmlsq_laneq */
18606 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18607 vmlsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
18608 float32x4_t __c, const int __lane)
18610 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18613 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18614 vmlsq_laneq_s16 (int16x8_t __a, int16x8_t __b,
18615 int16x8_t __c, const int __lane)
18617 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18620 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18621 vmlsq_laneq_s32 (int32x4_t __a, int32x4_t __b,
18622 int32x4_t __c, const int __lane)
18624 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18626 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18627 vmlsq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
18628 uint16x8_t __c, const int __lane)
18630 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18633 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18634 vmlsq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
18635 uint32x4_t __c, const int __lane)
18637 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18640 /* vmov_n_ */
18642 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18643 vmov_n_f32 (float32_t __a)
18645 return vdup_n_f32 (__a);
18648 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18649 vmov_n_f64 (float64_t __a)
18651 return (float64x1_t) {__a};
18654 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
18655 vmov_n_p8 (poly8_t __a)
18657 return vdup_n_p8 (__a);
18660 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
18661 vmov_n_p16 (poly16_t __a)
18663 return vdup_n_p16 (__a);
18666 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18667 vmov_n_s8 (int8_t __a)
18669 return vdup_n_s8 (__a);
18672 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18673 vmov_n_s16 (int16_t __a)
18675 return vdup_n_s16 (__a);
18678 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18679 vmov_n_s32 (int32_t __a)
18681 return vdup_n_s32 (__a);
18684 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
18685 vmov_n_s64 (int64_t __a)
18687 return (int64x1_t) {__a};
18690 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18691 vmov_n_u8 (uint8_t __a)
18693 return vdup_n_u8 (__a);
18696 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18697 vmov_n_u16 (uint16_t __a)
18699 return vdup_n_u16 (__a);
18702 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18703 vmov_n_u32 (uint32_t __a)
18705 return vdup_n_u32 (__a);
18708 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18709 vmov_n_u64 (uint64_t __a)
18711 return (uint64x1_t) {__a};
18714 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18715 vmovq_n_f32 (float32_t __a)
18717 return vdupq_n_f32 (__a);
18720 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18721 vmovq_n_f64 (float64_t __a)
18723 return vdupq_n_f64 (__a);
18726 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
18727 vmovq_n_p8 (poly8_t __a)
18729 return vdupq_n_p8 (__a);
18732 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
18733 vmovq_n_p16 (poly16_t __a)
18735 return vdupq_n_p16 (__a);
18738 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18739 vmovq_n_s8 (int8_t __a)
18741 return vdupq_n_s8 (__a);
18744 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18745 vmovq_n_s16 (int16_t __a)
18747 return vdupq_n_s16 (__a);
18750 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18751 vmovq_n_s32 (int32_t __a)
18753 return vdupq_n_s32 (__a);
18756 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18757 vmovq_n_s64 (int64_t __a)
18759 return vdupq_n_s64 (__a);
18762 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18763 vmovq_n_u8 (uint8_t __a)
18765 return vdupq_n_u8 (__a);
18768 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18769 vmovq_n_u16 (uint16_t __a)
18771 return vdupq_n_u16 (__a);
18774 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18775 vmovq_n_u32 (uint32_t __a)
18777 return vdupq_n_u32 (__a);
18780 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18781 vmovq_n_u64 (uint64_t __a)
18783 return vdupq_n_u64 (__a);
18786 /* vmul_lane */
18788 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18789 vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __lane)
18791 return __a * __aarch64_vget_lane_any (__b, __lane);
18794 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18795 vmul_lane_f64 (float64x1_t __a, float64x1_t __b, const int __lane)
18797 return __a * __b;
18800 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18801 vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __lane)
18803 return __a * __aarch64_vget_lane_any (__b, __lane);
18806 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18807 vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __lane)
18809 return __a * __aarch64_vget_lane_any (__b, __lane);
18812 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18813 vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __lane)
18815 return __a * __aarch64_vget_lane_any (__b, __lane);
18818 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18819 vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __lane)
18821 return __a * __aarch64_vget_lane_any (__b, __lane);
18824 /* vmuld_lane */
18826 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18827 vmuld_lane_f64 (float64_t __a, float64x1_t __b, const int __lane)
18829 return __a * __aarch64_vget_lane_any (__b, __lane);
18832 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18833 vmuld_laneq_f64 (float64_t __a, float64x2_t __b, const int __lane)
18835 return __a * __aarch64_vget_lane_any (__b, __lane);
18838 /* vmuls_lane */
18840 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18841 vmuls_lane_f32 (float32_t __a, float32x2_t __b, const int __lane)
18843 return __a * __aarch64_vget_lane_any (__b, __lane);
18846 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18847 vmuls_laneq_f32 (float32_t __a, float32x4_t __b, const int __lane)
18849 return __a * __aarch64_vget_lane_any (__b, __lane);
18852 /* vmul_laneq */
18854 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18855 vmul_laneq_f32 (float32x2_t __a, float32x4_t __b, const int __lane)
18857 return __a * __aarch64_vget_lane_any (__b, __lane);
18860 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18861 vmul_laneq_f64 (float64x1_t __a, float64x2_t __b, const int __lane)
18863 return __a * __aarch64_vget_lane_any (__b, __lane);
18866 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18867 vmul_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __lane)
18869 return __a * __aarch64_vget_lane_any (__b, __lane);
18872 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18873 vmul_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __lane)
18875 return __a * __aarch64_vget_lane_any (__b, __lane);
18878 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18879 vmul_laneq_u16 (uint16x4_t __a, uint16x8_t __b, const int __lane)
18881 return __a * __aarch64_vget_lane_any (__b, __lane);
18884 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18885 vmul_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __lane)
18887 return __a * __aarch64_vget_lane_any (__b, __lane);
18890 /* vmul_n */
18892 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18893 vmul_n_f64 (float64x1_t __a, float64_t __b)
18895 return (float64x1_t) { vget_lane_f64 (__a, 0) * __b };
18898 /* vmulq_lane */
18900 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18901 vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __lane)
18903 return __a * __aarch64_vget_lane_any (__b, __lane);
18906 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18907 vmulq_lane_f64 (float64x2_t __a, float64x1_t __b, const int __lane)
18909 __AARCH64_LANE_CHECK (__a, __lane);
18910 return __a * __b[0];
18913 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18914 vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __lane)
18916 return __a * __aarch64_vget_lane_any (__b, __lane);
18919 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18920 vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __lane)
18922 return __a * __aarch64_vget_lane_any (__b, __lane);
18925 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18926 vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __lane)
18928 return __a * __aarch64_vget_lane_any (__b, __lane);
18931 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18932 vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __lane)
18934 return __a * __aarch64_vget_lane_any (__b, __lane);
18937 /* vmulq_laneq */
18939 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18940 vmulq_laneq_f32 (float32x4_t __a, float32x4_t __b, const int __lane)
18942 return __a * __aarch64_vget_lane_any (__b, __lane);
18945 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18946 vmulq_laneq_f64 (float64x2_t __a, float64x2_t __b, const int __lane)
18948 return __a * __aarch64_vget_lane_any (__b, __lane);
18951 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18952 vmulq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __lane)
18954 return __a * __aarch64_vget_lane_any (__b, __lane);
18957 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18958 vmulq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __lane)
18960 return __a * __aarch64_vget_lane_any (__b, __lane);
18963 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18964 vmulq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, const int __lane)
18966 return __a * __aarch64_vget_lane_any (__b, __lane);
18969 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18970 vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane)
18972 return __a * __aarch64_vget_lane_any (__b, __lane);
18975 /* vneg */
18977 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18978 vneg_f32 (float32x2_t __a)
18980 return -__a;
18983 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18984 vneg_f64 (float64x1_t __a)
18986 return -__a;
18989 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18990 vneg_s8 (int8x8_t __a)
18992 return -__a;
18995 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18996 vneg_s16 (int16x4_t __a)
18998 return -__a;
19001 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19002 vneg_s32 (int32x2_t __a)
19004 return -__a;
19007 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
19008 vneg_s64 (int64x1_t __a)
19010 return -__a;
19013 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19014 vnegq_f32 (float32x4_t __a)
19016 return -__a;
19019 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19020 vnegq_f64 (float64x2_t __a)
19022 return -__a;
19025 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19026 vnegq_s8 (int8x16_t __a)
19028 return -__a;
19031 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19032 vnegq_s16 (int16x8_t __a)
19034 return -__a;
19037 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19038 vnegq_s32 (int32x4_t __a)
19040 return -__a;
19043 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19044 vnegq_s64 (int64x2_t __a)
19046 return -__a;
19049 /* vpadd */
19051 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19052 vpadd_s8 (int8x8_t __a, int8x8_t __b)
19054 return __builtin_aarch64_addpv8qi (__a, __b);
19057 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19058 vpadd_s16 (int16x4_t __a, int16x4_t __b)
19060 return __builtin_aarch64_addpv4hi (__a, __b);
19063 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19064 vpadd_s32 (int32x2_t __a, int32x2_t __b)
19066 return __builtin_aarch64_addpv2si (__a, __b);
19069 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19070 vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
19072 return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
19073 (int8x8_t) __b);
19076 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19077 vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
19079 return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
19080 (int16x4_t) __b);
19083 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19084 vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
19086 return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
19087 (int32x2_t) __b);
19090 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
19091 vpaddd_f64 (float64x2_t __a)
19093 return __builtin_aarch64_reduc_plus_scal_v2df (__a);
19096 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19097 vpaddd_s64 (int64x2_t __a)
19099 return __builtin_aarch64_addpdi (__a);
19102 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19103 vpaddd_u64 (uint64x2_t __a)
19105 return __builtin_aarch64_addpdi ((int64x2_t) __a);
19108 /* vqabs */
19110 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19111 vqabsq_s64 (int64x2_t __a)
19113 return (int64x2_t) __builtin_aarch64_sqabsv2di (__a);
19116 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19117 vqabsb_s8 (int8_t __a)
19119 return (int8_t) __builtin_aarch64_sqabsqi (__a);
19122 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19123 vqabsh_s16 (int16_t __a)
19125 return (int16_t) __builtin_aarch64_sqabshi (__a);
19128 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19129 vqabss_s32 (int32_t __a)
19131 return (int32_t) __builtin_aarch64_sqabssi (__a);
19134 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19135 vqabsd_s64 (int64_t __a)
19137 return __builtin_aarch64_sqabsdi (__a);
19140 /* vqadd */
19142 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19143 vqaddb_s8 (int8_t __a, int8_t __b)
19145 return (int8_t) __builtin_aarch64_sqaddqi (__a, __b);
19148 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19149 vqaddh_s16 (int16_t __a, int16_t __b)
19151 return (int16_t) __builtin_aarch64_sqaddhi (__a, __b);
19154 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19155 vqadds_s32 (int32_t __a, int32_t __b)
19157 return (int32_t) __builtin_aarch64_sqaddsi (__a, __b);
19160 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19161 vqaddd_s64 (int64_t __a, int64_t __b)
19163 return __builtin_aarch64_sqadddi (__a, __b);
19166 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
19167 vqaddb_u8 (uint8_t __a, uint8_t __b)
19169 return (uint8_t) __builtin_aarch64_uqaddqi_uuu (__a, __b);
19172 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
19173 vqaddh_u16 (uint16_t __a, uint16_t __b)
19175 return (uint16_t) __builtin_aarch64_uqaddhi_uuu (__a, __b);
19178 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19179 vqadds_u32 (uint32_t __a, uint32_t __b)
19181 return (uint32_t) __builtin_aarch64_uqaddsi_uuu (__a, __b);
19184 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19185 vqaddd_u64 (uint64_t __a, uint64_t __b)
19187 return __builtin_aarch64_uqadddi_uuu (__a, __b);
19190 /* vqdmlal */
19192 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19193 vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
19195 return __builtin_aarch64_sqdmlalv4hi (__a, __b, __c);
19198 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19199 vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
19201 return __builtin_aarch64_sqdmlal2v8hi (__a, __b, __c);
19204 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19205 vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
19206 int const __d)
19208 return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d);
19211 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19212 vqdmlal_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
19213 int const __d)
19215 return __builtin_aarch64_sqdmlal2_laneqv8hi (__a, __b, __c, __d);
19218 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19219 vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
19221 return __builtin_aarch64_sqdmlal2_nv8hi (__a, __b, __c);
19224 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19225 vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
19227 return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __c, __d);
19230 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19231 vqdmlal_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
19233 return __builtin_aarch64_sqdmlal_laneqv4hi (__a, __b, __c, __d);
19236 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19237 vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
19239 return __builtin_aarch64_sqdmlal_nv4hi (__a, __b, __c);
19242 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19243 vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
19245 return __builtin_aarch64_sqdmlalv2si (__a, __b, __c);
19248 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19249 vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
19251 return __builtin_aarch64_sqdmlal2v4si (__a, __b, __c);
19254 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19255 vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
19256 int const __d)
19258 return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d);
19261 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19262 vqdmlal_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
19263 int const __d)
19265 return __builtin_aarch64_sqdmlal2_laneqv4si (__a, __b, __c, __d);
19268 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19269 vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
19271 return __builtin_aarch64_sqdmlal2_nv4si (__a, __b, __c);
19274 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19275 vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
19277 return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __c, __d);
19280 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19281 vqdmlal_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
19283 return __builtin_aarch64_sqdmlal_laneqv2si (__a, __b, __c, __d);
19286 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19287 vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
19289 return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c);
19292 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19293 vqdmlalh_s16 (int32_t __a, int16_t __b, int16_t __c)
19295 return __builtin_aarch64_sqdmlalhi (__a, __b, __c);
19298 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19299 vqdmlalh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d)
19301 return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
19304 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19305 vqdmlalh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d)
19307 return __builtin_aarch64_sqdmlal_laneqhi (__a, __b, __c, __d);
19310 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19311 vqdmlals_s32 (int64_t __a, int32_t __b, int32_t __c)
19313 return __builtin_aarch64_sqdmlalsi (__a, __b, __c);
19316 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19317 vqdmlals_lane_s32 (int64_t __a, int32_t __b, int32x2_t __c, const int __d)
19319 return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
19322 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19323 vqdmlals_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d)
19325 return __builtin_aarch64_sqdmlal_laneqsi (__a, __b, __c, __d);
19328 /* vqdmlsl */
19330 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19331 vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
19333 return __builtin_aarch64_sqdmlslv4hi (__a, __b, __c);
19336 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19337 vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
19339 return __builtin_aarch64_sqdmlsl2v8hi (__a, __b, __c);
19342 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19343 vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
19344 int const __d)
19346 return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d);
19349 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19350 vqdmlsl_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
19351 int const __d)
19353 return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a, __b, __c, __d);
19356 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19357 vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
19359 return __builtin_aarch64_sqdmlsl2_nv8hi (__a, __b, __c);
19362 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19363 vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
19365 return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __c, __d);
19368 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19369 vqdmlsl_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
19371 return __builtin_aarch64_sqdmlsl_laneqv4hi (__a, __b, __c, __d);
19374 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19375 vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
19377 return __builtin_aarch64_sqdmlsl_nv4hi (__a, __b, __c);
19380 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19381 vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
19383 return __builtin_aarch64_sqdmlslv2si (__a, __b, __c);
19386 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19387 vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
19389 return __builtin_aarch64_sqdmlsl2v4si (__a, __b, __c);
19392 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19393 vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
19394 int const __d)
19396 return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d);
19399 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19400 vqdmlsl_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
19401 int const __d)
19403 return __builtin_aarch64_sqdmlsl2_laneqv4si (__a, __b, __c, __d);
19406 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19407 vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
19409 return __builtin_aarch64_sqdmlsl2_nv4si (__a, __b, __c);
19412 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19413 vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
19415 return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __c, __d);
19418 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19419 vqdmlsl_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
19421 return __builtin_aarch64_sqdmlsl_laneqv2si (__a, __b, __c, __d);
19424 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19425 vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
19427 return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c);
19430 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19431 vqdmlslh_s16 (int32_t __a, int16_t __b, int16_t __c)
19433 return __builtin_aarch64_sqdmlslhi (__a, __b, __c);
19436 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19437 vqdmlslh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d)
19439 return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
19442 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19443 vqdmlslh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d)
19445 return __builtin_aarch64_sqdmlsl_laneqhi (__a, __b, __c, __d);
19448 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19449 vqdmlsls_s32 (int64_t __a, int32_t __b, int32_t __c)
19451 return __builtin_aarch64_sqdmlslsi (__a, __b, __c);
19454 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19455 vqdmlsls_lane_s32 (int64_t __a, int32_t __b, int32x2_t __c, const int __d)
19457 return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
19460 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19461 vqdmlsls_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d)
19463 return __builtin_aarch64_sqdmlsl_laneqsi (__a, __b, __c, __d);
19466 /* vqdmulh */
19468 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19469 vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
19471 return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c);
19474 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19475 vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
19477 return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c);
19480 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19481 vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
19483 return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c);
19486 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19487 vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
19489 return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c);
19492 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19493 vqdmulhh_s16 (int16_t __a, int16_t __b)
19495 return (int16_t) __builtin_aarch64_sqdmulhhi (__a, __b);
19498 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19499 vqdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
19501 return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
19504 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19505 vqdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
19507 return __builtin_aarch64_sqdmulh_laneqhi (__a, __b, __c);
19510 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19511 vqdmulhs_s32 (int32_t __a, int32_t __b)
19513 return (int32_t) __builtin_aarch64_sqdmulhsi (__a, __b);
19516 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19517 vqdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
19519 return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
19522 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19523 vqdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
19525 return __builtin_aarch64_sqdmulh_laneqsi (__a, __b, __c);
19528 /* vqdmull */
19530 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19531 vqdmull_s16 (int16x4_t __a, int16x4_t __b)
19533 return __builtin_aarch64_sqdmullv4hi (__a, __b);
19536 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19537 vqdmull_high_s16 (int16x8_t __a, int16x8_t __b)
19539 return __builtin_aarch64_sqdmull2v8hi (__a, __b);
19542 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19543 vqdmull_high_lane_s16 (int16x8_t __a, int16x4_t __b, int const __c)
19545 return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c);
19548 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19549 vqdmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, int const __c)
19551 return __builtin_aarch64_sqdmull2_laneqv8hi (__a, __b,__c);
19554 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19555 vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
19557 return __builtin_aarch64_sqdmull2_nv8hi (__a, __b);
19560 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19561 vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
19563 return __builtin_aarch64_sqdmull_lanev4hi (__a, __b, __c);
19566 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19567 vqdmull_laneq_s16 (int16x4_t __a, int16x8_t __b, int const __c)
19569 return __builtin_aarch64_sqdmull_laneqv4hi (__a, __b, __c);
19572 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19573 vqdmull_n_s16 (int16x4_t __a, int16_t __b)
19575 return __builtin_aarch64_sqdmull_nv4hi (__a, __b);
19578 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19579 vqdmull_s32 (int32x2_t __a, int32x2_t __b)
19581 return __builtin_aarch64_sqdmullv2si (__a, __b);
19584 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19585 vqdmull_high_s32 (int32x4_t __a, int32x4_t __b)
19587 return __builtin_aarch64_sqdmull2v4si (__a, __b);
19590 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19591 vqdmull_high_lane_s32 (int32x4_t __a, int32x2_t __b, int const __c)
19593 return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c);
19596 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19597 vqdmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, int const __c)
19599 return __builtin_aarch64_sqdmull2_laneqv4si (__a, __b, __c);
19602 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19603 vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
19605 return __builtin_aarch64_sqdmull2_nv4si (__a, __b);
19608 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19609 vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
19611 return __builtin_aarch64_sqdmull_lanev2si (__a, __b, __c);
19614 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19615 vqdmull_laneq_s32 (int32x2_t __a, int32x4_t __b, int const __c)
19617 return __builtin_aarch64_sqdmull_laneqv2si (__a, __b, __c);
19620 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19621 vqdmull_n_s32 (int32x2_t __a, int32_t __b)
19623 return __builtin_aarch64_sqdmull_nv2si (__a, __b);
19626 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19627 vqdmullh_s16 (int16_t __a, int16_t __b)
19629 return (int32_t) __builtin_aarch64_sqdmullhi (__a, __b);
19632 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19633 vqdmullh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
19635 return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
19638 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19639 vqdmullh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
19641 return __builtin_aarch64_sqdmull_laneqhi (__a, __b, __c);
19644 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19645 vqdmulls_s32 (int32_t __a, int32_t __b)
19647 return __builtin_aarch64_sqdmullsi (__a, __b);
19650 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19651 vqdmulls_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
19653 return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c);
19656 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19657 vqdmulls_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
19659 return __builtin_aarch64_sqdmull_laneqsi (__a, __b, __c);
19662 /* vqmovn */
19664 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19665 vqmovn_s16 (int16x8_t __a)
19667 return (int8x8_t) __builtin_aarch64_sqmovnv8hi (__a);
19670 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19671 vqmovn_s32 (int32x4_t __a)
19673 return (int16x4_t) __builtin_aarch64_sqmovnv4si (__a);
19676 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19677 vqmovn_s64 (int64x2_t __a)
19679 return (int32x2_t) __builtin_aarch64_sqmovnv2di (__a);
19682 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19683 vqmovn_u16 (uint16x8_t __a)
19685 return (uint8x8_t) __builtin_aarch64_uqmovnv8hi ((int16x8_t) __a);
19688 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19689 vqmovn_u32 (uint32x4_t __a)
19691 return (uint16x4_t) __builtin_aarch64_uqmovnv4si ((int32x4_t) __a);
19694 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19695 vqmovn_u64 (uint64x2_t __a)
19697 return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a);
19700 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19701 vqmovnh_s16 (int16_t __a)
19703 return (int8_t) __builtin_aarch64_sqmovnhi (__a);
19706 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19707 vqmovns_s32 (int32_t __a)
19709 return (int16_t) __builtin_aarch64_sqmovnsi (__a);
19712 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19713 vqmovnd_s64 (int64_t __a)
19715 return (int32_t) __builtin_aarch64_sqmovndi (__a);
19718 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
19719 vqmovnh_u16 (uint16_t __a)
19721 return (uint8_t) __builtin_aarch64_uqmovnhi (__a);
19724 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
19725 vqmovns_u32 (uint32_t __a)
19727 return (uint16_t) __builtin_aarch64_uqmovnsi (__a);
19730 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19731 vqmovnd_u64 (uint64_t __a)
19733 return (uint32_t) __builtin_aarch64_uqmovndi (__a);
19736 /* vqmovun */
19738 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19739 vqmovun_s16 (int16x8_t __a)
19741 return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a);
19744 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19745 vqmovun_s32 (int32x4_t __a)
19747 return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a);
19750 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19751 vqmovun_s64 (int64x2_t __a)
19753 return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a);
19756 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19757 vqmovunh_s16 (int16_t __a)
19759 return (int8_t) __builtin_aarch64_sqmovunhi (__a);
19762 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19763 vqmovuns_s32 (int32_t __a)
19765 return (int16_t) __builtin_aarch64_sqmovunsi (__a);
19768 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19769 vqmovund_s64 (int64_t __a)
19771 return (int32_t) __builtin_aarch64_sqmovundi (__a);
19774 /* vqneg */
19776 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19777 vqnegq_s64 (int64x2_t __a)
19779 return (int64x2_t) __builtin_aarch64_sqnegv2di (__a);
19782 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19783 vqnegb_s8 (int8_t __a)
19785 return (int8_t) __builtin_aarch64_sqnegqi (__a);
19788 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19789 vqnegh_s16 (int16_t __a)
19791 return (int16_t) __builtin_aarch64_sqneghi (__a);
19794 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19795 vqnegs_s32 (int32_t __a)
19797 return (int32_t) __builtin_aarch64_sqnegsi (__a);
19800 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19801 vqnegd_s64 (int64_t __a)
19803 return __builtin_aarch64_sqnegdi (__a);
19806 /* vqrdmulh */
19808 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19809 vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
19811 return __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c);
19814 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19815 vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
19817 return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c);
19820 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19821 vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
19823 return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c);
19826 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19827 vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
19829 return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c);
19832 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19833 vqrdmulhh_s16 (int16_t __a, int16_t __b)
19835 return (int16_t) __builtin_aarch64_sqrdmulhhi (__a, __b);
19838 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19839 vqrdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
19841 return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
19844 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19845 vqrdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
19847 return __builtin_aarch64_sqrdmulh_laneqhi (__a, __b, __c);
19850 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19851 vqrdmulhs_s32 (int32_t __a, int32_t __b)
19853 return (int32_t) __builtin_aarch64_sqrdmulhsi (__a, __b);
19856 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19857 vqrdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
19859 return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
19862 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19863 vqrdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
19865 return __builtin_aarch64_sqrdmulh_laneqsi (__a, __b, __c);
19868 /* vqrshl */
19870 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19871 vqrshl_s8 (int8x8_t __a, int8x8_t __b)
19873 return __builtin_aarch64_sqrshlv8qi (__a, __b);
19876 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19877 vqrshl_s16 (int16x4_t __a, int16x4_t __b)
19879 return __builtin_aarch64_sqrshlv4hi (__a, __b);
19882 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19883 vqrshl_s32 (int32x2_t __a, int32x2_t __b)
19885 return __builtin_aarch64_sqrshlv2si (__a, __b);
19888 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
19889 vqrshl_s64 (int64x1_t __a, int64x1_t __b)
19891 return (int64x1_t) {__builtin_aarch64_sqrshldi (__a[0], __b[0])};
19894 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19895 vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
19897 return __builtin_aarch64_uqrshlv8qi_uus ( __a, __b);
19900 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19901 vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
19903 return __builtin_aarch64_uqrshlv4hi_uus ( __a, __b);
19906 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19907 vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
19909 return __builtin_aarch64_uqrshlv2si_uus ( __a, __b);
19912 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19913 vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
19915 return (uint64x1_t) {__builtin_aarch64_uqrshldi_uus (__a[0], __b[0])};
19918 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19919 vqrshlq_s8 (int8x16_t __a, int8x16_t __b)
19921 return __builtin_aarch64_sqrshlv16qi (__a, __b);
19924 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19925 vqrshlq_s16 (int16x8_t __a, int16x8_t __b)
19927 return __builtin_aarch64_sqrshlv8hi (__a, __b);
19930 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19931 vqrshlq_s32 (int32x4_t __a, int32x4_t __b)
19933 return __builtin_aarch64_sqrshlv4si (__a, __b);
19936 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19937 vqrshlq_s64 (int64x2_t __a, int64x2_t __b)
19939 return __builtin_aarch64_sqrshlv2di (__a, __b);
19942 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19943 vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
19945 return __builtin_aarch64_uqrshlv16qi_uus ( __a, __b);
19948 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19949 vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
19951 return __builtin_aarch64_uqrshlv8hi_uus ( __a, __b);
19954 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19955 vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
19957 return __builtin_aarch64_uqrshlv4si_uus ( __a, __b);
19960 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19961 vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
19963 return __builtin_aarch64_uqrshlv2di_uus ( __a, __b);
19966 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19967 vqrshlb_s8 (int8_t __a, int8_t __b)
19969 return __builtin_aarch64_sqrshlqi (__a, __b);
19972 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19973 vqrshlh_s16 (int16_t __a, int16_t __b)
19975 return __builtin_aarch64_sqrshlhi (__a, __b);
19978 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19979 vqrshls_s32 (int32_t __a, int32_t __b)
19981 return __builtin_aarch64_sqrshlsi (__a, __b);
19984 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19985 vqrshld_s64 (int64_t __a, int64_t __b)
19987 return __builtin_aarch64_sqrshldi (__a, __b);
19990 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
19991 vqrshlb_u8 (uint8_t __a, uint8_t __b)
19993 return __builtin_aarch64_uqrshlqi_uus (__a, __b);
19996 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
19997 vqrshlh_u16 (uint16_t __a, uint16_t __b)
19999 return __builtin_aarch64_uqrshlhi_uus (__a, __b);
20002 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20003 vqrshls_u32 (uint32_t __a, uint32_t __b)
20005 return __builtin_aarch64_uqrshlsi_uus (__a, __b);
20008 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20009 vqrshld_u64 (uint64_t __a, uint64_t __b)
20011 return __builtin_aarch64_uqrshldi_uus (__a, __b);
20014 /* vqrshrn */
20016 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20017 vqrshrn_n_s16 (int16x8_t __a, const int __b)
20019 return (int8x8_t) __builtin_aarch64_sqrshrn_nv8hi (__a, __b);
20022 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20023 vqrshrn_n_s32 (int32x4_t __a, const int __b)
20025 return (int16x4_t) __builtin_aarch64_sqrshrn_nv4si (__a, __b);
20028 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20029 vqrshrn_n_s64 (int64x2_t __a, const int __b)
20031 return (int32x2_t) __builtin_aarch64_sqrshrn_nv2di (__a, __b);
20034 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20035 vqrshrn_n_u16 (uint16x8_t __a, const int __b)
20037 return __builtin_aarch64_uqrshrn_nv8hi_uus ( __a, __b);
20040 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20041 vqrshrn_n_u32 (uint32x4_t __a, const int __b)
20043 return __builtin_aarch64_uqrshrn_nv4si_uus ( __a, __b);
20046 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20047 vqrshrn_n_u64 (uint64x2_t __a, const int __b)
20049 return __builtin_aarch64_uqrshrn_nv2di_uus ( __a, __b);
20052 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20053 vqrshrnh_n_s16 (int16_t __a, const int __b)
20055 return (int8_t) __builtin_aarch64_sqrshrn_nhi (__a, __b);
20058 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20059 vqrshrns_n_s32 (int32_t __a, const int __b)
20061 return (int16_t) __builtin_aarch64_sqrshrn_nsi (__a, __b);
20064 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20065 vqrshrnd_n_s64 (int64_t __a, const int __b)
20067 return (int32_t) __builtin_aarch64_sqrshrn_ndi (__a, __b);
20070 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20071 vqrshrnh_n_u16 (uint16_t __a, const int __b)
20073 return __builtin_aarch64_uqrshrn_nhi_uus (__a, __b);
20076 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20077 vqrshrns_n_u32 (uint32_t __a, const int __b)
20079 return __builtin_aarch64_uqrshrn_nsi_uus (__a, __b);
20082 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20083 vqrshrnd_n_u64 (uint64_t __a, const int __b)
20085 return __builtin_aarch64_uqrshrn_ndi_uus (__a, __b);
20088 /* vqrshrun */
20090 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20091 vqrshrun_n_s16 (int16x8_t __a, const int __b)
20093 return (uint8x8_t) __builtin_aarch64_sqrshrun_nv8hi (__a, __b);
20096 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20097 vqrshrun_n_s32 (int32x4_t __a, const int __b)
20099 return (uint16x4_t) __builtin_aarch64_sqrshrun_nv4si (__a, __b);
20102 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20103 vqrshrun_n_s64 (int64x2_t __a, const int __b)
20105 return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b);
20108 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20109 vqrshrunh_n_s16 (int16_t __a, const int __b)
20111 return (int8_t) __builtin_aarch64_sqrshrun_nhi (__a, __b);
20114 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20115 vqrshruns_n_s32 (int32_t __a, const int __b)
20117 return (int16_t) __builtin_aarch64_sqrshrun_nsi (__a, __b);
20120 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20121 vqrshrund_n_s64 (int64_t __a, const int __b)
20123 return (int32_t) __builtin_aarch64_sqrshrun_ndi (__a, __b);
20126 /* vqshl */
20128 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20129 vqshl_s8 (int8x8_t __a, int8x8_t __b)
20131 return __builtin_aarch64_sqshlv8qi (__a, __b);
20134 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20135 vqshl_s16 (int16x4_t __a, int16x4_t __b)
20137 return __builtin_aarch64_sqshlv4hi (__a, __b);
20140 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20141 vqshl_s32 (int32x2_t __a, int32x2_t __b)
20143 return __builtin_aarch64_sqshlv2si (__a, __b);
20146 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20147 vqshl_s64 (int64x1_t __a, int64x1_t __b)
20149 return (int64x1_t) {__builtin_aarch64_sqshldi (__a[0], __b[0])};
20152 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20153 vqshl_u8 (uint8x8_t __a, int8x8_t __b)
20155 return __builtin_aarch64_uqshlv8qi_uus ( __a, __b);
20158 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20159 vqshl_u16 (uint16x4_t __a, int16x4_t __b)
20161 return __builtin_aarch64_uqshlv4hi_uus ( __a, __b);
20164 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20165 vqshl_u32 (uint32x2_t __a, int32x2_t __b)
20167 return __builtin_aarch64_uqshlv2si_uus ( __a, __b);
20170 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20171 vqshl_u64 (uint64x1_t __a, int64x1_t __b)
20173 return (uint64x1_t) {__builtin_aarch64_uqshldi_uus (__a[0], __b[0])};
20176 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20177 vqshlq_s8 (int8x16_t __a, int8x16_t __b)
20179 return __builtin_aarch64_sqshlv16qi (__a, __b);
20182 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20183 vqshlq_s16 (int16x8_t __a, int16x8_t __b)
20185 return __builtin_aarch64_sqshlv8hi (__a, __b);
20188 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20189 vqshlq_s32 (int32x4_t __a, int32x4_t __b)
20191 return __builtin_aarch64_sqshlv4si (__a, __b);
20194 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20195 vqshlq_s64 (int64x2_t __a, int64x2_t __b)
20197 return __builtin_aarch64_sqshlv2di (__a, __b);
20200 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20201 vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
20203 return __builtin_aarch64_uqshlv16qi_uus ( __a, __b);
20206 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20207 vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
20209 return __builtin_aarch64_uqshlv8hi_uus ( __a, __b);
20212 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20213 vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
20215 return __builtin_aarch64_uqshlv4si_uus ( __a, __b);
20218 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20219 vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
20221 return __builtin_aarch64_uqshlv2di_uus ( __a, __b);
20224 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20225 vqshlb_s8 (int8_t __a, int8_t __b)
20227 return __builtin_aarch64_sqshlqi (__a, __b);
20230 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20231 vqshlh_s16 (int16_t __a, int16_t __b)
20233 return __builtin_aarch64_sqshlhi (__a, __b);
20236 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20237 vqshls_s32 (int32_t __a, int32_t __b)
20239 return __builtin_aarch64_sqshlsi (__a, __b);
20242 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20243 vqshld_s64 (int64_t __a, int64_t __b)
20245 return __builtin_aarch64_sqshldi (__a, __b);
20248 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20249 vqshlb_u8 (uint8_t __a, uint8_t __b)
20251 return __builtin_aarch64_uqshlqi_uus (__a, __b);
20254 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20255 vqshlh_u16 (uint16_t __a, uint16_t __b)
20257 return __builtin_aarch64_uqshlhi_uus (__a, __b);
20260 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20261 vqshls_u32 (uint32_t __a, uint32_t __b)
20263 return __builtin_aarch64_uqshlsi_uus (__a, __b);
20266 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20267 vqshld_u64 (uint64_t __a, uint64_t __b)
20269 return __builtin_aarch64_uqshldi_uus (__a, __b);
20272 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20273 vqshl_n_s8 (int8x8_t __a, const int __b)
20275 return (int8x8_t) __builtin_aarch64_sqshl_nv8qi (__a, __b);
20278 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20279 vqshl_n_s16 (int16x4_t __a, const int __b)
20281 return (int16x4_t) __builtin_aarch64_sqshl_nv4hi (__a, __b);
20284 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20285 vqshl_n_s32 (int32x2_t __a, const int __b)
20287 return (int32x2_t) __builtin_aarch64_sqshl_nv2si (__a, __b);
20290 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20291 vqshl_n_s64 (int64x1_t __a, const int __b)
20293 return (int64x1_t) {__builtin_aarch64_sqshl_ndi (__a[0], __b)};
20296 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20297 vqshl_n_u8 (uint8x8_t __a, const int __b)
20299 return __builtin_aarch64_uqshl_nv8qi_uus (__a, __b);
20302 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20303 vqshl_n_u16 (uint16x4_t __a, const int __b)
20305 return __builtin_aarch64_uqshl_nv4hi_uus (__a, __b);
20308 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20309 vqshl_n_u32 (uint32x2_t __a, const int __b)
20311 return __builtin_aarch64_uqshl_nv2si_uus (__a, __b);
20314 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20315 vqshl_n_u64 (uint64x1_t __a, const int __b)
20317 return (uint64x1_t) {__builtin_aarch64_uqshl_ndi_uus (__a[0], __b)};
20320 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20321 vqshlq_n_s8 (int8x16_t __a, const int __b)
20323 return (int8x16_t) __builtin_aarch64_sqshl_nv16qi (__a, __b);
20326 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20327 vqshlq_n_s16 (int16x8_t __a, const int __b)
20329 return (int16x8_t) __builtin_aarch64_sqshl_nv8hi (__a, __b);
20332 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20333 vqshlq_n_s32 (int32x4_t __a, const int __b)
20335 return (int32x4_t) __builtin_aarch64_sqshl_nv4si (__a, __b);
20338 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20339 vqshlq_n_s64 (int64x2_t __a, const int __b)
20341 return (int64x2_t) __builtin_aarch64_sqshl_nv2di (__a, __b);
20344 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20345 vqshlq_n_u8 (uint8x16_t __a, const int __b)
20347 return __builtin_aarch64_uqshl_nv16qi_uus (__a, __b);
20350 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20351 vqshlq_n_u16 (uint16x8_t __a, const int __b)
20353 return __builtin_aarch64_uqshl_nv8hi_uus (__a, __b);
20356 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20357 vqshlq_n_u32 (uint32x4_t __a, const int __b)
20359 return __builtin_aarch64_uqshl_nv4si_uus (__a, __b);
20362 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20363 vqshlq_n_u64 (uint64x2_t __a, const int __b)
20365 return __builtin_aarch64_uqshl_nv2di_uus (__a, __b);
20368 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20369 vqshlb_n_s8 (int8_t __a, const int __b)
20371 return (int8_t) __builtin_aarch64_sqshl_nqi (__a, __b);
20374 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20375 vqshlh_n_s16 (int16_t __a, const int __b)
20377 return (int16_t) __builtin_aarch64_sqshl_nhi (__a, __b);
20380 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20381 vqshls_n_s32 (int32_t __a, const int __b)
20383 return (int32_t) __builtin_aarch64_sqshl_nsi (__a, __b);
20386 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20387 vqshld_n_s64 (int64_t __a, const int __b)
20389 return __builtin_aarch64_sqshl_ndi (__a, __b);
20392 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20393 vqshlb_n_u8 (uint8_t __a, const int __b)
20395 return __builtin_aarch64_uqshl_nqi_uus (__a, __b);
20398 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20399 vqshlh_n_u16 (uint16_t __a, const int __b)
20401 return __builtin_aarch64_uqshl_nhi_uus (__a, __b);
20404 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20405 vqshls_n_u32 (uint32_t __a, const int __b)
20407 return __builtin_aarch64_uqshl_nsi_uus (__a, __b);
20410 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20411 vqshld_n_u64 (uint64_t __a, const int __b)
20413 return __builtin_aarch64_uqshl_ndi_uus (__a, __b);
20416 /* vqshlu */
20418 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20419 vqshlu_n_s8 (int8x8_t __a, const int __b)
20421 return __builtin_aarch64_sqshlu_nv8qi_uss (__a, __b);
20424 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20425 vqshlu_n_s16 (int16x4_t __a, const int __b)
20427 return __builtin_aarch64_sqshlu_nv4hi_uss (__a, __b);
20430 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20431 vqshlu_n_s32 (int32x2_t __a, const int __b)
20433 return __builtin_aarch64_sqshlu_nv2si_uss (__a, __b);
20436 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20437 vqshlu_n_s64 (int64x1_t __a, const int __b)
20439 return (uint64x1_t) {__builtin_aarch64_sqshlu_ndi_uss (__a[0], __b)};
20442 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20443 vqshluq_n_s8 (int8x16_t __a, const int __b)
20445 return __builtin_aarch64_sqshlu_nv16qi_uss (__a, __b);
20448 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20449 vqshluq_n_s16 (int16x8_t __a, const int __b)
20451 return __builtin_aarch64_sqshlu_nv8hi_uss (__a, __b);
20454 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20455 vqshluq_n_s32 (int32x4_t __a, const int __b)
20457 return __builtin_aarch64_sqshlu_nv4si_uss (__a, __b);
20460 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20461 vqshluq_n_s64 (int64x2_t __a, const int __b)
20463 return __builtin_aarch64_sqshlu_nv2di_uss (__a, __b);
20466 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20467 vqshlub_n_s8 (int8_t __a, const int __b)
20469 return (int8_t) __builtin_aarch64_sqshlu_nqi_uss (__a, __b);
20472 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20473 vqshluh_n_s16 (int16_t __a, const int __b)
20475 return (int16_t) __builtin_aarch64_sqshlu_nhi_uss (__a, __b);
20478 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20479 vqshlus_n_s32 (int32_t __a, const int __b)
20481 return (int32_t) __builtin_aarch64_sqshlu_nsi_uss (__a, __b);
20484 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20485 vqshlud_n_s64 (int64_t __a, const int __b)
20487 return __builtin_aarch64_sqshlu_ndi_uss (__a, __b);
20490 /* vqshrn */
20492 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20493 vqshrn_n_s16 (int16x8_t __a, const int __b)
20495 return (int8x8_t) __builtin_aarch64_sqshrn_nv8hi (__a, __b);
20498 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20499 vqshrn_n_s32 (int32x4_t __a, const int __b)
20501 return (int16x4_t) __builtin_aarch64_sqshrn_nv4si (__a, __b);
20504 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20505 vqshrn_n_s64 (int64x2_t __a, const int __b)
20507 return (int32x2_t) __builtin_aarch64_sqshrn_nv2di (__a, __b);
20510 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20511 vqshrn_n_u16 (uint16x8_t __a, const int __b)
20513 return __builtin_aarch64_uqshrn_nv8hi_uus ( __a, __b);
20516 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20517 vqshrn_n_u32 (uint32x4_t __a, const int __b)
20519 return __builtin_aarch64_uqshrn_nv4si_uus ( __a, __b);
20522 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20523 vqshrn_n_u64 (uint64x2_t __a, const int __b)
20525 return __builtin_aarch64_uqshrn_nv2di_uus ( __a, __b);
20528 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20529 vqshrnh_n_s16 (int16_t __a, const int __b)
20531 return (int8_t) __builtin_aarch64_sqshrn_nhi (__a, __b);
20534 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20535 vqshrns_n_s32 (int32_t __a, const int __b)
20537 return (int16_t) __builtin_aarch64_sqshrn_nsi (__a, __b);
20540 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20541 vqshrnd_n_s64 (int64_t __a, const int __b)
20543 return (int32_t) __builtin_aarch64_sqshrn_ndi (__a, __b);
20546 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20547 vqshrnh_n_u16 (uint16_t __a, const int __b)
20549 return __builtin_aarch64_uqshrn_nhi_uus (__a, __b);
20552 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20553 vqshrns_n_u32 (uint32_t __a, const int __b)
20555 return __builtin_aarch64_uqshrn_nsi_uus (__a, __b);
20558 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20559 vqshrnd_n_u64 (uint64_t __a, const int __b)
20561 return __builtin_aarch64_uqshrn_ndi_uus (__a, __b);
20564 /* vqshrun */
20566 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20567 vqshrun_n_s16 (int16x8_t __a, const int __b)
20569 return (uint8x8_t) __builtin_aarch64_sqshrun_nv8hi (__a, __b);
20572 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20573 vqshrun_n_s32 (int32x4_t __a, const int __b)
20575 return (uint16x4_t) __builtin_aarch64_sqshrun_nv4si (__a, __b);
20578 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20579 vqshrun_n_s64 (int64x2_t __a, const int __b)
20581 return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b);
20584 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20585 vqshrunh_n_s16 (int16_t __a, const int __b)
20587 return (int8_t) __builtin_aarch64_sqshrun_nhi (__a, __b);
20590 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20591 vqshruns_n_s32 (int32_t __a, const int __b)
20593 return (int16_t) __builtin_aarch64_sqshrun_nsi (__a, __b);
20596 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20597 vqshrund_n_s64 (int64_t __a, const int __b)
20599 return (int32_t) __builtin_aarch64_sqshrun_ndi (__a, __b);
20602 /* vqsub */
20604 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20605 vqsubb_s8 (int8_t __a, int8_t __b)
20607 return (int8_t) __builtin_aarch64_sqsubqi (__a, __b);
20610 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20611 vqsubh_s16 (int16_t __a, int16_t __b)
20613 return (int16_t) __builtin_aarch64_sqsubhi (__a, __b);
20616 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20617 vqsubs_s32 (int32_t __a, int32_t __b)
20619 return (int32_t) __builtin_aarch64_sqsubsi (__a, __b);
20622 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20623 vqsubd_s64 (int64_t __a, int64_t __b)
20625 return __builtin_aarch64_sqsubdi (__a, __b);
20628 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20629 vqsubb_u8 (uint8_t __a, uint8_t __b)
20631 return (uint8_t) __builtin_aarch64_uqsubqi_uuu (__a, __b);
20634 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20635 vqsubh_u16 (uint16_t __a, uint16_t __b)
20637 return (uint16_t) __builtin_aarch64_uqsubhi_uuu (__a, __b);
20640 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20641 vqsubs_u32 (uint32_t __a, uint32_t __b)
20643 return (uint32_t) __builtin_aarch64_uqsubsi_uuu (__a, __b);
20646 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20647 vqsubd_u64 (uint64_t __a, uint64_t __b)
20649 return __builtin_aarch64_uqsubdi_uuu (__a, __b);
20652 /* vrbit */
20654 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20655 vrbit_p8 (poly8x8_t __a)
20657 return (poly8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
20660 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20661 vrbit_s8 (int8x8_t __a)
20663 return __builtin_aarch64_rbitv8qi (__a);
20666 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20667 vrbit_u8 (uint8x8_t __a)
20669 return (uint8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
20672 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20673 vrbitq_p8 (poly8x16_t __a)
20675 return (poly8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t)__a);
20678 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20679 vrbitq_s8 (int8x16_t __a)
20681 return __builtin_aarch64_rbitv16qi (__a);
20684 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20685 vrbitq_u8 (uint8x16_t __a)
20687 return (uint8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t) __a);
20690 /* vrecpe */
20692 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20693 vrecpe_u32 (uint32x2_t __a)
20695 return (uint32x2_t) __builtin_aarch64_urecpev2si ((int32x2_t) __a);
20698 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20699 vrecpeq_u32 (uint32x4_t __a)
20701 return (uint32x4_t) __builtin_aarch64_urecpev4si ((int32x4_t) __a);
20704 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20705 vrecpes_f32 (float32_t __a)
20707 return __builtin_aarch64_frecpesf (__a);
20710 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
20711 vrecped_f64 (float64_t __a)
20713 return __builtin_aarch64_frecpedf (__a);
20716 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20717 vrecpe_f32 (float32x2_t __a)
20719 return __builtin_aarch64_frecpev2sf (__a);
20722 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20723 vrecpeq_f32 (float32x4_t __a)
20725 return __builtin_aarch64_frecpev4sf (__a);
20728 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20729 vrecpeq_f64 (float64x2_t __a)
20731 return __builtin_aarch64_frecpev2df (__a);
20734 /* vrecps */
20736 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20737 vrecpss_f32 (float32_t __a, float32_t __b)
20739 return __builtin_aarch64_frecpssf (__a, __b);
20742 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
20743 vrecpsd_f64 (float64_t __a, float64_t __b)
20745 return __builtin_aarch64_frecpsdf (__a, __b);
20748 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20749 vrecps_f32 (float32x2_t __a, float32x2_t __b)
20751 return __builtin_aarch64_frecpsv2sf (__a, __b);
20754 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20755 vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
20757 return __builtin_aarch64_frecpsv4sf (__a, __b);
20760 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20761 vrecpsq_f64 (float64x2_t __a, float64x2_t __b)
20763 return __builtin_aarch64_frecpsv2df (__a, __b);
20766 /* vrecpx */
20768 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20769 vrecpxs_f32 (float32_t __a)
20771 return __builtin_aarch64_frecpxsf (__a);
20774 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
20775 vrecpxd_f64 (float64_t __a)
20777 return __builtin_aarch64_frecpxdf (__a);
20781 /* vrev */
20783 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20784 vrev16_p8 (poly8x8_t a)
20786 return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20789 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20790 vrev16_s8 (int8x8_t a)
20792 return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20795 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20796 vrev16_u8 (uint8x8_t a)
20798 return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20801 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20802 vrev16q_p8 (poly8x16_t a)
20804 return __builtin_shuffle (a,
20805 (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
20808 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20809 vrev16q_s8 (int8x16_t a)
20811 return __builtin_shuffle (a,
20812 (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
20815 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20816 vrev16q_u8 (uint8x16_t a)
20818 return __builtin_shuffle (a,
20819 (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
20822 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20823 vrev32_p8 (poly8x8_t a)
20825 return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
20828 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
20829 vrev32_p16 (poly16x4_t a)
20831 return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
20834 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20835 vrev32_s8 (int8x8_t a)
20837 return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
20840 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20841 vrev32_s16 (int16x4_t a)
20843 return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
20846 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20847 vrev32_u8 (uint8x8_t a)
20849 return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
20852 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20853 vrev32_u16 (uint16x4_t a)
20855 return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
20858 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20859 vrev32q_p8 (poly8x16_t a)
20861 return __builtin_shuffle (a,
20862 (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
20865 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
20866 vrev32q_p16 (poly16x8_t a)
20868 return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20871 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20872 vrev32q_s8 (int8x16_t a)
20874 return __builtin_shuffle (a,
20875 (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
20878 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20879 vrev32q_s16 (int16x8_t a)
20881 return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20884 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20885 vrev32q_u8 (uint8x16_t a)
20887 return __builtin_shuffle (a,
20888 (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
20891 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20892 vrev32q_u16 (uint16x8_t a)
20894 return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20897 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20898 vrev64_f32 (float32x2_t a)
20900 return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
20903 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20904 vrev64_p8 (poly8x8_t a)
20906 return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
20909 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
20910 vrev64_p16 (poly16x4_t a)
20912 return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
20915 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20916 vrev64_s8 (int8x8_t a)
20918 return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
20921 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20922 vrev64_s16 (int16x4_t a)
20924 return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
20927 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20928 vrev64_s32 (int32x2_t a)
20930 return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
20933 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20934 vrev64_u8 (uint8x8_t a)
20936 return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
20939 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20940 vrev64_u16 (uint16x4_t a)
20942 return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
20945 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20946 vrev64_u32 (uint32x2_t a)
20948 return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
20951 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20952 vrev64q_f32 (float32x4_t a)
20954 return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
20957 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20958 vrev64q_p8 (poly8x16_t a)
20960 return __builtin_shuffle (a,
20961 (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
20964 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
20965 vrev64q_p16 (poly16x8_t a)
20967 return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
20970 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20971 vrev64q_s8 (int8x16_t a)
20973 return __builtin_shuffle (a,
20974 (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
20977 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20978 vrev64q_s16 (int16x8_t a)
20980 return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
20983 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20984 vrev64q_s32 (int32x4_t a)
20986 return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
20989 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20990 vrev64q_u8 (uint8x16_t a)
20992 return __builtin_shuffle (a,
20993 (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
20996 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20997 vrev64q_u16 (uint16x8_t a)
20999 return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21002 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21003 vrev64q_u32 (uint32x4_t a)
21005 return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
21008 /* vrnd */
21010 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21011 vrnd_f32 (float32x2_t __a)
21013 return __builtin_aarch64_btruncv2sf (__a);
21016 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21017 vrnd_f64 (float64x1_t __a)
21019 return vset_lane_f64 (__builtin_trunc (vget_lane_f64 (__a, 0)), __a, 0);
21022 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21023 vrndq_f32 (float32x4_t __a)
21025 return __builtin_aarch64_btruncv4sf (__a);
21028 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21029 vrndq_f64 (float64x2_t __a)
21031 return __builtin_aarch64_btruncv2df (__a);
21034 /* vrnda */
21036 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21037 vrnda_f32 (float32x2_t __a)
21039 return __builtin_aarch64_roundv2sf (__a);
21042 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21043 vrnda_f64 (float64x1_t __a)
21045 return vset_lane_f64 (__builtin_round (vget_lane_f64 (__a, 0)), __a, 0);
21048 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21049 vrndaq_f32 (float32x4_t __a)
21051 return __builtin_aarch64_roundv4sf (__a);
21054 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21055 vrndaq_f64 (float64x2_t __a)
21057 return __builtin_aarch64_roundv2df (__a);
21060 /* vrndi */
21062 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21063 vrndi_f32 (float32x2_t __a)
21065 return __builtin_aarch64_nearbyintv2sf (__a);
21068 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21069 vrndi_f64 (float64x1_t __a)
21071 return vset_lane_f64 (__builtin_nearbyint (vget_lane_f64 (__a, 0)), __a, 0);
21074 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21075 vrndiq_f32 (float32x4_t __a)
21077 return __builtin_aarch64_nearbyintv4sf (__a);
21080 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21081 vrndiq_f64 (float64x2_t __a)
21083 return __builtin_aarch64_nearbyintv2df (__a);
21086 /* vrndm */
21088 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21089 vrndm_f32 (float32x2_t __a)
21091 return __builtin_aarch64_floorv2sf (__a);
21094 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21095 vrndm_f64 (float64x1_t __a)
21097 return vset_lane_f64 (__builtin_floor (vget_lane_f64 (__a, 0)), __a, 0);
21100 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21101 vrndmq_f32 (float32x4_t __a)
21103 return __builtin_aarch64_floorv4sf (__a);
21106 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21107 vrndmq_f64 (float64x2_t __a)
21109 return __builtin_aarch64_floorv2df (__a);
21112 /* vrndn */
21114 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21115 vrndn_f32 (float32x2_t __a)
21117 return __builtin_aarch64_frintnv2sf (__a);
21120 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21121 vrndn_f64 (float64x1_t __a)
21123 return (float64x1_t) {__builtin_aarch64_frintndf (__a[0])};
21126 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21127 vrndnq_f32 (float32x4_t __a)
21129 return __builtin_aarch64_frintnv4sf (__a);
21132 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21133 vrndnq_f64 (float64x2_t __a)
21135 return __builtin_aarch64_frintnv2df (__a);
21138 /* vrndp */
21140 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21141 vrndp_f32 (float32x2_t __a)
21143 return __builtin_aarch64_ceilv2sf (__a);
21146 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21147 vrndp_f64 (float64x1_t __a)
21149 return vset_lane_f64 (__builtin_ceil (vget_lane_f64 (__a, 0)), __a, 0);
21152 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21153 vrndpq_f32 (float32x4_t __a)
21155 return __builtin_aarch64_ceilv4sf (__a);
21158 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21159 vrndpq_f64 (float64x2_t __a)
21161 return __builtin_aarch64_ceilv2df (__a);
21164 /* vrndx */
21166 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21167 vrndx_f32 (float32x2_t __a)
21169 return __builtin_aarch64_rintv2sf (__a);
21172 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21173 vrndx_f64 (float64x1_t __a)
21175 return vset_lane_f64 (__builtin_rint (vget_lane_f64 (__a, 0)), __a, 0);
21178 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21179 vrndxq_f32 (float32x4_t __a)
21181 return __builtin_aarch64_rintv4sf (__a);
21184 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21185 vrndxq_f64 (float64x2_t __a)
21187 return __builtin_aarch64_rintv2df (__a);
21190 /* vrshl */
21192 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21193 vrshl_s8 (int8x8_t __a, int8x8_t __b)
21195 return (int8x8_t) __builtin_aarch64_srshlv8qi (__a, __b);
21198 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21199 vrshl_s16 (int16x4_t __a, int16x4_t __b)
21201 return (int16x4_t) __builtin_aarch64_srshlv4hi (__a, __b);
21204 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21205 vrshl_s32 (int32x2_t __a, int32x2_t __b)
21207 return (int32x2_t) __builtin_aarch64_srshlv2si (__a, __b);
21210 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21211 vrshl_s64 (int64x1_t __a, int64x1_t __b)
21213 return (int64x1_t) {__builtin_aarch64_srshldi (__a[0], __b[0])};
21216 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21217 vrshl_u8 (uint8x8_t __a, int8x8_t __b)
21219 return __builtin_aarch64_urshlv8qi_uus (__a, __b);
21222 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21223 vrshl_u16 (uint16x4_t __a, int16x4_t __b)
21225 return __builtin_aarch64_urshlv4hi_uus (__a, __b);
21228 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21229 vrshl_u32 (uint32x2_t __a, int32x2_t __b)
21231 return __builtin_aarch64_urshlv2si_uus (__a, __b);
21234 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21235 vrshl_u64 (uint64x1_t __a, int64x1_t __b)
21237 return (uint64x1_t) {__builtin_aarch64_urshldi_uus (__a[0], __b[0])};
21240 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21241 vrshlq_s8 (int8x16_t __a, int8x16_t __b)
21243 return (int8x16_t) __builtin_aarch64_srshlv16qi (__a, __b);
21246 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21247 vrshlq_s16 (int16x8_t __a, int16x8_t __b)
21249 return (int16x8_t) __builtin_aarch64_srshlv8hi (__a, __b);
21252 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21253 vrshlq_s32 (int32x4_t __a, int32x4_t __b)
21255 return (int32x4_t) __builtin_aarch64_srshlv4si (__a, __b);
21258 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21259 vrshlq_s64 (int64x2_t __a, int64x2_t __b)
21261 return (int64x2_t) __builtin_aarch64_srshlv2di (__a, __b);
21264 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21265 vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
21267 return __builtin_aarch64_urshlv16qi_uus (__a, __b);
21270 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21271 vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
21273 return __builtin_aarch64_urshlv8hi_uus (__a, __b);
21276 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21277 vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
21279 return __builtin_aarch64_urshlv4si_uus (__a, __b);
21282 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21283 vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
21285 return __builtin_aarch64_urshlv2di_uus (__a, __b);
21288 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21289 vrshld_s64 (int64_t __a, int64_t __b)
21291 return __builtin_aarch64_srshldi (__a, __b);
21294 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21295 vrshld_u64 (uint64_t __a, int64_t __b)
21297 return __builtin_aarch64_urshldi_uus (__a, __b);
21300 /* vrshr */
21302 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21303 vrshr_n_s8 (int8x8_t __a, const int __b)
21305 return (int8x8_t) __builtin_aarch64_srshr_nv8qi (__a, __b);
21308 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21309 vrshr_n_s16 (int16x4_t __a, const int __b)
21311 return (int16x4_t) __builtin_aarch64_srshr_nv4hi (__a, __b);
21314 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21315 vrshr_n_s32 (int32x2_t __a, const int __b)
21317 return (int32x2_t) __builtin_aarch64_srshr_nv2si (__a, __b);
21320 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21321 vrshr_n_s64 (int64x1_t __a, const int __b)
21323 return (int64x1_t) {__builtin_aarch64_srshr_ndi (__a[0], __b)};
21326 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21327 vrshr_n_u8 (uint8x8_t __a, const int __b)
21329 return __builtin_aarch64_urshr_nv8qi_uus (__a, __b);
21332 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21333 vrshr_n_u16 (uint16x4_t __a, const int __b)
21335 return __builtin_aarch64_urshr_nv4hi_uus (__a, __b);
21338 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21339 vrshr_n_u32 (uint32x2_t __a, const int __b)
21341 return __builtin_aarch64_urshr_nv2si_uus (__a, __b);
21344 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21345 vrshr_n_u64 (uint64x1_t __a, const int __b)
21347 return (uint64x1_t) {__builtin_aarch64_urshr_ndi_uus (__a[0], __b)};
21350 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21351 vrshrq_n_s8 (int8x16_t __a, const int __b)
21353 return (int8x16_t) __builtin_aarch64_srshr_nv16qi (__a, __b);
21356 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21357 vrshrq_n_s16 (int16x8_t __a, const int __b)
21359 return (int16x8_t) __builtin_aarch64_srshr_nv8hi (__a, __b);
21362 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21363 vrshrq_n_s32 (int32x4_t __a, const int __b)
21365 return (int32x4_t) __builtin_aarch64_srshr_nv4si (__a, __b);
21368 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21369 vrshrq_n_s64 (int64x2_t __a, const int __b)
21371 return (int64x2_t) __builtin_aarch64_srshr_nv2di (__a, __b);
21374 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21375 vrshrq_n_u8 (uint8x16_t __a, const int __b)
21377 return __builtin_aarch64_urshr_nv16qi_uus (__a, __b);
21380 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21381 vrshrq_n_u16 (uint16x8_t __a, const int __b)
21383 return __builtin_aarch64_urshr_nv8hi_uus (__a, __b);
21386 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21387 vrshrq_n_u32 (uint32x4_t __a, const int __b)
21389 return __builtin_aarch64_urshr_nv4si_uus (__a, __b);
21392 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21393 vrshrq_n_u64 (uint64x2_t __a, const int __b)
21395 return __builtin_aarch64_urshr_nv2di_uus (__a, __b);
21398 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21399 vrshrd_n_s64 (int64_t __a, const int __b)
21401 return __builtin_aarch64_srshr_ndi (__a, __b);
21404 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21405 vrshrd_n_u64 (uint64_t __a, const int __b)
21407 return __builtin_aarch64_urshr_ndi_uus (__a, __b);
21410 /* vrsra */
21412 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21413 vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
21415 return (int8x8_t) __builtin_aarch64_srsra_nv8qi (__a, __b, __c);
21418 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21419 vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
21421 return (int16x4_t) __builtin_aarch64_srsra_nv4hi (__a, __b, __c);
21424 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21425 vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
21427 return (int32x2_t) __builtin_aarch64_srsra_nv2si (__a, __b, __c);
21430 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21431 vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
21433 return (int64x1_t) {__builtin_aarch64_srsra_ndi (__a[0], __b[0], __c)};
21436 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21437 vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
21439 return __builtin_aarch64_ursra_nv8qi_uuus (__a, __b, __c);
21442 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21443 vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
21445 return __builtin_aarch64_ursra_nv4hi_uuus (__a, __b, __c);
21448 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21449 vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
21451 return __builtin_aarch64_ursra_nv2si_uuus (__a, __b, __c);
21454 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21455 vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
21457 return (uint64x1_t) {__builtin_aarch64_ursra_ndi_uuus (__a[0], __b[0], __c)};
21460 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21461 vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
21463 return (int8x16_t) __builtin_aarch64_srsra_nv16qi (__a, __b, __c);
21466 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21467 vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
21469 return (int16x8_t) __builtin_aarch64_srsra_nv8hi (__a, __b, __c);
21472 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21473 vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
21475 return (int32x4_t) __builtin_aarch64_srsra_nv4si (__a, __b, __c);
21478 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21479 vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
21481 return (int64x2_t) __builtin_aarch64_srsra_nv2di (__a, __b, __c);
21484 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21485 vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
21487 return __builtin_aarch64_ursra_nv16qi_uuus (__a, __b, __c);
21490 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21491 vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
21493 return __builtin_aarch64_ursra_nv8hi_uuus (__a, __b, __c);
21496 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21497 vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
21499 return __builtin_aarch64_ursra_nv4si_uuus (__a, __b, __c);
21502 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21503 vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
21505 return __builtin_aarch64_ursra_nv2di_uuus (__a, __b, __c);
21508 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21509 vrsrad_n_s64 (int64_t __a, int64_t __b, const int __c)
21511 return __builtin_aarch64_srsra_ndi (__a, __b, __c);
21514 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21515 vrsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c)
21517 return __builtin_aarch64_ursra_ndi_uuus (__a, __b, __c);
21520 #ifdef __ARM_FEATURE_CRYPTO
21522 /* vsha1 */
21524 static __inline uint32x4_t
21525 vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
21527 return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk);
21529 static __inline uint32x4_t
21530 vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
21532 return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk);
21534 static __inline uint32x4_t
21535 vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
21537 return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk);
21540 static __inline uint32_t
21541 vsha1h_u32 (uint32_t hash_e)
21543 return __builtin_aarch64_crypto_sha1hsi_uu (hash_e);
21546 static __inline uint32x4_t
21547 vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11)
21549 return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11);
21552 static __inline uint32x4_t
21553 vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15)
21555 return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15);
21558 static __inline uint32x4_t
21559 vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk)
21561 return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk);
21564 static __inline uint32x4_t
21565 vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk)
21567 return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk);
21570 static __inline uint32x4_t
21571 vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7)
21573 return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7);
21576 static __inline uint32x4_t
21577 vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15)
21579 return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15);
21582 static __inline poly128_t
21583 vmull_p64 (poly64_t a, poly64_t b)
21585 return
21586 __builtin_aarch64_crypto_pmulldi_ppp (a, b);
21589 static __inline poly128_t
21590 vmull_high_p64 (poly64x2_t a, poly64x2_t b)
21592 return __builtin_aarch64_crypto_pmullv2di_ppp (a, b);
21595 #endif
21597 /* vshl */
21599 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21600 vshl_n_s8 (int8x8_t __a, const int __b)
21602 return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b);
21605 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21606 vshl_n_s16 (int16x4_t __a, const int __b)
21608 return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b);
21611 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21612 vshl_n_s32 (int32x2_t __a, const int __b)
21614 return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b);
21617 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21618 vshl_n_s64 (int64x1_t __a, const int __b)
21620 return (int64x1_t) {__builtin_aarch64_ashldi (__a[0], __b)};
21623 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21624 vshl_n_u8 (uint8x8_t __a, const int __b)
21626 return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b);
21629 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21630 vshl_n_u16 (uint16x4_t __a, const int __b)
21632 return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b);
21635 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21636 vshl_n_u32 (uint32x2_t __a, const int __b)
21638 return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b);
21641 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21642 vshl_n_u64 (uint64x1_t __a, const int __b)
21644 return (uint64x1_t) {__builtin_aarch64_ashldi ((int64_t) __a[0], __b)};
21647 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21648 vshlq_n_s8 (int8x16_t __a, const int __b)
21650 return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b);
21653 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21654 vshlq_n_s16 (int16x8_t __a, const int __b)
21656 return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b);
21659 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21660 vshlq_n_s32 (int32x4_t __a, const int __b)
21662 return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b);
21665 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21666 vshlq_n_s64 (int64x2_t __a, const int __b)
21668 return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b);
21671 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21672 vshlq_n_u8 (uint8x16_t __a, const int __b)
21674 return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b);
21677 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21678 vshlq_n_u16 (uint16x8_t __a, const int __b)
21680 return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b);
21683 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21684 vshlq_n_u32 (uint32x4_t __a, const int __b)
21686 return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b);
21689 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21690 vshlq_n_u64 (uint64x2_t __a, const int __b)
21692 return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b);
21695 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21696 vshld_n_s64 (int64_t __a, const int __b)
21698 return __builtin_aarch64_ashldi (__a, __b);
21701 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21702 vshld_n_u64 (uint64_t __a, const int __b)
21704 return (uint64_t) __builtin_aarch64_ashldi (__a, __b);
21707 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21708 vshl_s8 (int8x8_t __a, int8x8_t __b)
21710 return __builtin_aarch64_sshlv8qi (__a, __b);
21713 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21714 vshl_s16 (int16x4_t __a, int16x4_t __b)
21716 return __builtin_aarch64_sshlv4hi (__a, __b);
21719 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21720 vshl_s32 (int32x2_t __a, int32x2_t __b)
21722 return __builtin_aarch64_sshlv2si (__a, __b);
21725 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21726 vshl_s64 (int64x1_t __a, int64x1_t __b)
21728 return (int64x1_t) {__builtin_aarch64_sshldi (__a[0], __b[0])};
21731 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21732 vshl_u8 (uint8x8_t __a, int8x8_t __b)
21734 return __builtin_aarch64_ushlv8qi_uus (__a, __b);
21737 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21738 vshl_u16 (uint16x4_t __a, int16x4_t __b)
21740 return __builtin_aarch64_ushlv4hi_uus (__a, __b);
21743 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21744 vshl_u32 (uint32x2_t __a, int32x2_t __b)
21746 return __builtin_aarch64_ushlv2si_uus (__a, __b);
21749 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21750 vshl_u64 (uint64x1_t __a, int64x1_t __b)
21752 return (uint64x1_t) {__builtin_aarch64_ushldi_uus (__a[0], __b[0])};
21755 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21756 vshlq_s8 (int8x16_t __a, int8x16_t __b)
21758 return __builtin_aarch64_sshlv16qi (__a, __b);
21761 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21762 vshlq_s16 (int16x8_t __a, int16x8_t __b)
21764 return __builtin_aarch64_sshlv8hi (__a, __b);
21767 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21768 vshlq_s32 (int32x4_t __a, int32x4_t __b)
21770 return __builtin_aarch64_sshlv4si (__a, __b);
21773 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21774 vshlq_s64 (int64x2_t __a, int64x2_t __b)
21776 return __builtin_aarch64_sshlv2di (__a, __b);
21779 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21780 vshlq_u8 (uint8x16_t __a, int8x16_t __b)
21782 return __builtin_aarch64_ushlv16qi_uus (__a, __b);
21785 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21786 vshlq_u16 (uint16x8_t __a, int16x8_t __b)
21788 return __builtin_aarch64_ushlv8hi_uus (__a, __b);
21791 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21792 vshlq_u32 (uint32x4_t __a, int32x4_t __b)
21794 return __builtin_aarch64_ushlv4si_uus (__a, __b);
21797 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21798 vshlq_u64 (uint64x2_t __a, int64x2_t __b)
21800 return __builtin_aarch64_ushlv2di_uus (__a, __b);
21803 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21804 vshld_s64 (int64_t __a, int64_t __b)
21806 return __builtin_aarch64_sshldi (__a, __b);
21809 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21810 vshld_u64 (uint64_t __a, uint64_t __b)
21812 return __builtin_aarch64_ushldi_uus (__a, __b);
21815 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21816 vshll_high_n_s8 (int8x16_t __a, const int __b)
21818 return __builtin_aarch64_sshll2_nv16qi (__a, __b);
21821 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21822 vshll_high_n_s16 (int16x8_t __a, const int __b)
21824 return __builtin_aarch64_sshll2_nv8hi (__a, __b);
21827 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21828 vshll_high_n_s32 (int32x4_t __a, const int __b)
21830 return __builtin_aarch64_sshll2_nv4si (__a, __b);
21833 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21834 vshll_high_n_u8 (uint8x16_t __a, const int __b)
21836 return (uint16x8_t) __builtin_aarch64_ushll2_nv16qi ((int8x16_t) __a, __b);
21839 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21840 vshll_high_n_u16 (uint16x8_t __a, const int __b)
21842 return (uint32x4_t) __builtin_aarch64_ushll2_nv8hi ((int16x8_t) __a, __b);
21845 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21846 vshll_high_n_u32 (uint32x4_t __a, const int __b)
21848 return (uint64x2_t) __builtin_aarch64_ushll2_nv4si ((int32x4_t) __a, __b);
21851 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21852 vshll_n_s8 (int8x8_t __a, const int __b)
21854 return __builtin_aarch64_sshll_nv8qi (__a, __b);
21857 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21858 vshll_n_s16 (int16x4_t __a, const int __b)
21860 return __builtin_aarch64_sshll_nv4hi (__a, __b);
21863 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21864 vshll_n_s32 (int32x2_t __a, const int __b)
21866 return __builtin_aarch64_sshll_nv2si (__a, __b);
21869 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21870 vshll_n_u8 (uint8x8_t __a, const int __b)
21872 return __builtin_aarch64_ushll_nv8qi_uus (__a, __b);
21875 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21876 vshll_n_u16 (uint16x4_t __a, const int __b)
21878 return __builtin_aarch64_ushll_nv4hi_uus (__a, __b);
21881 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21882 vshll_n_u32 (uint32x2_t __a, const int __b)
21884 return __builtin_aarch64_ushll_nv2si_uus (__a, __b);
21887 /* vshr */
21889 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21890 vshr_n_s8 (int8x8_t __a, const int __b)
21892 return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b);
21895 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21896 vshr_n_s16 (int16x4_t __a, const int __b)
21898 return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b);
21901 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21902 vshr_n_s32 (int32x2_t __a, const int __b)
21904 return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b);
21907 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21908 vshr_n_s64 (int64x1_t __a, const int __b)
21910 return (int64x1_t) {__builtin_aarch64_ashr_simddi (__a[0], __b)};
21913 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21914 vshr_n_u8 (uint8x8_t __a, const int __b)
21916 return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
21919 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21920 vshr_n_u16 (uint16x4_t __a, const int __b)
21922 return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
21925 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21926 vshr_n_u32 (uint32x2_t __a, const int __b)
21928 return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
21931 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21932 vshr_n_u64 (uint64x1_t __a, const int __b)
21934 return (uint64x1_t) {__builtin_aarch64_lshr_simddi_uus ( __a[0], __b)};
21937 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21938 vshrq_n_s8 (int8x16_t __a, const int __b)
21940 return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b);
21943 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21944 vshrq_n_s16 (int16x8_t __a, const int __b)
21946 return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b);
21949 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21950 vshrq_n_s32 (int32x4_t __a, const int __b)
21952 return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b);
21955 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21956 vshrq_n_s64 (int64x2_t __a, const int __b)
21958 return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b);
21961 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21962 vshrq_n_u8 (uint8x16_t __a, const int __b)
21964 return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
21967 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21968 vshrq_n_u16 (uint16x8_t __a, const int __b)
21970 return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
21973 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21974 vshrq_n_u32 (uint32x4_t __a, const int __b)
21976 return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
21979 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21980 vshrq_n_u64 (uint64x2_t __a, const int __b)
21982 return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
21985 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21986 vshrd_n_s64 (int64_t __a, const int __b)
21988 return __builtin_aarch64_ashr_simddi (__a, __b);
21991 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21992 vshrd_n_u64 (uint64_t __a, const int __b)
21994 return __builtin_aarch64_lshr_simddi_uus (__a, __b);
21997 /* vsli */
21999 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22000 vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22002 return (int8x8_t) __builtin_aarch64_ssli_nv8qi (__a, __b, __c);
22005 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22006 vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22008 return (int16x4_t) __builtin_aarch64_ssli_nv4hi (__a, __b, __c);
22011 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22012 vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22014 return (int32x2_t) __builtin_aarch64_ssli_nv2si (__a, __b, __c);
22017 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22018 vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22020 return (int64x1_t) {__builtin_aarch64_ssli_ndi (__a[0], __b[0], __c)};
22023 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22024 vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22026 return __builtin_aarch64_usli_nv8qi_uuus (__a, __b, __c);
22029 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22030 vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22032 return __builtin_aarch64_usli_nv4hi_uuus (__a, __b, __c);
22035 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22036 vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22038 return __builtin_aarch64_usli_nv2si_uuus (__a, __b, __c);
22041 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22042 vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22044 return (uint64x1_t) {__builtin_aarch64_usli_ndi_uuus (__a[0], __b[0], __c)};
22047 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22048 vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22050 return (int8x16_t) __builtin_aarch64_ssli_nv16qi (__a, __b, __c);
22053 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22054 vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22056 return (int16x8_t) __builtin_aarch64_ssli_nv8hi (__a, __b, __c);
22059 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22060 vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22062 return (int32x4_t) __builtin_aarch64_ssli_nv4si (__a, __b, __c);
22065 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22066 vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22068 return (int64x2_t) __builtin_aarch64_ssli_nv2di (__a, __b, __c);
22071 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22072 vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22074 return __builtin_aarch64_usli_nv16qi_uuus (__a, __b, __c);
22077 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22078 vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22080 return __builtin_aarch64_usli_nv8hi_uuus (__a, __b, __c);
22083 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22084 vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22086 return __builtin_aarch64_usli_nv4si_uuus (__a, __b, __c);
22089 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22090 vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22092 return __builtin_aarch64_usli_nv2di_uuus (__a, __b, __c);
22095 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22096 vslid_n_s64 (int64_t __a, int64_t __b, const int __c)
22098 return __builtin_aarch64_ssli_ndi (__a, __b, __c);
22101 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22102 vslid_n_u64 (uint64_t __a, uint64_t __b, const int __c)
22104 return __builtin_aarch64_usli_ndi_uuus (__a, __b, __c);
22107 /* vsqadd */
22109 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22110 vsqadd_u8 (uint8x8_t __a, int8x8_t __b)
22112 return __builtin_aarch64_usqaddv8qi_uus (__a, __b);
22115 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22116 vsqadd_u16 (uint16x4_t __a, int16x4_t __b)
22118 return __builtin_aarch64_usqaddv4hi_uus (__a, __b);
22121 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22122 vsqadd_u32 (uint32x2_t __a, int32x2_t __b)
22124 return __builtin_aarch64_usqaddv2si_uus (__a, __b);
22127 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22128 vsqadd_u64 (uint64x1_t __a, int64x1_t __b)
22130 return (uint64x1_t) {__builtin_aarch64_usqadddi_uus (__a[0], __b[0])};
22133 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22134 vsqaddq_u8 (uint8x16_t __a, int8x16_t __b)
22136 return __builtin_aarch64_usqaddv16qi_uus (__a, __b);
22139 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22140 vsqaddq_u16 (uint16x8_t __a, int16x8_t __b)
22142 return __builtin_aarch64_usqaddv8hi_uus (__a, __b);
22145 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22146 vsqaddq_u32 (uint32x4_t __a, int32x4_t __b)
22148 return __builtin_aarch64_usqaddv4si_uus (__a, __b);
22151 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22152 vsqaddq_u64 (uint64x2_t __a, int64x2_t __b)
22154 return __builtin_aarch64_usqaddv2di_uus (__a, __b);
22157 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
22158 vsqaddb_u8 (uint8_t __a, int8_t __b)
22160 return __builtin_aarch64_usqaddqi_uus (__a, __b);
22163 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
22164 vsqaddh_u16 (uint16_t __a, int16_t __b)
22166 return __builtin_aarch64_usqaddhi_uus (__a, __b);
22169 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
22170 vsqadds_u32 (uint32_t __a, int32_t __b)
22172 return __builtin_aarch64_usqaddsi_uus (__a, __b);
22175 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22176 vsqaddd_u64 (uint64_t __a, int64_t __b)
22178 return __builtin_aarch64_usqadddi_uus (__a, __b);
22181 /* vsqrt */
22182 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22183 vsqrt_f32 (float32x2_t a)
22185 return __builtin_aarch64_sqrtv2sf (a);
22188 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22189 vsqrtq_f32 (float32x4_t a)
22191 return __builtin_aarch64_sqrtv4sf (a);
22194 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
22195 vsqrt_f64 (float64x1_t a)
22197 return (float64x1_t) { __builtin_sqrt (a[0]) };
22200 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22201 vsqrtq_f64 (float64x2_t a)
22203 return __builtin_aarch64_sqrtv2df (a);
22206 /* vsra */
22208 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22209 vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22211 return (int8x8_t) __builtin_aarch64_ssra_nv8qi (__a, __b, __c);
22214 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22215 vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22217 return (int16x4_t) __builtin_aarch64_ssra_nv4hi (__a, __b, __c);
22220 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22221 vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22223 return (int32x2_t) __builtin_aarch64_ssra_nv2si (__a, __b, __c);
22226 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22227 vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22229 return (int64x1_t) {__builtin_aarch64_ssra_ndi (__a[0], __b[0], __c)};
22232 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22233 vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22235 return __builtin_aarch64_usra_nv8qi_uuus (__a, __b, __c);
22238 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22239 vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22241 return __builtin_aarch64_usra_nv4hi_uuus (__a, __b, __c);
22244 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22245 vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22247 return __builtin_aarch64_usra_nv2si_uuus (__a, __b, __c);
22250 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22251 vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22253 return (uint64x1_t) {__builtin_aarch64_usra_ndi_uuus (__a[0], __b[0], __c)};
22256 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22257 vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22259 return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c);
22262 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22263 vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22265 return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c);
22268 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22269 vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22271 return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c);
22274 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22275 vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22277 return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c);
22280 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22281 vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22283 return __builtin_aarch64_usra_nv16qi_uuus (__a, __b, __c);
22286 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22287 vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22289 return __builtin_aarch64_usra_nv8hi_uuus (__a, __b, __c);
22292 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22293 vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22295 return __builtin_aarch64_usra_nv4si_uuus (__a, __b, __c);
22298 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22299 vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22301 return __builtin_aarch64_usra_nv2di_uuus (__a, __b, __c);
22304 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22305 vsrad_n_s64 (int64_t __a, int64_t __b, const int __c)
22307 return __builtin_aarch64_ssra_ndi (__a, __b, __c);
22310 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22311 vsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c)
22313 return __builtin_aarch64_usra_ndi_uuus (__a, __b, __c);
22316 /* vsri */
22318 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22319 vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22321 return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c);
22324 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22325 vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22327 return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c);
22330 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22331 vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22333 return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c);
22336 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22337 vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22339 return (int64x1_t) {__builtin_aarch64_ssri_ndi (__a[0], __b[0], __c)};
22342 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22343 vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22345 return __builtin_aarch64_usri_nv8qi_uuus (__a, __b, __c);
22348 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22349 vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22351 return __builtin_aarch64_usri_nv4hi_uuus (__a, __b, __c);
22354 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22355 vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22357 return __builtin_aarch64_usri_nv2si_uuus (__a, __b, __c);
22360 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22361 vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22363 return (uint64x1_t) {__builtin_aarch64_usri_ndi_uuus (__a[0], __b[0], __c)};
22366 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22367 vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22369 return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c);
22372 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22373 vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22375 return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c);
22378 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22379 vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22381 return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c);
22384 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22385 vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22387 return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c);
22390 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22391 vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22393 return __builtin_aarch64_usri_nv16qi_uuus (__a, __b, __c);
22396 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22397 vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22399 return __builtin_aarch64_usri_nv8hi_uuus (__a, __b, __c);
22402 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22403 vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22405 return __builtin_aarch64_usri_nv4si_uuus (__a, __b, __c);
22408 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22409 vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22411 return __builtin_aarch64_usri_nv2di_uuus (__a, __b, __c);
22414 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22415 vsrid_n_s64 (int64_t __a, int64_t __b, const int __c)
22417 return __builtin_aarch64_ssri_ndi (__a, __b, __c);
22420 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22421 vsrid_n_u64 (uint64_t __a, uint64_t __b, const int __c)
22423 return __builtin_aarch64_usri_ndi_uuus (__a, __b, __c);
22426 /* vst1 */
22428 __extension__ static __inline void __attribute__ ((__always_inline__))
22429 vst1_f32 (float32_t *a, float32x2_t b)
22431 __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b);
22434 __extension__ static __inline void __attribute__ ((__always_inline__))
22435 vst1_f64 (float64_t *a, float64x1_t b)
22437 *a = b[0];
22440 __extension__ static __inline void __attribute__ ((__always_inline__))
22441 vst1_p8 (poly8_t *a, poly8x8_t b)
22443 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
22444 (int8x8_t) b);
22447 __extension__ static __inline void __attribute__ ((__always_inline__))
22448 vst1_p16 (poly16_t *a, poly16x4_t b)
22450 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
22451 (int16x4_t) b);
22454 __extension__ static __inline void __attribute__ ((__always_inline__))
22455 vst1_s8 (int8_t *a, int8x8_t b)
22457 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b);
22460 __extension__ static __inline void __attribute__ ((__always_inline__))
22461 vst1_s16 (int16_t *a, int16x4_t b)
22463 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b);
22466 __extension__ static __inline void __attribute__ ((__always_inline__))
22467 vst1_s32 (int32_t *a, int32x2_t b)
22469 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b);
22472 __extension__ static __inline void __attribute__ ((__always_inline__))
22473 vst1_s64 (int64_t *a, int64x1_t b)
22475 *a = b[0];
22478 __extension__ static __inline void __attribute__ ((__always_inline__))
22479 vst1_u8 (uint8_t *a, uint8x8_t b)
22481 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
22482 (int8x8_t) b);
22485 __extension__ static __inline void __attribute__ ((__always_inline__))
22486 vst1_u16 (uint16_t *a, uint16x4_t b)
22488 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
22489 (int16x4_t) b);
22492 __extension__ static __inline void __attribute__ ((__always_inline__))
22493 vst1_u32 (uint32_t *a, uint32x2_t b)
22495 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a,
22496 (int32x2_t) b);
22499 __extension__ static __inline void __attribute__ ((__always_inline__))
22500 vst1_u64 (uint64_t *a, uint64x1_t b)
22502 *a = b[0];
22505 __extension__ static __inline void __attribute__ ((__always_inline__))
22506 vst1q_f32 (float32_t *a, float32x4_t b)
22508 __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b);
22511 __extension__ static __inline void __attribute__ ((__always_inline__))
22512 vst1q_f64 (float64_t *a, float64x2_t b)
22514 __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);
22517 /* vst1q */
22519 __extension__ static __inline void __attribute__ ((__always_inline__))
22520 vst1q_p8 (poly8_t *a, poly8x16_t b)
22522 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
22523 (int8x16_t) b);
22526 __extension__ static __inline void __attribute__ ((__always_inline__))
22527 vst1q_p16 (poly16_t *a, poly16x8_t b)
22529 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
22530 (int16x8_t) b);
22533 __extension__ static __inline void __attribute__ ((__always_inline__))
22534 vst1q_s8 (int8_t *a, int8x16_t b)
22536 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b);
22539 __extension__ static __inline void __attribute__ ((__always_inline__))
22540 vst1q_s16 (int16_t *a, int16x8_t b)
22542 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b);
22545 __extension__ static __inline void __attribute__ ((__always_inline__))
22546 vst1q_s32 (int32_t *a, int32x4_t b)
22548 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b);
22551 __extension__ static __inline void __attribute__ ((__always_inline__))
22552 vst1q_s64 (int64_t *a, int64x2_t b)
22554 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b);
22557 __extension__ static __inline void __attribute__ ((__always_inline__))
22558 vst1q_u8 (uint8_t *a, uint8x16_t b)
22560 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
22561 (int8x16_t) b);
22564 __extension__ static __inline void __attribute__ ((__always_inline__))
22565 vst1q_u16 (uint16_t *a, uint16x8_t b)
22567 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
22568 (int16x8_t) b);
22571 __extension__ static __inline void __attribute__ ((__always_inline__))
22572 vst1q_u32 (uint32_t *a, uint32x4_t b)
22574 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a,
22575 (int32x4_t) b);
22578 __extension__ static __inline void __attribute__ ((__always_inline__))
22579 vst1q_u64 (uint64_t *a, uint64x2_t b)
22581 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a,
22582 (int64x2_t) b);
22585 /* vstn */
22587 __extension__ static __inline void
22588 vst2_s64 (int64_t * __a, int64x1x2_t val)
22590 __builtin_aarch64_simd_oi __o;
22591 int64x2x2_t temp;
22592 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
22593 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
22594 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
22595 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
22596 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
22599 __extension__ static __inline void
22600 vst2_u64 (uint64_t * __a, uint64x1x2_t val)
22602 __builtin_aarch64_simd_oi __o;
22603 uint64x2x2_t temp;
22604 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
22605 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
22606 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
22607 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
22608 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
22611 __extension__ static __inline void
22612 vst2_f64 (float64_t * __a, float64x1x2_t val)
22614 __builtin_aarch64_simd_oi __o;
22615 float64x2x2_t temp;
22616 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
22617 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
22618 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0);
22619 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1);
22620 __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
22623 __extension__ static __inline void
22624 vst2_s8 (int8_t * __a, int8x8x2_t val)
22626 __builtin_aarch64_simd_oi __o;
22627 int8x16x2_t temp;
22628 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
22629 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
22630 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
22631 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
22632 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
22635 __extension__ static __inline void __attribute__ ((__always_inline__))
22636 vst2_p8 (poly8_t * __a, poly8x8x2_t val)
22638 __builtin_aarch64_simd_oi __o;
22639 poly8x16x2_t temp;
22640 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
22641 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
22642 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
22643 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
22644 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
22647 __extension__ static __inline void __attribute__ ((__always_inline__))
22648 vst2_s16 (int16_t * __a, int16x4x2_t val)
22650 __builtin_aarch64_simd_oi __o;
22651 int16x8x2_t temp;
22652 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
22653 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
22654 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
22655 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
22656 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
22659 __extension__ static __inline void __attribute__ ((__always_inline__))
22660 vst2_p16 (poly16_t * __a, poly16x4x2_t val)
22662 __builtin_aarch64_simd_oi __o;
22663 poly16x8x2_t temp;
22664 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
22665 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
22666 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
22667 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
22668 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
22671 __extension__ static __inline void __attribute__ ((__always_inline__))
22672 vst2_s32 (int32_t * __a, int32x2x2_t val)
22674 __builtin_aarch64_simd_oi __o;
22675 int32x4x2_t temp;
22676 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
22677 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
22678 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
22679 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
22680 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
22683 __extension__ static __inline void __attribute__ ((__always_inline__))
22684 vst2_u8 (uint8_t * __a, uint8x8x2_t val)
22686 __builtin_aarch64_simd_oi __o;
22687 uint8x16x2_t temp;
22688 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
22689 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
22690 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
22691 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
22692 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
22695 __extension__ static __inline void __attribute__ ((__always_inline__))
22696 vst2_u16 (uint16_t * __a, uint16x4x2_t val)
22698 __builtin_aarch64_simd_oi __o;
22699 uint16x8x2_t temp;
22700 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
22701 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
22702 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
22703 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
22704 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
22707 __extension__ static __inline void __attribute__ ((__always_inline__))
22708 vst2_u32 (uint32_t * __a, uint32x2x2_t val)
22710 __builtin_aarch64_simd_oi __o;
22711 uint32x4x2_t temp;
22712 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
22713 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
22714 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
22715 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
22716 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
22719 __extension__ static __inline void __attribute__ ((__always_inline__))
22720 vst2_f32 (float32_t * __a, float32x2x2_t val)
22722 __builtin_aarch64_simd_oi __o;
22723 float32x4x2_t temp;
22724 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
22725 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
22726 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0);
22727 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1);
22728 __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
22731 __extension__ static __inline void __attribute__ ((__always_inline__))
22732 vst2q_s8 (int8_t * __a, int8x16x2_t val)
22734 __builtin_aarch64_simd_oi __o;
22735 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
22736 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
22737 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
22740 __extension__ static __inline void __attribute__ ((__always_inline__))
22741 vst2q_p8 (poly8_t * __a, poly8x16x2_t val)
22743 __builtin_aarch64_simd_oi __o;
22744 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
22745 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
22746 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
22749 __extension__ static __inline void __attribute__ ((__always_inline__))
22750 vst2q_s16 (int16_t * __a, int16x8x2_t val)
22752 __builtin_aarch64_simd_oi __o;
22753 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
22754 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
22755 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
22758 __extension__ static __inline void __attribute__ ((__always_inline__))
22759 vst2q_p16 (poly16_t * __a, poly16x8x2_t val)
22761 __builtin_aarch64_simd_oi __o;
22762 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
22763 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
22764 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
22767 __extension__ static __inline void __attribute__ ((__always_inline__))
22768 vst2q_s32 (int32_t * __a, int32x4x2_t val)
22770 __builtin_aarch64_simd_oi __o;
22771 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
22772 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
22773 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
22776 __extension__ static __inline void __attribute__ ((__always_inline__))
22777 vst2q_s64 (int64_t * __a, int64x2x2_t val)
22779 __builtin_aarch64_simd_oi __o;
22780 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
22781 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
22782 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
22785 __extension__ static __inline void __attribute__ ((__always_inline__))
22786 vst2q_u8 (uint8_t * __a, uint8x16x2_t val)
22788 __builtin_aarch64_simd_oi __o;
22789 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
22790 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
22791 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
22794 __extension__ static __inline void __attribute__ ((__always_inline__))
22795 vst2q_u16 (uint16_t * __a, uint16x8x2_t val)
22797 __builtin_aarch64_simd_oi __o;
22798 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
22799 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
22800 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
22803 __extension__ static __inline void __attribute__ ((__always_inline__))
22804 vst2q_u32 (uint32_t * __a, uint32x4x2_t val)
22806 __builtin_aarch64_simd_oi __o;
22807 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
22808 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
22809 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
22812 __extension__ static __inline void __attribute__ ((__always_inline__))
22813 vst2q_u64 (uint64_t * __a, uint64x2x2_t val)
22815 __builtin_aarch64_simd_oi __o;
22816 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
22817 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
22818 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
22821 __extension__ static __inline void __attribute__ ((__always_inline__))
22822 vst2q_f32 (float32_t * __a, float32x4x2_t val)
22824 __builtin_aarch64_simd_oi __o;
22825 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0);
22826 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1);
22827 __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
22830 __extension__ static __inline void __attribute__ ((__always_inline__))
22831 vst2q_f64 (float64_t * __a, float64x2x2_t val)
22833 __builtin_aarch64_simd_oi __o;
22834 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0);
22835 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1);
22836 __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o);
22839 __extension__ static __inline void
22840 vst3_s64 (int64_t * __a, int64x1x3_t val)
22842 __builtin_aarch64_simd_ci __o;
22843 int64x2x3_t temp;
22844 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
22845 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
22846 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
22847 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
22848 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
22849 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
22850 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
22853 __extension__ static __inline void
22854 vst3_u64 (uint64_t * __a, uint64x1x3_t val)
22856 __builtin_aarch64_simd_ci __o;
22857 uint64x2x3_t temp;
22858 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
22859 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
22860 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
22861 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
22862 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
22863 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
22864 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
22867 __extension__ static __inline void
22868 vst3_f64 (float64_t * __a, float64x1x3_t val)
22870 __builtin_aarch64_simd_ci __o;
22871 float64x2x3_t temp;
22872 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
22873 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
22874 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
22875 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0);
22876 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1);
22877 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2);
22878 __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o);
22881 __extension__ static __inline void
22882 vst3_s8 (int8_t * __a, int8x8x3_t val)
22884 __builtin_aarch64_simd_ci __o;
22885 int8x16x3_t temp;
22886 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
22887 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
22888 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
22889 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
22890 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
22891 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
22892 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
22895 __extension__ static __inline void __attribute__ ((__always_inline__))
22896 vst3_p8 (poly8_t * __a, poly8x8x3_t val)
22898 __builtin_aarch64_simd_ci __o;
22899 poly8x16x3_t temp;
22900 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
22901 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
22902 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
22903 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
22904 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
22905 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
22906 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
22909 __extension__ static __inline void __attribute__ ((__always_inline__))
22910 vst3_s16 (int16_t * __a, int16x4x3_t val)
22912 __builtin_aarch64_simd_ci __o;
22913 int16x8x3_t temp;
22914 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
22915 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
22916 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
22917 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
22918 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
22919 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
22920 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
22923 __extension__ static __inline void __attribute__ ((__always_inline__))
22924 vst3_p16 (poly16_t * __a, poly16x4x3_t val)
22926 __builtin_aarch64_simd_ci __o;
22927 poly16x8x3_t temp;
22928 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
22929 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
22930 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
22931 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
22932 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
22933 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
22934 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
22937 __extension__ static __inline void __attribute__ ((__always_inline__))
22938 vst3_s32 (int32_t * __a, int32x2x3_t val)
22940 __builtin_aarch64_simd_ci __o;
22941 int32x4x3_t temp;
22942 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
22943 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
22944 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
22945 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
22946 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
22947 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
22948 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
22951 __extension__ static __inline void __attribute__ ((__always_inline__))
22952 vst3_u8 (uint8_t * __a, uint8x8x3_t val)
22954 __builtin_aarch64_simd_ci __o;
22955 uint8x16x3_t temp;
22956 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
22957 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
22958 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
22959 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
22960 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
22961 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
22962 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
22965 __extension__ static __inline void __attribute__ ((__always_inline__))
22966 vst3_u16 (uint16_t * __a, uint16x4x3_t val)
22968 __builtin_aarch64_simd_ci __o;
22969 uint16x8x3_t temp;
22970 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
22971 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
22972 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
22973 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
22974 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
22975 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
22976 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
22979 __extension__ static __inline void __attribute__ ((__always_inline__))
22980 vst3_u32 (uint32_t * __a, uint32x2x3_t val)
22982 __builtin_aarch64_simd_ci __o;
22983 uint32x4x3_t temp;
22984 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
22985 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
22986 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
22987 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
22988 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
22989 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
22990 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
22993 __extension__ static __inline void __attribute__ ((__always_inline__))
22994 vst3_f32 (float32_t * __a, float32x2x3_t val)
22996 __builtin_aarch64_simd_ci __o;
22997 float32x4x3_t temp;
22998 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
22999 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
23000 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
23001 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0);
23002 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1);
23003 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2);
23004 __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
23007 __extension__ static __inline void __attribute__ ((__always_inline__))
23008 vst3q_s8 (int8_t * __a, int8x16x3_t val)
23010 __builtin_aarch64_simd_ci __o;
23011 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
23012 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
23013 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
23014 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23017 __extension__ static __inline void __attribute__ ((__always_inline__))
23018 vst3q_p8 (poly8_t * __a, poly8x16x3_t val)
23020 __builtin_aarch64_simd_ci __o;
23021 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
23022 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
23023 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
23024 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23027 __extension__ static __inline void __attribute__ ((__always_inline__))
23028 vst3q_s16 (int16_t * __a, int16x8x3_t val)
23030 __builtin_aarch64_simd_ci __o;
23031 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
23032 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
23033 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
23034 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23037 __extension__ static __inline void __attribute__ ((__always_inline__))
23038 vst3q_p16 (poly16_t * __a, poly16x8x3_t val)
23040 __builtin_aarch64_simd_ci __o;
23041 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
23042 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
23043 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
23044 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23047 __extension__ static __inline void __attribute__ ((__always_inline__))
23048 vst3q_s32 (int32_t * __a, int32x4x3_t val)
23050 __builtin_aarch64_simd_ci __o;
23051 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
23052 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
23053 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
23054 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
23057 __extension__ static __inline void __attribute__ ((__always_inline__))
23058 vst3q_s64 (int64_t * __a, int64x2x3_t val)
23060 __builtin_aarch64_simd_ci __o;
23061 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
23062 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
23063 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
23064 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
23067 __extension__ static __inline void __attribute__ ((__always_inline__))
23068 vst3q_u8 (uint8_t * __a, uint8x16x3_t val)
23070 __builtin_aarch64_simd_ci __o;
23071 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
23072 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
23073 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
23074 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23077 __extension__ static __inline void __attribute__ ((__always_inline__))
23078 vst3q_u16 (uint16_t * __a, uint16x8x3_t val)
23080 __builtin_aarch64_simd_ci __o;
23081 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
23082 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
23083 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
23084 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23087 __extension__ static __inline void __attribute__ ((__always_inline__))
23088 vst3q_u32 (uint32_t * __a, uint32x4x3_t val)
23090 __builtin_aarch64_simd_ci __o;
23091 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
23092 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
23093 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
23094 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
23097 __extension__ static __inline void __attribute__ ((__always_inline__))
23098 vst3q_u64 (uint64_t * __a, uint64x2x3_t val)
23100 __builtin_aarch64_simd_ci __o;
23101 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
23102 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
23103 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
23104 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
23107 __extension__ static __inline void __attribute__ ((__always_inline__))
23108 vst3q_f32 (float32_t * __a, float32x4x3_t val)
23110 __builtin_aarch64_simd_ci __o;
23111 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0);
23112 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1);
23113 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2);
23114 __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
23117 __extension__ static __inline void __attribute__ ((__always_inline__))
23118 vst3q_f64 (float64_t * __a, float64x2x3_t val)
23120 __builtin_aarch64_simd_ci __o;
23121 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0);
23122 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1);
23123 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2);
23124 __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o);
23127 __extension__ static __inline void
23128 vst4_s64 (int64_t * __a, int64x1x4_t val)
23130 __builtin_aarch64_simd_xi __o;
23131 int64x2x4_t temp;
23132 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
23133 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
23134 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
23135 temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0)));
23136 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
23137 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
23138 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
23139 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
23140 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
23143 __extension__ static __inline void
23144 vst4_u64 (uint64_t * __a, uint64x1x4_t val)
23146 __builtin_aarch64_simd_xi __o;
23147 uint64x2x4_t temp;
23148 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
23149 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
23150 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
23151 temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0)));
23152 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
23153 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
23154 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
23155 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
23156 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
23159 __extension__ static __inline void
23160 vst4_f64 (float64_t * __a, float64x1x4_t val)
23162 __builtin_aarch64_simd_xi __o;
23163 float64x2x4_t temp;
23164 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
23165 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
23166 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
23167 temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0)));
23168 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0);
23169 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1);
23170 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2);
23171 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3);
23172 __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o);
23175 __extension__ static __inline void
23176 vst4_s8 (int8_t * __a, int8x8x4_t val)
23178 __builtin_aarch64_simd_xi __o;
23179 int8x16x4_t temp;
23180 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
23181 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
23182 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
23183 temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0)));
23184 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
23185 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
23186 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
23187 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
23188 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23191 __extension__ static __inline void __attribute__ ((__always_inline__))
23192 vst4_p8 (poly8_t * __a, poly8x8x4_t val)
23194 __builtin_aarch64_simd_xi __o;
23195 poly8x16x4_t temp;
23196 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
23197 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
23198 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
23199 temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0)));
23200 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
23201 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
23202 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
23203 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
23204 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23207 __extension__ static __inline void __attribute__ ((__always_inline__))
23208 vst4_s16 (int16_t * __a, int16x4x4_t val)
23210 __builtin_aarch64_simd_xi __o;
23211 int16x8x4_t temp;
23212 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
23213 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
23214 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
23215 temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0)));
23216 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
23217 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
23218 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
23219 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
23220 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23223 __extension__ static __inline void __attribute__ ((__always_inline__))
23224 vst4_p16 (poly16_t * __a, poly16x4x4_t val)
23226 __builtin_aarch64_simd_xi __o;
23227 poly16x8x4_t temp;
23228 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
23229 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
23230 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
23231 temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0)));
23232 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
23233 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
23234 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
23235 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
23236 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23239 __extension__ static __inline void __attribute__ ((__always_inline__))
23240 vst4_s32 (int32_t * __a, int32x2x4_t val)
23242 __builtin_aarch64_simd_xi __o;
23243 int32x4x4_t temp;
23244 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
23245 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
23246 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
23247 temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0)));
23248 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
23249 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
23250 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
23251 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
23252 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
23255 __extension__ static __inline void __attribute__ ((__always_inline__))
23256 vst4_u8 (uint8_t * __a, uint8x8x4_t val)
23258 __builtin_aarch64_simd_xi __o;
23259 uint8x16x4_t temp;
23260 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
23261 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
23262 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
23263 temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0)));
23264 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
23265 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
23266 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
23267 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
23268 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23271 __extension__ static __inline void __attribute__ ((__always_inline__))
23272 vst4_u16 (uint16_t * __a, uint16x4x4_t val)
23274 __builtin_aarch64_simd_xi __o;
23275 uint16x8x4_t temp;
23276 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
23277 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
23278 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
23279 temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0)));
23280 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
23281 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
23282 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
23283 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
23284 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23287 __extension__ static __inline void __attribute__ ((__always_inline__))
23288 vst4_u32 (uint32_t * __a, uint32x2x4_t val)
23290 __builtin_aarch64_simd_xi __o;
23291 uint32x4x4_t temp;
23292 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
23293 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
23294 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
23295 temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0)));
23296 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
23297 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
23298 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
23299 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
23300 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
23303 __extension__ static __inline void __attribute__ ((__always_inline__))
23304 vst4_f32 (float32_t * __a, float32x2x4_t val)
23306 __builtin_aarch64_simd_xi __o;
23307 float32x4x4_t temp;
23308 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
23309 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
23310 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
23311 temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0)));
23312 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0);
23313 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1);
23314 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2);
23315 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3);
23316 __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
23319 __extension__ static __inline void __attribute__ ((__always_inline__))
23320 vst4q_s8 (int8_t * __a, int8x16x4_t val)
23322 __builtin_aarch64_simd_xi __o;
23323 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
23324 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
23325 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
23326 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
23327 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23330 __extension__ static __inline void __attribute__ ((__always_inline__))
23331 vst4q_p8 (poly8_t * __a, poly8x16x4_t val)
23333 __builtin_aarch64_simd_xi __o;
23334 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
23335 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
23336 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
23337 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
23338 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23341 __extension__ static __inline void __attribute__ ((__always_inline__))
23342 vst4q_s16 (int16_t * __a, int16x8x4_t val)
23344 __builtin_aarch64_simd_xi __o;
23345 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
23346 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
23347 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
23348 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
23349 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23352 __extension__ static __inline void __attribute__ ((__always_inline__))
23353 vst4q_p16 (poly16_t * __a, poly16x8x4_t val)
23355 __builtin_aarch64_simd_xi __o;
23356 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
23357 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
23358 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
23359 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
23360 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23363 __extension__ static __inline void __attribute__ ((__always_inline__))
23364 vst4q_s32 (int32_t * __a, int32x4x4_t val)
23366 __builtin_aarch64_simd_xi __o;
23367 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
23368 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
23369 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
23370 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
23371 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
23374 __extension__ static __inline void __attribute__ ((__always_inline__))
23375 vst4q_s64 (int64_t * __a, int64x2x4_t val)
23377 __builtin_aarch64_simd_xi __o;
23378 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
23379 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
23380 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
23381 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
23382 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
23385 __extension__ static __inline void __attribute__ ((__always_inline__))
23386 vst4q_u8 (uint8_t * __a, uint8x16x4_t val)
23388 __builtin_aarch64_simd_xi __o;
23389 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
23390 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
23391 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
23392 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
23393 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23396 __extension__ static __inline void __attribute__ ((__always_inline__))
23397 vst4q_u16 (uint16_t * __a, uint16x8x4_t val)
23399 __builtin_aarch64_simd_xi __o;
23400 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
23401 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
23402 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
23403 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
23404 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23407 __extension__ static __inline void __attribute__ ((__always_inline__))
23408 vst4q_u32 (uint32_t * __a, uint32x4x4_t val)
23410 __builtin_aarch64_simd_xi __o;
23411 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
23412 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
23413 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
23414 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
23415 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
23418 __extension__ static __inline void __attribute__ ((__always_inline__))
23419 vst4q_u64 (uint64_t * __a, uint64x2x4_t val)
23421 __builtin_aarch64_simd_xi __o;
23422 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
23423 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
23424 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
23425 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
23426 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
23429 __extension__ static __inline void __attribute__ ((__always_inline__))
23430 vst4q_f32 (float32_t * __a, float32x4x4_t val)
23432 __builtin_aarch64_simd_xi __o;
23433 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0);
23434 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1);
23435 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2);
23436 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3);
23437 __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
23440 __extension__ static __inline void __attribute__ ((__always_inline__))
23441 vst4q_f64 (float64_t * __a, float64x2x4_t val)
23443 __builtin_aarch64_simd_xi __o;
23444 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0);
23445 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1);
23446 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2);
23447 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3);
23448 __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o);
23451 /* vsub */
23453 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
23454 vsubd_s64 (int64_t __a, int64_t __b)
23456 return __a - __b;
23459 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
23460 vsubd_u64 (uint64_t __a, uint64_t __b)
23462 return __a - __b;
23465 /* vtbx1 */
23467 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23468 vtbx1_s8 (int8x8_t __r, int8x8_t __tab, int8x8_t __idx)
23470 uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
23471 vmov_n_u8 (8));
23472 int8x8_t __tbl = vtbl1_s8 (__tab, __idx);
23474 return vbsl_s8 (__mask, __tbl, __r);
23477 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23478 vtbx1_u8 (uint8x8_t __r, uint8x8_t __tab, uint8x8_t __idx)
23480 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
23481 uint8x8_t __tbl = vtbl1_u8 (__tab, __idx);
23483 return vbsl_u8 (__mask, __tbl, __r);
23486 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
23487 vtbx1_p8 (poly8x8_t __r, poly8x8_t __tab, uint8x8_t __idx)
23489 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
23490 poly8x8_t __tbl = vtbl1_p8 (__tab, __idx);
23492 return vbsl_p8 (__mask, __tbl, __r);
23495 /* vtbx3 */
23497 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23498 vtbx3_s8 (int8x8_t __r, int8x8x3_t __tab, int8x8_t __idx)
23500 uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
23501 vmov_n_u8 (24));
23502 int8x8_t __tbl = vtbl3_s8 (__tab, __idx);
23504 return vbsl_s8 (__mask, __tbl, __r);
23507 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23508 vtbx3_u8 (uint8x8_t __r, uint8x8x3_t __tab, uint8x8_t __idx)
23510 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
23511 uint8x8_t __tbl = vtbl3_u8 (__tab, __idx);
23513 return vbsl_u8 (__mask, __tbl, __r);
23516 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
23517 vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx)
23519 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
23520 poly8x8_t __tbl = vtbl3_p8 (__tab, __idx);
23522 return vbsl_p8 (__mask, __tbl, __r);
23525 /* vtrn */
23527 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
23528 vtrn1_f32 (float32x2_t __a, float32x2_t __b)
23530 #ifdef __AARCH64EB__
23531 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
23532 #else
23533 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
23534 #endif
23537 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
23538 vtrn1_p8 (poly8x8_t __a, poly8x8_t __b)
23540 #ifdef __AARCH64EB__
23541 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23542 #else
23543 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23544 #endif
23547 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
23548 vtrn1_p16 (poly16x4_t __a, poly16x4_t __b)
23550 #ifdef __AARCH64EB__
23551 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
23552 #else
23553 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
23554 #endif
23557 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23558 vtrn1_s8 (int8x8_t __a, int8x8_t __b)
23560 #ifdef __AARCH64EB__
23561 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23562 #else
23563 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23564 #endif
23567 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23568 vtrn1_s16 (int16x4_t __a, int16x4_t __b)
23570 #ifdef __AARCH64EB__
23571 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
23572 #else
23573 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
23574 #endif
23577 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23578 vtrn1_s32 (int32x2_t __a, int32x2_t __b)
23580 #ifdef __AARCH64EB__
23581 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
23582 #else
23583 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
23584 #endif
23587 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23588 vtrn1_u8 (uint8x8_t __a, uint8x8_t __b)
23590 #ifdef __AARCH64EB__
23591 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23592 #else
23593 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23594 #endif
23597 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23598 vtrn1_u16 (uint16x4_t __a, uint16x4_t __b)
23600 #ifdef __AARCH64EB__
23601 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
23602 #else
23603 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
23604 #endif
23607 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23608 vtrn1_u32 (uint32x2_t __a, uint32x2_t __b)
23610 #ifdef __AARCH64EB__
23611 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
23612 #else
23613 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
23614 #endif
23617 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
23618 vtrn1q_f32 (float32x4_t __a, float32x4_t __b)
23620 #ifdef __AARCH64EB__
23621 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
23622 #else
23623 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
23624 #endif
23627 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
23628 vtrn1q_f64 (float64x2_t __a, float64x2_t __b)
23630 #ifdef __AARCH64EB__
23631 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
23632 #else
23633 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
23634 #endif
23637 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
23638 vtrn1q_p8 (poly8x16_t __a, poly8x16_t __b)
23640 #ifdef __AARCH64EB__
23641 return __builtin_shuffle (__a, __b,
23642 (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
23643 #else
23644 return __builtin_shuffle (__a, __b,
23645 (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
23646 #endif
23649 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
23650 vtrn1q_p16 (poly16x8_t __a, poly16x8_t __b)
23652 #ifdef __AARCH64EB__
23653 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23654 #else
23655 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23656 #endif
23659 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23660 vtrn1q_s8 (int8x16_t __a, int8x16_t __b)
23662 #ifdef __AARCH64EB__
23663 return __builtin_shuffle (__a, __b,
23664 (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
23665 #else
23666 return __builtin_shuffle (__a, __b,
23667 (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
23668 #endif
23671 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23672 vtrn1q_s16 (int16x8_t __a, int16x8_t __b)
23674 #ifdef __AARCH64EB__
23675 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23676 #else
23677 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23678 #endif
23681 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23682 vtrn1q_s32 (int32x4_t __a, int32x4_t __b)
23684 #ifdef __AARCH64EB__
23685 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
23686 #else
23687 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
23688 #endif
23691 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23692 vtrn1q_s64 (int64x2_t __a, int64x2_t __b)
23694 #ifdef __AARCH64EB__
23695 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
23696 #else
23697 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
23698 #endif
23701 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23702 vtrn1q_u8 (uint8x16_t __a, uint8x16_t __b)
23704 #ifdef __AARCH64EB__
23705 return __builtin_shuffle (__a, __b,
23706 (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
23707 #else
23708 return __builtin_shuffle (__a, __b,
23709 (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
23710 #endif
23713 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23714 vtrn1q_u16 (uint16x8_t __a, uint16x8_t __b)
23716 #ifdef __AARCH64EB__
23717 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23718 #else
23719 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23720 #endif
23723 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23724 vtrn1q_u32 (uint32x4_t __a, uint32x4_t __b)
23726 #ifdef __AARCH64EB__
23727 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
23728 #else
23729 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
23730 #endif
23733 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23734 vtrn1q_u64 (uint64x2_t __a, uint64x2_t __b)
23736 #ifdef __AARCH64EB__
23737 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
23738 #else
23739 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
23740 #endif
23743 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
23744 vtrn2_f32 (float32x2_t __a, float32x2_t __b)
23746 #ifdef __AARCH64EB__
23747 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
23748 #else
23749 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
23750 #endif
23753 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
23754 vtrn2_p8 (poly8x8_t __a, poly8x8_t __b)
23756 #ifdef __AARCH64EB__
23757 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
23758 #else
23759 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
23760 #endif
23763 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
23764 vtrn2_p16 (poly16x4_t __a, poly16x4_t __b)
23766 #ifdef __AARCH64EB__
23767 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
23768 #else
23769 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
23770 #endif
23773 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23774 vtrn2_s8 (int8x8_t __a, int8x8_t __b)
23776 #ifdef __AARCH64EB__
23777 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
23778 #else
23779 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
23780 #endif
23783 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23784 vtrn2_s16 (int16x4_t __a, int16x4_t __b)
23786 #ifdef __AARCH64EB__
23787 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
23788 #else
23789 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
23790 #endif
23793 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23794 vtrn2_s32 (int32x2_t __a, int32x2_t __b)
23796 #ifdef __AARCH64EB__
23797 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
23798 #else
23799 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
23800 #endif
23803 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23804 vtrn2_u8 (uint8x8_t __a, uint8x8_t __b)
23806 #ifdef __AARCH64EB__
23807 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
23808 #else
23809 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
23810 #endif
23813 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23814 vtrn2_u16 (uint16x4_t __a, uint16x4_t __b)
23816 #ifdef __AARCH64EB__
23817 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
23818 #else
23819 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
23820 #endif
23823 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23824 vtrn2_u32 (uint32x2_t __a, uint32x2_t __b)
23826 #ifdef __AARCH64EB__
23827 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
23828 #else
23829 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
23830 #endif
23833 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
23834 vtrn2q_f32 (float32x4_t __a, float32x4_t __b)
23836 #ifdef __AARCH64EB__
23837 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
23838 #else
23839 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
23840 #endif
23843 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
23844 vtrn2q_f64 (float64x2_t __a, float64x2_t __b)
23846 #ifdef __AARCH64EB__
23847 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
23848 #else
23849 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
23850 #endif
23853 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
23854 vtrn2q_p8 (poly8x16_t __a, poly8x16_t __b)
23856 #ifdef __AARCH64EB__
23857 return __builtin_shuffle (__a, __b,
23858 (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
23859 #else
23860 return __builtin_shuffle (__a, __b,
23861 (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
23862 #endif
23865 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
23866 vtrn2q_p16 (poly16x8_t __a, poly16x8_t __b)
23868 #ifdef __AARCH64EB__
23869 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
23870 #else
23871 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
23872 #endif
23875 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23876 vtrn2q_s8 (int8x16_t __a, int8x16_t __b)
23878 #ifdef __AARCH64EB__
23879 return __builtin_shuffle (__a, __b,
23880 (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
23881 #else
23882 return __builtin_shuffle (__a, __b,
23883 (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
23884 #endif
23887 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23888 vtrn2q_s16 (int16x8_t __a, int16x8_t __b)
23890 #ifdef __AARCH64EB__
23891 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
23892 #else
23893 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
23894 #endif
23897 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23898 vtrn2q_s32 (int32x4_t __a, int32x4_t __b)
23900 #ifdef __AARCH64EB__
23901 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
23902 #else
23903 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
23904 #endif
23907 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23908 vtrn2q_s64 (int64x2_t __a, int64x2_t __b)
23910 #ifdef __AARCH64EB__
23911 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
23912 #else
23913 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
23914 #endif
23917 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23918 vtrn2q_u8 (uint8x16_t __a, uint8x16_t __b)
23920 #ifdef __AARCH64EB__
23921 return __builtin_shuffle (__a, __b,
23922 (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
23923 #else
23924 return __builtin_shuffle (__a, __b,
23925 (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
23926 #endif
23929 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23930 vtrn2q_u16 (uint16x8_t __a, uint16x8_t __b)
23932 #ifdef __AARCH64EB__
23933 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
23934 #else
23935 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
23936 #endif
23939 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23940 vtrn2q_u32 (uint32x4_t __a, uint32x4_t __b)
23942 #ifdef __AARCH64EB__
23943 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
23944 #else
23945 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
23946 #endif
23949 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23950 vtrn2q_u64 (uint64x2_t __a, uint64x2_t __b)
23952 #ifdef __AARCH64EB__
23953 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
23954 #else
23955 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
23956 #endif
23959 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
23960 vtrn_f32 (float32x2_t a, float32x2_t b)
23962 return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)};
23965 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
23966 vtrn_p8 (poly8x8_t a, poly8x8_t b)
23968 return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)};
23971 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
23972 vtrn_p16 (poly16x4_t a, poly16x4_t b)
23974 return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)};
23977 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
23978 vtrn_s8 (int8x8_t a, int8x8_t b)
23980 return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)};
23983 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
23984 vtrn_s16 (int16x4_t a, int16x4_t b)
23986 return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)};
23989 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
23990 vtrn_s32 (int32x2_t a, int32x2_t b)
23992 return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)};
23995 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
23996 vtrn_u8 (uint8x8_t a, uint8x8_t b)
23998 return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)};
24001 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
24002 vtrn_u16 (uint16x4_t a, uint16x4_t b)
24004 return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)};
24007 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
24008 vtrn_u32 (uint32x2_t a, uint32x2_t b)
24010 return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)};
24013 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
24014 vtrnq_f32 (float32x4_t a, float32x4_t b)
24016 return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)};
24019 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
24020 vtrnq_p8 (poly8x16_t a, poly8x16_t b)
24022 return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)};
24025 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
24026 vtrnq_p16 (poly16x8_t a, poly16x8_t b)
24028 return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)};
24031 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
24032 vtrnq_s8 (int8x16_t a, int8x16_t b)
24034 return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)};
24037 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
24038 vtrnq_s16 (int16x8_t a, int16x8_t b)
24040 return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)};
24043 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
24044 vtrnq_s32 (int32x4_t a, int32x4_t b)
24046 return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)};
24049 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
24050 vtrnq_u8 (uint8x16_t a, uint8x16_t b)
24052 return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)};
24055 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
24056 vtrnq_u16 (uint16x8_t a, uint16x8_t b)
24058 return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)};
24061 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
24062 vtrnq_u32 (uint32x4_t a, uint32x4_t b)
24064 return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)};
24067 /* vtst */
24069 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24070 vtst_s8 (int8x8_t __a, int8x8_t __b)
24072 return (uint8x8_t) ((__a & __b) != 0);
24075 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24076 vtst_s16 (int16x4_t __a, int16x4_t __b)
24078 return (uint16x4_t) ((__a & __b) != 0);
24081 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24082 vtst_s32 (int32x2_t __a, int32x2_t __b)
24084 return (uint32x2_t) ((__a & __b) != 0);
24087 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24088 vtst_s64 (int64x1_t __a, int64x1_t __b)
24090 return (uint64x1_t) {(__a[0] & __b[0]) ? -1ll : 0ll};
24093 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24094 vtst_u8 (uint8x8_t __a, uint8x8_t __b)
24096 return ((__a & __b) != 0);
24099 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24100 vtst_u16 (uint16x4_t __a, uint16x4_t __b)
24102 return ((__a & __b) != 0);
24105 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24106 vtst_u32 (uint32x2_t __a, uint32x2_t __b)
24108 return ((__a & __b) != 0);
24111 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24112 vtst_u64 (uint64x1_t __a, uint64x1_t __b)
24114 return (uint64x1_t) {(__a[0] & __b[0]) ? -1ll : 0ll};
24117 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24118 vtstq_s8 (int8x16_t __a, int8x16_t __b)
24120 return (uint8x16_t) ((__a & __b) != 0);
24123 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24124 vtstq_s16 (int16x8_t __a, int16x8_t __b)
24126 return (uint16x8_t) ((__a & __b) != 0);
24129 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24130 vtstq_s32 (int32x4_t __a, int32x4_t __b)
24132 return (uint32x4_t) ((__a & __b) != 0);
24135 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24136 vtstq_s64 (int64x2_t __a, int64x2_t __b)
24138 return (uint64x2_t) ((__a & __b) != __AARCH64_INT64_C (0));
24141 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24142 vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
24144 return ((__a & __b) != 0);
24147 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24148 vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
24150 return ((__a & __b) != 0);
24153 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24154 vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
24156 return ((__a & __b) != 0);
24159 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24160 vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
24162 return ((__a & __b) != __AARCH64_UINT64_C (0));
24165 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
24166 vtstd_s64 (int64_t __a, int64_t __b)
24168 return (__a & __b) ? -1ll : 0ll;
24171 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
24172 vtstd_u64 (uint64_t __a, uint64_t __b)
24174 return (__a & __b) ? -1ll : 0ll;
24177 /* vuqadd */
24179 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24180 vuqadd_s8 (int8x8_t __a, uint8x8_t __b)
24182 return __builtin_aarch64_suqaddv8qi_ssu (__a, __b);
24185 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24186 vuqadd_s16 (int16x4_t __a, uint16x4_t __b)
24188 return __builtin_aarch64_suqaddv4hi_ssu (__a, __b);
24191 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24192 vuqadd_s32 (int32x2_t __a, uint32x2_t __b)
24194 return __builtin_aarch64_suqaddv2si_ssu (__a, __b);
24197 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
24198 vuqadd_s64 (int64x1_t __a, uint64x1_t __b)
24200 return (int64x1_t) {__builtin_aarch64_suqadddi_ssu (__a[0], __b[0])};
24203 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24204 vuqaddq_s8 (int8x16_t __a, uint8x16_t __b)
24206 return __builtin_aarch64_suqaddv16qi_ssu (__a, __b);
24209 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24210 vuqaddq_s16 (int16x8_t __a, uint16x8_t __b)
24212 return __builtin_aarch64_suqaddv8hi_ssu (__a, __b);
24215 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24216 vuqaddq_s32 (int32x4_t __a, uint32x4_t __b)
24218 return __builtin_aarch64_suqaddv4si_ssu (__a, __b);
24221 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24222 vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
24224 return __builtin_aarch64_suqaddv2di_ssu (__a, __b);
24227 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
24228 vuqaddb_s8 (int8_t __a, uint8_t __b)
24230 return __builtin_aarch64_suqaddqi_ssu (__a, __b);
24233 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
24234 vuqaddh_s16 (int16_t __a, uint16_t __b)
24236 return __builtin_aarch64_suqaddhi_ssu (__a, __b);
24239 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
24240 vuqadds_s32 (int32_t __a, uint32_t __b)
24242 return __builtin_aarch64_suqaddsi_ssu (__a, __b);
24245 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
24246 vuqaddd_s64 (int64_t __a, uint64_t __b)
24248 return __builtin_aarch64_suqadddi_ssu (__a, __b);
24251 #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \
24252 __extension__ static __inline rettype \
24253 __attribute__ ((__always_inline__)) \
24254 v ## op ## Q ## _ ## funcsuffix (intype a, intype b) \
24256 return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b), \
24257 v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)}; \
24260 #define __INTERLEAVE_LIST(op) \
24261 __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,) \
24262 __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,) \
24263 __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,) \
24264 __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,) \
24265 __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,) \
24266 __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,) \
24267 __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,) \
24268 __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,) \
24269 __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,) \
24270 __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q) \
24271 __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q) \
24272 __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q) \
24273 __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q) \
24274 __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q) \
24275 __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q) \
24276 __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q) \
24277 __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q) \
24278 __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q)
24280 /* vuzp */
24282 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
24283 vuzp1_f32 (float32x2_t __a, float32x2_t __b)
24285 #ifdef __AARCH64EB__
24286 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24287 #else
24288 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24289 #endif
24292 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24293 vuzp1_p8 (poly8x8_t __a, poly8x8_t __b)
24295 #ifdef __AARCH64EB__
24296 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24297 #else
24298 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24299 #endif
24302 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
24303 vuzp1_p16 (poly16x4_t __a, poly16x4_t __b)
24305 #ifdef __AARCH64EB__
24306 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
24307 #else
24308 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
24309 #endif
24312 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24313 vuzp1_s8 (int8x8_t __a, int8x8_t __b)
24315 #ifdef __AARCH64EB__
24316 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24317 #else
24318 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24319 #endif
24322 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24323 vuzp1_s16 (int16x4_t __a, int16x4_t __b)
24325 #ifdef __AARCH64EB__
24326 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
24327 #else
24328 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
24329 #endif
24332 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24333 vuzp1_s32 (int32x2_t __a, int32x2_t __b)
24335 #ifdef __AARCH64EB__
24336 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24337 #else
24338 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24339 #endif
24342 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24343 vuzp1_u8 (uint8x8_t __a, uint8x8_t __b)
24345 #ifdef __AARCH64EB__
24346 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24347 #else
24348 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24349 #endif
24352 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24353 vuzp1_u16 (uint16x4_t __a, uint16x4_t __b)
24355 #ifdef __AARCH64EB__
24356 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
24357 #else
24358 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
24359 #endif
24362 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24363 vuzp1_u32 (uint32x2_t __a, uint32x2_t __b)
24365 #ifdef __AARCH64EB__
24366 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24367 #else
24368 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24369 #endif
24372 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
24373 vuzp1q_f32 (float32x4_t __a, float32x4_t __b)
24375 #ifdef __AARCH64EB__
24376 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
24377 #else
24378 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
24379 #endif
24382 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
24383 vuzp1q_f64 (float64x2_t __a, float64x2_t __b)
24385 #ifdef __AARCH64EB__
24386 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24387 #else
24388 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24389 #endif
24392 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
24393 vuzp1q_p8 (poly8x16_t __a, poly8x16_t __b)
24395 #ifdef __AARCH64EB__
24396 return __builtin_shuffle (__a, __b, (uint8x16_t)
24397 {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
24398 #else
24399 return __builtin_shuffle (__a, __b, (uint8x16_t)
24400 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
24401 #endif
24404 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
24405 vuzp1q_p16 (poly16x8_t __a, poly16x8_t __b)
24407 #ifdef __AARCH64EB__
24408 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24409 #else
24410 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24411 #endif
24414 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24415 vuzp1q_s8 (int8x16_t __a, int8x16_t __b)
24417 #ifdef __AARCH64EB__
24418 return __builtin_shuffle (__a, __b,
24419 (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
24420 #else
24421 return __builtin_shuffle (__a, __b,
24422 (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
24423 #endif
24426 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24427 vuzp1q_s16 (int16x8_t __a, int16x8_t __b)
24429 #ifdef __AARCH64EB__
24430 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24431 #else
24432 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24433 #endif
24436 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24437 vuzp1q_s32 (int32x4_t __a, int32x4_t __b)
24439 #ifdef __AARCH64EB__
24440 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
24441 #else
24442 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
24443 #endif
24446 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24447 vuzp1q_s64 (int64x2_t __a, int64x2_t __b)
24449 #ifdef __AARCH64EB__
24450 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24451 #else
24452 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24453 #endif
24456 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24457 vuzp1q_u8 (uint8x16_t __a, uint8x16_t __b)
24459 #ifdef __AARCH64EB__
24460 return __builtin_shuffle (__a, __b,
24461 (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
24462 #else
24463 return __builtin_shuffle (__a, __b,
24464 (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
24465 #endif
24468 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24469 vuzp1q_u16 (uint16x8_t __a, uint16x8_t __b)
24471 #ifdef __AARCH64EB__
24472 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24473 #else
24474 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24475 #endif
24478 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24479 vuzp1q_u32 (uint32x4_t __a, uint32x4_t __b)
24481 #ifdef __AARCH64EB__
24482 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
24483 #else
24484 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
24485 #endif
24488 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24489 vuzp1q_u64 (uint64x2_t __a, uint64x2_t __b)
24491 #ifdef __AARCH64EB__
24492 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24493 #else
24494 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24495 #endif
24498 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
24499 vuzp2_f32 (float32x2_t __a, float32x2_t __b)
24501 #ifdef __AARCH64EB__
24502 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24503 #else
24504 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24505 #endif
24508 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24509 vuzp2_p8 (poly8x8_t __a, poly8x8_t __b)
24511 #ifdef __AARCH64EB__
24512 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24513 #else
24514 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24515 #endif
24518 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
24519 vuzp2_p16 (poly16x4_t __a, poly16x4_t __b)
24521 #ifdef __AARCH64EB__
24522 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
24523 #else
24524 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
24525 #endif
24528 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24529 vuzp2_s8 (int8x8_t __a, int8x8_t __b)
24531 #ifdef __AARCH64EB__
24532 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24533 #else
24534 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24535 #endif
24538 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24539 vuzp2_s16 (int16x4_t __a, int16x4_t __b)
24541 #ifdef __AARCH64EB__
24542 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
24543 #else
24544 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
24545 #endif
24548 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24549 vuzp2_s32 (int32x2_t __a, int32x2_t __b)
24551 #ifdef __AARCH64EB__
24552 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24553 #else
24554 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24555 #endif
24558 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24559 vuzp2_u8 (uint8x8_t __a, uint8x8_t __b)
24561 #ifdef __AARCH64EB__
24562 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24563 #else
24564 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24565 #endif
24568 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24569 vuzp2_u16 (uint16x4_t __a, uint16x4_t __b)
24571 #ifdef __AARCH64EB__
24572 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
24573 #else
24574 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
24575 #endif
24578 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24579 vuzp2_u32 (uint32x2_t __a, uint32x2_t __b)
24581 #ifdef __AARCH64EB__
24582 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24583 #else
24584 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24585 #endif
24588 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
24589 vuzp2q_f32 (float32x4_t __a, float32x4_t __b)
24591 #ifdef __AARCH64EB__
24592 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
24593 #else
24594 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
24595 #endif
24598 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
24599 vuzp2q_f64 (float64x2_t __a, float64x2_t __b)
24601 #ifdef __AARCH64EB__
24602 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24603 #else
24604 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24605 #endif
24608 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
24609 vuzp2q_p8 (poly8x16_t __a, poly8x16_t __b)
24611 #ifdef __AARCH64EB__
24612 return __builtin_shuffle (__a, __b,
24613 (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
24614 #else
24615 return __builtin_shuffle (__a, __b,
24616 (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
24617 #endif
24620 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
24621 vuzp2q_p16 (poly16x8_t __a, poly16x8_t __b)
24623 #ifdef __AARCH64EB__
24624 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24625 #else
24626 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24627 #endif
24630 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24631 vuzp2q_s8 (int8x16_t __a, int8x16_t __b)
24633 #ifdef __AARCH64EB__
24634 return __builtin_shuffle (__a, __b,
24635 (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
24636 #else
24637 return __builtin_shuffle (__a, __b,
24638 (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
24639 #endif
24642 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24643 vuzp2q_s16 (int16x8_t __a, int16x8_t __b)
24645 #ifdef __AARCH64EB__
24646 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24647 #else
24648 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24649 #endif
24652 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24653 vuzp2q_s32 (int32x4_t __a, int32x4_t __b)
24655 #ifdef __AARCH64EB__
24656 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
24657 #else
24658 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
24659 #endif
24662 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24663 vuzp2q_s64 (int64x2_t __a, int64x2_t __b)
24665 #ifdef __AARCH64EB__
24666 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24667 #else
24668 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24669 #endif
24672 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24673 vuzp2q_u8 (uint8x16_t __a, uint8x16_t __b)
24675 #ifdef __AARCH64EB__
24676 return __builtin_shuffle (__a, __b, (uint8x16_t)
24677 {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
24678 #else
24679 return __builtin_shuffle (__a, __b, (uint8x16_t)
24680 {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
24681 #endif
24684 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24685 vuzp2q_u16 (uint16x8_t __a, uint16x8_t __b)
24687 #ifdef __AARCH64EB__
24688 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24689 #else
24690 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24691 #endif
24694 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24695 vuzp2q_u32 (uint32x4_t __a, uint32x4_t __b)
24697 #ifdef __AARCH64EB__
24698 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
24699 #else
24700 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
24701 #endif
24704 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24705 vuzp2q_u64 (uint64x2_t __a, uint64x2_t __b)
24707 #ifdef __AARCH64EB__
24708 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24709 #else
24710 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24711 #endif
24714 __INTERLEAVE_LIST (uzp)
24716 /* vzip */
24718 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
24719 vzip1_f32 (float32x2_t __a, float32x2_t __b)
24721 #ifdef __AARCH64EB__
24722 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24723 #else
24724 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24725 #endif
24728 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24729 vzip1_p8 (poly8x8_t __a, poly8x8_t __b)
24731 #ifdef __AARCH64EB__
24732 return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
24733 #else
24734 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24735 #endif
24738 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
24739 vzip1_p16 (poly16x4_t __a, poly16x4_t __b)
24741 #ifdef __AARCH64EB__
24742 return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
24743 #else
24744 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
24745 #endif
24748 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24749 vzip1_s8 (int8x8_t __a, int8x8_t __b)
24751 #ifdef __AARCH64EB__
24752 return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
24753 #else
24754 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24755 #endif
24758 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24759 vzip1_s16 (int16x4_t __a, int16x4_t __b)
24761 #ifdef __AARCH64EB__
24762 return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
24763 #else
24764 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
24765 #endif
24768 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24769 vzip1_s32 (int32x2_t __a, int32x2_t __b)
24771 #ifdef __AARCH64EB__
24772 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24773 #else
24774 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24775 #endif
24778 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24779 vzip1_u8 (uint8x8_t __a, uint8x8_t __b)
24781 #ifdef __AARCH64EB__
24782 return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
24783 #else
24784 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24785 #endif
24788 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24789 vzip1_u16 (uint16x4_t __a, uint16x4_t __b)
24791 #ifdef __AARCH64EB__
24792 return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
24793 #else
24794 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
24795 #endif
24798 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24799 vzip1_u32 (uint32x2_t __a, uint32x2_t __b)
24801 #ifdef __AARCH64EB__
24802 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24803 #else
24804 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24805 #endif
24808 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
24809 vzip1q_f32 (float32x4_t __a, float32x4_t __b)
24811 #ifdef __AARCH64EB__
24812 return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
24813 #else
24814 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
24815 #endif
24818 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
24819 vzip1q_f64 (float64x2_t __a, float64x2_t __b)
24821 #ifdef __AARCH64EB__
24822 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24823 #else
24824 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24825 #endif
24828 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
24829 vzip1q_p8 (poly8x16_t __a, poly8x16_t __b)
24831 #ifdef __AARCH64EB__
24832 return __builtin_shuffle (__a, __b, (uint8x16_t)
24833 {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
24834 #else
24835 return __builtin_shuffle (__a, __b, (uint8x16_t)
24836 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
24837 #endif
24840 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
24841 vzip1q_p16 (poly16x8_t __a, poly16x8_t __b)
24843 #ifdef __AARCH64EB__
24844 return __builtin_shuffle (__a, __b, (uint16x8_t)
24845 {12, 4, 13, 5, 14, 6, 15, 7});
24846 #else
24847 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24848 #endif
24851 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24852 vzip1q_s8 (int8x16_t __a, int8x16_t __b)
24854 #ifdef __AARCH64EB__
24855 return __builtin_shuffle (__a, __b, (uint8x16_t)
24856 {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
24857 #else
24858 return __builtin_shuffle (__a, __b, (uint8x16_t)
24859 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
24860 #endif
24863 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24864 vzip1q_s16 (int16x8_t __a, int16x8_t __b)
24866 #ifdef __AARCH64EB__
24867 return __builtin_shuffle (__a, __b, (uint16x8_t)
24868 {12, 4, 13, 5, 14, 6, 15, 7});
24869 #else
24870 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24871 #endif
24874 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24875 vzip1q_s32 (int32x4_t __a, int32x4_t __b)
24877 #ifdef __AARCH64EB__
24878 return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
24879 #else
24880 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
24881 #endif
24884 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24885 vzip1q_s64 (int64x2_t __a, int64x2_t __b)
24887 #ifdef __AARCH64EB__
24888 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24889 #else
24890 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24891 #endif
24894 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24895 vzip1q_u8 (uint8x16_t __a, uint8x16_t __b)
24897 #ifdef __AARCH64EB__
24898 return __builtin_shuffle (__a, __b, (uint8x16_t)
24899 {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
24900 #else
24901 return __builtin_shuffle (__a, __b, (uint8x16_t)
24902 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
24903 #endif
24906 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24907 vzip1q_u16 (uint16x8_t __a, uint16x8_t __b)
24909 #ifdef __AARCH64EB__
24910 return __builtin_shuffle (__a, __b, (uint16x8_t)
24911 {12, 4, 13, 5, 14, 6, 15, 7});
24912 #else
24913 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24914 #endif
24917 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24918 vzip1q_u32 (uint32x4_t __a, uint32x4_t __b)
24920 #ifdef __AARCH64EB__
24921 return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
24922 #else
24923 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
24924 #endif
24927 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24928 vzip1q_u64 (uint64x2_t __a, uint64x2_t __b)
24930 #ifdef __AARCH64EB__
24931 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24932 #else
24933 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24934 #endif
24937 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
24938 vzip2_f32 (float32x2_t __a, float32x2_t __b)
24940 #ifdef __AARCH64EB__
24941 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24942 #else
24943 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24944 #endif
24947 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24948 vzip2_p8 (poly8x8_t __a, poly8x8_t __b)
24950 #ifdef __AARCH64EB__
24951 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
24952 #else
24953 return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
24954 #endif
24957 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
24958 vzip2_p16 (poly16x4_t __a, poly16x4_t __b)
24960 #ifdef __AARCH64EB__
24961 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
24962 #else
24963 return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
24964 #endif
24967 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24968 vzip2_s8 (int8x8_t __a, int8x8_t __b)
24970 #ifdef __AARCH64EB__
24971 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
24972 #else
24973 return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
24974 #endif
24977 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24978 vzip2_s16 (int16x4_t __a, int16x4_t __b)
24980 #ifdef __AARCH64EB__
24981 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
24982 #else
24983 return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
24984 #endif
24987 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24988 vzip2_s32 (int32x2_t __a, int32x2_t __b)
24990 #ifdef __AARCH64EB__
24991 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24992 #else
24993 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24994 #endif
24997 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24998 vzip2_u8 (uint8x8_t __a, uint8x8_t __b)
25000 #ifdef __AARCH64EB__
25001 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25002 #else
25003 return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
25004 #endif
25007 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25008 vzip2_u16 (uint16x4_t __a, uint16x4_t __b)
25010 #ifdef __AARCH64EB__
25011 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
25012 #else
25013 return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
25014 #endif
25017 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25018 vzip2_u32 (uint32x2_t __a, uint32x2_t __b)
25020 #ifdef __AARCH64EB__
25021 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25022 #else
25023 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25024 #endif
25027 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
25028 vzip2q_f32 (float32x4_t __a, float32x4_t __b)
25030 #ifdef __AARCH64EB__
25031 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25032 #else
25033 return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25034 #endif
25037 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
25038 vzip2q_f64 (float64x2_t __a, float64x2_t __b)
25040 #ifdef __AARCH64EB__
25041 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25042 #else
25043 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25044 #endif
25047 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
25048 vzip2q_p8 (poly8x16_t __a, poly8x16_t __b)
25050 #ifdef __AARCH64EB__
25051 return __builtin_shuffle (__a, __b, (uint8x16_t)
25052 {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25053 #else
25054 return __builtin_shuffle (__a, __b, (uint8x16_t)
25055 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25056 #endif
25059 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
25060 vzip2q_p16 (poly16x8_t __a, poly16x8_t __b)
25062 #ifdef __AARCH64EB__
25063 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25064 #else
25065 return __builtin_shuffle (__a, __b, (uint16x8_t)
25066 {4, 12, 5, 13, 6, 14, 7, 15});
25067 #endif
25070 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
25071 vzip2q_s8 (int8x16_t __a, int8x16_t __b)
25073 #ifdef __AARCH64EB__
25074 return __builtin_shuffle (__a, __b, (uint8x16_t)
25075 {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25076 #else
25077 return __builtin_shuffle (__a, __b, (uint8x16_t)
25078 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25079 #endif
25082 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
25083 vzip2q_s16 (int16x8_t __a, int16x8_t __b)
25085 #ifdef __AARCH64EB__
25086 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25087 #else
25088 return __builtin_shuffle (__a, __b, (uint16x8_t)
25089 {4, 12, 5, 13, 6, 14, 7, 15});
25090 #endif
25093 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
25094 vzip2q_s32 (int32x4_t __a, int32x4_t __b)
25096 #ifdef __AARCH64EB__
25097 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25098 #else
25099 return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25100 #endif
25103 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25104 vzip2q_s64 (int64x2_t __a, int64x2_t __b)
25106 #ifdef __AARCH64EB__
25107 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25108 #else
25109 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25110 #endif
25113 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25114 vzip2q_u8 (uint8x16_t __a, uint8x16_t __b)
25116 #ifdef __AARCH64EB__
25117 return __builtin_shuffle (__a, __b, (uint8x16_t)
25118 {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25119 #else
25120 return __builtin_shuffle (__a, __b, (uint8x16_t)
25121 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25122 #endif
25125 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25126 vzip2q_u16 (uint16x8_t __a, uint16x8_t __b)
25128 #ifdef __AARCH64EB__
25129 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25130 #else
25131 return __builtin_shuffle (__a, __b, (uint16x8_t)
25132 {4, 12, 5, 13, 6, 14, 7, 15});
25133 #endif
25136 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25137 vzip2q_u32 (uint32x4_t __a, uint32x4_t __b)
25139 #ifdef __AARCH64EB__
25140 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25141 #else
25142 return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25143 #endif
25146 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25147 vzip2q_u64 (uint64x2_t __a, uint64x2_t __b)
25149 #ifdef __AARCH64EB__
25150 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25151 #else
25152 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25153 #endif
25156 __INTERLEAVE_LIST (zip)
25158 #undef __INTERLEAVE_LIST
25159 #undef __DEFINTERLEAVE
25161 /* End of optimal implementations in approved order. */
25163 #undef __aarch64_vget_lane_any
25165 #undef __aarch64_vdup_lane_any
25166 #undef __aarch64_vdup_lane_f32
25167 #undef __aarch64_vdup_lane_f64
25168 #undef __aarch64_vdup_lane_p8
25169 #undef __aarch64_vdup_lane_p16
25170 #undef __aarch64_vdup_lane_s8
25171 #undef __aarch64_vdup_lane_s16
25172 #undef __aarch64_vdup_lane_s32
25173 #undef __aarch64_vdup_lane_s64
25174 #undef __aarch64_vdup_lane_u8
25175 #undef __aarch64_vdup_lane_u16
25176 #undef __aarch64_vdup_lane_u32
25177 #undef __aarch64_vdup_lane_u64
25178 #undef __aarch64_vdup_laneq_f32
25179 #undef __aarch64_vdup_laneq_f64
25180 #undef __aarch64_vdup_laneq_p8
25181 #undef __aarch64_vdup_laneq_p16
25182 #undef __aarch64_vdup_laneq_s8
25183 #undef __aarch64_vdup_laneq_s16
25184 #undef __aarch64_vdup_laneq_s32
25185 #undef __aarch64_vdup_laneq_s64
25186 #undef __aarch64_vdup_laneq_u8
25187 #undef __aarch64_vdup_laneq_u16
25188 #undef __aarch64_vdup_laneq_u32
25189 #undef __aarch64_vdup_laneq_u64
25190 #undef __aarch64_vdupq_lane_f32
25191 #undef __aarch64_vdupq_lane_f64
25192 #undef __aarch64_vdupq_lane_p8
25193 #undef __aarch64_vdupq_lane_p16
25194 #undef __aarch64_vdupq_lane_s8
25195 #undef __aarch64_vdupq_lane_s16
25196 #undef __aarch64_vdupq_lane_s32
25197 #undef __aarch64_vdupq_lane_s64
25198 #undef __aarch64_vdupq_lane_u8
25199 #undef __aarch64_vdupq_lane_u16
25200 #undef __aarch64_vdupq_lane_u32
25201 #undef __aarch64_vdupq_lane_u64
25202 #undef __aarch64_vdupq_laneq_f32
25203 #undef __aarch64_vdupq_laneq_f64
25204 #undef __aarch64_vdupq_laneq_p8
25205 #undef __aarch64_vdupq_laneq_p16
25206 #undef __aarch64_vdupq_laneq_s8
25207 #undef __aarch64_vdupq_laneq_s16
25208 #undef __aarch64_vdupq_laneq_s32
25209 #undef __aarch64_vdupq_laneq_s64
25210 #undef __aarch64_vdupq_laneq_u8
25211 #undef __aarch64_vdupq_laneq_u16
25212 #undef __aarch64_vdupq_laneq_u32
25213 #undef __aarch64_vdupq_laneq_u64
25215 #endif
25217 #endif