[AArch64]Remove be_checked_get_lane, check bounds with __builtin_aarch64_im_lane_boundsi.
[official-gcc.git] / gcc / config / aarch64 / arm_neon.h
blob319cd8c1a0a441831a037e9c063badce7565f97c
1 /* ARM NEON intrinsics include file.
3 Copyright (C) 2011-2014 Free Software Foundation, Inc.
4 Contributed by ARM Ltd.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 #ifndef _AARCH64_NEON_H_
28 #define _AARCH64_NEON_H_
30 #include <stdint.h>
32 #define __AARCH64_UINT64_C(__C) ((uint64_t) __C)
33 #define __AARCH64_INT64_C(__C) ((int64_t) __C)
35 typedef __Int8x8_t int8x8_t;
36 typedef __Int16x4_t int16x4_t;
37 typedef __Int32x2_t int32x2_t;
38 typedef __Int64x1_t int64x1_t;
39 typedef __Float32x2_t float32x2_t;
40 typedef __Poly8x8_t poly8x8_t;
41 typedef __Poly16x4_t poly16x4_t;
42 typedef __Uint8x8_t uint8x8_t;
43 typedef __Uint16x4_t uint16x4_t;
44 typedef __Uint32x2_t uint32x2_t;
45 typedef __Float64x1_t float64x1_t;
46 typedef __Uint64x1_t uint64x1_t;
47 typedef __Int8x16_t int8x16_t;
48 typedef __Int16x8_t int16x8_t;
49 typedef __Int32x4_t int32x4_t;
50 typedef __Int64x2_t int64x2_t;
51 typedef __Float32x4_t float32x4_t;
52 typedef __Float64x2_t float64x2_t;
53 typedef __Poly8x16_t poly8x16_t;
54 typedef __Poly16x8_t poly16x8_t;
55 typedef __Poly64x2_t poly64x2_t;
56 typedef __Uint8x16_t uint8x16_t;
57 typedef __Uint16x8_t uint16x8_t;
58 typedef __Uint32x4_t uint32x4_t;
59 typedef __Uint64x2_t uint64x2_t;
61 typedef __Poly8_t poly8_t;
62 typedef __Poly16_t poly16_t;
63 typedef __Poly64_t poly64_t;
64 typedef __Poly128_t poly128_t;
66 typedef float float32_t;
67 typedef double float64_t;
69 typedef struct int8x8x2_t
71 int8x8_t val[2];
72 } int8x8x2_t;
74 typedef struct int8x16x2_t
76 int8x16_t val[2];
77 } int8x16x2_t;
79 typedef struct int16x4x2_t
81 int16x4_t val[2];
82 } int16x4x2_t;
84 typedef struct int16x8x2_t
86 int16x8_t val[2];
87 } int16x8x2_t;
89 typedef struct int32x2x2_t
91 int32x2_t val[2];
92 } int32x2x2_t;
94 typedef struct int32x4x2_t
96 int32x4_t val[2];
97 } int32x4x2_t;
99 typedef struct int64x1x2_t
101 int64x1_t val[2];
102 } int64x1x2_t;
104 typedef struct int64x2x2_t
106 int64x2_t val[2];
107 } int64x2x2_t;
109 typedef struct uint8x8x2_t
111 uint8x8_t val[2];
112 } uint8x8x2_t;
114 typedef struct uint8x16x2_t
116 uint8x16_t val[2];
117 } uint8x16x2_t;
119 typedef struct uint16x4x2_t
121 uint16x4_t val[2];
122 } uint16x4x2_t;
124 typedef struct uint16x8x2_t
126 uint16x8_t val[2];
127 } uint16x8x2_t;
129 typedef struct uint32x2x2_t
131 uint32x2_t val[2];
132 } uint32x2x2_t;
134 typedef struct uint32x4x2_t
136 uint32x4_t val[2];
137 } uint32x4x2_t;
139 typedef struct uint64x1x2_t
141 uint64x1_t val[2];
142 } uint64x1x2_t;
144 typedef struct uint64x2x2_t
146 uint64x2_t val[2];
147 } uint64x2x2_t;
149 typedef struct float32x2x2_t
151 float32x2_t val[2];
152 } float32x2x2_t;
154 typedef struct float32x4x2_t
156 float32x4_t val[2];
157 } float32x4x2_t;
159 typedef struct float64x2x2_t
161 float64x2_t val[2];
162 } float64x2x2_t;
164 typedef struct float64x1x2_t
166 float64x1_t val[2];
167 } float64x1x2_t;
169 typedef struct poly8x8x2_t
171 poly8x8_t val[2];
172 } poly8x8x2_t;
174 typedef struct poly8x16x2_t
176 poly8x16_t val[2];
177 } poly8x16x2_t;
179 typedef struct poly16x4x2_t
181 poly16x4_t val[2];
182 } poly16x4x2_t;
184 typedef struct poly16x8x2_t
186 poly16x8_t val[2];
187 } poly16x8x2_t;
189 typedef struct int8x8x3_t
191 int8x8_t val[3];
192 } int8x8x3_t;
194 typedef struct int8x16x3_t
196 int8x16_t val[3];
197 } int8x16x3_t;
199 typedef struct int16x4x3_t
201 int16x4_t val[3];
202 } int16x4x3_t;
204 typedef struct int16x8x3_t
206 int16x8_t val[3];
207 } int16x8x3_t;
209 typedef struct int32x2x3_t
211 int32x2_t val[3];
212 } int32x2x3_t;
214 typedef struct int32x4x3_t
216 int32x4_t val[3];
217 } int32x4x3_t;
219 typedef struct int64x1x3_t
221 int64x1_t val[3];
222 } int64x1x3_t;
224 typedef struct int64x2x3_t
226 int64x2_t val[3];
227 } int64x2x3_t;
229 typedef struct uint8x8x3_t
231 uint8x8_t val[3];
232 } uint8x8x3_t;
234 typedef struct uint8x16x3_t
236 uint8x16_t val[3];
237 } uint8x16x3_t;
239 typedef struct uint16x4x3_t
241 uint16x4_t val[3];
242 } uint16x4x3_t;
244 typedef struct uint16x8x3_t
246 uint16x8_t val[3];
247 } uint16x8x3_t;
249 typedef struct uint32x2x3_t
251 uint32x2_t val[3];
252 } uint32x2x3_t;
254 typedef struct uint32x4x3_t
256 uint32x4_t val[3];
257 } uint32x4x3_t;
259 typedef struct uint64x1x3_t
261 uint64x1_t val[3];
262 } uint64x1x3_t;
264 typedef struct uint64x2x3_t
266 uint64x2_t val[3];
267 } uint64x2x3_t;
269 typedef struct float32x2x3_t
271 float32x2_t val[3];
272 } float32x2x3_t;
274 typedef struct float32x4x3_t
276 float32x4_t val[3];
277 } float32x4x3_t;
279 typedef struct float64x2x3_t
281 float64x2_t val[3];
282 } float64x2x3_t;
284 typedef struct float64x1x3_t
286 float64x1_t val[3];
287 } float64x1x3_t;
289 typedef struct poly8x8x3_t
291 poly8x8_t val[3];
292 } poly8x8x3_t;
294 typedef struct poly8x16x3_t
296 poly8x16_t val[3];
297 } poly8x16x3_t;
299 typedef struct poly16x4x3_t
301 poly16x4_t val[3];
302 } poly16x4x3_t;
304 typedef struct poly16x8x3_t
306 poly16x8_t val[3];
307 } poly16x8x3_t;
309 typedef struct int8x8x4_t
311 int8x8_t val[4];
312 } int8x8x4_t;
314 typedef struct int8x16x4_t
316 int8x16_t val[4];
317 } int8x16x4_t;
319 typedef struct int16x4x4_t
321 int16x4_t val[4];
322 } int16x4x4_t;
324 typedef struct int16x8x4_t
326 int16x8_t val[4];
327 } int16x8x4_t;
329 typedef struct int32x2x4_t
331 int32x2_t val[4];
332 } int32x2x4_t;
334 typedef struct int32x4x4_t
336 int32x4_t val[4];
337 } int32x4x4_t;
339 typedef struct int64x1x4_t
341 int64x1_t val[4];
342 } int64x1x4_t;
344 typedef struct int64x2x4_t
346 int64x2_t val[4];
347 } int64x2x4_t;
349 typedef struct uint8x8x4_t
351 uint8x8_t val[4];
352 } uint8x8x4_t;
354 typedef struct uint8x16x4_t
356 uint8x16_t val[4];
357 } uint8x16x4_t;
359 typedef struct uint16x4x4_t
361 uint16x4_t val[4];
362 } uint16x4x4_t;
364 typedef struct uint16x8x4_t
366 uint16x8_t val[4];
367 } uint16x8x4_t;
369 typedef struct uint32x2x4_t
371 uint32x2_t val[4];
372 } uint32x2x4_t;
374 typedef struct uint32x4x4_t
376 uint32x4_t val[4];
377 } uint32x4x4_t;
379 typedef struct uint64x1x4_t
381 uint64x1_t val[4];
382 } uint64x1x4_t;
384 typedef struct uint64x2x4_t
386 uint64x2_t val[4];
387 } uint64x2x4_t;
389 typedef struct float32x2x4_t
391 float32x2_t val[4];
392 } float32x2x4_t;
394 typedef struct float32x4x4_t
396 float32x4_t val[4];
397 } float32x4x4_t;
399 typedef struct float64x2x4_t
401 float64x2_t val[4];
402 } float64x2x4_t;
404 typedef struct float64x1x4_t
406 float64x1_t val[4];
407 } float64x1x4_t;
409 typedef struct poly8x8x4_t
411 poly8x8_t val[4];
412 } poly8x8x4_t;
414 typedef struct poly8x16x4_t
416 poly8x16_t val[4];
417 } poly8x16x4_t;
419 typedef struct poly16x4x4_t
421 poly16x4_t val[4];
422 } poly16x4x4_t;
424 typedef struct poly16x8x4_t
426 poly16x8_t val[4];
427 } poly16x8x4_t;
429 /* __aarch64_vdup_lane internal macros. */
430 #define __aarch64_vdup_lane_any(__size, __q, __a, __b) \
431 vdup##__q##_n_##__size (__aarch64_vget_lane_any (__a, __b))
433 #define __aarch64_vdup_lane_f32(__a, __b) \
434 __aarch64_vdup_lane_any (f32, , __a, __b)
435 #define __aarch64_vdup_lane_f64(__a, __b) \
436 __aarch64_vdup_lane_any (f64, , __a, __b)
437 #define __aarch64_vdup_lane_p8(__a, __b) \
438 __aarch64_vdup_lane_any (p8, , __a, __b)
439 #define __aarch64_vdup_lane_p16(__a, __b) \
440 __aarch64_vdup_lane_any (p16, , __a, __b)
441 #define __aarch64_vdup_lane_s8(__a, __b) \
442 __aarch64_vdup_lane_any (s8, , __a, __b)
443 #define __aarch64_vdup_lane_s16(__a, __b) \
444 __aarch64_vdup_lane_any (s16, , __a, __b)
445 #define __aarch64_vdup_lane_s32(__a, __b) \
446 __aarch64_vdup_lane_any (s32, , __a, __b)
447 #define __aarch64_vdup_lane_s64(__a, __b) \
448 __aarch64_vdup_lane_any (s64, , __a, __b)
449 #define __aarch64_vdup_lane_u8(__a, __b) \
450 __aarch64_vdup_lane_any (u8, , __a, __b)
451 #define __aarch64_vdup_lane_u16(__a, __b) \
452 __aarch64_vdup_lane_any (u16, , __a, __b)
453 #define __aarch64_vdup_lane_u32(__a, __b) \
454 __aarch64_vdup_lane_any (u32, , __a, __b)
455 #define __aarch64_vdup_lane_u64(__a, __b) \
456 __aarch64_vdup_lane_any (u64, , __a, __b)
458 /* __aarch64_vdup_laneq internal macros. */
459 #define __aarch64_vdup_laneq_f32(__a, __b) \
460 __aarch64_vdup_lane_any (f32, , __a, __b)
461 #define __aarch64_vdup_laneq_f64(__a, __b) \
462 __aarch64_vdup_lane_any (f64, , __a, __b)
463 #define __aarch64_vdup_laneq_p8(__a, __b) \
464 __aarch64_vdup_lane_any (p8, , __a, __b)
465 #define __aarch64_vdup_laneq_p16(__a, __b) \
466 __aarch64_vdup_lane_any (p16, , __a, __b)
467 #define __aarch64_vdup_laneq_s8(__a, __b) \
468 __aarch64_vdup_lane_any (s8, , __a, __b)
469 #define __aarch64_vdup_laneq_s16(__a, __b) \
470 __aarch64_vdup_lane_any (s16, , __a, __b)
471 #define __aarch64_vdup_laneq_s32(__a, __b) \
472 __aarch64_vdup_lane_any (s32, , __a, __b)
473 #define __aarch64_vdup_laneq_s64(__a, __b) \
474 __aarch64_vdup_lane_any (s64, , __a, __b)
475 #define __aarch64_vdup_laneq_u8(__a, __b) \
476 __aarch64_vdup_lane_any (u8, , __a, __b)
477 #define __aarch64_vdup_laneq_u16(__a, __b) \
478 __aarch64_vdup_lane_any (u16, , __a, __b)
479 #define __aarch64_vdup_laneq_u32(__a, __b) \
480 __aarch64_vdup_lane_any (u32, , __a, __b)
481 #define __aarch64_vdup_laneq_u64(__a, __b) \
482 __aarch64_vdup_lane_any (u64, , __a, __b)
484 /* __aarch64_vdupq_lane internal macros. */
485 #define __aarch64_vdupq_lane_f32(__a, __b) \
486 __aarch64_vdup_lane_any (f32, q, __a, __b)
487 #define __aarch64_vdupq_lane_f64(__a, __b) \
488 __aarch64_vdup_lane_any (f64, q, __a, __b)
489 #define __aarch64_vdupq_lane_p8(__a, __b) \
490 __aarch64_vdup_lane_any (p8, q, __a, __b)
491 #define __aarch64_vdupq_lane_p16(__a, __b) \
492 __aarch64_vdup_lane_any (p16, q, __a, __b)
493 #define __aarch64_vdupq_lane_s8(__a, __b) \
494 __aarch64_vdup_lane_any (s8, q, __a, __b)
495 #define __aarch64_vdupq_lane_s16(__a, __b) \
496 __aarch64_vdup_lane_any (s16, q, __a, __b)
497 #define __aarch64_vdupq_lane_s32(__a, __b) \
498 __aarch64_vdup_lane_any (s32, q, __a, __b)
499 #define __aarch64_vdupq_lane_s64(__a, __b) \
500 __aarch64_vdup_lane_any (s64, q, __a, __b)
501 #define __aarch64_vdupq_lane_u8(__a, __b) \
502 __aarch64_vdup_lane_any (u8, q, __a, __b)
503 #define __aarch64_vdupq_lane_u16(__a, __b) \
504 __aarch64_vdup_lane_any (u16, q, __a, __b)
505 #define __aarch64_vdupq_lane_u32(__a, __b) \
506 __aarch64_vdup_lane_any (u32, q, __a, __b)
507 #define __aarch64_vdupq_lane_u64(__a, __b) \
508 __aarch64_vdup_lane_any (u64, q, __a, __b)
510 /* __aarch64_vdupq_laneq internal macros. */
511 #define __aarch64_vdupq_laneq_f32(__a, __b) \
512 __aarch64_vdup_lane_any (f32, q, __a, __b)
513 #define __aarch64_vdupq_laneq_f64(__a, __b) \
514 __aarch64_vdup_lane_any (f64, q, __a, __b)
515 #define __aarch64_vdupq_laneq_p8(__a, __b) \
516 __aarch64_vdup_lane_any (p8, q, __a, __b)
517 #define __aarch64_vdupq_laneq_p16(__a, __b) \
518 __aarch64_vdup_lane_any (p16, q, __a, __b)
519 #define __aarch64_vdupq_laneq_s8(__a, __b) \
520 __aarch64_vdup_lane_any (s8, q, __a, __b)
521 #define __aarch64_vdupq_laneq_s16(__a, __b) \
522 __aarch64_vdup_lane_any (s16, q, __a, __b)
523 #define __aarch64_vdupq_laneq_s32(__a, __b) \
524 __aarch64_vdup_lane_any (s32, q, __a, __b)
525 #define __aarch64_vdupq_laneq_s64(__a, __b) \
526 __aarch64_vdup_lane_any (s64, q, __a, __b)
527 #define __aarch64_vdupq_laneq_u8(__a, __b) \
528 __aarch64_vdup_lane_any (u8, q, __a, __b)
529 #define __aarch64_vdupq_laneq_u16(__a, __b) \
530 __aarch64_vdup_lane_any (u16, q, __a, __b)
531 #define __aarch64_vdupq_laneq_u32(__a, __b) \
532 __aarch64_vdup_lane_any (u32, q, __a, __b)
533 #define __aarch64_vdupq_laneq_u64(__a, __b) \
534 __aarch64_vdup_lane_any (u64, q, __a, __b)
536 /* Internal macro for lane indices. */
538 #define __AARCH64_NUM_LANES(__v) (sizeof (__v) / sizeof (__v[0]))
539 #define __AARCH64_LANE_CHECK(__vec, __idx) \
540 __builtin_aarch64_im_lane_boundsi (__AARCH64_NUM_LANES (__vec), __idx)
542 /* For big-endian, GCC's vector indices are the opposite way around
543 to the architectural lane indices used by Neon intrinsics. */
544 #ifdef __AARCH64EB__
545 #define __aarch64_lane(__vec, __idx) (__AARCH64_NUM_LANES (__vec) - 1 - __idx)
546 #else
547 #define __aarch64_lane(__vec, __idx) __idx
548 #endif
550 /* vget_lane internal macro. */
551 #define __aarch64_vget_lane_any(__vec, __index) \
552 __extension__ \
553 ({ \
554 __AARCH64_LANE_CHECK (__vec, __index); \
555 __vec[__aarch64_lane (__vec, __index)]; \
558 /* vset_lane and vld1_lane internal macro. */
559 #define __aarch64_vset_lane_any(__elem, __vec, __index) \
560 __extension__ \
561 ({ \
562 __AARCH64_LANE_CHECK (__vec, __index); \
563 __vec[__aarch64_lane (__vec, __index)] = __elem; \
564 __vec; \
567 /* vadd */
568 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
569 vadd_s8 (int8x8_t __a, int8x8_t __b)
571 return __a + __b;
574 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
575 vadd_s16 (int16x4_t __a, int16x4_t __b)
577 return __a + __b;
580 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
581 vadd_s32 (int32x2_t __a, int32x2_t __b)
583 return __a + __b;
586 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
587 vadd_f32 (float32x2_t __a, float32x2_t __b)
589 return __a + __b;
592 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
593 vadd_f64 (float64x1_t __a, float64x1_t __b)
595 return __a + __b;
598 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
599 vadd_u8 (uint8x8_t __a, uint8x8_t __b)
601 return __a + __b;
604 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
605 vadd_u16 (uint16x4_t __a, uint16x4_t __b)
607 return __a + __b;
610 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
611 vadd_u32 (uint32x2_t __a, uint32x2_t __b)
613 return __a + __b;
616 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
617 vadd_s64 (int64x1_t __a, int64x1_t __b)
619 return __a + __b;
622 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
623 vadd_u64 (uint64x1_t __a, uint64x1_t __b)
625 return __a + __b;
628 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
629 vaddq_s8 (int8x16_t __a, int8x16_t __b)
631 return __a + __b;
634 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
635 vaddq_s16 (int16x8_t __a, int16x8_t __b)
637 return __a + __b;
640 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
641 vaddq_s32 (int32x4_t __a, int32x4_t __b)
643 return __a + __b;
646 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
647 vaddq_s64 (int64x2_t __a, int64x2_t __b)
649 return __a + __b;
652 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
653 vaddq_f32 (float32x4_t __a, float32x4_t __b)
655 return __a + __b;
658 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
659 vaddq_f64 (float64x2_t __a, float64x2_t __b)
661 return __a + __b;
664 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
665 vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
667 return __a + __b;
670 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
671 vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
673 return __a + __b;
676 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
677 vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
679 return __a + __b;
682 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
683 vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
685 return __a + __b;
688 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
689 vaddl_s8 (int8x8_t __a, int8x8_t __b)
691 return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b);
694 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
695 vaddl_s16 (int16x4_t __a, int16x4_t __b)
697 return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b);
700 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
701 vaddl_s32 (int32x2_t __a, int32x2_t __b)
703 return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b);
706 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
707 vaddl_u8 (uint8x8_t __a, uint8x8_t __b)
709 return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a,
710 (int8x8_t) __b);
713 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
714 vaddl_u16 (uint16x4_t __a, uint16x4_t __b)
716 return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a,
717 (int16x4_t) __b);
720 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
721 vaddl_u32 (uint32x2_t __a, uint32x2_t __b)
723 return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a,
724 (int32x2_t) __b);
727 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
728 vaddl_high_s8 (int8x16_t __a, int8x16_t __b)
730 return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b);
733 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
734 vaddl_high_s16 (int16x8_t __a, int16x8_t __b)
736 return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b);
739 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
740 vaddl_high_s32 (int32x4_t __a, int32x4_t __b)
742 return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b);
745 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
746 vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b)
748 return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a,
749 (int8x16_t) __b);
752 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
753 vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b)
755 return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a,
756 (int16x8_t) __b);
759 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
760 vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b)
762 return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a,
763 (int32x4_t) __b);
766 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
767 vaddw_s8 (int16x8_t __a, int8x8_t __b)
769 return (int16x8_t) __builtin_aarch64_saddwv8qi (__a, __b);
772 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
773 vaddw_s16 (int32x4_t __a, int16x4_t __b)
775 return (int32x4_t) __builtin_aarch64_saddwv4hi (__a, __b);
778 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
779 vaddw_s32 (int64x2_t __a, int32x2_t __b)
781 return (int64x2_t) __builtin_aarch64_saddwv2si (__a, __b);
784 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
785 vaddw_u8 (uint16x8_t __a, uint8x8_t __b)
787 return (uint16x8_t) __builtin_aarch64_uaddwv8qi ((int16x8_t) __a,
788 (int8x8_t) __b);
791 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
792 vaddw_u16 (uint32x4_t __a, uint16x4_t __b)
794 return (uint32x4_t) __builtin_aarch64_uaddwv4hi ((int32x4_t) __a,
795 (int16x4_t) __b);
798 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
799 vaddw_u32 (uint64x2_t __a, uint32x2_t __b)
801 return (uint64x2_t) __builtin_aarch64_uaddwv2si ((int64x2_t) __a,
802 (int32x2_t) __b);
805 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
806 vaddw_high_s8 (int16x8_t __a, int8x16_t __b)
808 return (int16x8_t) __builtin_aarch64_saddw2v16qi (__a, __b);
811 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
812 vaddw_high_s16 (int32x4_t __a, int16x8_t __b)
814 return (int32x4_t) __builtin_aarch64_saddw2v8hi (__a, __b);
817 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
818 vaddw_high_s32 (int64x2_t __a, int32x4_t __b)
820 return (int64x2_t) __builtin_aarch64_saddw2v4si (__a, __b);
823 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
824 vaddw_high_u8 (uint16x8_t __a, uint8x16_t __b)
826 return (uint16x8_t) __builtin_aarch64_uaddw2v16qi ((int16x8_t) __a,
827 (int8x16_t) __b);
830 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
831 vaddw_high_u16 (uint32x4_t __a, uint16x8_t __b)
833 return (uint32x4_t) __builtin_aarch64_uaddw2v8hi ((int32x4_t) __a,
834 (int16x8_t) __b);
837 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
838 vaddw_high_u32 (uint64x2_t __a, uint32x4_t __b)
840 return (uint64x2_t) __builtin_aarch64_uaddw2v4si ((int64x2_t) __a,
841 (int32x4_t) __b);
844 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
845 vhadd_s8 (int8x8_t __a, int8x8_t __b)
847 return (int8x8_t) __builtin_aarch64_shaddv8qi (__a, __b);
850 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
851 vhadd_s16 (int16x4_t __a, int16x4_t __b)
853 return (int16x4_t) __builtin_aarch64_shaddv4hi (__a, __b);
856 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
857 vhadd_s32 (int32x2_t __a, int32x2_t __b)
859 return (int32x2_t) __builtin_aarch64_shaddv2si (__a, __b);
862 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
863 vhadd_u8 (uint8x8_t __a, uint8x8_t __b)
865 return (uint8x8_t) __builtin_aarch64_uhaddv8qi ((int8x8_t) __a,
866 (int8x8_t) __b);
869 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
870 vhadd_u16 (uint16x4_t __a, uint16x4_t __b)
872 return (uint16x4_t) __builtin_aarch64_uhaddv4hi ((int16x4_t) __a,
873 (int16x4_t) __b);
876 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
877 vhadd_u32 (uint32x2_t __a, uint32x2_t __b)
879 return (uint32x2_t) __builtin_aarch64_uhaddv2si ((int32x2_t) __a,
880 (int32x2_t) __b);
883 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
884 vhaddq_s8 (int8x16_t __a, int8x16_t __b)
886 return (int8x16_t) __builtin_aarch64_shaddv16qi (__a, __b);
889 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
890 vhaddq_s16 (int16x8_t __a, int16x8_t __b)
892 return (int16x8_t) __builtin_aarch64_shaddv8hi (__a, __b);
895 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
896 vhaddq_s32 (int32x4_t __a, int32x4_t __b)
898 return (int32x4_t) __builtin_aarch64_shaddv4si (__a, __b);
901 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
902 vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
904 return (uint8x16_t) __builtin_aarch64_uhaddv16qi ((int8x16_t) __a,
905 (int8x16_t) __b);
908 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
909 vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
911 return (uint16x8_t) __builtin_aarch64_uhaddv8hi ((int16x8_t) __a,
912 (int16x8_t) __b);
915 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
916 vhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
918 return (uint32x4_t) __builtin_aarch64_uhaddv4si ((int32x4_t) __a,
919 (int32x4_t) __b);
922 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
923 vrhadd_s8 (int8x8_t __a, int8x8_t __b)
925 return (int8x8_t) __builtin_aarch64_srhaddv8qi (__a, __b);
928 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
929 vrhadd_s16 (int16x4_t __a, int16x4_t __b)
931 return (int16x4_t) __builtin_aarch64_srhaddv4hi (__a, __b);
934 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
935 vrhadd_s32 (int32x2_t __a, int32x2_t __b)
937 return (int32x2_t) __builtin_aarch64_srhaddv2si (__a, __b);
940 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
941 vrhadd_u8 (uint8x8_t __a, uint8x8_t __b)
943 return (uint8x8_t) __builtin_aarch64_urhaddv8qi ((int8x8_t) __a,
944 (int8x8_t) __b);
947 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
948 vrhadd_u16 (uint16x4_t __a, uint16x4_t __b)
950 return (uint16x4_t) __builtin_aarch64_urhaddv4hi ((int16x4_t) __a,
951 (int16x4_t) __b);
954 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
955 vrhadd_u32 (uint32x2_t __a, uint32x2_t __b)
957 return (uint32x2_t) __builtin_aarch64_urhaddv2si ((int32x2_t) __a,
958 (int32x2_t) __b);
961 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
962 vrhaddq_s8 (int8x16_t __a, int8x16_t __b)
964 return (int8x16_t) __builtin_aarch64_srhaddv16qi (__a, __b);
967 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
968 vrhaddq_s16 (int16x8_t __a, int16x8_t __b)
970 return (int16x8_t) __builtin_aarch64_srhaddv8hi (__a, __b);
973 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
974 vrhaddq_s32 (int32x4_t __a, int32x4_t __b)
976 return (int32x4_t) __builtin_aarch64_srhaddv4si (__a, __b);
979 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
980 vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
982 return (uint8x16_t) __builtin_aarch64_urhaddv16qi ((int8x16_t) __a,
983 (int8x16_t) __b);
986 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
987 vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
989 return (uint16x8_t) __builtin_aarch64_urhaddv8hi ((int16x8_t) __a,
990 (int16x8_t) __b);
993 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
994 vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
996 return (uint32x4_t) __builtin_aarch64_urhaddv4si ((int32x4_t) __a,
997 (int32x4_t) __b);
1000 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1001 vaddhn_s16 (int16x8_t __a, int16x8_t __b)
1003 return (int8x8_t) __builtin_aarch64_addhnv8hi (__a, __b);
1006 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1007 vaddhn_s32 (int32x4_t __a, int32x4_t __b)
1009 return (int16x4_t) __builtin_aarch64_addhnv4si (__a, __b);
1012 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1013 vaddhn_s64 (int64x2_t __a, int64x2_t __b)
1015 return (int32x2_t) __builtin_aarch64_addhnv2di (__a, __b);
1018 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1019 vaddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1021 return (uint8x8_t) __builtin_aarch64_addhnv8hi ((int16x8_t) __a,
1022 (int16x8_t) __b);
1025 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1026 vaddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1028 return (uint16x4_t) __builtin_aarch64_addhnv4si ((int32x4_t) __a,
1029 (int32x4_t) __b);
1032 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1033 vaddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1035 return (uint32x2_t) __builtin_aarch64_addhnv2di ((int64x2_t) __a,
1036 (int64x2_t) __b);
1039 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1040 vraddhn_s16 (int16x8_t __a, int16x8_t __b)
1042 return (int8x8_t) __builtin_aarch64_raddhnv8hi (__a, __b);
1045 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1046 vraddhn_s32 (int32x4_t __a, int32x4_t __b)
1048 return (int16x4_t) __builtin_aarch64_raddhnv4si (__a, __b);
1051 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1052 vraddhn_s64 (int64x2_t __a, int64x2_t __b)
1054 return (int32x2_t) __builtin_aarch64_raddhnv2di (__a, __b);
1057 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1058 vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1060 return (uint8x8_t) __builtin_aarch64_raddhnv8hi ((int16x8_t) __a,
1061 (int16x8_t) __b);
1064 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1065 vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1067 return (uint16x4_t) __builtin_aarch64_raddhnv4si ((int32x4_t) __a,
1068 (int32x4_t) __b);
1071 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1072 vraddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1074 return (uint32x2_t) __builtin_aarch64_raddhnv2di ((int64x2_t) __a,
1075 (int64x2_t) __b);
1078 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1079 vaddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1081 return (int8x16_t) __builtin_aarch64_addhn2v8hi (__a, __b, __c);
1084 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1085 vaddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1087 return (int16x8_t) __builtin_aarch64_addhn2v4si (__a, __b, __c);
1090 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1091 vaddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1093 return (int32x4_t) __builtin_aarch64_addhn2v2di (__a, __b, __c);
1096 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1097 vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1099 return (uint8x16_t) __builtin_aarch64_addhn2v8hi ((int8x8_t) __a,
1100 (int16x8_t) __b,
1101 (int16x8_t) __c);
1104 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1105 vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1107 return (uint16x8_t) __builtin_aarch64_addhn2v4si ((int16x4_t) __a,
1108 (int32x4_t) __b,
1109 (int32x4_t) __c);
1112 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1113 vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1115 return (uint32x4_t) __builtin_aarch64_addhn2v2di ((int32x2_t) __a,
1116 (int64x2_t) __b,
1117 (int64x2_t) __c);
1120 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1121 vraddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1123 return (int8x16_t) __builtin_aarch64_raddhn2v8hi (__a, __b, __c);
1126 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1127 vraddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1129 return (int16x8_t) __builtin_aarch64_raddhn2v4si (__a, __b, __c);
1132 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1133 vraddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1135 return (int32x4_t) __builtin_aarch64_raddhn2v2di (__a, __b, __c);
1138 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1139 vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1141 return (uint8x16_t) __builtin_aarch64_raddhn2v8hi ((int8x8_t) __a,
1142 (int16x8_t) __b,
1143 (int16x8_t) __c);
1146 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1147 vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1149 return (uint16x8_t) __builtin_aarch64_raddhn2v4si ((int16x4_t) __a,
1150 (int32x4_t) __b,
1151 (int32x4_t) __c);
1154 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1155 vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1157 return (uint32x4_t) __builtin_aarch64_raddhn2v2di ((int32x2_t) __a,
1158 (int64x2_t) __b,
1159 (int64x2_t) __c);
1162 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1163 vdiv_f32 (float32x2_t __a, float32x2_t __b)
1165 return __a / __b;
1168 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1169 vdiv_f64 (float64x1_t __a, float64x1_t __b)
1171 return __a / __b;
1174 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1175 vdivq_f32 (float32x4_t __a, float32x4_t __b)
1177 return __a / __b;
1180 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1181 vdivq_f64 (float64x2_t __a, float64x2_t __b)
1183 return __a / __b;
1186 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1187 vmul_s8 (int8x8_t __a, int8x8_t __b)
1189 return __a * __b;
1192 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1193 vmul_s16 (int16x4_t __a, int16x4_t __b)
1195 return __a * __b;
1198 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1199 vmul_s32 (int32x2_t __a, int32x2_t __b)
1201 return __a * __b;
1204 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1205 vmul_f32 (float32x2_t __a, float32x2_t __b)
1207 return __a * __b;
1210 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1211 vmul_f64 (float64x1_t __a, float64x1_t __b)
1213 return __a * __b;
1216 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1217 vmul_u8 (uint8x8_t __a, uint8x8_t __b)
1219 return __a * __b;
1222 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1223 vmul_u16 (uint16x4_t __a, uint16x4_t __b)
1225 return __a * __b;
1228 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1229 vmul_u32 (uint32x2_t __a, uint32x2_t __b)
1231 return __a * __b;
1234 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
1235 vmul_p8 (poly8x8_t __a, poly8x8_t __b)
1237 return (poly8x8_t) __builtin_aarch64_pmulv8qi ((int8x8_t) __a,
1238 (int8x8_t) __b);
1241 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1242 vmulq_s8 (int8x16_t __a, int8x16_t __b)
1244 return __a * __b;
1247 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1248 vmulq_s16 (int16x8_t __a, int16x8_t __b)
1250 return __a * __b;
1253 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1254 vmulq_s32 (int32x4_t __a, int32x4_t __b)
1256 return __a * __b;
1259 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1260 vmulq_f32 (float32x4_t __a, float32x4_t __b)
1262 return __a * __b;
1265 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1266 vmulq_f64 (float64x2_t __a, float64x2_t __b)
1268 return __a * __b;
1271 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1272 vmulq_u8 (uint8x16_t __a, uint8x16_t __b)
1274 return __a * __b;
1277 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1278 vmulq_u16 (uint16x8_t __a, uint16x8_t __b)
1280 return __a * __b;
1283 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1284 vmulq_u32 (uint32x4_t __a, uint32x4_t __b)
1286 return __a * __b;
1289 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
1290 vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
1292 return (poly8x16_t) __builtin_aarch64_pmulv16qi ((int8x16_t) __a,
1293 (int8x16_t) __b);
1296 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1297 vand_s8 (int8x8_t __a, int8x8_t __b)
1299 return __a & __b;
1302 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1303 vand_s16 (int16x4_t __a, int16x4_t __b)
1305 return __a & __b;
1308 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1309 vand_s32 (int32x2_t __a, int32x2_t __b)
1311 return __a & __b;
1314 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1315 vand_u8 (uint8x8_t __a, uint8x8_t __b)
1317 return __a & __b;
1320 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1321 vand_u16 (uint16x4_t __a, uint16x4_t __b)
1323 return __a & __b;
1326 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1327 vand_u32 (uint32x2_t __a, uint32x2_t __b)
1329 return __a & __b;
1332 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1333 vand_s64 (int64x1_t __a, int64x1_t __b)
1335 return __a & __b;
1338 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1339 vand_u64 (uint64x1_t __a, uint64x1_t __b)
1341 return __a & __b;
1344 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1345 vandq_s8 (int8x16_t __a, int8x16_t __b)
1347 return __a & __b;
1350 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1351 vandq_s16 (int16x8_t __a, int16x8_t __b)
1353 return __a & __b;
1356 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1357 vandq_s32 (int32x4_t __a, int32x4_t __b)
1359 return __a & __b;
1362 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1363 vandq_s64 (int64x2_t __a, int64x2_t __b)
1365 return __a & __b;
1368 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1369 vandq_u8 (uint8x16_t __a, uint8x16_t __b)
1371 return __a & __b;
1374 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1375 vandq_u16 (uint16x8_t __a, uint16x8_t __b)
1377 return __a & __b;
1380 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1381 vandq_u32 (uint32x4_t __a, uint32x4_t __b)
1383 return __a & __b;
1386 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1387 vandq_u64 (uint64x2_t __a, uint64x2_t __b)
1389 return __a & __b;
1392 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1393 vorr_s8 (int8x8_t __a, int8x8_t __b)
1395 return __a | __b;
1398 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1399 vorr_s16 (int16x4_t __a, int16x4_t __b)
1401 return __a | __b;
1404 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1405 vorr_s32 (int32x2_t __a, int32x2_t __b)
1407 return __a | __b;
1410 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1411 vorr_u8 (uint8x8_t __a, uint8x8_t __b)
1413 return __a | __b;
1416 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1417 vorr_u16 (uint16x4_t __a, uint16x4_t __b)
1419 return __a | __b;
1422 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1423 vorr_u32 (uint32x2_t __a, uint32x2_t __b)
1425 return __a | __b;
1428 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1429 vorr_s64 (int64x1_t __a, int64x1_t __b)
1431 return __a | __b;
1434 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1435 vorr_u64 (uint64x1_t __a, uint64x1_t __b)
1437 return __a | __b;
1440 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1441 vorrq_s8 (int8x16_t __a, int8x16_t __b)
1443 return __a | __b;
1446 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1447 vorrq_s16 (int16x8_t __a, int16x8_t __b)
1449 return __a | __b;
1452 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1453 vorrq_s32 (int32x4_t __a, int32x4_t __b)
1455 return __a | __b;
1458 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1459 vorrq_s64 (int64x2_t __a, int64x2_t __b)
1461 return __a | __b;
1464 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1465 vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
1467 return __a | __b;
1470 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1471 vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
1473 return __a | __b;
1476 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1477 vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
1479 return __a | __b;
1482 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1483 vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
1485 return __a | __b;
1488 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1489 veor_s8 (int8x8_t __a, int8x8_t __b)
1491 return __a ^ __b;
1494 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1495 veor_s16 (int16x4_t __a, int16x4_t __b)
1497 return __a ^ __b;
1500 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1501 veor_s32 (int32x2_t __a, int32x2_t __b)
1503 return __a ^ __b;
1506 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1507 veor_u8 (uint8x8_t __a, uint8x8_t __b)
1509 return __a ^ __b;
1512 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1513 veor_u16 (uint16x4_t __a, uint16x4_t __b)
1515 return __a ^ __b;
1518 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1519 veor_u32 (uint32x2_t __a, uint32x2_t __b)
1521 return __a ^ __b;
1524 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1525 veor_s64 (int64x1_t __a, int64x1_t __b)
1527 return __a ^ __b;
1530 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1531 veor_u64 (uint64x1_t __a, uint64x1_t __b)
1533 return __a ^ __b;
1536 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1537 veorq_s8 (int8x16_t __a, int8x16_t __b)
1539 return __a ^ __b;
1542 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1543 veorq_s16 (int16x8_t __a, int16x8_t __b)
1545 return __a ^ __b;
1548 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1549 veorq_s32 (int32x4_t __a, int32x4_t __b)
1551 return __a ^ __b;
1554 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1555 veorq_s64 (int64x2_t __a, int64x2_t __b)
1557 return __a ^ __b;
1560 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1561 veorq_u8 (uint8x16_t __a, uint8x16_t __b)
1563 return __a ^ __b;
1566 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1567 veorq_u16 (uint16x8_t __a, uint16x8_t __b)
1569 return __a ^ __b;
1572 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1573 veorq_u32 (uint32x4_t __a, uint32x4_t __b)
1575 return __a ^ __b;
1578 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1579 veorq_u64 (uint64x2_t __a, uint64x2_t __b)
1581 return __a ^ __b;
1584 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1585 vbic_s8 (int8x8_t __a, int8x8_t __b)
1587 return __a & ~__b;
1590 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1591 vbic_s16 (int16x4_t __a, int16x4_t __b)
1593 return __a & ~__b;
1596 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1597 vbic_s32 (int32x2_t __a, int32x2_t __b)
1599 return __a & ~__b;
1602 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1603 vbic_u8 (uint8x8_t __a, uint8x8_t __b)
1605 return __a & ~__b;
1608 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1609 vbic_u16 (uint16x4_t __a, uint16x4_t __b)
1611 return __a & ~__b;
1614 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1615 vbic_u32 (uint32x2_t __a, uint32x2_t __b)
1617 return __a & ~__b;
1620 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1621 vbic_s64 (int64x1_t __a, int64x1_t __b)
1623 return __a & ~__b;
1626 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1627 vbic_u64 (uint64x1_t __a, uint64x1_t __b)
1629 return __a & ~__b;
1632 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1633 vbicq_s8 (int8x16_t __a, int8x16_t __b)
1635 return __a & ~__b;
1638 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1639 vbicq_s16 (int16x8_t __a, int16x8_t __b)
1641 return __a & ~__b;
1644 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1645 vbicq_s32 (int32x4_t __a, int32x4_t __b)
1647 return __a & ~__b;
1650 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1651 vbicq_s64 (int64x2_t __a, int64x2_t __b)
1653 return __a & ~__b;
1656 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1657 vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
1659 return __a & ~__b;
1662 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1663 vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
1665 return __a & ~__b;
1668 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1669 vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
1671 return __a & ~__b;
1674 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1675 vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
1677 return __a & ~__b;
1680 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1681 vorn_s8 (int8x8_t __a, int8x8_t __b)
1683 return __a | ~__b;
1686 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1687 vorn_s16 (int16x4_t __a, int16x4_t __b)
1689 return __a | ~__b;
1692 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1693 vorn_s32 (int32x2_t __a, int32x2_t __b)
1695 return __a | ~__b;
1698 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1699 vorn_u8 (uint8x8_t __a, uint8x8_t __b)
1701 return __a | ~__b;
1704 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1705 vorn_u16 (uint16x4_t __a, uint16x4_t __b)
1707 return __a | ~__b;
1710 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1711 vorn_u32 (uint32x2_t __a, uint32x2_t __b)
1713 return __a | ~__b;
1716 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1717 vorn_s64 (int64x1_t __a, int64x1_t __b)
1719 return __a | ~__b;
1722 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1723 vorn_u64 (uint64x1_t __a, uint64x1_t __b)
1725 return __a | ~__b;
1728 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1729 vornq_s8 (int8x16_t __a, int8x16_t __b)
1731 return __a | ~__b;
1734 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1735 vornq_s16 (int16x8_t __a, int16x8_t __b)
1737 return __a | ~__b;
1740 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1741 vornq_s32 (int32x4_t __a, int32x4_t __b)
1743 return __a | ~__b;
1746 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1747 vornq_s64 (int64x2_t __a, int64x2_t __b)
1749 return __a | ~__b;
1752 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1753 vornq_u8 (uint8x16_t __a, uint8x16_t __b)
1755 return __a | ~__b;
1758 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1759 vornq_u16 (uint16x8_t __a, uint16x8_t __b)
1761 return __a | ~__b;
1764 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1765 vornq_u32 (uint32x4_t __a, uint32x4_t __b)
1767 return __a | ~__b;
1770 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1771 vornq_u64 (uint64x2_t __a, uint64x2_t __b)
1773 return __a | ~__b;
1776 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1777 vsub_s8 (int8x8_t __a, int8x8_t __b)
1779 return __a - __b;
1782 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1783 vsub_s16 (int16x4_t __a, int16x4_t __b)
1785 return __a - __b;
1788 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1789 vsub_s32 (int32x2_t __a, int32x2_t __b)
1791 return __a - __b;
1794 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1795 vsub_f32 (float32x2_t __a, float32x2_t __b)
1797 return __a - __b;
1800 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1801 vsub_f64 (float64x1_t __a, float64x1_t __b)
1803 return __a - __b;
1806 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1807 vsub_u8 (uint8x8_t __a, uint8x8_t __b)
1809 return __a - __b;
1812 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1813 vsub_u16 (uint16x4_t __a, uint16x4_t __b)
1815 return __a - __b;
1818 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1819 vsub_u32 (uint32x2_t __a, uint32x2_t __b)
1821 return __a - __b;
1824 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1825 vsub_s64 (int64x1_t __a, int64x1_t __b)
1827 return __a - __b;
1830 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1831 vsub_u64 (uint64x1_t __a, uint64x1_t __b)
1833 return __a - __b;
1836 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1837 vsubq_s8 (int8x16_t __a, int8x16_t __b)
1839 return __a - __b;
1842 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1843 vsubq_s16 (int16x8_t __a, int16x8_t __b)
1845 return __a - __b;
1848 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1849 vsubq_s32 (int32x4_t __a, int32x4_t __b)
1851 return __a - __b;
1854 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1855 vsubq_s64 (int64x2_t __a, int64x2_t __b)
1857 return __a - __b;
1860 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1861 vsubq_f32 (float32x4_t __a, float32x4_t __b)
1863 return __a - __b;
1866 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1867 vsubq_f64 (float64x2_t __a, float64x2_t __b)
1869 return __a - __b;
1872 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1873 vsubq_u8 (uint8x16_t __a, uint8x16_t __b)
1875 return __a - __b;
1878 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1879 vsubq_u16 (uint16x8_t __a, uint16x8_t __b)
1881 return __a - __b;
1884 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1885 vsubq_u32 (uint32x4_t __a, uint32x4_t __b)
1887 return __a - __b;
1890 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1891 vsubq_u64 (uint64x2_t __a, uint64x2_t __b)
1893 return __a - __b;
1896 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1897 vsubl_s8 (int8x8_t __a, int8x8_t __b)
1899 return (int16x8_t) __builtin_aarch64_ssublv8qi (__a, __b);
1902 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1903 vsubl_s16 (int16x4_t __a, int16x4_t __b)
1905 return (int32x4_t) __builtin_aarch64_ssublv4hi (__a, __b);
1908 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1909 vsubl_s32 (int32x2_t __a, int32x2_t __b)
1911 return (int64x2_t) __builtin_aarch64_ssublv2si (__a, __b);
1914 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1915 vsubl_u8 (uint8x8_t __a, uint8x8_t __b)
1917 return (uint16x8_t) __builtin_aarch64_usublv8qi ((int8x8_t) __a,
1918 (int8x8_t) __b);
1921 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1922 vsubl_u16 (uint16x4_t __a, uint16x4_t __b)
1924 return (uint32x4_t) __builtin_aarch64_usublv4hi ((int16x4_t) __a,
1925 (int16x4_t) __b);
1928 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1929 vsubl_u32 (uint32x2_t __a, uint32x2_t __b)
1931 return (uint64x2_t) __builtin_aarch64_usublv2si ((int32x2_t) __a,
1932 (int32x2_t) __b);
1935 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1936 vsubl_high_s8 (int8x16_t __a, int8x16_t __b)
1938 return (int16x8_t) __builtin_aarch64_ssubl2v16qi (__a, __b);
1941 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1942 vsubl_high_s16 (int16x8_t __a, int16x8_t __b)
1944 return (int32x4_t) __builtin_aarch64_ssubl2v8hi (__a, __b);
1947 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1948 vsubl_high_s32 (int32x4_t __a, int32x4_t __b)
1950 return (int64x2_t) __builtin_aarch64_ssubl2v4si (__a, __b);
1953 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1954 vsubl_high_u8 (uint8x16_t __a, uint8x16_t __b)
1956 return (uint16x8_t) __builtin_aarch64_usubl2v16qi ((int8x16_t) __a,
1957 (int8x16_t) __b);
1960 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1961 vsubl_high_u16 (uint16x8_t __a, uint16x8_t __b)
1963 return (uint32x4_t) __builtin_aarch64_usubl2v8hi ((int16x8_t) __a,
1964 (int16x8_t) __b);
1967 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1968 vsubl_high_u32 (uint32x4_t __a, uint32x4_t __b)
1970 return (uint64x2_t) __builtin_aarch64_usubl2v4si ((int32x4_t) __a,
1971 (int32x4_t) __b);
1974 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1975 vsubw_s8 (int16x8_t __a, int8x8_t __b)
1977 return (int16x8_t) __builtin_aarch64_ssubwv8qi (__a, __b);
1980 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1981 vsubw_s16 (int32x4_t __a, int16x4_t __b)
1983 return (int32x4_t) __builtin_aarch64_ssubwv4hi (__a, __b);
1986 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1987 vsubw_s32 (int64x2_t __a, int32x2_t __b)
1989 return (int64x2_t) __builtin_aarch64_ssubwv2si (__a, __b);
1992 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1993 vsubw_u8 (uint16x8_t __a, uint8x8_t __b)
1995 return (uint16x8_t) __builtin_aarch64_usubwv8qi ((int16x8_t) __a,
1996 (int8x8_t) __b);
1999 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2000 vsubw_u16 (uint32x4_t __a, uint16x4_t __b)
2002 return (uint32x4_t) __builtin_aarch64_usubwv4hi ((int32x4_t) __a,
2003 (int16x4_t) __b);
2006 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2007 vsubw_u32 (uint64x2_t __a, uint32x2_t __b)
2009 return (uint64x2_t) __builtin_aarch64_usubwv2si ((int64x2_t) __a,
2010 (int32x2_t) __b);
2013 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2014 vsubw_high_s8 (int16x8_t __a, int8x16_t __b)
2016 return (int16x8_t) __builtin_aarch64_ssubw2v16qi (__a, __b);
2019 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2020 vsubw_high_s16 (int32x4_t __a, int16x8_t __b)
2022 return (int32x4_t) __builtin_aarch64_ssubw2v8hi (__a, __b);
2025 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2026 vsubw_high_s32 (int64x2_t __a, int32x4_t __b)
2028 return (int64x2_t) __builtin_aarch64_ssubw2v4si (__a, __b);
2031 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2032 vsubw_high_u8 (uint16x8_t __a, uint8x16_t __b)
2034 return (uint16x8_t) __builtin_aarch64_usubw2v16qi ((int16x8_t) __a,
2035 (int8x16_t) __b);
2038 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2039 vsubw_high_u16 (uint32x4_t __a, uint16x8_t __b)
2041 return (uint32x4_t) __builtin_aarch64_usubw2v8hi ((int32x4_t) __a,
2042 (int16x8_t) __b);
2045 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2046 vsubw_high_u32 (uint64x2_t __a, uint32x4_t __b)
2048 return (uint64x2_t) __builtin_aarch64_usubw2v4si ((int64x2_t) __a,
2049 (int32x4_t) __b);
2052 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2053 vqadd_s8 (int8x8_t __a, int8x8_t __b)
2055 return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b);
2058 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2059 vqadd_s16 (int16x4_t __a, int16x4_t __b)
2061 return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b);
2064 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2065 vqadd_s32 (int32x2_t __a, int32x2_t __b)
2067 return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b);
2070 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2071 vqadd_s64 (int64x1_t __a, int64x1_t __b)
2073 return (int64x1_t) {__builtin_aarch64_sqadddi (__a[0], __b[0])};
2076 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2077 vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
2079 return __builtin_aarch64_uqaddv8qi_uuu (__a, __b);
2082 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2083 vhsub_s8 (int8x8_t __a, int8x8_t __b)
2085 return (int8x8_t)__builtin_aarch64_shsubv8qi (__a, __b);
2088 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2089 vhsub_s16 (int16x4_t __a, int16x4_t __b)
2091 return (int16x4_t) __builtin_aarch64_shsubv4hi (__a, __b);
2094 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2095 vhsub_s32 (int32x2_t __a, int32x2_t __b)
2097 return (int32x2_t) __builtin_aarch64_shsubv2si (__a, __b);
2100 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2101 vhsub_u8 (uint8x8_t __a, uint8x8_t __b)
2103 return (uint8x8_t) __builtin_aarch64_uhsubv8qi ((int8x8_t) __a,
2104 (int8x8_t) __b);
2107 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2108 vhsub_u16 (uint16x4_t __a, uint16x4_t __b)
2110 return (uint16x4_t) __builtin_aarch64_uhsubv4hi ((int16x4_t) __a,
2111 (int16x4_t) __b);
2114 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2115 vhsub_u32 (uint32x2_t __a, uint32x2_t __b)
2117 return (uint32x2_t) __builtin_aarch64_uhsubv2si ((int32x2_t) __a,
2118 (int32x2_t) __b);
2121 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2122 vhsubq_s8 (int8x16_t __a, int8x16_t __b)
2124 return (int8x16_t) __builtin_aarch64_shsubv16qi (__a, __b);
2127 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2128 vhsubq_s16 (int16x8_t __a, int16x8_t __b)
2130 return (int16x8_t) __builtin_aarch64_shsubv8hi (__a, __b);
2133 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2134 vhsubq_s32 (int32x4_t __a, int32x4_t __b)
2136 return (int32x4_t) __builtin_aarch64_shsubv4si (__a, __b);
2139 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2140 vhsubq_u8 (uint8x16_t __a, uint8x16_t __b)
2142 return (uint8x16_t) __builtin_aarch64_uhsubv16qi ((int8x16_t) __a,
2143 (int8x16_t) __b);
2146 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2147 vhsubq_u16 (uint16x8_t __a, uint16x8_t __b)
2149 return (uint16x8_t) __builtin_aarch64_uhsubv8hi ((int16x8_t) __a,
2150 (int16x8_t) __b);
2153 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2154 vhsubq_u32 (uint32x4_t __a, uint32x4_t __b)
2156 return (uint32x4_t) __builtin_aarch64_uhsubv4si ((int32x4_t) __a,
2157 (int32x4_t) __b);
2160 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2161 vsubhn_s16 (int16x8_t __a, int16x8_t __b)
2163 return (int8x8_t) __builtin_aarch64_subhnv8hi (__a, __b);
2166 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2167 vsubhn_s32 (int32x4_t __a, int32x4_t __b)
2169 return (int16x4_t) __builtin_aarch64_subhnv4si (__a, __b);
2172 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2173 vsubhn_s64 (int64x2_t __a, int64x2_t __b)
2175 return (int32x2_t) __builtin_aarch64_subhnv2di (__a, __b);
2178 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2179 vsubhn_u16 (uint16x8_t __a, uint16x8_t __b)
2181 return (uint8x8_t) __builtin_aarch64_subhnv8hi ((int16x8_t) __a,
2182 (int16x8_t) __b);
2185 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2186 vsubhn_u32 (uint32x4_t __a, uint32x4_t __b)
2188 return (uint16x4_t) __builtin_aarch64_subhnv4si ((int32x4_t) __a,
2189 (int32x4_t) __b);
2192 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2193 vsubhn_u64 (uint64x2_t __a, uint64x2_t __b)
2195 return (uint32x2_t) __builtin_aarch64_subhnv2di ((int64x2_t) __a,
2196 (int64x2_t) __b);
2199 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2200 vrsubhn_s16 (int16x8_t __a, int16x8_t __b)
2202 return (int8x8_t) __builtin_aarch64_rsubhnv8hi (__a, __b);
2205 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2206 vrsubhn_s32 (int32x4_t __a, int32x4_t __b)
2208 return (int16x4_t) __builtin_aarch64_rsubhnv4si (__a, __b);
2211 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2212 vrsubhn_s64 (int64x2_t __a, int64x2_t __b)
2214 return (int32x2_t) __builtin_aarch64_rsubhnv2di (__a, __b);
2217 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2218 vrsubhn_u16 (uint16x8_t __a, uint16x8_t __b)
2220 return (uint8x8_t) __builtin_aarch64_rsubhnv8hi ((int16x8_t) __a,
2221 (int16x8_t) __b);
2224 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2225 vrsubhn_u32 (uint32x4_t __a, uint32x4_t __b)
2227 return (uint16x4_t) __builtin_aarch64_rsubhnv4si ((int32x4_t) __a,
2228 (int32x4_t) __b);
2231 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2232 vrsubhn_u64 (uint64x2_t __a, uint64x2_t __b)
2234 return (uint32x2_t) __builtin_aarch64_rsubhnv2di ((int64x2_t) __a,
2235 (int64x2_t) __b);
2238 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2239 vrsubhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
2241 return (int8x16_t) __builtin_aarch64_rsubhn2v8hi (__a, __b, __c);
2244 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2245 vrsubhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
2247 return (int16x8_t) __builtin_aarch64_rsubhn2v4si (__a, __b, __c);
2250 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2251 vrsubhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
2253 return (int32x4_t) __builtin_aarch64_rsubhn2v2di (__a, __b, __c);
2256 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2257 vrsubhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
2259 return (uint8x16_t) __builtin_aarch64_rsubhn2v8hi ((int8x8_t) __a,
2260 (int16x8_t) __b,
2261 (int16x8_t) __c);
2264 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2265 vrsubhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
2267 return (uint16x8_t) __builtin_aarch64_rsubhn2v4si ((int16x4_t) __a,
2268 (int32x4_t) __b,
2269 (int32x4_t) __c);
2272 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2273 vrsubhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
2275 return (uint32x4_t) __builtin_aarch64_rsubhn2v2di ((int32x2_t) __a,
2276 (int64x2_t) __b,
2277 (int64x2_t) __c);
2280 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2281 vsubhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
2283 return (int8x16_t) __builtin_aarch64_subhn2v8hi (__a, __b, __c);
2286 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2287 vsubhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
2289 return (int16x8_t) __builtin_aarch64_subhn2v4si (__a, __b, __c);;
2292 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2293 vsubhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
2295 return (int32x4_t) __builtin_aarch64_subhn2v2di (__a, __b, __c);
2298 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2299 vsubhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
2301 return (uint8x16_t) __builtin_aarch64_subhn2v8hi ((int8x8_t) __a,
2302 (int16x8_t) __b,
2303 (int16x8_t) __c);
2306 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2307 vsubhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
2309 return (uint16x8_t) __builtin_aarch64_subhn2v4si ((int16x4_t) __a,
2310 (int32x4_t) __b,
2311 (int32x4_t) __c);
2314 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2315 vsubhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
2317 return (uint32x4_t) __builtin_aarch64_subhn2v2di ((int32x2_t) __a,
2318 (int64x2_t) __b,
2319 (int64x2_t) __c);
2322 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2323 vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
2325 return __builtin_aarch64_uqaddv4hi_uuu (__a, __b);
2328 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2329 vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
2331 return __builtin_aarch64_uqaddv2si_uuu (__a, __b);
2334 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2335 vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
2337 return (uint64x1_t) {__builtin_aarch64_uqadddi_uuu (__a[0], __b[0])};
2340 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2341 vqaddq_s8 (int8x16_t __a, int8x16_t __b)
2343 return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b);
2346 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2347 vqaddq_s16 (int16x8_t __a, int16x8_t __b)
2349 return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b);
2352 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2353 vqaddq_s32 (int32x4_t __a, int32x4_t __b)
2355 return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b);
2358 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2359 vqaddq_s64 (int64x2_t __a, int64x2_t __b)
2361 return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b);
2364 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2365 vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
2367 return __builtin_aarch64_uqaddv16qi_uuu (__a, __b);
2370 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2371 vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
2373 return __builtin_aarch64_uqaddv8hi_uuu (__a, __b);
2376 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2377 vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
2379 return __builtin_aarch64_uqaddv4si_uuu (__a, __b);
2382 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2383 vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
2385 return __builtin_aarch64_uqaddv2di_uuu (__a, __b);
2388 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2389 vqsub_s8 (int8x8_t __a, int8x8_t __b)
2391 return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b);
2394 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2395 vqsub_s16 (int16x4_t __a, int16x4_t __b)
2397 return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b);
2400 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2401 vqsub_s32 (int32x2_t __a, int32x2_t __b)
2403 return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b);
2406 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2407 vqsub_s64 (int64x1_t __a, int64x1_t __b)
2409 return (int64x1_t) {__builtin_aarch64_sqsubdi (__a[0], __b[0])};
2412 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2413 vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
2415 return __builtin_aarch64_uqsubv8qi_uuu (__a, __b);
2418 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2419 vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
2421 return __builtin_aarch64_uqsubv4hi_uuu (__a, __b);
2424 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2425 vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
2427 return __builtin_aarch64_uqsubv2si_uuu (__a, __b);
2430 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2431 vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
2433 return (uint64x1_t) {__builtin_aarch64_uqsubdi_uuu (__a[0], __b[0])};
2436 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2437 vqsubq_s8 (int8x16_t __a, int8x16_t __b)
2439 return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b);
2442 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2443 vqsubq_s16 (int16x8_t __a, int16x8_t __b)
2445 return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b);
2448 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2449 vqsubq_s32 (int32x4_t __a, int32x4_t __b)
2451 return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b);
2454 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2455 vqsubq_s64 (int64x2_t __a, int64x2_t __b)
2457 return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b);
2460 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2461 vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
2463 return __builtin_aarch64_uqsubv16qi_uuu (__a, __b);
2466 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2467 vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
2469 return __builtin_aarch64_uqsubv8hi_uuu (__a, __b);
2472 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2473 vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
2475 return __builtin_aarch64_uqsubv4si_uuu (__a, __b);
2478 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2479 vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
2481 return __builtin_aarch64_uqsubv2di_uuu (__a, __b);
2484 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2485 vqneg_s8 (int8x8_t __a)
2487 return (int8x8_t) __builtin_aarch64_sqnegv8qi (__a);
2490 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2491 vqneg_s16 (int16x4_t __a)
2493 return (int16x4_t) __builtin_aarch64_sqnegv4hi (__a);
2496 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2497 vqneg_s32 (int32x2_t __a)
2499 return (int32x2_t) __builtin_aarch64_sqnegv2si (__a);
2502 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2503 vqneg_s64 (int64x1_t __a)
2505 return (int64x1_t) {__builtin_aarch64_sqnegdi (__a[0])};
2508 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2509 vqnegq_s8 (int8x16_t __a)
2511 return (int8x16_t) __builtin_aarch64_sqnegv16qi (__a);
2514 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2515 vqnegq_s16 (int16x8_t __a)
2517 return (int16x8_t) __builtin_aarch64_sqnegv8hi (__a);
2520 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2521 vqnegq_s32 (int32x4_t __a)
2523 return (int32x4_t) __builtin_aarch64_sqnegv4si (__a);
2526 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2527 vqabs_s8 (int8x8_t __a)
2529 return (int8x8_t) __builtin_aarch64_sqabsv8qi (__a);
2532 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2533 vqabs_s16 (int16x4_t __a)
2535 return (int16x4_t) __builtin_aarch64_sqabsv4hi (__a);
2538 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2539 vqabs_s32 (int32x2_t __a)
2541 return (int32x2_t) __builtin_aarch64_sqabsv2si (__a);
2544 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2545 vqabs_s64 (int64x1_t __a)
2547 return (int64x1_t) {__builtin_aarch64_sqabsdi (__a[0])};
2550 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2551 vqabsq_s8 (int8x16_t __a)
2553 return (int8x16_t) __builtin_aarch64_sqabsv16qi (__a);
2556 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2557 vqabsq_s16 (int16x8_t __a)
2559 return (int16x8_t) __builtin_aarch64_sqabsv8hi (__a);
2562 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2563 vqabsq_s32 (int32x4_t __a)
2565 return (int32x4_t) __builtin_aarch64_sqabsv4si (__a);
2568 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2569 vqdmulh_s16 (int16x4_t __a, int16x4_t __b)
2571 return (int16x4_t) __builtin_aarch64_sqdmulhv4hi (__a, __b);
2574 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2575 vqdmulh_s32 (int32x2_t __a, int32x2_t __b)
2577 return (int32x2_t) __builtin_aarch64_sqdmulhv2si (__a, __b);
2580 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2581 vqdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2583 return (int16x8_t) __builtin_aarch64_sqdmulhv8hi (__a, __b);
2586 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2587 vqdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2589 return (int32x4_t) __builtin_aarch64_sqdmulhv4si (__a, __b);
2592 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2593 vqrdmulh_s16 (int16x4_t __a, int16x4_t __b)
2595 return (int16x4_t) __builtin_aarch64_sqrdmulhv4hi (__a, __b);
2598 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2599 vqrdmulh_s32 (int32x2_t __a, int32x2_t __b)
2601 return (int32x2_t) __builtin_aarch64_sqrdmulhv2si (__a, __b);
2604 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2605 vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2607 return (int16x8_t) __builtin_aarch64_sqrdmulhv8hi (__a, __b);
2610 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2611 vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2613 return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b);
2616 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2617 vcreate_s8 (uint64_t __a)
2619 return (int8x8_t) __a;
2622 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2623 vcreate_s16 (uint64_t __a)
2625 return (int16x4_t) __a;
2628 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2629 vcreate_s32 (uint64_t __a)
2631 return (int32x2_t) __a;
2634 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2635 vcreate_s64 (uint64_t __a)
2637 return (int64x1_t) {__a};
2640 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2641 vcreate_f32 (uint64_t __a)
2643 return (float32x2_t) __a;
2646 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2647 vcreate_u8 (uint64_t __a)
2649 return (uint8x8_t) __a;
2652 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2653 vcreate_u16 (uint64_t __a)
2655 return (uint16x4_t) __a;
2658 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2659 vcreate_u32 (uint64_t __a)
2661 return (uint32x2_t) __a;
2664 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2665 vcreate_u64 (uint64_t __a)
2667 return (uint64x1_t) {__a};
2670 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
2671 vcreate_f64 (uint64_t __a)
2673 return (float64x1_t) __a;
2676 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2677 vcreate_p8 (uint64_t __a)
2679 return (poly8x8_t) __a;
2682 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2683 vcreate_p16 (uint64_t __a)
2685 return (poly16x4_t) __a;
2688 /* vget_lane */
2690 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2691 vget_lane_f32 (float32x2_t __a, const int __b)
2693 return __aarch64_vget_lane_any (__a, __b);
2696 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2697 vget_lane_f64 (float64x1_t __a, const int __b)
2699 return __aarch64_vget_lane_any (__a, __b);
2702 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2703 vget_lane_p8 (poly8x8_t __a, const int __b)
2705 return __aarch64_vget_lane_any (__a, __b);
2708 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2709 vget_lane_p16 (poly16x4_t __a, const int __b)
2711 return __aarch64_vget_lane_any (__a, __b);
2714 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2715 vget_lane_s8 (int8x8_t __a, const int __b)
2717 return __aarch64_vget_lane_any (__a, __b);
2720 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2721 vget_lane_s16 (int16x4_t __a, const int __b)
2723 return __aarch64_vget_lane_any (__a, __b);
2726 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2727 vget_lane_s32 (int32x2_t __a, const int __b)
2729 return __aarch64_vget_lane_any (__a, __b);
2732 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2733 vget_lane_s64 (int64x1_t __a, const int __b)
2735 return __aarch64_vget_lane_any (__a, __b);
2738 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2739 vget_lane_u8 (uint8x8_t __a, const int __b)
2741 return __aarch64_vget_lane_any (__a, __b);
2744 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2745 vget_lane_u16 (uint16x4_t __a, const int __b)
2747 return __aarch64_vget_lane_any (__a, __b);
2750 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2751 vget_lane_u32 (uint32x2_t __a, const int __b)
2753 return __aarch64_vget_lane_any (__a, __b);
2756 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2757 vget_lane_u64 (uint64x1_t __a, const int __b)
2759 return __aarch64_vget_lane_any (__a, __b);
2762 /* vgetq_lane */
2764 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2765 vgetq_lane_f32 (float32x4_t __a, const int __b)
2767 return __aarch64_vget_lane_any (__a, __b);
2770 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2771 vgetq_lane_f64 (float64x2_t __a, const int __b)
2773 return __aarch64_vget_lane_any (__a, __b);
2776 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2777 vgetq_lane_p8 (poly8x16_t __a, const int __b)
2779 return __aarch64_vget_lane_any (__a, __b);
2782 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2783 vgetq_lane_p16 (poly16x8_t __a, const int __b)
2785 return __aarch64_vget_lane_any (__a, __b);
2788 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2789 vgetq_lane_s8 (int8x16_t __a, const int __b)
2791 return __aarch64_vget_lane_any (__a, __b);
2794 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2795 vgetq_lane_s16 (int16x8_t __a, const int __b)
2797 return __aarch64_vget_lane_any (__a, __b);
2800 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2801 vgetq_lane_s32 (int32x4_t __a, const int __b)
2803 return __aarch64_vget_lane_any (__a, __b);
2806 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2807 vgetq_lane_s64 (int64x2_t __a, const int __b)
2809 return __aarch64_vget_lane_any (__a, __b);
2812 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2813 vgetq_lane_u8 (uint8x16_t __a, const int __b)
2815 return __aarch64_vget_lane_any (__a, __b);
2818 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2819 vgetq_lane_u16 (uint16x8_t __a, const int __b)
2821 return __aarch64_vget_lane_any (__a, __b);
2824 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2825 vgetq_lane_u32 (uint32x4_t __a, const int __b)
2827 return __aarch64_vget_lane_any (__a, __b);
2830 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2831 vgetq_lane_u64 (uint64x2_t __a, const int __b)
2833 return __aarch64_vget_lane_any (__a, __b);
2836 /* vreinterpret */
2838 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2839 vreinterpret_p8_f64 (float64x1_t __a)
2841 return (poly8x8_t) __a;
2844 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2845 vreinterpret_p8_s8 (int8x8_t __a)
2847 return (poly8x8_t) __a;
2850 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2851 vreinterpret_p8_s16 (int16x4_t __a)
2853 return (poly8x8_t) __a;
2856 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2857 vreinterpret_p8_s32 (int32x2_t __a)
2859 return (poly8x8_t) __a;
2862 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2863 vreinterpret_p8_s64 (int64x1_t __a)
2865 return (poly8x8_t) __a;
2868 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2869 vreinterpret_p8_f32 (float32x2_t __a)
2871 return (poly8x8_t) __a;
2874 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2875 vreinterpret_p8_u8 (uint8x8_t __a)
2877 return (poly8x8_t) __a;
2880 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2881 vreinterpret_p8_u16 (uint16x4_t __a)
2883 return (poly8x8_t) __a;
2886 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2887 vreinterpret_p8_u32 (uint32x2_t __a)
2889 return (poly8x8_t) __a;
2892 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2893 vreinterpret_p8_u64 (uint64x1_t __a)
2895 return (poly8x8_t) __a;
2898 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2899 vreinterpret_p8_p16 (poly16x4_t __a)
2901 return (poly8x8_t) __a;
2904 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2905 vreinterpretq_p8_f64 (float64x2_t __a)
2907 return (poly8x16_t) __a;
2910 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2911 vreinterpretq_p8_s8 (int8x16_t __a)
2913 return (poly8x16_t) __a;
2916 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2917 vreinterpretq_p8_s16 (int16x8_t __a)
2919 return (poly8x16_t) __a;
2922 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2923 vreinterpretq_p8_s32 (int32x4_t __a)
2925 return (poly8x16_t) __a;
2928 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2929 vreinterpretq_p8_s64 (int64x2_t __a)
2931 return (poly8x16_t) __a;
2934 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2935 vreinterpretq_p8_f32 (float32x4_t __a)
2937 return (poly8x16_t) __a;
2940 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2941 vreinterpretq_p8_u8 (uint8x16_t __a)
2943 return (poly8x16_t) __a;
2946 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2947 vreinterpretq_p8_u16 (uint16x8_t __a)
2949 return (poly8x16_t) __a;
2952 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2953 vreinterpretq_p8_u32 (uint32x4_t __a)
2955 return (poly8x16_t) __a;
2958 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2959 vreinterpretq_p8_u64 (uint64x2_t __a)
2961 return (poly8x16_t) __a;
2964 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2965 vreinterpretq_p8_p16 (poly16x8_t __a)
2967 return (poly8x16_t) __a;
2970 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2971 vreinterpret_p16_f64 (float64x1_t __a)
2973 return (poly16x4_t) __a;
2976 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2977 vreinterpret_p16_s8 (int8x8_t __a)
2979 return (poly16x4_t) __a;
2982 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2983 vreinterpret_p16_s16 (int16x4_t __a)
2985 return (poly16x4_t) __a;
2988 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2989 vreinterpret_p16_s32 (int32x2_t __a)
2991 return (poly16x4_t) __a;
2994 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2995 vreinterpret_p16_s64 (int64x1_t __a)
2997 return (poly16x4_t) __a;
3000 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3001 vreinterpret_p16_f32 (float32x2_t __a)
3003 return (poly16x4_t) __a;
3006 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3007 vreinterpret_p16_u8 (uint8x8_t __a)
3009 return (poly16x4_t) __a;
3012 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3013 vreinterpret_p16_u16 (uint16x4_t __a)
3015 return (poly16x4_t) __a;
3018 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3019 vreinterpret_p16_u32 (uint32x2_t __a)
3021 return (poly16x4_t) __a;
3024 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3025 vreinterpret_p16_u64 (uint64x1_t __a)
3027 return (poly16x4_t) __a;
3030 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3031 vreinterpret_p16_p8 (poly8x8_t __a)
3033 return (poly16x4_t) __a;
3036 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3037 vreinterpretq_p16_f64 (float64x2_t __a)
3039 return (poly16x8_t) __a;
3042 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3043 vreinterpretq_p16_s8 (int8x16_t __a)
3045 return (poly16x8_t) __a;
3048 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3049 vreinterpretq_p16_s16 (int16x8_t __a)
3051 return (poly16x8_t) __a;
3054 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3055 vreinterpretq_p16_s32 (int32x4_t __a)
3057 return (poly16x8_t) __a;
3060 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3061 vreinterpretq_p16_s64 (int64x2_t __a)
3063 return (poly16x8_t) __a;
3066 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3067 vreinterpretq_p16_f32 (float32x4_t __a)
3069 return (poly16x8_t) __a;
3072 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3073 vreinterpretq_p16_u8 (uint8x16_t __a)
3075 return (poly16x8_t) __a;
3078 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3079 vreinterpretq_p16_u16 (uint16x8_t __a)
3081 return (poly16x8_t) __a;
3084 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3085 vreinterpretq_p16_u32 (uint32x4_t __a)
3087 return (poly16x8_t) __a;
3090 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3091 vreinterpretq_p16_u64 (uint64x2_t __a)
3093 return (poly16x8_t) __a;
3096 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3097 vreinterpretq_p16_p8 (poly8x16_t __a)
3099 return (poly16x8_t) __a;
3102 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3103 vreinterpret_f32_f64 (float64x1_t __a)
3105 return (float32x2_t) __a;
3108 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3109 vreinterpret_f32_s8 (int8x8_t __a)
3111 return (float32x2_t) __a;
3114 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3115 vreinterpret_f32_s16 (int16x4_t __a)
3117 return (float32x2_t) __a;
3120 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3121 vreinterpret_f32_s32 (int32x2_t __a)
3123 return (float32x2_t) __a;
3126 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3127 vreinterpret_f32_s64 (int64x1_t __a)
3129 return (float32x2_t) __a;
3132 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3133 vreinterpret_f32_u8 (uint8x8_t __a)
3135 return (float32x2_t) __a;
3138 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3139 vreinterpret_f32_u16 (uint16x4_t __a)
3141 return (float32x2_t) __a;
3144 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3145 vreinterpret_f32_u32 (uint32x2_t __a)
3147 return (float32x2_t) __a;
3150 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3151 vreinterpret_f32_u64 (uint64x1_t __a)
3153 return (float32x2_t) __a;
3156 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3157 vreinterpret_f32_p8 (poly8x8_t __a)
3159 return (float32x2_t) __a;
3162 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3163 vreinterpret_f32_p16 (poly16x4_t __a)
3165 return (float32x2_t) __a;
3168 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3169 vreinterpretq_f32_f64 (float64x2_t __a)
3171 return (float32x4_t) __a;
3174 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3175 vreinterpretq_f32_s8 (int8x16_t __a)
3177 return (float32x4_t) __a;
3180 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3181 vreinterpretq_f32_s16 (int16x8_t __a)
3183 return (float32x4_t) __a;
3186 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3187 vreinterpretq_f32_s32 (int32x4_t __a)
3189 return (float32x4_t) __a;
3192 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3193 vreinterpretq_f32_s64 (int64x2_t __a)
3195 return (float32x4_t) __a;
3198 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3199 vreinterpretq_f32_u8 (uint8x16_t __a)
3201 return (float32x4_t) __a;
3204 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3205 vreinterpretq_f32_u16 (uint16x8_t __a)
3207 return (float32x4_t) __a;
3210 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3211 vreinterpretq_f32_u32 (uint32x4_t __a)
3213 return (float32x4_t) __a;
3216 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3217 vreinterpretq_f32_u64 (uint64x2_t __a)
3219 return (float32x4_t) __a;
3222 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3223 vreinterpretq_f32_p8 (poly8x16_t __a)
3225 return (float32x4_t) __a;
3228 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3229 vreinterpretq_f32_p16 (poly16x8_t __a)
3231 return (float32x4_t) __a;
3234 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3235 vreinterpret_f64_f32 (float32x2_t __a)
3237 return (float64x1_t) __a;
3240 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3241 vreinterpret_f64_p8 (poly8x8_t __a)
3243 return (float64x1_t) __a;
3246 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3247 vreinterpret_f64_p16 (poly16x4_t __a)
3249 return (float64x1_t) __a;
3252 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3253 vreinterpret_f64_s8 (int8x8_t __a)
3255 return (float64x1_t) __a;
3258 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3259 vreinterpret_f64_s16 (int16x4_t __a)
3261 return (float64x1_t) __a;
3264 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3265 vreinterpret_f64_s32 (int32x2_t __a)
3267 return (float64x1_t) __a;
3270 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3271 vreinterpret_f64_s64 (int64x1_t __a)
3273 return (float64x1_t) __a;
3276 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3277 vreinterpret_f64_u8 (uint8x8_t __a)
3279 return (float64x1_t) __a;
3282 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3283 vreinterpret_f64_u16 (uint16x4_t __a)
3285 return (float64x1_t) __a;
3288 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3289 vreinterpret_f64_u32 (uint32x2_t __a)
3291 return (float64x1_t) __a;
3294 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3295 vreinterpret_f64_u64 (uint64x1_t __a)
3297 return (float64x1_t) __a;
3300 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3301 vreinterpretq_f64_f32 (float32x4_t __a)
3303 return (float64x2_t) __a;
3306 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3307 vreinterpretq_f64_p8 (poly8x16_t __a)
3309 return (float64x2_t) __a;
3312 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3313 vreinterpretq_f64_p16 (poly16x8_t __a)
3315 return (float64x2_t) __a;
3318 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3319 vreinterpretq_f64_s8 (int8x16_t __a)
3321 return (float64x2_t) __a;
3324 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3325 vreinterpretq_f64_s16 (int16x8_t __a)
3327 return (float64x2_t) __a;
3330 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3331 vreinterpretq_f64_s32 (int32x4_t __a)
3333 return (float64x2_t) __a;
3336 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3337 vreinterpretq_f64_s64 (int64x2_t __a)
3339 return (float64x2_t) __a;
3342 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3343 vreinterpretq_f64_u8 (uint8x16_t __a)
3345 return (float64x2_t) __a;
3348 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3349 vreinterpretq_f64_u16 (uint16x8_t __a)
3351 return (float64x2_t) __a;
3354 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3355 vreinterpretq_f64_u32 (uint32x4_t __a)
3357 return (float64x2_t) __a;
3360 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3361 vreinterpretq_f64_u64 (uint64x2_t __a)
3363 return (float64x2_t) __a;
3366 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3367 vreinterpret_s64_f64 (float64x1_t __a)
3369 return (int64x1_t) __a;
3372 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3373 vreinterpret_s64_s8 (int8x8_t __a)
3375 return (int64x1_t) __a;
3378 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3379 vreinterpret_s64_s16 (int16x4_t __a)
3381 return (int64x1_t) __a;
3384 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3385 vreinterpret_s64_s32 (int32x2_t __a)
3387 return (int64x1_t) __a;
3390 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3391 vreinterpret_s64_f32 (float32x2_t __a)
3393 return (int64x1_t) __a;
3396 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3397 vreinterpret_s64_u8 (uint8x8_t __a)
3399 return (int64x1_t) __a;
3402 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3403 vreinterpret_s64_u16 (uint16x4_t __a)
3405 return (int64x1_t) __a;
3408 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3409 vreinterpret_s64_u32 (uint32x2_t __a)
3411 return (int64x1_t) __a;
3414 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3415 vreinterpret_s64_u64 (uint64x1_t __a)
3417 return (int64x1_t) __a;
3420 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3421 vreinterpret_s64_p8 (poly8x8_t __a)
3423 return (int64x1_t) __a;
3426 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3427 vreinterpret_s64_p16 (poly16x4_t __a)
3429 return (int64x1_t) __a;
3432 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3433 vreinterpretq_s64_f64 (float64x2_t __a)
3435 return (int64x2_t) __a;
3438 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3439 vreinterpretq_s64_s8 (int8x16_t __a)
3441 return (int64x2_t) __a;
3444 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3445 vreinterpretq_s64_s16 (int16x8_t __a)
3447 return (int64x2_t) __a;
3450 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3451 vreinterpretq_s64_s32 (int32x4_t __a)
3453 return (int64x2_t) __a;
3456 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3457 vreinterpretq_s64_f32 (float32x4_t __a)
3459 return (int64x2_t) __a;
3462 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3463 vreinterpretq_s64_u8 (uint8x16_t __a)
3465 return (int64x2_t) __a;
3468 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3469 vreinterpretq_s64_u16 (uint16x8_t __a)
3471 return (int64x2_t) __a;
3474 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3475 vreinterpretq_s64_u32 (uint32x4_t __a)
3477 return (int64x2_t) __a;
3480 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3481 vreinterpretq_s64_u64 (uint64x2_t __a)
3483 return (int64x2_t) __a;
3486 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3487 vreinterpretq_s64_p8 (poly8x16_t __a)
3489 return (int64x2_t) __a;
3492 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3493 vreinterpretq_s64_p16 (poly16x8_t __a)
3495 return (int64x2_t) __a;
3498 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3499 vreinterpret_u64_f64 (float64x1_t __a)
3501 return (uint64x1_t) __a;
3504 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3505 vreinterpret_u64_s8 (int8x8_t __a)
3507 return (uint64x1_t) __a;
3510 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3511 vreinterpret_u64_s16 (int16x4_t __a)
3513 return (uint64x1_t) __a;
3516 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3517 vreinterpret_u64_s32 (int32x2_t __a)
3519 return (uint64x1_t) __a;
3522 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3523 vreinterpret_u64_s64 (int64x1_t __a)
3525 return (uint64x1_t) __a;
3528 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3529 vreinterpret_u64_f32 (float32x2_t __a)
3531 return (uint64x1_t) __a;
3534 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3535 vreinterpret_u64_u8 (uint8x8_t __a)
3537 return (uint64x1_t) __a;
3540 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3541 vreinterpret_u64_u16 (uint16x4_t __a)
3543 return (uint64x1_t) __a;
3546 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3547 vreinterpret_u64_u32 (uint32x2_t __a)
3549 return (uint64x1_t) __a;
3552 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3553 vreinterpret_u64_p8 (poly8x8_t __a)
3555 return (uint64x1_t) __a;
3558 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3559 vreinterpret_u64_p16 (poly16x4_t __a)
3561 return (uint64x1_t) __a;
3564 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3565 vreinterpretq_u64_f64 (float64x2_t __a)
3567 return (uint64x2_t) __a;
3570 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3571 vreinterpretq_u64_s8 (int8x16_t __a)
3573 return (uint64x2_t) __a;
3576 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3577 vreinterpretq_u64_s16 (int16x8_t __a)
3579 return (uint64x2_t) __a;
3582 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3583 vreinterpretq_u64_s32 (int32x4_t __a)
3585 return (uint64x2_t) __a;
3588 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3589 vreinterpretq_u64_s64 (int64x2_t __a)
3591 return (uint64x2_t) __a;
3594 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3595 vreinterpretq_u64_f32 (float32x4_t __a)
3597 return (uint64x2_t) __a;
3600 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3601 vreinterpretq_u64_u8 (uint8x16_t __a)
3603 return (uint64x2_t) __a;
3606 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3607 vreinterpretq_u64_u16 (uint16x8_t __a)
3609 return (uint64x2_t) __a;
3612 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3613 vreinterpretq_u64_u32 (uint32x4_t __a)
3615 return (uint64x2_t) __a;
3618 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3619 vreinterpretq_u64_p8 (poly8x16_t __a)
3621 return (uint64x2_t) __a;
3624 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3625 vreinterpretq_u64_p16 (poly16x8_t __a)
3627 return (uint64x2_t) __a;
3630 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3631 vreinterpret_s8_f64 (float64x1_t __a)
3633 return (int8x8_t) __a;
3636 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3637 vreinterpret_s8_s16 (int16x4_t __a)
3639 return (int8x8_t) __a;
3642 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3643 vreinterpret_s8_s32 (int32x2_t __a)
3645 return (int8x8_t) __a;
3648 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3649 vreinterpret_s8_s64 (int64x1_t __a)
3651 return (int8x8_t) __a;
3654 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3655 vreinterpret_s8_f32 (float32x2_t __a)
3657 return (int8x8_t) __a;
3660 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3661 vreinterpret_s8_u8 (uint8x8_t __a)
3663 return (int8x8_t) __a;
3666 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3667 vreinterpret_s8_u16 (uint16x4_t __a)
3669 return (int8x8_t) __a;
3672 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3673 vreinterpret_s8_u32 (uint32x2_t __a)
3675 return (int8x8_t) __a;
3678 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3679 vreinterpret_s8_u64 (uint64x1_t __a)
3681 return (int8x8_t) __a;
3684 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3685 vreinterpret_s8_p8 (poly8x8_t __a)
3687 return (int8x8_t) __a;
3690 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3691 vreinterpret_s8_p16 (poly16x4_t __a)
3693 return (int8x8_t) __a;
3696 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3697 vreinterpretq_s8_f64 (float64x2_t __a)
3699 return (int8x16_t) __a;
3702 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3703 vreinterpretq_s8_s16 (int16x8_t __a)
3705 return (int8x16_t) __a;
3708 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3709 vreinterpretq_s8_s32 (int32x4_t __a)
3711 return (int8x16_t) __a;
3714 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3715 vreinterpretq_s8_s64 (int64x2_t __a)
3717 return (int8x16_t) __a;
3720 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3721 vreinterpretq_s8_f32 (float32x4_t __a)
3723 return (int8x16_t) __a;
3726 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3727 vreinterpretq_s8_u8 (uint8x16_t __a)
3729 return (int8x16_t) __a;
3732 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3733 vreinterpretq_s8_u16 (uint16x8_t __a)
3735 return (int8x16_t) __a;
3738 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3739 vreinterpretq_s8_u32 (uint32x4_t __a)
3741 return (int8x16_t) __a;
3744 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3745 vreinterpretq_s8_u64 (uint64x2_t __a)
3747 return (int8x16_t) __a;
3750 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3751 vreinterpretq_s8_p8 (poly8x16_t __a)
3753 return (int8x16_t) __a;
3756 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3757 vreinterpretq_s8_p16 (poly16x8_t __a)
3759 return (int8x16_t) __a;
3762 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3763 vreinterpret_s16_f64 (float64x1_t __a)
3765 return (int16x4_t) __a;
3768 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3769 vreinterpret_s16_s8 (int8x8_t __a)
3771 return (int16x4_t) __a;
3774 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3775 vreinterpret_s16_s32 (int32x2_t __a)
3777 return (int16x4_t) __a;
3780 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3781 vreinterpret_s16_s64 (int64x1_t __a)
3783 return (int16x4_t) __a;
3786 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3787 vreinterpret_s16_f32 (float32x2_t __a)
3789 return (int16x4_t) __a;
3792 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3793 vreinterpret_s16_u8 (uint8x8_t __a)
3795 return (int16x4_t) __a;
3798 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3799 vreinterpret_s16_u16 (uint16x4_t __a)
3801 return (int16x4_t) __a;
3804 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3805 vreinterpret_s16_u32 (uint32x2_t __a)
3807 return (int16x4_t) __a;
3810 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3811 vreinterpret_s16_u64 (uint64x1_t __a)
3813 return (int16x4_t) __a;
3816 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3817 vreinterpret_s16_p8 (poly8x8_t __a)
3819 return (int16x4_t) __a;
3822 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3823 vreinterpret_s16_p16 (poly16x4_t __a)
3825 return (int16x4_t) __a;
3828 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3829 vreinterpretq_s16_f64 (float64x2_t __a)
3831 return (int16x8_t) __a;
3834 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3835 vreinterpretq_s16_s8 (int8x16_t __a)
3837 return (int16x8_t) __a;
3840 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3841 vreinterpretq_s16_s32 (int32x4_t __a)
3843 return (int16x8_t) __a;
3846 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3847 vreinterpretq_s16_s64 (int64x2_t __a)
3849 return (int16x8_t) __a;
3852 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3853 vreinterpretq_s16_f32 (float32x4_t __a)
3855 return (int16x8_t) __a;
3858 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3859 vreinterpretq_s16_u8 (uint8x16_t __a)
3861 return (int16x8_t) __a;
3864 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3865 vreinterpretq_s16_u16 (uint16x8_t __a)
3867 return (int16x8_t) __a;
3870 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3871 vreinterpretq_s16_u32 (uint32x4_t __a)
3873 return (int16x8_t) __a;
3876 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3877 vreinterpretq_s16_u64 (uint64x2_t __a)
3879 return (int16x8_t) __a;
3882 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3883 vreinterpretq_s16_p8 (poly8x16_t __a)
3885 return (int16x8_t) __a;
3888 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3889 vreinterpretq_s16_p16 (poly16x8_t __a)
3891 return (int16x8_t) __a;
3894 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3895 vreinterpret_s32_f64 (float64x1_t __a)
3897 return (int32x2_t) __a;
3900 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3901 vreinterpret_s32_s8 (int8x8_t __a)
3903 return (int32x2_t) __a;
3906 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3907 vreinterpret_s32_s16 (int16x4_t __a)
3909 return (int32x2_t) __a;
3912 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3913 vreinterpret_s32_s64 (int64x1_t __a)
3915 return (int32x2_t) __a;
3918 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3919 vreinterpret_s32_f32 (float32x2_t __a)
3921 return (int32x2_t) __a;
3924 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3925 vreinterpret_s32_u8 (uint8x8_t __a)
3927 return (int32x2_t) __a;
3930 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3931 vreinterpret_s32_u16 (uint16x4_t __a)
3933 return (int32x2_t) __a;
3936 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3937 vreinterpret_s32_u32 (uint32x2_t __a)
3939 return (int32x2_t) __a;
3942 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3943 vreinterpret_s32_u64 (uint64x1_t __a)
3945 return (int32x2_t) __a;
3948 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3949 vreinterpret_s32_p8 (poly8x8_t __a)
3951 return (int32x2_t) __a;
3954 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3955 vreinterpret_s32_p16 (poly16x4_t __a)
3957 return (int32x2_t) __a;
3960 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3961 vreinterpretq_s32_f64 (float64x2_t __a)
3963 return (int32x4_t) __a;
3966 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3967 vreinterpretq_s32_s8 (int8x16_t __a)
3969 return (int32x4_t) __a;
3972 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3973 vreinterpretq_s32_s16 (int16x8_t __a)
3975 return (int32x4_t) __a;
3978 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3979 vreinterpretq_s32_s64 (int64x2_t __a)
3981 return (int32x4_t) __a;
3984 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3985 vreinterpretq_s32_f32 (float32x4_t __a)
3987 return (int32x4_t) __a;
3990 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3991 vreinterpretq_s32_u8 (uint8x16_t __a)
3993 return (int32x4_t) __a;
3996 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3997 vreinterpretq_s32_u16 (uint16x8_t __a)
3999 return (int32x4_t) __a;
4002 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4003 vreinterpretq_s32_u32 (uint32x4_t __a)
4005 return (int32x4_t) __a;
4008 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4009 vreinterpretq_s32_u64 (uint64x2_t __a)
4011 return (int32x4_t) __a;
4014 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4015 vreinterpretq_s32_p8 (poly8x16_t __a)
4017 return (int32x4_t) __a;
4020 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4021 vreinterpretq_s32_p16 (poly16x8_t __a)
4023 return (int32x4_t) __a;
4026 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4027 vreinterpret_u8_f64 (float64x1_t __a)
4029 return (uint8x8_t) __a;
4032 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4033 vreinterpret_u8_s8 (int8x8_t __a)
4035 return (uint8x8_t) __a;
4038 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4039 vreinterpret_u8_s16 (int16x4_t __a)
4041 return (uint8x8_t) __a;
4044 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4045 vreinterpret_u8_s32 (int32x2_t __a)
4047 return (uint8x8_t) __a;
4050 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4051 vreinterpret_u8_s64 (int64x1_t __a)
4053 return (uint8x8_t) __a;
4056 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4057 vreinterpret_u8_f32 (float32x2_t __a)
4059 return (uint8x8_t) __a;
4062 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4063 vreinterpret_u8_u16 (uint16x4_t __a)
4065 return (uint8x8_t) __a;
4068 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4069 vreinterpret_u8_u32 (uint32x2_t __a)
4071 return (uint8x8_t) __a;
4074 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4075 vreinterpret_u8_u64 (uint64x1_t __a)
4077 return (uint8x8_t) __a;
4080 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4081 vreinterpret_u8_p8 (poly8x8_t __a)
4083 return (uint8x8_t) __a;
4086 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4087 vreinterpret_u8_p16 (poly16x4_t __a)
4089 return (uint8x8_t) __a;
4092 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4093 vreinterpretq_u8_f64 (float64x2_t __a)
4095 return (uint8x16_t) __a;
4098 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4099 vreinterpretq_u8_s8 (int8x16_t __a)
4101 return (uint8x16_t) __a;
4104 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4105 vreinterpretq_u8_s16 (int16x8_t __a)
4107 return (uint8x16_t) __a;
4110 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4111 vreinterpretq_u8_s32 (int32x4_t __a)
4113 return (uint8x16_t) __a;
4116 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4117 vreinterpretq_u8_s64 (int64x2_t __a)
4119 return (uint8x16_t) __a;
4122 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4123 vreinterpretq_u8_f32 (float32x4_t __a)
4125 return (uint8x16_t) __a;
4128 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4129 vreinterpretq_u8_u16 (uint16x8_t __a)
4131 return (uint8x16_t) __a;
4134 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4135 vreinterpretq_u8_u32 (uint32x4_t __a)
4137 return (uint8x16_t) __a;
4140 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4141 vreinterpretq_u8_u64 (uint64x2_t __a)
4143 return (uint8x16_t) __a;
4146 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4147 vreinterpretq_u8_p8 (poly8x16_t __a)
4149 return (uint8x16_t) __a;
4152 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4153 vreinterpretq_u8_p16 (poly16x8_t __a)
4155 return (uint8x16_t) __a;
4158 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4159 vreinterpret_u16_f64 (float64x1_t __a)
4161 return (uint16x4_t) __a;
4164 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4165 vreinterpret_u16_s8 (int8x8_t __a)
4167 return (uint16x4_t) __a;
4170 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4171 vreinterpret_u16_s16 (int16x4_t __a)
4173 return (uint16x4_t) __a;
4176 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4177 vreinterpret_u16_s32 (int32x2_t __a)
4179 return (uint16x4_t) __a;
4182 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4183 vreinterpret_u16_s64 (int64x1_t __a)
4185 return (uint16x4_t) __a;
4188 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4189 vreinterpret_u16_f32 (float32x2_t __a)
4191 return (uint16x4_t) __a;
4194 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4195 vreinterpret_u16_u8 (uint8x8_t __a)
4197 return (uint16x4_t) __a;
4200 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4201 vreinterpret_u16_u32 (uint32x2_t __a)
4203 return (uint16x4_t) __a;
4206 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4207 vreinterpret_u16_u64 (uint64x1_t __a)
4209 return (uint16x4_t) __a;
4212 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4213 vreinterpret_u16_p8 (poly8x8_t __a)
4215 return (uint16x4_t) __a;
4218 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4219 vreinterpret_u16_p16 (poly16x4_t __a)
4221 return (uint16x4_t) __a;
4224 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4225 vreinterpretq_u16_f64 (float64x2_t __a)
4227 return (uint16x8_t) __a;
4230 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4231 vreinterpretq_u16_s8 (int8x16_t __a)
4233 return (uint16x8_t) __a;
4236 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4237 vreinterpretq_u16_s16 (int16x8_t __a)
4239 return (uint16x8_t) __a;
4242 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4243 vreinterpretq_u16_s32 (int32x4_t __a)
4245 return (uint16x8_t) __a;
4248 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4249 vreinterpretq_u16_s64 (int64x2_t __a)
4251 return (uint16x8_t) __a;
4254 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4255 vreinterpretq_u16_f32 (float32x4_t __a)
4257 return (uint16x8_t) __a;
4260 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4261 vreinterpretq_u16_u8 (uint8x16_t __a)
4263 return (uint16x8_t) __a;
4266 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4267 vreinterpretq_u16_u32 (uint32x4_t __a)
4269 return (uint16x8_t) __a;
4272 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4273 vreinterpretq_u16_u64 (uint64x2_t __a)
4275 return (uint16x8_t) __a;
4278 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4279 vreinterpretq_u16_p8 (poly8x16_t __a)
4281 return (uint16x8_t) __a;
4284 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4285 vreinterpretq_u16_p16 (poly16x8_t __a)
4287 return (uint16x8_t) __a;
4290 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4291 vreinterpret_u32_f64 (float64x1_t __a)
4293 return (uint32x2_t) __a;
4296 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4297 vreinterpret_u32_s8 (int8x8_t __a)
4299 return (uint32x2_t) __a;
4302 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4303 vreinterpret_u32_s16 (int16x4_t __a)
4305 return (uint32x2_t) __a;
4308 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4309 vreinterpret_u32_s32 (int32x2_t __a)
4311 return (uint32x2_t) __a;
4314 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4315 vreinterpret_u32_s64 (int64x1_t __a)
4317 return (uint32x2_t) __a;
4320 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4321 vreinterpret_u32_f32 (float32x2_t __a)
4323 return (uint32x2_t) __a;
4326 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4327 vreinterpret_u32_u8 (uint8x8_t __a)
4329 return (uint32x2_t) __a;
4332 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4333 vreinterpret_u32_u16 (uint16x4_t __a)
4335 return (uint32x2_t) __a;
4338 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4339 vreinterpret_u32_u64 (uint64x1_t __a)
4341 return (uint32x2_t) __a;
4344 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4345 vreinterpret_u32_p8 (poly8x8_t __a)
4347 return (uint32x2_t) __a;
4350 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4351 vreinterpret_u32_p16 (poly16x4_t __a)
4353 return (uint32x2_t) __a;
4356 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4357 vreinterpretq_u32_f64 (float64x2_t __a)
4359 return (uint32x4_t) __a;
4362 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4363 vreinterpretq_u32_s8 (int8x16_t __a)
4365 return (uint32x4_t) __a;
4368 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4369 vreinterpretq_u32_s16 (int16x8_t __a)
4371 return (uint32x4_t) __a;
4374 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4375 vreinterpretq_u32_s32 (int32x4_t __a)
4377 return (uint32x4_t) __a;
4380 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4381 vreinterpretq_u32_s64 (int64x2_t __a)
4383 return (uint32x4_t) __a;
4386 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4387 vreinterpretq_u32_f32 (float32x4_t __a)
4389 return (uint32x4_t) __a;
4392 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4393 vreinterpretq_u32_u8 (uint8x16_t __a)
4395 return (uint32x4_t) __a;
4398 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4399 vreinterpretq_u32_u16 (uint16x8_t __a)
4401 return (uint32x4_t) __a;
4404 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4405 vreinterpretq_u32_u64 (uint64x2_t __a)
4407 return (uint32x4_t) __a;
4410 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4411 vreinterpretq_u32_p8 (poly8x16_t __a)
4413 return (uint32x4_t) __a;
4416 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4417 vreinterpretq_u32_p16 (poly16x8_t __a)
4419 return (uint32x4_t) __a;
4422 /* vset_lane */
4424 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4425 vset_lane_f32 (float32_t __elem, float32x2_t __vec, const int __index)
4427 return __aarch64_vset_lane_any (__elem, __vec, __index);
4430 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
4431 vset_lane_f64 (float64_t __elem, float64x1_t __vec, const int __index)
4433 return __aarch64_vset_lane_any (__elem, __vec, __index);
4436 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4437 vset_lane_p8 (poly8_t __elem, poly8x8_t __vec, const int __index)
4439 return __aarch64_vset_lane_any (__elem, __vec, __index);
4442 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
4443 vset_lane_p16 (poly16_t __elem, poly16x4_t __vec, const int __index)
4445 return __aarch64_vset_lane_any (__elem, __vec, __index);
4448 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4449 vset_lane_s8 (int8_t __elem, int8x8_t __vec, const int __index)
4451 return __aarch64_vset_lane_any (__elem, __vec, __index);
4454 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4455 vset_lane_s16 (int16_t __elem, int16x4_t __vec, const int __index)
4457 return __aarch64_vset_lane_any (__elem, __vec, __index);
4460 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4461 vset_lane_s32 (int32_t __elem, int32x2_t __vec, const int __index)
4463 return __aarch64_vset_lane_any (__elem, __vec, __index);
4466 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4467 vset_lane_s64 (int64_t __elem, int64x1_t __vec, const int __index)
4469 return __aarch64_vset_lane_any (__elem, __vec, __index);
4472 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4473 vset_lane_u8 (uint8_t __elem, uint8x8_t __vec, const int __index)
4475 return __aarch64_vset_lane_any (__elem, __vec, __index);
4478 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4479 vset_lane_u16 (uint16_t __elem, uint16x4_t __vec, const int __index)
4481 return __aarch64_vset_lane_any (__elem, __vec, __index);
4484 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4485 vset_lane_u32 (uint32_t __elem, uint32x2_t __vec, const int __index)
4487 return __aarch64_vset_lane_any (__elem, __vec, __index);
4490 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4491 vset_lane_u64 (uint64_t __elem, uint64x1_t __vec, const int __index)
4493 return __aarch64_vset_lane_any (__elem, __vec, __index);
4496 /* vsetq_lane */
4498 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4499 vsetq_lane_f32 (float32_t __elem, float32x4_t __vec, const int __index)
4501 return __aarch64_vset_lane_any (__elem, __vec, __index);
4504 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4505 vsetq_lane_f64 (float64_t __elem, float64x2_t __vec, const int __index)
4507 return __aarch64_vset_lane_any (__elem, __vec, __index);
4510 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
4511 vsetq_lane_p8 (poly8_t __elem, poly8x16_t __vec, const int __index)
4513 return __aarch64_vset_lane_any (__elem, __vec, __index);
4516 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
4517 vsetq_lane_p16 (poly16_t __elem, poly16x8_t __vec, const int __index)
4519 return __aarch64_vset_lane_any (__elem, __vec, __index);
4522 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4523 vsetq_lane_s8 (int8_t __elem, int8x16_t __vec, const int __index)
4525 return __aarch64_vset_lane_any (__elem, __vec, __index);
4528 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4529 vsetq_lane_s16 (int16_t __elem, int16x8_t __vec, const int __index)
4531 return __aarch64_vset_lane_any (__elem, __vec, __index);
4534 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4535 vsetq_lane_s32 (int32_t __elem, int32x4_t __vec, const int __index)
4537 return __aarch64_vset_lane_any (__elem, __vec, __index);
4540 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4541 vsetq_lane_s64 (int64_t __elem, int64x2_t __vec, const int __index)
4543 return __aarch64_vset_lane_any (__elem, __vec, __index);
4546 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4547 vsetq_lane_u8 (uint8_t __elem, uint8x16_t __vec, const int __index)
4549 return __aarch64_vset_lane_any (__elem, __vec, __index);
4552 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4553 vsetq_lane_u16 (uint16_t __elem, uint16x8_t __vec, const int __index)
4555 return __aarch64_vset_lane_any (__elem, __vec, __index);
4558 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4559 vsetq_lane_u32 (uint32_t __elem, uint32x4_t __vec, const int __index)
4561 return __aarch64_vset_lane_any (__elem, __vec, __index);
4564 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4565 vsetq_lane_u64 (uint64_t __elem, uint64x2_t __vec, const int __index)
4567 return __aarch64_vset_lane_any (__elem, __vec, __index);
4570 #define __GET_LOW(__TYPE) \
4571 uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \
4572 uint64x1_t lo = vcreate_u64 (vgetq_lane_u64 (tmp, 0)); \
4573 return vreinterpret_##__TYPE##_u64 (lo);
4575 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4576 vget_low_f32 (float32x4_t __a)
4578 __GET_LOW (f32);
4581 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
4582 vget_low_f64 (float64x2_t __a)
4584 return (float64x1_t) {vgetq_lane_f64 (__a, 0)};
4587 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4588 vget_low_p8 (poly8x16_t __a)
4590 __GET_LOW (p8);
4593 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
4594 vget_low_p16 (poly16x8_t __a)
4596 __GET_LOW (p16);
4599 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4600 vget_low_s8 (int8x16_t __a)
4602 __GET_LOW (s8);
4605 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4606 vget_low_s16 (int16x8_t __a)
4608 __GET_LOW (s16);
4611 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4612 vget_low_s32 (int32x4_t __a)
4614 __GET_LOW (s32);
4617 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4618 vget_low_s64 (int64x2_t __a)
4620 __GET_LOW (s64);
4623 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4624 vget_low_u8 (uint8x16_t __a)
4626 __GET_LOW (u8);
4629 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4630 vget_low_u16 (uint16x8_t __a)
4632 __GET_LOW (u16);
4635 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4636 vget_low_u32 (uint32x4_t __a)
4638 __GET_LOW (u32);
4641 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4642 vget_low_u64 (uint64x2_t __a)
4644 return vcreate_u64 (vgetq_lane_u64 (__a, 0));
4647 #undef __GET_LOW
4649 #define __GET_HIGH(__TYPE) \
4650 uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \
4651 uint64x1_t hi = vcreate_u64 (vgetq_lane_u64 (tmp, 1)); \
4652 return vreinterpret_##__TYPE##_u64 (hi);
4654 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4655 vget_high_f32 (float32x4_t __a)
4657 __GET_HIGH (f32);
4660 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
4661 vget_high_f64 (float64x2_t __a)
4663 __GET_HIGH (f64);
4666 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4667 vget_high_p8 (poly8x16_t __a)
4669 __GET_HIGH (p8);
4672 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
4673 vget_high_p16 (poly16x8_t __a)
4675 __GET_HIGH (p16);
4678 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4679 vget_high_s8 (int8x16_t __a)
4681 __GET_HIGH (s8);
4684 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4685 vget_high_s16 (int16x8_t __a)
4687 __GET_HIGH (s16);
4690 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4691 vget_high_s32 (int32x4_t __a)
4693 __GET_HIGH (s32);
4696 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4697 vget_high_s64 (int64x2_t __a)
4699 __GET_HIGH (s64);
4702 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4703 vget_high_u8 (uint8x16_t __a)
4705 __GET_HIGH (u8);
4708 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4709 vget_high_u16 (uint16x8_t __a)
4711 __GET_HIGH (u16);
4714 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4715 vget_high_u32 (uint32x4_t __a)
4717 __GET_HIGH (u32);
4720 #undef __GET_HIGH
4722 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4723 vget_high_u64 (uint64x2_t __a)
4725 return vcreate_u64 (vgetq_lane_u64 (__a, 1));
4728 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4729 vcombine_s8 (int8x8_t __a, int8x8_t __b)
4731 return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b);
4734 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4735 vcombine_s16 (int16x4_t __a, int16x4_t __b)
4737 return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b);
4740 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4741 vcombine_s32 (int32x2_t __a, int32x2_t __b)
4743 return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b);
4746 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4747 vcombine_s64 (int64x1_t __a, int64x1_t __b)
4749 return __builtin_aarch64_combinedi (__a[0], __b[0]);
4752 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4753 vcombine_f32 (float32x2_t __a, float32x2_t __b)
4755 return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b);
4758 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4759 vcombine_u8 (uint8x8_t __a, uint8x8_t __b)
4761 return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
4762 (int8x8_t) __b);
4765 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4766 vcombine_u16 (uint16x4_t __a, uint16x4_t __b)
4768 return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
4769 (int16x4_t) __b);
4772 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4773 vcombine_u32 (uint32x2_t __a, uint32x2_t __b)
4775 return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a,
4776 (int32x2_t) __b);
4779 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4780 vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
4782 return (uint64x2_t) __builtin_aarch64_combinedi (__a[0], __b[0]);
4785 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4786 vcombine_f64 (float64x1_t __a, float64x1_t __b)
4788 return __builtin_aarch64_combinedf (__a[0], __b[0]);
4791 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
4792 vcombine_p8 (poly8x8_t __a, poly8x8_t __b)
4794 return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
4795 (int8x8_t) __b);
4798 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
4799 vcombine_p16 (poly16x4_t __a, poly16x4_t __b)
4801 return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
4802 (int16x4_t) __b);
4805 /* Start of temporary inline asm implementations. */
4807 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4808 vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
4810 int8x8_t result;
4811 __asm__ ("saba %0.8b,%2.8b,%3.8b"
4812 : "=w"(result)
4813 : "0"(a), "w"(b), "w"(c)
4814 : /* No clobbers */);
4815 return result;
4818 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4819 vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
4821 int16x4_t result;
4822 __asm__ ("saba %0.4h,%2.4h,%3.4h"
4823 : "=w"(result)
4824 : "0"(a), "w"(b), "w"(c)
4825 : /* No clobbers */);
4826 return result;
4829 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4830 vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
4832 int32x2_t result;
4833 __asm__ ("saba %0.2s,%2.2s,%3.2s"
4834 : "=w"(result)
4835 : "0"(a), "w"(b), "w"(c)
4836 : /* No clobbers */);
4837 return result;
4840 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4841 vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
4843 uint8x8_t result;
4844 __asm__ ("uaba %0.8b,%2.8b,%3.8b"
4845 : "=w"(result)
4846 : "0"(a), "w"(b), "w"(c)
4847 : /* No clobbers */);
4848 return result;
4851 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4852 vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
4854 uint16x4_t result;
4855 __asm__ ("uaba %0.4h,%2.4h,%3.4h"
4856 : "=w"(result)
4857 : "0"(a), "w"(b), "w"(c)
4858 : /* No clobbers */);
4859 return result;
4862 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4863 vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
4865 uint32x2_t result;
4866 __asm__ ("uaba %0.2s,%2.2s,%3.2s"
4867 : "=w"(result)
4868 : "0"(a), "w"(b), "w"(c)
4869 : /* No clobbers */);
4870 return result;
4873 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4874 vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
4876 int16x8_t result;
4877 __asm__ ("sabal2 %0.8h,%2.16b,%3.16b"
4878 : "=w"(result)
4879 : "0"(a), "w"(b), "w"(c)
4880 : /* No clobbers */);
4881 return result;
4884 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4885 vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
4887 int32x4_t result;
4888 __asm__ ("sabal2 %0.4s,%2.8h,%3.8h"
4889 : "=w"(result)
4890 : "0"(a), "w"(b), "w"(c)
4891 : /* No clobbers */);
4892 return result;
4895 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4896 vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
4898 int64x2_t result;
4899 __asm__ ("sabal2 %0.2d,%2.4s,%3.4s"
4900 : "=w"(result)
4901 : "0"(a), "w"(b), "w"(c)
4902 : /* No clobbers */);
4903 return result;
4906 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4907 vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
4909 uint16x8_t result;
4910 __asm__ ("uabal2 %0.8h,%2.16b,%3.16b"
4911 : "=w"(result)
4912 : "0"(a), "w"(b), "w"(c)
4913 : /* No clobbers */);
4914 return result;
4917 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4918 vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
4920 uint32x4_t result;
4921 __asm__ ("uabal2 %0.4s,%2.8h,%3.8h"
4922 : "=w"(result)
4923 : "0"(a), "w"(b), "w"(c)
4924 : /* No clobbers */);
4925 return result;
4928 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4929 vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
4931 uint64x2_t result;
4932 __asm__ ("uabal2 %0.2d,%2.4s,%3.4s"
4933 : "=w"(result)
4934 : "0"(a), "w"(b), "w"(c)
4935 : /* No clobbers */);
4936 return result;
4939 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4940 vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
4942 int16x8_t result;
4943 __asm__ ("sabal %0.8h,%2.8b,%3.8b"
4944 : "=w"(result)
4945 : "0"(a), "w"(b), "w"(c)
4946 : /* No clobbers */);
4947 return result;
4950 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4951 vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
4953 int32x4_t result;
4954 __asm__ ("sabal %0.4s,%2.4h,%3.4h"
4955 : "=w"(result)
4956 : "0"(a), "w"(b), "w"(c)
4957 : /* No clobbers */);
4958 return result;
4961 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4962 vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
4964 int64x2_t result;
4965 __asm__ ("sabal %0.2d,%2.2s,%3.2s"
4966 : "=w"(result)
4967 : "0"(a), "w"(b), "w"(c)
4968 : /* No clobbers */);
4969 return result;
4972 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4973 vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
4975 uint16x8_t result;
4976 __asm__ ("uabal %0.8h,%2.8b,%3.8b"
4977 : "=w"(result)
4978 : "0"(a), "w"(b), "w"(c)
4979 : /* No clobbers */);
4980 return result;
4983 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4984 vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
4986 uint32x4_t result;
4987 __asm__ ("uabal %0.4s,%2.4h,%3.4h"
4988 : "=w"(result)
4989 : "0"(a), "w"(b), "w"(c)
4990 : /* No clobbers */);
4991 return result;
4994 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4995 vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
4997 uint64x2_t result;
4998 __asm__ ("uabal %0.2d,%2.2s,%3.2s"
4999 : "=w"(result)
5000 : "0"(a), "w"(b), "w"(c)
5001 : /* No clobbers */);
5002 return result;
5005 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5006 vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
5008 int8x16_t result;
5009 __asm__ ("saba %0.16b,%2.16b,%3.16b"
5010 : "=w"(result)
5011 : "0"(a), "w"(b), "w"(c)
5012 : /* No clobbers */);
5013 return result;
5016 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5017 vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
5019 int16x8_t result;
5020 __asm__ ("saba %0.8h,%2.8h,%3.8h"
5021 : "=w"(result)
5022 : "0"(a), "w"(b), "w"(c)
5023 : /* No clobbers */);
5024 return result;
5027 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5028 vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
5030 int32x4_t result;
5031 __asm__ ("saba %0.4s,%2.4s,%3.4s"
5032 : "=w"(result)
5033 : "0"(a), "w"(b), "w"(c)
5034 : /* No clobbers */);
5035 return result;
5038 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5039 vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
5041 uint8x16_t result;
5042 __asm__ ("uaba %0.16b,%2.16b,%3.16b"
5043 : "=w"(result)
5044 : "0"(a), "w"(b), "w"(c)
5045 : /* No clobbers */);
5046 return result;
5049 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5050 vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
5052 uint16x8_t result;
5053 __asm__ ("uaba %0.8h,%2.8h,%3.8h"
5054 : "=w"(result)
5055 : "0"(a), "w"(b), "w"(c)
5056 : /* No clobbers */);
5057 return result;
5060 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5061 vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
5063 uint32x4_t result;
5064 __asm__ ("uaba %0.4s,%2.4s,%3.4s"
5065 : "=w"(result)
5066 : "0"(a), "w"(b), "w"(c)
5067 : /* No clobbers */);
5068 return result;
5071 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5072 vabd_f32 (float32x2_t a, float32x2_t b)
5074 float32x2_t result;
5075 __asm__ ("fabd %0.2s, %1.2s, %2.2s"
5076 : "=w"(result)
5077 : "w"(a), "w"(b)
5078 : /* No clobbers */);
5079 return result;
5082 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5083 vabd_s8 (int8x8_t a, int8x8_t b)
5085 int8x8_t result;
5086 __asm__ ("sabd %0.8b, %1.8b, %2.8b"
5087 : "=w"(result)
5088 : "w"(a), "w"(b)
5089 : /* No clobbers */);
5090 return result;
5093 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5094 vabd_s16 (int16x4_t a, int16x4_t b)
5096 int16x4_t result;
5097 __asm__ ("sabd %0.4h, %1.4h, %2.4h"
5098 : "=w"(result)
5099 : "w"(a), "w"(b)
5100 : /* No clobbers */);
5101 return result;
5104 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5105 vabd_s32 (int32x2_t a, int32x2_t b)
5107 int32x2_t result;
5108 __asm__ ("sabd %0.2s, %1.2s, %2.2s"
5109 : "=w"(result)
5110 : "w"(a), "w"(b)
5111 : /* No clobbers */);
5112 return result;
5115 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5116 vabd_u8 (uint8x8_t a, uint8x8_t b)
5118 uint8x8_t result;
5119 __asm__ ("uabd %0.8b, %1.8b, %2.8b"
5120 : "=w"(result)
5121 : "w"(a), "w"(b)
5122 : /* No clobbers */);
5123 return result;
5126 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5127 vabd_u16 (uint16x4_t a, uint16x4_t b)
5129 uint16x4_t result;
5130 __asm__ ("uabd %0.4h, %1.4h, %2.4h"
5131 : "=w"(result)
5132 : "w"(a), "w"(b)
5133 : /* No clobbers */);
5134 return result;
5137 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5138 vabd_u32 (uint32x2_t a, uint32x2_t b)
5140 uint32x2_t result;
5141 __asm__ ("uabd %0.2s, %1.2s, %2.2s"
5142 : "=w"(result)
5143 : "w"(a), "w"(b)
5144 : /* No clobbers */);
5145 return result;
5148 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
5149 vabdd_f64 (float64_t a, float64_t b)
5151 float64_t result;
5152 __asm__ ("fabd %d0, %d1, %d2"
5153 : "=w"(result)
5154 : "w"(a), "w"(b)
5155 : /* No clobbers */);
5156 return result;
5159 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5160 vabdl_high_s8 (int8x16_t a, int8x16_t b)
5162 int16x8_t result;
5163 __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b"
5164 : "=w"(result)
5165 : "w"(a), "w"(b)
5166 : /* No clobbers */);
5167 return result;
5170 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5171 vabdl_high_s16 (int16x8_t a, int16x8_t b)
5173 int32x4_t result;
5174 __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h"
5175 : "=w"(result)
5176 : "w"(a), "w"(b)
5177 : /* No clobbers */);
5178 return result;
5181 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
5182 vabdl_high_s32 (int32x4_t a, int32x4_t b)
5184 int64x2_t result;
5185 __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s"
5186 : "=w"(result)
5187 : "w"(a), "w"(b)
5188 : /* No clobbers */);
5189 return result;
5192 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5193 vabdl_high_u8 (uint8x16_t a, uint8x16_t b)
5195 uint16x8_t result;
5196 __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b"
5197 : "=w"(result)
5198 : "w"(a), "w"(b)
5199 : /* No clobbers */);
5200 return result;
5203 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5204 vabdl_high_u16 (uint16x8_t a, uint16x8_t b)
5206 uint32x4_t result;
5207 __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h"
5208 : "=w"(result)
5209 : "w"(a), "w"(b)
5210 : /* No clobbers */);
5211 return result;
5214 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5215 vabdl_high_u32 (uint32x4_t a, uint32x4_t b)
5217 uint64x2_t result;
5218 __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s"
5219 : "=w"(result)
5220 : "w"(a), "w"(b)
5221 : /* No clobbers */);
5222 return result;
5225 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5226 vabdl_s8 (int8x8_t a, int8x8_t b)
5228 int16x8_t result;
5229 __asm__ ("sabdl %0.8h, %1.8b, %2.8b"
5230 : "=w"(result)
5231 : "w"(a), "w"(b)
5232 : /* No clobbers */);
5233 return result;
5236 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5237 vabdl_s16 (int16x4_t a, int16x4_t b)
5239 int32x4_t result;
5240 __asm__ ("sabdl %0.4s, %1.4h, %2.4h"
5241 : "=w"(result)
5242 : "w"(a), "w"(b)
5243 : /* No clobbers */);
5244 return result;
5247 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
5248 vabdl_s32 (int32x2_t a, int32x2_t b)
5250 int64x2_t result;
5251 __asm__ ("sabdl %0.2d, %1.2s, %2.2s"
5252 : "=w"(result)
5253 : "w"(a), "w"(b)
5254 : /* No clobbers */);
5255 return result;
5258 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5259 vabdl_u8 (uint8x8_t a, uint8x8_t b)
5261 uint16x8_t result;
5262 __asm__ ("uabdl %0.8h, %1.8b, %2.8b"
5263 : "=w"(result)
5264 : "w"(a), "w"(b)
5265 : /* No clobbers */);
5266 return result;
5269 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5270 vabdl_u16 (uint16x4_t a, uint16x4_t b)
5272 uint32x4_t result;
5273 __asm__ ("uabdl %0.4s, %1.4h, %2.4h"
5274 : "=w"(result)
5275 : "w"(a), "w"(b)
5276 : /* No clobbers */);
5277 return result;
5280 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5281 vabdl_u32 (uint32x2_t a, uint32x2_t b)
5283 uint64x2_t result;
5284 __asm__ ("uabdl %0.2d, %1.2s, %2.2s"
5285 : "=w"(result)
5286 : "w"(a), "w"(b)
5287 : /* No clobbers */);
5288 return result;
5291 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5292 vabdq_f32 (float32x4_t a, float32x4_t b)
5294 float32x4_t result;
5295 __asm__ ("fabd %0.4s, %1.4s, %2.4s"
5296 : "=w"(result)
5297 : "w"(a), "w"(b)
5298 : /* No clobbers */);
5299 return result;
5302 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5303 vabdq_f64 (float64x2_t a, float64x2_t b)
5305 float64x2_t result;
5306 __asm__ ("fabd %0.2d, %1.2d, %2.2d"
5307 : "=w"(result)
5308 : "w"(a), "w"(b)
5309 : /* No clobbers */);
5310 return result;
5313 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5314 vabdq_s8 (int8x16_t a, int8x16_t b)
5316 int8x16_t result;
5317 __asm__ ("sabd %0.16b, %1.16b, %2.16b"
5318 : "=w"(result)
5319 : "w"(a), "w"(b)
5320 : /* No clobbers */);
5321 return result;
5324 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5325 vabdq_s16 (int16x8_t a, int16x8_t b)
5327 int16x8_t result;
5328 __asm__ ("sabd %0.8h, %1.8h, %2.8h"
5329 : "=w"(result)
5330 : "w"(a), "w"(b)
5331 : /* No clobbers */);
5332 return result;
5335 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5336 vabdq_s32 (int32x4_t a, int32x4_t b)
5338 int32x4_t result;
5339 __asm__ ("sabd %0.4s, %1.4s, %2.4s"
5340 : "=w"(result)
5341 : "w"(a), "w"(b)
5342 : /* No clobbers */);
5343 return result;
5346 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5347 vabdq_u8 (uint8x16_t a, uint8x16_t b)
5349 uint8x16_t result;
5350 __asm__ ("uabd %0.16b, %1.16b, %2.16b"
5351 : "=w"(result)
5352 : "w"(a), "w"(b)
5353 : /* No clobbers */);
5354 return result;
5357 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5358 vabdq_u16 (uint16x8_t a, uint16x8_t b)
5360 uint16x8_t result;
5361 __asm__ ("uabd %0.8h, %1.8h, %2.8h"
5362 : "=w"(result)
5363 : "w"(a), "w"(b)
5364 : /* No clobbers */);
5365 return result;
5368 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5369 vabdq_u32 (uint32x4_t a, uint32x4_t b)
5371 uint32x4_t result;
5372 __asm__ ("uabd %0.4s, %1.4s, %2.4s"
5373 : "=w"(result)
5374 : "w"(a), "w"(b)
5375 : /* No clobbers */);
5376 return result;
5379 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
5380 vabds_f32 (float32_t a, float32_t b)
5382 float32_t result;
5383 __asm__ ("fabd %s0, %s1, %s2"
5384 : "=w"(result)
5385 : "w"(a), "w"(b)
5386 : /* No clobbers */);
5387 return result;
5390 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
5391 vaddlv_s8 (int8x8_t a)
5393 int16_t result;
5394 __asm__ ("saddlv %h0,%1.8b"
5395 : "=w"(result)
5396 : "w"(a)
5397 : /* No clobbers */);
5398 return result;
5401 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
5402 vaddlv_s16 (int16x4_t a)
5404 int32_t result;
5405 __asm__ ("saddlv %s0,%1.4h"
5406 : "=w"(result)
5407 : "w"(a)
5408 : /* No clobbers */);
5409 return result;
5412 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
5413 vaddlv_u8 (uint8x8_t a)
5415 uint16_t result;
5416 __asm__ ("uaddlv %h0,%1.8b"
5417 : "=w"(result)
5418 : "w"(a)
5419 : /* No clobbers */);
5420 return result;
5423 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
5424 vaddlv_u16 (uint16x4_t a)
5426 uint32_t result;
5427 __asm__ ("uaddlv %s0,%1.4h"
5428 : "=w"(result)
5429 : "w"(a)
5430 : /* No clobbers */);
5431 return result;
5434 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
5435 vaddlvq_s8 (int8x16_t a)
5437 int16_t result;
5438 __asm__ ("saddlv %h0,%1.16b"
5439 : "=w"(result)
5440 : "w"(a)
5441 : /* No clobbers */);
5442 return result;
5445 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
5446 vaddlvq_s16 (int16x8_t a)
5448 int32_t result;
5449 __asm__ ("saddlv %s0,%1.8h"
5450 : "=w"(result)
5451 : "w"(a)
5452 : /* No clobbers */);
5453 return result;
5456 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
5457 vaddlvq_s32 (int32x4_t a)
5459 int64_t result;
5460 __asm__ ("saddlv %d0,%1.4s"
5461 : "=w"(result)
5462 : "w"(a)
5463 : /* No clobbers */);
5464 return result;
5467 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
5468 vaddlvq_u8 (uint8x16_t a)
5470 uint16_t result;
5471 __asm__ ("uaddlv %h0,%1.16b"
5472 : "=w"(result)
5473 : "w"(a)
5474 : /* No clobbers */);
5475 return result;
5478 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
5479 vaddlvq_u16 (uint16x8_t a)
5481 uint32_t result;
5482 __asm__ ("uaddlv %s0,%1.8h"
5483 : "=w"(result)
5484 : "w"(a)
5485 : /* No clobbers */);
5486 return result;
5489 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
5490 vaddlvq_u32 (uint32x4_t a)
5492 uint64_t result;
5493 __asm__ ("uaddlv %d0,%1.4s"
5494 : "=w"(result)
5495 : "w"(a)
5496 : /* No clobbers */);
5497 return result;
5500 #define vcopyq_lane_f32(a, b, c, d) \
5501 __extension__ \
5502 ({ \
5503 float32x4_t c_ = (c); \
5504 float32x4_t a_ = (a); \
5505 float32x4_t result; \
5506 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5507 : "=w"(result) \
5508 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5509 : /* No clobbers */); \
5510 result; \
5513 #define vcopyq_lane_f64(a, b, c, d) \
5514 __extension__ \
5515 ({ \
5516 float64x2_t c_ = (c); \
5517 float64x2_t a_ = (a); \
5518 float64x2_t result; \
5519 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5520 : "=w"(result) \
5521 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5522 : /* No clobbers */); \
5523 result; \
5526 #define vcopyq_lane_p8(a, b, c, d) \
5527 __extension__ \
5528 ({ \
5529 poly8x16_t c_ = (c); \
5530 poly8x16_t a_ = (a); \
5531 poly8x16_t result; \
5532 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5533 : "=w"(result) \
5534 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5535 : /* No clobbers */); \
5536 result; \
5539 #define vcopyq_lane_p16(a, b, c, d) \
5540 __extension__ \
5541 ({ \
5542 poly16x8_t c_ = (c); \
5543 poly16x8_t a_ = (a); \
5544 poly16x8_t result; \
5545 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5546 : "=w"(result) \
5547 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5548 : /* No clobbers */); \
5549 result; \
5552 #define vcopyq_lane_s8(a, b, c, d) \
5553 __extension__ \
5554 ({ \
5555 int8x16_t c_ = (c); \
5556 int8x16_t a_ = (a); \
5557 int8x16_t result; \
5558 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5559 : "=w"(result) \
5560 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5561 : /* No clobbers */); \
5562 result; \
5565 #define vcopyq_lane_s16(a, b, c, d) \
5566 __extension__ \
5567 ({ \
5568 int16x8_t c_ = (c); \
5569 int16x8_t a_ = (a); \
5570 int16x8_t result; \
5571 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5572 : "=w"(result) \
5573 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5574 : /* No clobbers */); \
5575 result; \
5578 #define vcopyq_lane_s32(a, b, c, d) \
5579 __extension__ \
5580 ({ \
5581 int32x4_t c_ = (c); \
5582 int32x4_t a_ = (a); \
5583 int32x4_t result; \
5584 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5585 : "=w"(result) \
5586 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5587 : /* No clobbers */); \
5588 result; \
5591 #define vcopyq_lane_s64(a, b, c, d) \
5592 __extension__ \
5593 ({ \
5594 int64x2_t c_ = (c); \
5595 int64x2_t a_ = (a); \
5596 int64x2_t result; \
5597 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5598 : "=w"(result) \
5599 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5600 : /* No clobbers */); \
5601 result; \
5604 #define vcopyq_lane_u8(a, b, c, d) \
5605 __extension__ \
5606 ({ \
5607 uint8x16_t c_ = (c); \
5608 uint8x16_t a_ = (a); \
5609 uint8x16_t result; \
5610 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5611 : "=w"(result) \
5612 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5613 : /* No clobbers */); \
5614 result; \
5617 #define vcopyq_lane_u16(a, b, c, d) \
5618 __extension__ \
5619 ({ \
5620 uint16x8_t c_ = (c); \
5621 uint16x8_t a_ = (a); \
5622 uint16x8_t result; \
5623 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5624 : "=w"(result) \
5625 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5626 : /* No clobbers */); \
5627 result; \
5630 #define vcopyq_lane_u32(a, b, c, d) \
5631 __extension__ \
5632 ({ \
5633 uint32x4_t c_ = (c); \
5634 uint32x4_t a_ = (a); \
5635 uint32x4_t result; \
5636 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5637 : "=w"(result) \
5638 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5639 : /* No clobbers */); \
5640 result; \
5643 #define vcopyq_lane_u64(a, b, c, d) \
5644 __extension__ \
5645 ({ \
5646 uint64x2_t c_ = (c); \
5647 uint64x2_t a_ = (a); \
5648 uint64x2_t result; \
5649 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5650 : "=w"(result) \
5651 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5652 : /* No clobbers */); \
5653 result; \
5656 /* vcvt_f16_f32 not supported */
5658 /* vcvt_f32_f16 not supported */
5660 /* vcvt_high_f16_f32 not supported */
5662 /* vcvt_high_f32_f16 not supported */
5664 static float32x2_t vdup_n_f32 (float32_t);
5666 #define vcvt_n_f32_s32(a, b) \
5667 __extension__ \
5668 ({ \
5669 int32x2_t a_ = (a); \
5670 float32x2_t result; \
5671 __asm__ ("scvtf %0.2s, %1.2s, #%2" \
5672 : "=w"(result) \
5673 : "w"(a_), "i"(b) \
5674 : /* No clobbers */); \
5675 result; \
5678 #define vcvt_n_f32_u32(a, b) \
5679 __extension__ \
5680 ({ \
5681 uint32x2_t a_ = (a); \
5682 float32x2_t result; \
5683 __asm__ ("ucvtf %0.2s, %1.2s, #%2" \
5684 : "=w"(result) \
5685 : "w"(a_), "i"(b) \
5686 : /* No clobbers */); \
5687 result; \
5690 #define vcvt_n_s32_f32(a, b) \
5691 __extension__ \
5692 ({ \
5693 float32x2_t a_ = (a); \
5694 int32x2_t result; \
5695 __asm__ ("fcvtzs %0.2s, %1.2s, #%2" \
5696 : "=w"(result) \
5697 : "w"(a_), "i"(b) \
5698 : /* No clobbers */); \
5699 result; \
5702 #define vcvt_n_u32_f32(a, b) \
5703 __extension__ \
5704 ({ \
5705 float32x2_t a_ = (a); \
5706 uint32x2_t result; \
5707 __asm__ ("fcvtzu %0.2s, %1.2s, #%2" \
5708 : "=w"(result) \
5709 : "w"(a_), "i"(b) \
5710 : /* No clobbers */); \
5711 result; \
5714 #define vcvtd_n_f64_s64(a, b) \
5715 __extension__ \
5716 ({ \
5717 int64_t a_ = (a); \
5718 float64_t result; \
5719 __asm__ ("scvtf %d0,%d1,%2" \
5720 : "=w"(result) \
5721 : "w"(a_), "i"(b) \
5722 : /* No clobbers */); \
5723 result; \
5726 #define vcvtd_n_f64_u64(a, b) \
5727 __extension__ \
5728 ({ \
5729 uint64_t a_ = (a); \
5730 float64_t result; \
5731 __asm__ ("ucvtf %d0,%d1,%2" \
5732 : "=w"(result) \
5733 : "w"(a_), "i"(b) \
5734 : /* No clobbers */); \
5735 result; \
5738 #define vcvtd_n_s64_f64(a, b) \
5739 __extension__ \
5740 ({ \
5741 float64_t a_ = (a); \
5742 int64_t result; \
5743 __asm__ ("fcvtzs %d0,%d1,%2" \
5744 : "=w"(result) \
5745 : "w"(a_), "i"(b) \
5746 : /* No clobbers */); \
5747 result; \
5750 #define vcvtd_n_u64_f64(a, b) \
5751 __extension__ \
5752 ({ \
5753 float64_t a_ = (a); \
5754 uint64_t result; \
5755 __asm__ ("fcvtzu %d0,%d1,%2" \
5756 : "=w"(result) \
5757 : "w"(a_), "i"(b) \
5758 : /* No clobbers */); \
5759 result; \
5762 #define vcvtq_n_f32_s32(a, b) \
5763 __extension__ \
5764 ({ \
5765 int32x4_t a_ = (a); \
5766 float32x4_t result; \
5767 __asm__ ("scvtf %0.4s, %1.4s, #%2" \
5768 : "=w"(result) \
5769 : "w"(a_), "i"(b) \
5770 : /* No clobbers */); \
5771 result; \
5774 #define vcvtq_n_f32_u32(a, b) \
5775 __extension__ \
5776 ({ \
5777 uint32x4_t a_ = (a); \
5778 float32x4_t result; \
5779 __asm__ ("ucvtf %0.4s, %1.4s, #%2" \
5780 : "=w"(result) \
5781 : "w"(a_), "i"(b) \
5782 : /* No clobbers */); \
5783 result; \
5786 #define vcvtq_n_f64_s64(a, b) \
5787 __extension__ \
5788 ({ \
5789 int64x2_t a_ = (a); \
5790 float64x2_t result; \
5791 __asm__ ("scvtf %0.2d, %1.2d, #%2" \
5792 : "=w"(result) \
5793 : "w"(a_), "i"(b) \
5794 : /* No clobbers */); \
5795 result; \
5798 #define vcvtq_n_f64_u64(a, b) \
5799 __extension__ \
5800 ({ \
5801 uint64x2_t a_ = (a); \
5802 float64x2_t result; \
5803 __asm__ ("ucvtf %0.2d, %1.2d, #%2" \
5804 : "=w"(result) \
5805 : "w"(a_), "i"(b) \
5806 : /* No clobbers */); \
5807 result; \
5810 #define vcvtq_n_s32_f32(a, b) \
5811 __extension__ \
5812 ({ \
5813 float32x4_t a_ = (a); \
5814 int32x4_t result; \
5815 __asm__ ("fcvtzs %0.4s, %1.4s, #%2" \
5816 : "=w"(result) \
5817 : "w"(a_), "i"(b) \
5818 : /* No clobbers */); \
5819 result; \
5822 #define vcvtq_n_s64_f64(a, b) \
5823 __extension__ \
5824 ({ \
5825 float64x2_t a_ = (a); \
5826 int64x2_t result; \
5827 __asm__ ("fcvtzs %0.2d, %1.2d, #%2" \
5828 : "=w"(result) \
5829 : "w"(a_), "i"(b) \
5830 : /* No clobbers */); \
5831 result; \
5834 #define vcvtq_n_u32_f32(a, b) \
5835 __extension__ \
5836 ({ \
5837 float32x4_t a_ = (a); \
5838 uint32x4_t result; \
5839 __asm__ ("fcvtzu %0.4s, %1.4s, #%2" \
5840 : "=w"(result) \
5841 : "w"(a_), "i"(b) \
5842 : /* No clobbers */); \
5843 result; \
5846 #define vcvtq_n_u64_f64(a, b) \
5847 __extension__ \
5848 ({ \
5849 float64x2_t a_ = (a); \
5850 uint64x2_t result; \
5851 __asm__ ("fcvtzu %0.2d, %1.2d, #%2" \
5852 : "=w"(result) \
5853 : "w"(a_), "i"(b) \
5854 : /* No clobbers */); \
5855 result; \
5858 #define vcvts_n_f32_s32(a, b) \
5859 __extension__ \
5860 ({ \
5861 int32_t a_ = (a); \
5862 float32_t result; \
5863 __asm__ ("scvtf %s0,%s1,%2" \
5864 : "=w"(result) \
5865 : "w"(a_), "i"(b) \
5866 : /* No clobbers */); \
5867 result; \
5870 #define vcvts_n_f32_u32(a, b) \
5871 __extension__ \
5872 ({ \
5873 uint32_t a_ = (a); \
5874 float32_t result; \
5875 __asm__ ("ucvtf %s0,%s1,%2" \
5876 : "=w"(result) \
5877 : "w"(a_), "i"(b) \
5878 : /* No clobbers */); \
5879 result; \
5882 #define vcvts_n_s32_f32(a, b) \
5883 __extension__ \
5884 ({ \
5885 float32_t a_ = (a); \
5886 int32_t result; \
5887 __asm__ ("fcvtzs %s0,%s1,%2" \
5888 : "=w"(result) \
5889 : "w"(a_), "i"(b) \
5890 : /* No clobbers */); \
5891 result; \
5894 #define vcvts_n_u32_f32(a, b) \
5895 __extension__ \
5896 ({ \
5897 float32_t a_ = (a); \
5898 uint32_t result; \
5899 __asm__ ("fcvtzu %s0,%s1,%2" \
5900 : "=w"(result) \
5901 : "w"(a_), "i"(b) \
5902 : /* No clobbers */); \
5903 result; \
5906 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5907 vcvtx_f32_f64 (float64x2_t a)
5909 float32x2_t result;
5910 __asm__ ("fcvtxn %0.2s,%1.2d"
5911 : "=w"(result)
5912 : "w"(a)
5913 : /* No clobbers */);
5914 return result;
5917 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5918 vcvtx_high_f32_f64 (float32x2_t a, float64x2_t b)
5920 float32x4_t result;
5921 __asm__ ("fcvtxn2 %0.4s,%1.2d"
5922 : "=w"(result)
5923 : "w" (b), "0"(a)
5924 : /* No clobbers */);
5925 return result;
5928 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
5929 vcvtxd_f32_f64 (float64_t a)
5931 float32_t result;
5932 __asm__ ("fcvtxn %s0,%d1"
5933 : "=w"(result)
5934 : "w"(a)
5935 : /* No clobbers */);
5936 return result;
5939 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5940 vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
5942 float32x2_t result;
5943 float32x2_t t1;
5944 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s"
5945 : "=w"(result), "=w"(t1)
5946 : "0"(a), "w"(b), "w"(c)
5947 : /* No clobbers */);
5948 return result;
5951 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5952 vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
5954 int16x4_t result;
5955 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
5956 : "=w"(result)
5957 : "0"(a), "w"(b), "x"(c)
5958 : /* No clobbers */);
5959 return result;
5962 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5963 vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
5965 int32x2_t result;
5966 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
5967 : "=w"(result)
5968 : "0"(a), "w"(b), "w"(c)
5969 : /* No clobbers */);
5970 return result;
5973 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5974 vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
5976 uint16x4_t result;
5977 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
5978 : "=w"(result)
5979 : "0"(a), "w"(b), "x"(c)
5980 : /* No clobbers */);
5981 return result;
5984 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5985 vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
5987 uint32x2_t result;
5988 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
5989 : "=w"(result)
5990 : "0"(a), "w"(b), "w"(c)
5991 : /* No clobbers */);
5992 return result;
5995 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5996 vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
5998 int8x8_t result;
5999 __asm__ ("mla %0.8b, %2.8b, %3.8b"
6000 : "=w"(result)
6001 : "0"(a), "w"(b), "w"(c)
6002 : /* No clobbers */);
6003 return result;
6006 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6007 vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
6009 int16x4_t result;
6010 __asm__ ("mla %0.4h, %2.4h, %3.4h"
6011 : "=w"(result)
6012 : "0"(a), "w"(b), "w"(c)
6013 : /* No clobbers */);
6014 return result;
6017 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6018 vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
6020 int32x2_t result;
6021 __asm__ ("mla %0.2s, %2.2s, %3.2s"
6022 : "=w"(result)
6023 : "0"(a), "w"(b), "w"(c)
6024 : /* No clobbers */);
6025 return result;
6028 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6029 vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
6031 uint8x8_t result;
6032 __asm__ ("mla %0.8b, %2.8b, %3.8b"
6033 : "=w"(result)
6034 : "0"(a), "w"(b), "w"(c)
6035 : /* No clobbers */);
6036 return result;
6039 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6040 vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
6042 uint16x4_t result;
6043 __asm__ ("mla %0.4h, %2.4h, %3.4h"
6044 : "=w"(result)
6045 : "0"(a), "w"(b), "w"(c)
6046 : /* No clobbers */);
6047 return result;
6050 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6051 vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
6053 uint32x2_t result;
6054 __asm__ ("mla %0.2s, %2.2s, %3.2s"
6055 : "=w"(result)
6056 : "0"(a), "w"(b), "w"(c)
6057 : /* No clobbers */);
6058 return result;
6061 #define vmlal_high_lane_s16(a, b, c, d) \
6062 __extension__ \
6063 ({ \
6064 int16x4_t c_ = (c); \
6065 int16x8_t b_ = (b); \
6066 int32x4_t a_ = (a); \
6067 int32x4_t result; \
6068 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
6069 : "=w"(result) \
6070 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6071 : /* No clobbers */); \
6072 result; \
6075 #define vmlal_high_lane_s32(a, b, c, d) \
6076 __extension__ \
6077 ({ \
6078 int32x2_t c_ = (c); \
6079 int32x4_t b_ = (b); \
6080 int64x2_t a_ = (a); \
6081 int64x2_t result; \
6082 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
6083 : "=w"(result) \
6084 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6085 : /* No clobbers */); \
6086 result; \
6089 #define vmlal_high_lane_u16(a, b, c, d) \
6090 __extension__ \
6091 ({ \
6092 uint16x4_t c_ = (c); \
6093 uint16x8_t b_ = (b); \
6094 uint32x4_t a_ = (a); \
6095 uint32x4_t result; \
6096 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
6097 : "=w"(result) \
6098 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6099 : /* No clobbers */); \
6100 result; \
6103 #define vmlal_high_lane_u32(a, b, c, d) \
6104 __extension__ \
6105 ({ \
6106 uint32x2_t c_ = (c); \
6107 uint32x4_t b_ = (b); \
6108 uint64x2_t a_ = (a); \
6109 uint64x2_t result; \
6110 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
6111 : "=w"(result) \
6112 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6113 : /* No clobbers */); \
6114 result; \
6117 #define vmlal_high_laneq_s16(a, b, c, d) \
6118 __extension__ \
6119 ({ \
6120 int16x8_t c_ = (c); \
6121 int16x8_t b_ = (b); \
6122 int32x4_t a_ = (a); \
6123 int32x4_t result; \
6124 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
6125 : "=w"(result) \
6126 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6127 : /* No clobbers */); \
6128 result; \
6131 #define vmlal_high_laneq_s32(a, b, c, d) \
6132 __extension__ \
6133 ({ \
6134 int32x4_t c_ = (c); \
6135 int32x4_t b_ = (b); \
6136 int64x2_t a_ = (a); \
6137 int64x2_t result; \
6138 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
6139 : "=w"(result) \
6140 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6141 : /* No clobbers */); \
6142 result; \
6145 #define vmlal_high_laneq_u16(a, b, c, d) \
6146 __extension__ \
6147 ({ \
6148 uint16x8_t c_ = (c); \
6149 uint16x8_t b_ = (b); \
6150 uint32x4_t a_ = (a); \
6151 uint32x4_t result; \
6152 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
6153 : "=w"(result) \
6154 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6155 : /* No clobbers */); \
6156 result; \
6159 #define vmlal_high_laneq_u32(a, b, c, d) \
6160 __extension__ \
6161 ({ \
6162 uint32x4_t c_ = (c); \
6163 uint32x4_t b_ = (b); \
6164 uint64x2_t a_ = (a); \
6165 uint64x2_t result; \
6166 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
6167 : "=w"(result) \
6168 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6169 : /* No clobbers */); \
6170 result; \
6173 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6174 vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
6176 int32x4_t result;
6177 __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]"
6178 : "=w"(result)
6179 : "0"(a), "w"(b), "x"(c)
6180 : /* No clobbers */);
6181 return result;
6184 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6185 vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
6187 int64x2_t result;
6188 __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]"
6189 : "=w"(result)
6190 : "0"(a), "w"(b), "w"(c)
6191 : /* No clobbers */);
6192 return result;
6195 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6196 vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
6198 uint32x4_t result;
6199 __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]"
6200 : "=w"(result)
6201 : "0"(a), "w"(b), "x"(c)
6202 : /* No clobbers */);
6203 return result;
6206 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6207 vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
6209 uint64x2_t result;
6210 __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]"
6211 : "=w"(result)
6212 : "0"(a), "w"(b), "w"(c)
6213 : /* No clobbers */);
6214 return result;
6217 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6218 vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
6220 int16x8_t result;
6221 __asm__ ("smlal2 %0.8h,%2.16b,%3.16b"
6222 : "=w"(result)
6223 : "0"(a), "w"(b), "w"(c)
6224 : /* No clobbers */);
6225 return result;
6228 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6229 vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
6231 int32x4_t result;
6232 __asm__ ("smlal2 %0.4s,%2.8h,%3.8h"
6233 : "=w"(result)
6234 : "0"(a), "w"(b), "w"(c)
6235 : /* No clobbers */);
6236 return result;
6239 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6240 vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
6242 int64x2_t result;
6243 __asm__ ("smlal2 %0.2d,%2.4s,%3.4s"
6244 : "=w"(result)
6245 : "0"(a), "w"(b), "w"(c)
6246 : /* No clobbers */);
6247 return result;
6250 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6251 vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
6253 uint16x8_t result;
6254 __asm__ ("umlal2 %0.8h,%2.16b,%3.16b"
6255 : "=w"(result)
6256 : "0"(a), "w"(b), "w"(c)
6257 : /* No clobbers */);
6258 return result;
6261 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6262 vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
6264 uint32x4_t result;
6265 __asm__ ("umlal2 %0.4s,%2.8h,%3.8h"
6266 : "=w"(result)
6267 : "0"(a), "w"(b), "w"(c)
6268 : /* No clobbers */);
6269 return result;
6272 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6273 vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
6275 uint64x2_t result;
6276 __asm__ ("umlal2 %0.2d,%2.4s,%3.4s"
6277 : "=w"(result)
6278 : "0"(a), "w"(b), "w"(c)
6279 : /* No clobbers */);
6280 return result;
6283 #define vmlal_lane_s16(a, b, c, d) \
6284 __extension__ \
6285 ({ \
6286 int16x4_t c_ = (c); \
6287 int16x4_t b_ = (b); \
6288 int32x4_t a_ = (a); \
6289 int32x4_t result; \
6290 __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]" \
6291 : "=w"(result) \
6292 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6293 : /* No clobbers */); \
6294 result; \
6297 #define vmlal_lane_s32(a, b, c, d) \
6298 __extension__ \
6299 ({ \
6300 int32x2_t c_ = (c); \
6301 int32x2_t b_ = (b); \
6302 int64x2_t a_ = (a); \
6303 int64x2_t result; \
6304 __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]" \
6305 : "=w"(result) \
6306 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6307 : /* No clobbers */); \
6308 result; \
6311 #define vmlal_lane_u16(a, b, c, d) \
6312 __extension__ \
6313 ({ \
6314 uint16x4_t c_ = (c); \
6315 uint16x4_t b_ = (b); \
6316 uint32x4_t a_ = (a); \
6317 uint32x4_t result; \
6318 __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]" \
6319 : "=w"(result) \
6320 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6321 : /* No clobbers */); \
6322 result; \
6325 #define vmlal_lane_u32(a, b, c, d) \
6326 __extension__ \
6327 ({ \
6328 uint32x2_t c_ = (c); \
6329 uint32x2_t b_ = (b); \
6330 uint64x2_t a_ = (a); \
6331 uint64x2_t result; \
6332 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
6333 : "=w"(result) \
6334 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6335 : /* No clobbers */); \
6336 result; \
6339 #define vmlal_laneq_s16(a, b, c, d) \
6340 __extension__ \
6341 ({ \
6342 int16x8_t c_ = (c); \
6343 int16x4_t b_ = (b); \
6344 int32x4_t a_ = (a); \
6345 int32x4_t result; \
6346 __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]" \
6347 : "=w"(result) \
6348 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6349 : /* No clobbers */); \
6350 result; \
6353 #define vmlal_laneq_s32(a, b, c, d) \
6354 __extension__ \
6355 ({ \
6356 int32x4_t c_ = (c); \
6357 int32x2_t b_ = (b); \
6358 int64x2_t a_ = (a); \
6359 int64x2_t result; \
6360 __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]" \
6361 : "=w"(result) \
6362 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6363 : /* No clobbers */); \
6364 result; \
6367 #define vmlal_laneq_u16(a, b, c, d) \
6368 __extension__ \
6369 ({ \
6370 uint16x8_t c_ = (c); \
6371 uint16x4_t b_ = (b); \
6372 uint32x4_t a_ = (a); \
6373 uint32x4_t result; \
6374 __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]" \
6375 : "=w"(result) \
6376 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6377 : /* No clobbers */); \
6378 result; \
6381 #define vmlal_laneq_u32(a, b, c, d) \
6382 __extension__ \
6383 ({ \
6384 uint32x4_t c_ = (c); \
6385 uint32x2_t b_ = (b); \
6386 uint64x2_t a_ = (a); \
6387 uint64x2_t result; \
6388 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
6389 : "=w"(result) \
6390 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6391 : /* No clobbers */); \
6392 result; \
6395 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6396 vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
6398 int32x4_t result;
6399 __asm__ ("smlal %0.4s,%2.4h,%3.h[0]"
6400 : "=w"(result)
6401 : "0"(a), "w"(b), "x"(c)
6402 : /* No clobbers */);
6403 return result;
6406 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6407 vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
6409 int64x2_t result;
6410 __asm__ ("smlal %0.2d,%2.2s,%3.s[0]"
6411 : "=w"(result)
6412 : "0"(a), "w"(b), "w"(c)
6413 : /* No clobbers */);
6414 return result;
6417 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6418 vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
6420 uint32x4_t result;
6421 __asm__ ("umlal %0.4s,%2.4h,%3.h[0]"
6422 : "=w"(result)
6423 : "0"(a), "w"(b), "x"(c)
6424 : /* No clobbers */);
6425 return result;
6428 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6429 vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
6431 uint64x2_t result;
6432 __asm__ ("umlal %0.2d,%2.2s,%3.s[0]"
6433 : "=w"(result)
6434 : "0"(a), "w"(b), "w"(c)
6435 : /* No clobbers */);
6436 return result;
6439 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6440 vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
6442 int16x8_t result;
6443 __asm__ ("smlal %0.8h,%2.8b,%3.8b"
6444 : "=w"(result)
6445 : "0"(a), "w"(b), "w"(c)
6446 : /* No clobbers */);
6447 return result;
6450 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6451 vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
6453 int32x4_t result;
6454 __asm__ ("smlal %0.4s,%2.4h,%3.4h"
6455 : "=w"(result)
6456 : "0"(a), "w"(b), "w"(c)
6457 : /* No clobbers */);
6458 return result;
6461 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6462 vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
6464 int64x2_t result;
6465 __asm__ ("smlal %0.2d,%2.2s,%3.2s"
6466 : "=w"(result)
6467 : "0"(a), "w"(b), "w"(c)
6468 : /* No clobbers */);
6469 return result;
6472 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6473 vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
6475 uint16x8_t result;
6476 __asm__ ("umlal %0.8h,%2.8b,%3.8b"
6477 : "=w"(result)
6478 : "0"(a), "w"(b), "w"(c)
6479 : /* No clobbers */);
6480 return result;
6483 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6484 vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
6486 uint32x4_t result;
6487 __asm__ ("umlal %0.4s,%2.4h,%3.4h"
6488 : "=w"(result)
6489 : "0"(a), "w"(b), "w"(c)
6490 : /* No clobbers */);
6491 return result;
6494 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6495 vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
6497 uint64x2_t result;
6498 __asm__ ("umlal %0.2d,%2.2s,%3.2s"
6499 : "=w"(result)
6500 : "0"(a), "w"(b), "w"(c)
6501 : /* No clobbers */);
6502 return result;
6505 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6506 vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
6508 float32x4_t result;
6509 float32x4_t t1;
6510 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s"
6511 : "=w"(result), "=w"(t1)
6512 : "0"(a), "w"(b), "w"(c)
6513 : /* No clobbers */);
6514 return result;
6517 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6518 vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
6520 int16x8_t result;
6521 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
6522 : "=w"(result)
6523 : "0"(a), "w"(b), "x"(c)
6524 : /* No clobbers */);
6525 return result;
6528 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6529 vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
6531 int32x4_t result;
6532 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
6533 : "=w"(result)
6534 : "0"(a), "w"(b), "w"(c)
6535 : /* No clobbers */);
6536 return result;
6539 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6540 vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
6542 uint16x8_t result;
6543 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
6544 : "=w"(result)
6545 : "0"(a), "w"(b), "x"(c)
6546 : /* No clobbers */);
6547 return result;
6550 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6551 vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
6553 uint32x4_t result;
6554 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
6555 : "=w"(result)
6556 : "0"(a), "w"(b), "w"(c)
6557 : /* No clobbers */);
6558 return result;
6561 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
6562 vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
6564 int8x16_t result;
6565 __asm__ ("mla %0.16b, %2.16b, %3.16b"
6566 : "=w"(result)
6567 : "0"(a), "w"(b), "w"(c)
6568 : /* No clobbers */);
6569 return result;
6572 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6573 vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
6575 int16x8_t result;
6576 __asm__ ("mla %0.8h, %2.8h, %3.8h"
6577 : "=w"(result)
6578 : "0"(a), "w"(b), "w"(c)
6579 : /* No clobbers */);
6580 return result;
6583 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6584 vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
6586 int32x4_t result;
6587 __asm__ ("mla %0.4s, %2.4s, %3.4s"
6588 : "=w"(result)
6589 : "0"(a), "w"(b), "w"(c)
6590 : /* No clobbers */);
6591 return result;
6594 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
6595 vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
6597 uint8x16_t result;
6598 __asm__ ("mla %0.16b, %2.16b, %3.16b"
6599 : "=w"(result)
6600 : "0"(a), "w"(b), "w"(c)
6601 : /* No clobbers */);
6602 return result;
6605 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6606 vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
6608 uint16x8_t result;
6609 __asm__ ("mla %0.8h, %2.8h, %3.8h"
6610 : "=w"(result)
6611 : "0"(a), "w"(b), "w"(c)
6612 : /* No clobbers */);
6613 return result;
6616 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6617 vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
6619 uint32x4_t result;
6620 __asm__ ("mla %0.4s, %2.4s, %3.4s"
6621 : "=w"(result)
6622 : "0"(a), "w"(b), "w"(c)
6623 : /* No clobbers */);
6624 return result;
6627 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6628 vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
6630 float32x2_t result;
6631 float32x2_t t1;
6632 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s"
6633 : "=w"(result), "=w"(t1)
6634 : "0"(a), "w"(b), "w"(c)
6635 : /* No clobbers */);
6636 return result;
6639 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6640 vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
6642 int16x4_t result;
6643 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
6644 : "=w"(result)
6645 : "0"(a), "w"(b), "x"(c)
6646 : /* No clobbers */);
6647 return result;
6650 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6651 vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
6653 int32x2_t result;
6654 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
6655 : "=w"(result)
6656 : "0"(a), "w"(b), "w"(c)
6657 : /* No clobbers */);
6658 return result;
6661 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6662 vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
6664 uint16x4_t result;
6665 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
6666 : "=w"(result)
6667 : "0"(a), "w"(b), "x"(c)
6668 : /* No clobbers */);
6669 return result;
6672 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6673 vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
6675 uint32x2_t result;
6676 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
6677 : "=w"(result)
6678 : "0"(a), "w"(b), "w"(c)
6679 : /* No clobbers */);
6680 return result;
6683 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6684 vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
6686 int8x8_t result;
6687 __asm__ ("mls %0.8b,%2.8b,%3.8b"
6688 : "=w"(result)
6689 : "0"(a), "w"(b), "w"(c)
6690 : /* No clobbers */);
6691 return result;
6694 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6695 vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
6697 int16x4_t result;
6698 __asm__ ("mls %0.4h,%2.4h,%3.4h"
6699 : "=w"(result)
6700 : "0"(a), "w"(b), "w"(c)
6701 : /* No clobbers */);
6702 return result;
6705 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6706 vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
6708 int32x2_t result;
6709 __asm__ ("mls %0.2s,%2.2s,%3.2s"
6710 : "=w"(result)
6711 : "0"(a), "w"(b), "w"(c)
6712 : /* No clobbers */);
6713 return result;
6716 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6717 vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
6719 uint8x8_t result;
6720 __asm__ ("mls %0.8b,%2.8b,%3.8b"
6721 : "=w"(result)
6722 : "0"(a), "w"(b), "w"(c)
6723 : /* No clobbers */);
6724 return result;
6727 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6728 vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
6730 uint16x4_t result;
6731 __asm__ ("mls %0.4h,%2.4h,%3.4h"
6732 : "=w"(result)
6733 : "0"(a), "w"(b), "w"(c)
6734 : /* No clobbers */);
6735 return result;
6738 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6739 vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
6741 uint32x2_t result;
6742 __asm__ ("mls %0.2s,%2.2s,%3.2s"
6743 : "=w"(result)
6744 : "0"(a), "w"(b), "w"(c)
6745 : /* No clobbers */);
6746 return result;
6749 #define vmlsl_high_lane_s16(a, b, c, d) \
6750 __extension__ \
6751 ({ \
6752 int16x4_t c_ = (c); \
6753 int16x8_t b_ = (b); \
6754 int32x4_t a_ = (a); \
6755 int32x4_t result; \
6756 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
6757 : "=w"(result) \
6758 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6759 : /* No clobbers */); \
6760 result; \
6763 #define vmlsl_high_lane_s32(a, b, c, d) \
6764 __extension__ \
6765 ({ \
6766 int32x2_t c_ = (c); \
6767 int32x4_t b_ = (b); \
6768 int64x2_t a_ = (a); \
6769 int64x2_t result; \
6770 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
6771 : "=w"(result) \
6772 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6773 : /* No clobbers */); \
6774 result; \
6777 #define vmlsl_high_lane_u16(a, b, c, d) \
6778 __extension__ \
6779 ({ \
6780 uint16x4_t c_ = (c); \
6781 uint16x8_t b_ = (b); \
6782 uint32x4_t a_ = (a); \
6783 uint32x4_t result; \
6784 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
6785 : "=w"(result) \
6786 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6787 : /* No clobbers */); \
6788 result; \
6791 #define vmlsl_high_lane_u32(a, b, c, d) \
6792 __extension__ \
6793 ({ \
6794 uint32x2_t c_ = (c); \
6795 uint32x4_t b_ = (b); \
6796 uint64x2_t a_ = (a); \
6797 uint64x2_t result; \
6798 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
6799 : "=w"(result) \
6800 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6801 : /* No clobbers */); \
6802 result; \
6805 #define vmlsl_high_laneq_s16(a, b, c, d) \
6806 __extension__ \
6807 ({ \
6808 int16x8_t c_ = (c); \
6809 int16x8_t b_ = (b); \
6810 int32x4_t a_ = (a); \
6811 int32x4_t result; \
6812 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
6813 : "=w"(result) \
6814 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6815 : /* No clobbers */); \
6816 result; \
6819 #define vmlsl_high_laneq_s32(a, b, c, d) \
6820 __extension__ \
6821 ({ \
6822 int32x4_t c_ = (c); \
6823 int32x4_t b_ = (b); \
6824 int64x2_t a_ = (a); \
6825 int64x2_t result; \
6826 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
6827 : "=w"(result) \
6828 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6829 : /* No clobbers */); \
6830 result; \
6833 #define vmlsl_high_laneq_u16(a, b, c, d) \
6834 __extension__ \
6835 ({ \
6836 uint16x8_t c_ = (c); \
6837 uint16x8_t b_ = (b); \
6838 uint32x4_t a_ = (a); \
6839 uint32x4_t result; \
6840 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
6841 : "=w"(result) \
6842 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6843 : /* No clobbers */); \
6844 result; \
6847 #define vmlsl_high_laneq_u32(a, b, c, d) \
6848 __extension__ \
6849 ({ \
6850 uint32x4_t c_ = (c); \
6851 uint32x4_t b_ = (b); \
6852 uint64x2_t a_ = (a); \
6853 uint64x2_t result; \
6854 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
6855 : "=w"(result) \
6856 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6857 : /* No clobbers */); \
6858 result; \
6861 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6862 vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
6864 int32x4_t result;
6865 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]"
6866 : "=w"(result)
6867 : "0"(a), "w"(b), "x"(c)
6868 : /* No clobbers */);
6869 return result;
6872 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6873 vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
6875 int64x2_t result;
6876 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]"
6877 : "=w"(result)
6878 : "0"(a), "w"(b), "w"(c)
6879 : /* No clobbers */);
6880 return result;
6883 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6884 vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
6886 uint32x4_t result;
6887 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]"
6888 : "=w"(result)
6889 : "0"(a), "w"(b), "x"(c)
6890 : /* No clobbers */);
6891 return result;
6894 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6895 vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
6897 uint64x2_t result;
6898 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]"
6899 : "=w"(result)
6900 : "0"(a), "w"(b), "w"(c)
6901 : /* No clobbers */);
6902 return result;
6905 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6906 vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
6908 int16x8_t result;
6909 __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b"
6910 : "=w"(result)
6911 : "0"(a), "w"(b), "w"(c)
6912 : /* No clobbers */);
6913 return result;
6916 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6917 vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
6919 int32x4_t result;
6920 __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h"
6921 : "=w"(result)
6922 : "0"(a), "w"(b), "w"(c)
6923 : /* No clobbers */);
6924 return result;
6927 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6928 vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
6930 int64x2_t result;
6931 __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s"
6932 : "=w"(result)
6933 : "0"(a), "w"(b), "w"(c)
6934 : /* No clobbers */);
6935 return result;
6938 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6939 vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
6941 uint16x8_t result;
6942 __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b"
6943 : "=w"(result)
6944 : "0"(a), "w"(b), "w"(c)
6945 : /* No clobbers */);
6946 return result;
6949 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6950 vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
6952 uint32x4_t result;
6953 __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h"
6954 : "=w"(result)
6955 : "0"(a), "w"(b), "w"(c)
6956 : /* No clobbers */);
6957 return result;
6960 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6961 vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
6963 uint64x2_t result;
6964 __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s"
6965 : "=w"(result)
6966 : "0"(a), "w"(b), "w"(c)
6967 : /* No clobbers */);
6968 return result;
6971 #define vmlsl_lane_s16(a, b, c, d) \
6972 __extension__ \
6973 ({ \
6974 int16x4_t c_ = (c); \
6975 int16x4_t b_ = (b); \
6976 int32x4_t a_ = (a); \
6977 int32x4_t result; \
6978 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
6979 : "=w"(result) \
6980 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6981 : /* No clobbers */); \
6982 result; \
6985 #define vmlsl_lane_s32(a, b, c, d) \
6986 __extension__ \
6987 ({ \
6988 int32x2_t c_ = (c); \
6989 int32x2_t b_ = (b); \
6990 int64x2_t a_ = (a); \
6991 int64x2_t result; \
6992 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
6993 : "=w"(result) \
6994 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6995 : /* No clobbers */); \
6996 result; \
6999 #define vmlsl_lane_u16(a, b, c, d) \
7000 __extension__ \
7001 ({ \
7002 uint16x4_t c_ = (c); \
7003 uint16x4_t b_ = (b); \
7004 uint32x4_t a_ = (a); \
7005 uint32x4_t result; \
7006 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
7007 : "=w"(result) \
7008 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7009 : /* No clobbers */); \
7010 result; \
7013 #define vmlsl_lane_u32(a, b, c, d) \
7014 __extension__ \
7015 ({ \
7016 uint32x2_t c_ = (c); \
7017 uint32x2_t b_ = (b); \
7018 uint64x2_t a_ = (a); \
7019 uint64x2_t result; \
7020 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
7021 : "=w"(result) \
7022 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7023 : /* No clobbers */); \
7024 result; \
7027 #define vmlsl_laneq_s16(a, b, c, d) \
7028 __extension__ \
7029 ({ \
7030 int16x8_t c_ = (c); \
7031 int16x4_t b_ = (b); \
7032 int32x4_t a_ = (a); \
7033 int32x4_t result; \
7034 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
7035 : "=w"(result) \
7036 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7037 : /* No clobbers */); \
7038 result; \
7041 #define vmlsl_laneq_s32(a, b, c, d) \
7042 __extension__ \
7043 ({ \
7044 int32x4_t c_ = (c); \
7045 int32x2_t b_ = (b); \
7046 int64x2_t a_ = (a); \
7047 int64x2_t result; \
7048 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
7049 : "=w"(result) \
7050 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7051 : /* No clobbers */); \
7052 result; \
7055 #define vmlsl_laneq_u16(a, b, c, d) \
7056 __extension__ \
7057 ({ \
7058 uint16x8_t c_ = (c); \
7059 uint16x4_t b_ = (b); \
7060 uint32x4_t a_ = (a); \
7061 uint32x4_t result; \
7062 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
7063 : "=w"(result) \
7064 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7065 : /* No clobbers */); \
7066 result; \
7069 #define vmlsl_laneq_u32(a, b, c, d) \
7070 __extension__ \
7071 ({ \
7072 uint32x4_t c_ = (c); \
7073 uint32x2_t b_ = (b); \
7074 uint64x2_t a_ = (a); \
7075 uint64x2_t result; \
7076 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
7077 : "=w"(result) \
7078 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7079 : /* No clobbers */); \
7080 result; \
7083 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7084 vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
7086 int32x4_t result;
7087 __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]"
7088 : "=w"(result)
7089 : "0"(a), "w"(b), "x"(c)
7090 : /* No clobbers */);
7091 return result;
7094 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7095 vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
7097 int64x2_t result;
7098 __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]"
7099 : "=w"(result)
7100 : "0"(a), "w"(b), "w"(c)
7101 : /* No clobbers */);
7102 return result;
7105 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7106 vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
7108 uint32x4_t result;
7109 __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]"
7110 : "=w"(result)
7111 : "0"(a), "w"(b), "x"(c)
7112 : /* No clobbers */);
7113 return result;
7116 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7117 vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
7119 uint64x2_t result;
7120 __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]"
7121 : "=w"(result)
7122 : "0"(a), "w"(b), "w"(c)
7123 : /* No clobbers */);
7124 return result;
7127 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7128 vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
7130 int16x8_t result;
7131 __asm__ ("smlsl %0.8h, %2.8b, %3.8b"
7132 : "=w"(result)
7133 : "0"(a), "w"(b), "w"(c)
7134 : /* No clobbers */);
7135 return result;
7138 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7139 vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
7141 int32x4_t result;
7142 __asm__ ("smlsl %0.4s, %2.4h, %3.4h"
7143 : "=w"(result)
7144 : "0"(a), "w"(b), "w"(c)
7145 : /* No clobbers */);
7146 return result;
7149 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7150 vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
7152 int64x2_t result;
7153 __asm__ ("smlsl %0.2d, %2.2s, %3.2s"
7154 : "=w"(result)
7155 : "0"(a), "w"(b), "w"(c)
7156 : /* No clobbers */);
7157 return result;
7160 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7161 vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
7163 uint16x8_t result;
7164 __asm__ ("umlsl %0.8h, %2.8b, %3.8b"
7165 : "=w"(result)
7166 : "0"(a), "w"(b), "w"(c)
7167 : /* No clobbers */);
7168 return result;
7171 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7172 vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
7174 uint32x4_t result;
7175 __asm__ ("umlsl %0.4s, %2.4h, %3.4h"
7176 : "=w"(result)
7177 : "0"(a), "w"(b), "w"(c)
7178 : /* No clobbers */);
7179 return result;
7182 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7183 vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
7185 uint64x2_t result;
7186 __asm__ ("umlsl %0.2d, %2.2s, %3.2s"
7187 : "=w"(result)
7188 : "0"(a), "w"(b), "w"(c)
7189 : /* No clobbers */);
7190 return result;
7193 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7194 vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
7196 float32x4_t result;
7197 float32x4_t t1;
7198 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s"
7199 : "=w"(result), "=w"(t1)
7200 : "0"(a), "w"(b), "w"(c)
7201 : /* No clobbers */);
7202 return result;
7205 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7206 vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
7208 int16x8_t result;
7209 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
7210 : "=w"(result)
7211 : "0"(a), "w"(b), "x"(c)
7212 : /* No clobbers */);
7213 return result;
7216 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7217 vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
7219 int32x4_t result;
7220 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
7221 : "=w"(result)
7222 : "0"(a), "w"(b), "w"(c)
7223 : /* No clobbers */);
7224 return result;
7227 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7228 vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
7230 uint16x8_t result;
7231 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
7232 : "=w"(result)
7233 : "0"(a), "w"(b), "x"(c)
7234 : /* No clobbers */);
7235 return result;
7238 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7239 vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
7241 uint32x4_t result;
7242 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
7243 : "=w"(result)
7244 : "0"(a), "w"(b), "w"(c)
7245 : /* No clobbers */);
7246 return result;
7249 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7250 vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
7252 int8x16_t result;
7253 __asm__ ("mls %0.16b,%2.16b,%3.16b"
7254 : "=w"(result)
7255 : "0"(a), "w"(b), "w"(c)
7256 : /* No clobbers */);
7257 return result;
7260 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7261 vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
7263 int16x8_t result;
7264 __asm__ ("mls %0.8h,%2.8h,%3.8h"
7265 : "=w"(result)
7266 : "0"(a), "w"(b), "w"(c)
7267 : /* No clobbers */);
7268 return result;
7271 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7272 vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
7274 int32x4_t result;
7275 __asm__ ("mls %0.4s,%2.4s,%3.4s"
7276 : "=w"(result)
7277 : "0"(a), "w"(b), "w"(c)
7278 : /* No clobbers */);
7279 return result;
7282 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7283 vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
7285 uint8x16_t result;
7286 __asm__ ("mls %0.16b,%2.16b,%3.16b"
7287 : "=w"(result)
7288 : "0"(a), "w"(b), "w"(c)
7289 : /* No clobbers */);
7290 return result;
7293 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7294 vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
7296 uint16x8_t result;
7297 __asm__ ("mls %0.8h,%2.8h,%3.8h"
7298 : "=w"(result)
7299 : "0"(a), "w"(b), "w"(c)
7300 : /* No clobbers */);
7301 return result;
7304 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7305 vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
7307 uint32x4_t result;
7308 __asm__ ("mls %0.4s,%2.4s,%3.4s"
7309 : "=w"(result)
7310 : "0"(a), "w"(b), "w"(c)
7311 : /* No clobbers */);
7312 return result;
7315 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7316 vmovl_high_s8 (int8x16_t a)
7318 int16x8_t result;
7319 __asm__ ("sshll2 %0.8h,%1.16b,#0"
7320 : "=w"(result)
7321 : "w"(a)
7322 : /* No clobbers */);
7323 return result;
7326 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7327 vmovl_high_s16 (int16x8_t a)
7329 int32x4_t result;
7330 __asm__ ("sshll2 %0.4s,%1.8h,#0"
7331 : "=w"(result)
7332 : "w"(a)
7333 : /* No clobbers */);
7334 return result;
7337 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7338 vmovl_high_s32 (int32x4_t a)
7340 int64x2_t result;
7341 __asm__ ("sshll2 %0.2d,%1.4s,#0"
7342 : "=w"(result)
7343 : "w"(a)
7344 : /* No clobbers */);
7345 return result;
7348 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7349 vmovl_high_u8 (uint8x16_t a)
7351 uint16x8_t result;
7352 __asm__ ("ushll2 %0.8h,%1.16b,#0"
7353 : "=w"(result)
7354 : "w"(a)
7355 : /* No clobbers */);
7356 return result;
7359 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7360 vmovl_high_u16 (uint16x8_t a)
7362 uint32x4_t result;
7363 __asm__ ("ushll2 %0.4s,%1.8h,#0"
7364 : "=w"(result)
7365 : "w"(a)
7366 : /* No clobbers */);
7367 return result;
7370 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7371 vmovl_high_u32 (uint32x4_t a)
7373 uint64x2_t result;
7374 __asm__ ("ushll2 %0.2d,%1.4s,#0"
7375 : "=w"(result)
7376 : "w"(a)
7377 : /* No clobbers */);
7378 return result;
7381 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7382 vmovl_s8 (int8x8_t a)
7384 int16x8_t result;
7385 __asm__ ("sshll %0.8h,%1.8b,#0"
7386 : "=w"(result)
7387 : "w"(a)
7388 : /* No clobbers */);
7389 return result;
7392 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7393 vmovl_s16 (int16x4_t a)
7395 int32x4_t result;
7396 __asm__ ("sshll %0.4s,%1.4h,#0"
7397 : "=w"(result)
7398 : "w"(a)
7399 : /* No clobbers */);
7400 return result;
7403 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7404 vmovl_s32 (int32x2_t a)
7406 int64x2_t result;
7407 __asm__ ("sshll %0.2d,%1.2s,#0"
7408 : "=w"(result)
7409 : "w"(a)
7410 : /* No clobbers */);
7411 return result;
7414 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7415 vmovl_u8 (uint8x8_t a)
7417 uint16x8_t result;
7418 __asm__ ("ushll %0.8h,%1.8b,#0"
7419 : "=w"(result)
7420 : "w"(a)
7421 : /* No clobbers */);
7422 return result;
7425 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7426 vmovl_u16 (uint16x4_t a)
7428 uint32x4_t result;
7429 __asm__ ("ushll %0.4s,%1.4h,#0"
7430 : "=w"(result)
7431 : "w"(a)
7432 : /* No clobbers */);
7433 return result;
7436 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7437 vmovl_u32 (uint32x2_t a)
7439 uint64x2_t result;
7440 __asm__ ("ushll %0.2d,%1.2s,#0"
7441 : "=w"(result)
7442 : "w"(a)
7443 : /* No clobbers */);
7444 return result;
7447 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7448 vmovn_high_s16 (int8x8_t a, int16x8_t b)
7450 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
7451 __asm__ ("xtn2 %0.16b,%1.8h"
7452 : "+w"(result)
7453 : "w"(b)
7454 : /* No clobbers */);
7455 return result;
7458 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7459 vmovn_high_s32 (int16x4_t a, int32x4_t b)
7461 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
7462 __asm__ ("xtn2 %0.8h,%1.4s"
7463 : "+w"(result)
7464 : "w"(b)
7465 : /* No clobbers */);
7466 return result;
7469 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7470 vmovn_high_s64 (int32x2_t a, int64x2_t b)
7472 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
7473 __asm__ ("xtn2 %0.4s,%1.2d"
7474 : "+w"(result)
7475 : "w"(b)
7476 : /* No clobbers */);
7477 return result;
7480 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7481 vmovn_high_u16 (uint8x8_t a, uint16x8_t b)
7483 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
7484 __asm__ ("xtn2 %0.16b,%1.8h"
7485 : "+w"(result)
7486 : "w"(b)
7487 : /* No clobbers */);
7488 return result;
7491 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7492 vmovn_high_u32 (uint16x4_t a, uint32x4_t b)
7494 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
7495 __asm__ ("xtn2 %0.8h,%1.4s"
7496 : "+w"(result)
7497 : "w"(b)
7498 : /* No clobbers */);
7499 return result;
7502 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7503 vmovn_high_u64 (uint32x2_t a, uint64x2_t b)
7505 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
7506 __asm__ ("xtn2 %0.4s,%1.2d"
7507 : "+w"(result)
7508 : "w"(b)
7509 : /* No clobbers */);
7510 return result;
7513 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7514 vmovn_s16 (int16x8_t a)
7516 int8x8_t result;
7517 __asm__ ("xtn %0.8b,%1.8h"
7518 : "=w"(result)
7519 : "w"(a)
7520 : /* No clobbers */);
7521 return result;
7524 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7525 vmovn_s32 (int32x4_t a)
7527 int16x4_t result;
7528 __asm__ ("xtn %0.4h,%1.4s"
7529 : "=w"(result)
7530 : "w"(a)
7531 : /* No clobbers */);
7532 return result;
7535 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7536 vmovn_s64 (int64x2_t a)
7538 int32x2_t result;
7539 __asm__ ("xtn %0.2s,%1.2d"
7540 : "=w"(result)
7541 : "w"(a)
7542 : /* No clobbers */);
7543 return result;
7546 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7547 vmovn_u16 (uint16x8_t a)
7549 uint8x8_t result;
7550 __asm__ ("xtn %0.8b,%1.8h"
7551 : "=w"(result)
7552 : "w"(a)
7553 : /* No clobbers */);
7554 return result;
7557 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7558 vmovn_u32 (uint32x4_t a)
7560 uint16x4_t result;
7561 __asm__ ("xtn %0.4h,%1.4s"
7562 : "=w"(result)
7563 : "w"(a)
7564 : /* No clobbers */);
7565 return result;
7568 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7569 vmovn_u64 (uint64x2_t a)
7571 uint32x2_t result;
7572 __asm__ ("xtn %0.2s,%1.2d"
7573 : "=w"(result)
7574 : "w"(a)
7575 : /* No clobbers */);
7576 return result;
7579 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7580 vmul_n_f32 (float32x2_t a, float32_t b)
7582 float32x2_t result;
7583 __asm__ ("fmul %0.2s,%1.2s,%2.s[0]"
7584 : "=w"(result)
7585 : "w"(a), "w"(b)
7586 : /* No clobbers */);
7587 return result;
7590 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7591 vmul_n_s16 (int16x4_t a, int16_t b)
7593 int16x4_t result;
7594 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
7595 : "=w"(result)
7596 : "w"(a), "x"(b)
7597 : /* No clobbers */);
7598 return result;
7601 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7602 vmul_n_s32 (int32x2_t a, int32_t b)
7604 int32x2_t result;
7605 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
7606 : "=w"(result)
7607 : "w"(a), "w"(b)
7608 : /* No clobbers */);
7609 return result;
7612 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7613 vmul_n_u16 (uint16x4_t a, uint16_t b)
7615 uint16x4_t result;
7616 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
7617 : "=w"(result)
7618 : "w"(a), "x"(b)
7619 : /* No clobbers */);
7620 return result;
7623 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7624 vmul_n_u32 (uint32x2_t a, uint32_t b)
7626 uint32x2_t result;
7627 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
7628 : "=w"(result)
7629 : "w"(a), "w"(b)
7630 : /* No clobbers */);
7631 return result;
7634 #define vmull_high_lane_s16(a, b, c) \
7635 __extension__ \
7636 ({ \
7637 int16x4_t b_ = (b); \
7638 int16x8_t a_ = (a); \
7639 int32x4_t result; \
7640 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
7641 : "=w"(result) \
7642 : "w"(a_), "x"(b_), "i"(c) \
7643 : /* No clobbers */); \
7644 result; \
7647 #define vmull_high_lane_s32(a, b, c) \
7648 __extension__ \
7649 ({ \
7650 int32x2_t b_ = (b); \
7651 int32x4_t a_ = (a); \
7652 int64x2_t result; \
7653 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
7654 : "=w"(result) \
7655 : "w"(a_), "w"(b_), "i"(c) \
7656 : /* No clobbers */); \
7657 result; \
7660 #define vmull_high_lane_u16(a, b, c) \
7661 __extension__ \
7662 ({ \
7663 uint16x4_t b_ = (b); \
7664 uint16x8_t a_ = (a); \
7665 uint32x4_t result; \
7666 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
7667 : "=w"(result) \
7668 : "w"(a_), "x"(b_), "i"(c) \
7669 : /* No clobbers */); \
7670 result; \
7673 #define vmull_high_lane_u32(a, b, c) \
7674 __extension__ \
7675 ({ \
7676 uint32x2_t b_ = (b); \
7677 uint32x4_t a_ = (a); \
7678 uint64x2_t result; \
7679 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
7680 : "=w"(result) \
7681 : "w"(a_), "w"(b_), "i"(c) \
7682 : /* No clobbers */); \
7683 result; \
7686 #define vmull_high_laneq_s16(a, b, c) \
7687 __extension__ \
7688 ({ \
7689 int16x8_t b_ = (b); \
7690 int16x8_t a_ = (a); \
7691 int32x4_t result; \
7692 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
7693 : "=w"(result) \
7694 : "w"(a_), "x"(b_), "i"(c) \
7695 : /* No clobbers */); \
7696 result; \
7699 #define vmull_high_laneq_s32(a, b, c) \
7700 __extension__ \
7701 ({ \
7702 int32x4_t b_ = (b); \
7703 int32x4_t a_ = (a); \
7704 int64x2_t result; \
7705 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
7706 : "=w"(result) \
7707 : "w"(a_), "w"(b_), "i"(c) \
7708 : /* No clobbers */); \
7709 result; \
7712 #define vmull_high_laneq_u16(a, b, c) \
7713 __extension__ \
7714 ({ \
7715 uint16x8_t b_ = (b); \
7716 uint16x8_t a_ = (a); \
7717 uint32x4_t result; \
7718 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
7719 : "=w"(result) \
7720 : "w"(a_), "x"(b_), "i"(c) \
7721 : /* No clobbers */); \
7722 result; \
7725 #define vmull_high_laneq_u32(a, b, c) \
7726 __extension__ \
7727 ({ \
7728 uint32x4_t b_ = (b); \
7729 uint32x4_t a_ = (a); \
7730 uint64x2_t result; \
7731 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
7732 : "=w"(result) \
7733 : "w"(a_), "w"(b_), "i"(c) \
7734 : /* No clobbers */); \
7735 result; \
7738 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7739 vmull_high_n_s16 (int16x8_t a, int16_t b)
7741 int32x4_t result;
7742 __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]"
7743 : "=w"(result)
7744 : "w"(a), "x"(b)
7745 : /* No clobbers */);
7746 return result;
7749 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7750 vmull_high_n_s32 (int32x4_t a, int32_t b)
7752 int64x2_t result;
7753 __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]"
7754 : "=w"(result)
7755 : "w"(a), "w"(b)
7756 : /* No clobbers */);
7757 return result;
7760 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7761 vmull_high_n_u16 (uint16x8_t a, uint16_t b)
7763 uint32x4_t result;
7764 __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]"
7765 : "=w"(result)
7766 : "w"(a), "x"(b)
7767 : /* No clobbers */);
7768 return result;
7771 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7772 vmull_high_n_u32 (uint32x4_t a, uint32_t b)
7774 uint64x2_t result;
7775 __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]"
7776 : "=w"(result)
7777 : "w"(a), "w"(b)
7778 : /* No clobbers */);
7779 return result;
7782 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
7783 vmull_high_p8 (poly8x16_t a, poly8x16_t b)
7785 poly16x8_t result;
7786 __asm__ ("pmull2 %0.8h,%1.16b,%2.16b"
7787 : "=w"(result)
7788 : "w"(a), "w"(b)
7789 : /* No clobbers */);
7790 return result;
7793 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7794 vmull_high_s8 (int8x16_t a, int8x16_t b)
7796 int16x8_t result;
7797 __asm__ ("smull2 %0.8h,%1.16b,%2.16b"
7798 : "=w"(result)
7799 : "w"(a), "w"(b)
7800 : /* No clobbers */);
7801 return result;
7804 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7805 vmull_high_s16 (int16x8_t a, int16x8_t b)
7807 int32x4_t result;
7808 __asm__ ("smull2 %0.4s,%1.8h,%2.8h"
7809 : "=w"(result)
7810 : "w"(a), "w"(b)
7811 : /* No clobbers */);
7812 return result;
7815 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7816 vmull_high_s32 (int32x4_t a, int32x4_t b)
7818 int64x2_t result;
7819 __asm__ ("smull2 %0.2d,%1.4s,%2.4s"
7820 : "=w"(result)
7821 : "w"(a), "w"(b)
7822 : /* No clobbers */);
7823 return result;
7826 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7827 vmull_high_u8 (uint8x16_t a, uint8x16_t b)
7829 uint16x8_t result;
7830 __asm__ ("umull2 %0.8h,%1.16b,%2.16b"
7831 : "=w"(result)
7832 : "w"(a), "w"(b)
7833 : /* No clobbers */);
7834 return result;
7837 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7838 vmull_high_u16 (uint16x8_t a, uint16x8_t b)
7840 uint32x4_t result;
7841 __asm__ ("umull2 %0.4s,%1.8h,%2.8h"
7842 : "=w"(result)
7843 : "w"(a), "w"(b)
7844 : /* No clobbers */);
7845 return result;
7848 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7849 vmull_high_u32 (uint32x4_t a, uint32x4_t b)
7851 uint64x2_t result;
7852 __asm__ ("umull2 %0.2d,%1.4s,%2.4s"
7853 : "=w"(result)
7854 : "w"(a), "w"(b)
7855 : /* No clobbers */);
7856 return result;
7859 #define vmull_lane_s16(a, b, c) \
7860 __extension__ \
7861 ({ \
7862 int16x4_t b_ = (b); \
7863 int16x4_t a_ = (a); \
7864 int32x4_t result; \
7865 __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \
7866 : "=w"(result) \
7867 : "w"(a_), "x"(b_), "i"(c) \
7868 : /* No clobbers */); \
7869 result; \
7872 #define vmull_lane_s32(a, b, c) \
7873 __extension__ \
7874 ({ \
7875 int32x2_t b_ = (b); \
7876 int32x2_t a_ = (a); \
7877 int64x2_t result; \
7878 __asm__ ("smull %0.2d,%1.2s,%2.s[%3]" \
7879 : "=w"(result) \
7880 : "w"(a_), "w"(b_), "i"(c) \
7881 : /* No clobbers */); \
7882 result; \
7885 #define vmull_lane_u16(a, b, c) \
7886 __extension__ \
7887 ({ \
7888 uint16x4_t b_ = (b); \
7889 uint16x4_t a_ = (a); \
7890 uint32x4_t result; \
7891 __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \
7892 : "=w"(result) \
7893 : "w"(a_), "x"(b_), "i"(c) \
7894 : /* No clobbers */); \
7895 result; \
7898 #define vmull_lane_u32(a, b, c) \
7899 __extension__ \
7900 ({ \
7901 uint32x2_t b_ = (b); \
7902 uint32x2_t a_ = (a); \
7903 uint64x2_t result; \
7904 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
7905 : "=w"(result) \
7906 : "w"(a_), "w"(b_), "i"(c) \
7907 : /* No clobbers */); \
7908 result; \
7911 #define vmull_laneq_s16(a, b, c) \
7912 __extension__ \
7913 ({ \
7914 int16x8_t b_ = (b); \
7915 int16x4_t a_ = (a); \
7916 int32x4_t result; \
7917 __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \
7918 : "=w"(result) \
7919 : "w"(a_), "x"(b_), "i"(c) \
7920 : /* No clobbers */); \
7921 result; \
7924 #define vmull_laneq_s32(a, b, c) \
7925 __extension__ \
7926 ({ \
7927 int32x4_t b_ = (b); \
7928 int32x2_t a_ = (a); \
7929 int64x2_t result; \
7930 __asm__ ("smull %0.2d, %1.2s, %2.s[%3]" \
7931 : "=w"(result) \
7932 : "w"(a_), "w"(b_), "i"(c) \
7933 : /* No clobbers */); \
7934 result; \
7937 #define vmull_laneq_u16(a, b, c) \
7938 __extension__ \
7939 ({ \
7940 uint16x8_t b_ = (b); \
7941 uint16x4_t a_ = (a); \
7942 uint32x4_t result; \
7943 __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \
7944 : "=w"(result) \
7945 : "w"(a_), "x"(b_), "i"(c) \
7946 : /* No clobbers */); \
7947 result; \
7950 #define vmull_laneq_u32(a, b, c) \
7951 __extension__ \
7952 ({ \
7953 uint32x4_t b_ = (b); \
7954 uint32x2_t a_ = (a); \
7955 uint64x2_t result; \
7956 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
7957 : "=w"(result) \
7958 : "w"(a_), "w"(b_), "i"(c) \
7959 : /* No clobbers */); \
7960 result; \
7963 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7964 vmull_n_s16 (int16x4_t a, int16_t b)
7966 int32x4_t result;
7967 __asm__ ("smull %0.4s,%1.4h,%2.h[0]"
7968 : "=w"(result)
7969 : "w"(a), "x"(b)
7970 : /* No clobbers */);
7971 return result;
7974 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7975 vmull_n_s32 (int32x2_t a, int32_t b)
7977 int64x2_t result;
7978 __asm__ ("smull %0.2d,%1.2s,%2.s[0]"
7979 : "=w"(result)
7980 : "w"(a), "w"(b)
7981 : /* No clobbers */);
7982 return result;
7985 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7986 vmull_n_u16 (uint16x4_t a, uint16_t b)
7988 uint32x4_t result;
7989 __asm__ ("umull %0.4s,%1.4h,%2.h[0]"
7990 : "=w"(result)
7991 : "w"(a), "x"(b)
7992 : /* No clobbers */);
7993 return result;
7996 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7997 vmull_n_u32 (uint32x2_t a, uint32_t b)
7999 uint64x2_t result;
8000 __asm__ ("umull %0.2d,%1.2s,%2.s[0]"
8001 : "=w"(result)
8002 : "w"(a), "w"(b)
8003 : /* No clobbers */);
8004 return result;
8007 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
8008 vmull_p8 (poly8x8_t a, poly8x8_t b)
8010 poly16x8_t result;
8011 __asm__ ("pmull %0.8h, %1.8b, %2.8b"
8012 : "=w"(result)
8013 : "w"(a), "w"(b)
8014 : /* No clobbers */);
8015 return result;
8018 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8019 vmull_s8 (int8x8_t a, int8x8_t b)
8021 int16x8_t result;
8022 __asm__ ("smull %0.8h, %1.8b, %2.8b"
8023 : "=w"(result)
8024 : "w"(a), "w"(b)
8025 : /* No clobbers */);
8026 return result;
8029 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8030 vmull_s16 (int16x4_t a, int16x4_t b)
8032 int32x4_t result;
8033 __asm__ ("smull %0.4s, %1.4h, %2.4h"
8034 : "=w"(result)
8035 : "w"(a), "w"(b)
8036 : /* No clobbers */);
8037 return result;
8040 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8041 vmull_s32 (int32x2_t a, int32x2_t b)
8043 int64x2_t result;
8044 __asm__ ("smull %0.2d, %1.2s, %2.2s"
8045 : "=w"(result)
8046 : "w"(a), "w"(b)
8047 : /* No clobbers */);
8048 return result;
8051 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8052 vmull_u8 (uint8x8_t a, uint8x8_t b)
8054 uint16x8_t result;
8055 __asm__ ("umull %0.8h, %1.8b, %2.8b"
8056 : "=w"(result)
8057 : "w"(a), "w"(b)
8058 : /* No clobbers */);
8059 return result;
8062 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8063 vmull_u16 (uint16x4_t a, uint16x4_t b)
8065 uint32x4_t result;
8066 __asm__ ("umull %0.4s, %1.4h, %2.4h"
8067 : "=w"(result)
8068 : "w"(a), "w"(b)
8069 : /* No clobbers */);
8070 return result;
8073 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8074 vmull_u32 (uint32x2_t a, uint32x2_t b)
8076 uint64x2_t result;
8077 __asm__ ("umull %0.2d, %1.2s, %2.2s"
8078 : "=w"(result)
8079 : "w"(a), "w"(b)
8080 : /* No clobbers */);
8081 return result;
8084 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8085 vmulq_n_f32 (float32x4_t a, float32_t b)
8087 float32x4_t result;
8088 __asm__ ("fmul %0.4s,%1.4s,%2.s[0]"
8089 : "=w"(result)
8090 : "w"(a), "w"(b)
8091 : /* No clobbers */);
8092 return result;
8095 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8096 vmulq_n_f64 (float64x2_t a, float64_t b)
8098 float64x2_t result;
8099 __asm__ ("fmul %0.2d,%1.2d,%2.d[0]"
8100 : "=w"(result)
8101 : "w"(a), "w"(b)
8102 : /* No clobbers */);
8103 return result;
8106 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8107 vmulq_n_s16 (int16x8_t a, int16_t b)
8109 int16x8_t result;
8110 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
8111 : "=w"(result)
8112 : "w"(a), "x"(b)
8113 : /* No clobbers */);
8114 return result;
8117 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8118 vmulq_n_s32 (int32x4_t a, int32_t b)
8120 int32x4_t result;
8121 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
8122 : "=w"(result)
8123 : "w"(a), "w"(b)
8124 : /* No clobbers */);
8125 return result;
8128 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8129 vmulq_n_u16 (uint16x8_t a, uint16_t b)
8131 uint16x8_t result;
8132 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
8133 : "=w"(result)
8134 : "w"(a), "x"(b)
8135 : /* No clobbers */);
8136 return result;
8139 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8140 vmulq_n_u32 (uint32x4_t a, uint32_t b)
8142 uint32x4_t result;
8143 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
8144 : "=w"(result)
8145 : "w"(a), "w"(b)
8146 : /* No clobbers */);
8147 return result;
8150 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8151 vmulx_f32 (float32x2_t a, float32x2_t b)
8153 float32x2_t result;
8154 __asm__ ("fmulx %0.2s,%1.2s,%2.2s"
8155 : "=w"(result)
8156 : "w"(a), "w"(b)
8157 : /* No clobbers */);
8158 return result;
8161 #define vmulx_lane_f32(a, b, c) \
8162 __extension__ \
8163 ({ \
8164 float32x4_t b_ = (b); \
8165 float32x2_t a_ = (a); \
8166 float32x2_t result; \
8167 __asm__ ("fmulx %0.2s,%1.2s,%2.s[%3]" \
8168 : "=w"(result) \
8169 : "w"(a_), "w"(b_), "i"(c) \
8170 : /* No clobbers */); \
8171 result; \
8174 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
8175 vmulxd_f64 (float64_t a, float64_t b)
8177 float64_t result;
8178 __asm__ ("fmulx %d0, %d1, %d2"
8179 : "=w"(result)
8180 : "w"(a), "w"(b)
8181 : /* No clobbers */);
8182 return result;
8185 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8186 vmulxq_f32 (float32x4_t a, float32x4_t b)
8188 float32x4_t result;
8189 __asm__ ("fmulx %0.4s,%1.4s,%2.4s"
8190 : "=w"(result)
8191 : "w"(a), "w"(b)
8192 : /* No clobbers */);
8193 return result;
8196 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8197 vmulxq_f64 (float64x2_t a, float64x2_t b)
8199 float64x2_t result;
8200 __asm__ ("fmulx %0.2d,%1.2d,%2.2d"
8201 : "=w"(result)
8202 : "w"(a), "w"(b)
8203 : /* No clobbers */);
8204 return result;
8207 #define vmulxq_lane_f32(a, b, c) \
8208 __extension__ \
8209 ({ \
8210 float32x4_t b_ = (b); \
8211 float32x4_t a_ = (a); \
8212 float32x4_t result; \
8213 __asm__ ("fmulx %0.4s,%1.4s,%2.s[%3]" \
8214 : "=w"(result) \
8215 : "w"(a_), "w"(b_), "i"(c) \
8216 : /* No clobbers */); \
8217 result; \
8220 #define vmulxq_lane_f64(a, b, c) \
8221 __extension__ \
8222 ({ \
8223 float64x2_t b_ = (b); \
8224 float64x2_t a_ = (a); \
8225 float64x2_t result; \
8226 __asm__ ("fmulx %0.2d,%1.2d,%2.d[%3]" \
8227 : "=w"(result) \
8228 : "w"(a_), "w"(b_), "i"(c) \
8229 : /* No clobbers */); \
8230 result; \
8233 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
8234 vmulxs_f32 (float32_t a, float32_t b)
8236 float32_t result;
8237 __asm__ ("fmulx %s0, %s1, %s2"
8238 : "=w"(result)
8239 : "w"(a), "w"(b)
8240 : /* No clobbers */);
8241 return result;
8244 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
8245 vmvn_p8 (poly8x8_t a)
8247 poly8x8_t result;
8248 __asm__ ("mvn %0.8b,%1.8b"
8249 : "=w"(result)
8250 : "w"(a)
8251 : /* No clobbers */);
8252 return result;
8255 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
8256 vmvn_s8 (int8x8_t a)
8258 int8x8_t result;
8259 __asm__ ("mvn %0.8b,%1.8b"
8260 : "=w"(result)
8261 : "w"(a)
8262 : /* No clobbers */);
8263 return result;
8266 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8267 vmvn_s16 (int16x4_t a)
8269 int16x4_t result;
8270 __asm__ ("mvn %0.8b,%1.8b"
8271 : "=w"(result)
8272 : "w"(a)
8273 : /* No clobbers */);
8274 return result;
8277 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8278 vmvn_s32 (int32x2_t a)
8280 int32x2_t result;
8281 __asm__ ("mvn %0.8b,%1.8b"
8282 : "=w"(result)
8283 : "w"(a)
8284 : /* No clobbers */);
8285 return result;
8288 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8289 vmvn_u8 (uint8x8_t a)
8291 uint8x8_t result;
8292 __asm__ ("mvn %0.8b,%1.8b"
8293 : "=w"(result)
8294 : "w"(a)
8295 : /* No clobbers */);
8296 return result;
8299 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8300 vmvn_u16 (uint16x4_t a)
8302 uint16x4_t result;
8303 __asm__ ("mvn %0.8b,%1.8b"
8304 : "=w"(result)
8305 : "w"(a)
8306 : /* No clobbers */);
8307 return result;
8310 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8311 vmvn_u32 (uint32x2_t a)
8313 uint32x2_t result;
8314 __asm__ ("mvn %0.8b,%1.8b"
8315 : "=w"(result)
8316 : "w"(a)
8317 : /* No clobbers */);
8318 return result;
8321 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
8322 vmvnq_p8 (poly8x16_t a)
8324 poly8x16_t result;
8325 __asm__ ("mvn %0.16b,%1.16b"
8326 : "=w"(result)
8327 : "w"(a)
8328 : /* No clobbers */);
8329 return result;
8332 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8333 vmvnq_s8 (int8x16_t a)
8335 int8x16_t result;
8336 __asm__ ("mvn %0.16b,%1.16b"
8337 : "=w"(result)
8338 : "w"(a)
8339 : /* No clobbers */);
8340 return result;
8343 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8344 vmvnq_s16 (int16x8_t a)
8346 int16x8_t result;
8347 __asm__ ("mvn %0.16b,%1.16b"
8348 : "=w"(result)
8349 : "w"(a)
8350 : /* No clobbers */);
8351 return result;
8354 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8355 vmvnq_s32 (int32x4_t a)
8357 int32x4_t result;
8358 __asm__ ("mvn %0.16b,%1.16b"
8359 : "=w"(result)
8360 : "w"(a)
8361 : /* No clobbers */);
8362 return result;
8365 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8366 vmvnq_u8 (uint8x16_t a)
8368 uint8x16_t result;
8369 __asm__ ("mvn %0.16b,%1.16b"
8370 : "=w"(result)
8371 : "w"(a)
8372 : /* No clobbers */);
8373 return result;
8376 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8377 vmvnq_u16 (uint16x8_t a)
8379 uint16x8_t result;
8380 __asm__ ("mvn %0.16b,%1.16b"
8381 : "=w"(result)
8382 : "w"(a)
8383 : /* No clobbers */);
8384 return result;
8387 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8388 vmvnq_u32 (uint32x4_t a)
8390 uint32x4_t result;
8391 __asm__ ("mvn %0.16b,%1.16b"
8392 : "=w"(result)
8393 : "w"(a)
8394 : /* No clobbers */);
8395 return result;
8399 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8400 vpadal_s8 (int16x4_t a, int8x8_t b)
8402 int16x4_t result;
8403 __asm__ ("sadalp %0.4h,%2.8b"
8404 : "=w"(result)
8405 : "0"(a), "w"(b)
8406 : /* No clobbers */);
8407 return result;
8410 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8411 vpadal_s16 (int32x2_t a, int16x4_t b)
8413 int32x2_t result;
8414 __asm__ ("sadalp %0.2s,%2.4h"
8415 : "=w"(result)
8416 : "0"(a), "w"(b)
8417 : /* No clobbers */);
8418 return result;
8421 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
8422 vpadal_s32 (int64x1_t a, int32x2_t b)
8424 int64x1_t result;
8425 __asm__ ("sadalp %0.1d,%2.2s"
8426 : "=w"(result)
8427 : "0"(a), "w"(b)
8428 : /* No clobbers */);
8429 return result;
8432 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8433 vpadal_u8 (uint16x4_t a, uint8x8_t b)
8435 uint16x4_t result;
8436 __asm__ ("uadalp %0.4h,%2.8b"
8437 : "=w"(result)
8438 : "0"(a), "w"(b)
8439 : /* No clobbers */);
8440 return result;
8443 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8444 vpadal_u16 (uint32x2_t a, uint16x4_t b)
8446 uint32x2_t result;
8447 __asm__ ("uadalp %0.2s,%2.4h"
8448 : "=w"(result)
8449 : "0"(a), "w"(b)
8450 : /* No clobbers */);
8451 return result;
8454 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8455 vpadal_u32 (uint64x1_t a, uint32x2_t b)
8457 uint64x1_t result;
8458 __asm__ ("uadalp %0.1d,%2.2s"
8459 : "=w"(result)
8460 : "0"(a), "w"(b)
8461 : /* No clobbers */);
8462 return result;
8465 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8466 vpadalq_s8 (int16x8_t a, int8x16_t b)
8468 int16x8_t result;
8469 __asm__ ("sadalp %0.8h,%2.16b"
8470 : "=w"(result)
8471 : "0"(a), "w"(b)
8472 : /* No clobbers */);
8473 return result;
8476 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8477 vpadalq_s16 (int32x4_t a, int16x8_t b)
8479 int32x4_t result;
8480 __asm__ ("sadalp %0.4s,%2.8h"
8481 : "=w"(result)
8482 : "0"(a), "w"(b)
8483 : /* No clobbers */);
8484 return result;
8487 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8488 vpadalq_s32 (int64x2_t a, int32x4_t b)
8490 int64x2_t result;
8491 __asm__ ("sadalp %0.2d,%2.4s"
8492 : "=w"(result)
8493 : "0"(a), "w"(b)
8494 : /* No clobbers */);
8495 return result;
8498 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8499 vpadalq_u8 (uint16x8_t a, uint8x16_t b)
8501 uint16x8_t result;
8502 __asm__ ("uadalp %0.8h,%2.16b"
8503 : "=w"(result)
8504 : "0"(a), "w"(b)
8505 : /* No clobbers */);
8506 return result;
8509 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8510 vpadalq_u16 (uint32x4_t a, uint16x8_t b)
8512 uint32x4_t result;
8513 __asm__ ("uadalp %0.4s,%2.8h"
8514 : "=w"(result)
8515 : "0"(a), "w"(b)
8516 : /* No clobbers */);
8517 return result;
8520 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8521 vpadalq_u32 (uint64x2_t a, uint32x4_t b)
8523 uint64x2_t result;
8524 __asm__ ("uadalp %0.2d,%2.4s"
8525 : "=w"(result)
8526 : "0"(a), "w"(b)
8527 : /* No clobbers */);
8528 return result;
8531 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8532 vpadd_f32 (float32x2_t a, float32x2_t b)
8534 float32x2_t result;
8535 __asm__ ("faddp %0.2s,%1.2s,%2.2s"
8536 : "=w"(result)
8537 : "w"(a), "w"(b)
8538 : /* No clobbers */);
8539 return result;
8542 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8543 vpaddl_s8 (int8x8_t a)
8545 int16x4_t result;
8546 __asm__ ("saddlp %0.4h,%1.8b"
8547 : "=w"(result)
8548 : "w"(a)
8549 : /* No clobbers */);
8550 return result;
8553 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8554 vpaddl_s16 (int16x4_t a)
8556 int32x2_t result;
8557 __asm__ ("saddlp %0.2s,%1.4h"
8558 : "=w"(result)
8559 : "w"(a)
8560 : /* No clobbers */);
8561 return result;
8564 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
8565 vpaddl_s32 (int32x2_t a)
8567 int64x1_t result;
8568 __asm__ ("saddlp %0.1d,%1.2s"
8569 : "=w"(result)
8570 : "w"(a)
8571 : /* No clobbers */);
8572 return result;
8575 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8576 vpaddl_u8 (uint8x8_t a)
8578 uint16x4_t result;
8579 __asm__ ("uaddlp %0.4h,%1.8b"
8580 : "=w"(result)
8581 : "w"(a)
8582 : /* No clobbers */);
8583 return result;
8586 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8587 vpaddl_u16 (uint16x4_t a)
8589 uint32x2_t result;
8590 __asm__ ("uaddlp %0.2s,%1.4h"
8591 : "=w"(result)
8592 : "w"(a)
8593 : /* No clobbers */);
8594 return result;
8597 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8598 vpaddl_u32 (uint32x2_t a)
8600 uint64x1_t result;
8601 __asm__ ("uaddlp %0.1d,%1.2s"
8602 : "=w"(result)
8603 : "w"(a)
8604 : /* No clobbers */);
8605 return result;
8608 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8609 vpaddlq_s8 (int8x16_t a)
8611 int16x8_t result;
8612 __asm__ ("saddlp %0.8h,%1.16b"
8613 : "=w"(result)
8614 : "w"(a)
8615 : /* No clobbers */);
8616 return result;
8619 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8620 vpaddlq_s16 (int16x8_t a)
8622 int32x4_t result;
8623 __asm__ ("saddlp %0.4s,%1.8h"
8624 : "=w"(result)
8625 : "w"(a)
8626 : /* No clobbers */);
8627 return result;
8630 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8631 vpaddlq_s32 (int32x4_t a)
8633 int64x2_t result;
8634 __asm__ ("saddlp %0.2d,%1.4s"
8635 : "=w"(result)
8636 : "w"(a)
8637 : /* No clobbers */);
8638 return result;
8641 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8642 vpaddlq_u8 (uint8x16_t a)
8644 uint16x8_t result;
8645 __asm__ ("uaddlp %0.8h,%1.16b"
8646 : "=w"(result)
8647 : "w"(a)
8648 : /* No clobbers */);
8649 return result;
8652 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8653 vpaddlq_u16 (uint16x8_t a)
8655 uint32x4_t result;
8656 __asm__ ("uaddlp %0.4s,%1.8h"
8657 : "=w"(result)
8658 : "w"(a)
8659 : /* No clobbers */);
8660 return result;
8663 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8664 vpaddlq_u32 (uint32x4_t a)
8666 uint64x2_t result;
8667 __asm__ ("uaddlp %0.2d,%1.4s"
8668 : "=w"(result)
8669 : "w"(a)
8670 : /* No clobbers */);
8671 return result;
8674 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8675 vpaddq_f32 (float32x4_t a, float32x4_t b)
8677 float32x4_t result;
8678 __asm__ ("faddp %0.4s,%1.4s,%2.4s"
8679 : "=w"(result)
8680 : "w"(a), "w"(b)
8681 : /* No clobbers */);
8682 return result;
8685 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8686 vpaddq_f64 (float64x2_t a, float64x2_t b)
8688 float64x2_t result;
8689 __asm__ ("faddp %0.2d,%1.2d,%2.2d"
8690 : "=w"(result)
8691 : "w"(a), "w"(b)
8692 : /* No clobbers */);
8693 return result;
8696 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8697 vpaddq_s8 (int8x16_t a, int8x16_t b)
8699 int8x16_t result;
8700 __asm__ ("addp %0.16b,%1.16b,%2.16b"
8701 : "=w"(result)
8702 : "w"(a), "w"(b)
8703 : /* No clobbers */);
8704 return result;
8707 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8708 vpaddq_s16 (int16x8_t a, int16x8_t b)
8710 int16x8_t result;
8711 __asm__ ("addp %0.8h,%1.8h,%2.8h"
8712 : "=w"(result)
8713 : "w"(a), "w"(b)
8714 : /* No clobbers */);
8715 return result;
8718 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8719 vpaddq_s32 (int32x4_t a, int32x4_t b)
8721 int32x4_t result;
8722 __asm__ ("addp %0.4s,%1.4s,%2.4s"
8723 : "=w"(result)
8724 : "w"(a), "w"(b)
8725 : /* No clobbers */);
8726 return result;
8729 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8730 vpaddq_s64 (int64x2_t a, int64x2_t b)
8732 int64x2_t result;
8733 __asm__ ("addp %0.2d,%1.2d,%2.2d"
8734 : "=w"(result)
8735 : "w"(a), "w"(b)
8736 : /* No clobbers */);
8737 return result;
8740 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8741 vpaddq_u8 (uint8x16_t a, uint8x16_t b)
8743 uint8x16_t result;
8744 __asm__ ("addp %0.16b,%1.16b,%2.16b"
8745 : "=w"(result)
8746 : "w"(a), "w"(b)
8747 : /* No clobbers */);
8748 return result;
8751 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8752 vpaddq_u16 (uint16x8_t a, uint16x8_t b)
8754 uint16x8_t result;
8755 __asm__ ("addp %0.8h,%1.8h,%2.8h"
8756 : "=w"(result)
8757 : "w"(a), "w"(b)
8758 : /* No clobbers */);
8759 return result;
8762 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8763 vpaddq_u32 (uint32x4_t a, uint32x4_t b)
8765 uint32x4_t result;
8766 __asm__ ("addp %0.4s,%1.4s,%2.4s"
8767 : "=w"(result)
8768 : "w"(a), "w"(b)
8769 : /* No clobbers */);
8770 return result;
8773 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8774 vpaddq_u64 (uint64x2_t a, uint64x2_t b)
8776 uint64x2_t result;
8777 __asm__ ("addp %0.2d,%1.2d,%2.2d"
8778 : "=w"(result)
8779 : "w"(a), "w"(b)
8780 : /* No clobbers */);
8781 return result;
8784 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
8785 vpadds_f32 (float32x2_t a)
8787 float32_t result;
8788 __asm__ ("faddp %s0,%1.2s"
8789 : "=w"(result)
8790 : "w"(a)
8791 : /* No clobbers */);
8792 return result;
8795 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8796 vpmax_f32 (float32x2_t a, float32x2_t b)
8798 float32x2_t result;
8799 __asm__ ("fmaxp %0.2s, %1.2s, %2.2s"
8800 : "=w"(result)
8801 : "w"(a), "w"(b)
8802 : /* No clobbers */);
8803 return result;
8806 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
8807 vpmax_s8 (int8x8_t a, int8x8_t b)
8809 int8x8_t result;
8810 __asm__ ("smaxp %0.8b, %1.8b, %2.8b"
8811 : "=w"(result)
8812 : "w"(a), "w"(b)
8813 : /* No clobbers */);
8814 return result;
8817 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8818 vpmax_s16 (int16x4_t a, int16x4_t b)
8820 int16x4_t result;
8821 __asm__ ("smaxp %0.4h, %1.4h, %2.4h"
8822 : "=w"(result)
8823 : "w"(a), "w"(b)
8824 : /* No clobbers */);
8825 return result;
8828 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8829 vpmax_s32 (int32x2_t a, int32x2_t b)
8831 int32x2_t result;
8832 __asm__ ("smaxp %0.2s, %1.2s, %2.2s"
8833 : "=w"(result)
8834 : "w"(a), "w"(b)
8835 : /* No clobbers */);
8836 return result;
8839 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8840 vpmax_u8 (uint8x8_t a, uint8x8_t b)
8842 uint8x8_t result;
8843 __asm__ ("umaxp %0.8b, %1.8b, %2.8b"
8844 : "=w"(result)
8845 : "w"(a), "w"(b)
8846 : /* No clobbers */);
8847 return result;
8850 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8851 vpmax_u16 (uint16x4_t a, uint16x4_t b)
8853 uint16x4_t result;
8854 __asm__ ("umaxp %0.4h, %1.4h, %2.4h"
8855 : "=w"(result)
8856 : "w"(a), "w"(b)
8857 : /* No clobbers */);
8858 return result;
8861 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8862 vpmax_u32 (uint32x2_t a, uint32x2_t b)
8864 uint32x2_t result;
8865 __asm__ ("umaxp %0.2s, %1.2s, %2.2s"
8866 : "=w"(result)
8867 : "w"(a), "w"(b)
8868 : /* No clobbers */);
8869 return result;
8872 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8873 vpmaxnm_f32 (float32x2_t a, float32x2_t b)
8875 float32x2_t result;
8876 __asm__ ("fmaxnmp %0.2s,%1.2s,%2.2s"
8877 : "=w"(result)
8878 : "w"(a), "w"(b)
8879 : /* No clobbers */);
8880 return result;
8883 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8884 vpmaxnmq_f32 (float32x4_t a, float32x4_t b)
8886 float32x4_t result;
8887 __asm__ ("fmaxnmp %0.4s,%1.4s,%2.4s"
8888 : "=w"(result)
8889 : "w"(a), "w"(b)
8890 : /* No clobbers */);
8891 return result;
8894 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8895 vpmaxnmq_f64 (float64x2_t a, float64x2_t b)
8897 float64x2_t result;
8898 __asm__ ("fmaxnmp %0.2d,%1.2d,%2.2d"
8899 : "=w"(result)
8900 : "w"(a), "w"(b)
8901 : /* No clobbers */);
8902 return result;
8905 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
8906 vpmaxnmqd_f64 (float64x2_t a)
8908 float64_t result;
8909 __asm__ ("fmaxnmp %d0,%1.2d"
8910 : "=w"(result)
8911 : "w"(a)
8912 : /* No clobbers */);
8913 return result;
8916 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
8917 vpmaxnms_f32 (float32x2_t a)
8919 float32_t result;
8920 __asm__ ("fmaxnmp %s0,%1.2s"
8921 : "=w"(result)
8922 : "w"(a)
8923 : /* No clobbers */);
8924 return result;
8927 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8928 vpmaxq_f32 (float32x4_t a, float32x4_t b)
8930 float32x4_t result;
8931 __asm__ ("fmaxp %0.4s, %1.4s, %2.4s"
8932 : "=w"(result)
8933 : "w"(a), "w"(b)
8934 : /* No clobbers */);
8935 return result;
8938 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8939 vpmaxq_f64 (float64x2_t a, float64x2_t b)
8941 float64x2_t result;
8942 __asm__ ("fmaxp %0.2d, %1.2d, %2.2d"
8943 : "=w"(result)
8944 : "w"(a), "w"(b)
8945 : /* No clobbers */);
8946 return result;
8949 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8950 vpmaxq_s8 (int8x16_t a, int8x16_t b)
8952 int8x16_t result;
8953 __asm__ ("smaxp %0.16b, %1.16b, %2.16b"
8954 : "=w"(result)
8955 : "w"(a), "w"(b)
8956 : /* No clobbers */);
8957 return result;
8960 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8961 vpmaxq_s16 (int16x8_t a, int16x8_t b)
8963 int16x8_t result;
8964 __asm__ ("smaxp %0.8h, %1.8h, %2.8h"
8965 : "=w"(result)
8966 : "w"(a), "w"(b)
8967 : /* No clobbers */);
8968 return result;
8971 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8972 vpmaxq_s32 (int32x4_t a, int32x4_t b)
8974 int32x4_t result;
8975 __asm__ ("smaxp %0.4s, %1.4s, %2.4s"
8976 : "=w"(result)
8977 : "w"(a), "w"(b)
8978 : /* No clobbers */);
8979 return result;
8982 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8983 vpmaxq_u8 (uint8x16_t a, uint8x16_t b)
8985 uint8x16_t result;
8986 __asm__ ("umaxp %0.16b, %1.16b, %2.16b"
8987 : "=w"(result)
8988 : "w"(a), "w"(b)
8989 : /* No clobbers */);
8990 return result;
8993 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8994 vpmaxq_u16 (uint16x8_t a, uint16x8_t b)
8996 uint16x8_t result;
8997 __asm__ ("umaxp %0.8h, %1.8h, %2.8h"
8998 : "=w"(result)
8999 : "w"(a), "w"(b)
9000 : /* No clobbers */);
9001 return result;
9004 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9005 vpmaxq_u32 (uint32x4_t a, uint32x4_t b)
9007 uint32x4_t result;
9008 __asm__ ("umaxp %0.4s, %1.4s, %2.4s"
9009 : "=w"(result)
9010 : "w"(a), "w"(b)
9011 : /* No clobbers */);
9012 return result;
9015 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9016 vpmaxqd_f64 (float64x2_t a)
9018 float64_t result;
9019 __asm__ ("fmaxp %d0,%1.2d"
9020 : "=w"(result)
9021 : "w"(a)
9022 : /* No clobbers */);
9023 return result;
9026 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9027 vpmaxs_f32 (float32x2_t a)
9029 float32_t result;
9030 __asm__ ("fmaxp %s0,%1.2s"
9031 : "=w"(result)
9032 : "w"(a)
9033 : /* No clobbers */);
9034 return result;
9037 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9038 vpmin_f32 (float32x2_t a, float32x2_t b)
9040 float32x2_t result;
9041 __asm__ ("fminp %0.2s, %1.2s, %2.2s"
9042 : "=w"(result)
9043 : "w"(a), "w"(b)
9044 : /* No clobbers */);
9045 return result;
9048 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9049 vpmin_s8 (int8x8_t a, int8x8_t b)
9051 int8x8_t result;
9052 __asm__ ("sminp %0.8b, %1.8b, %2.8b"
9053 : "=w"(result)
9054 : "w"(a), "w"(b)
9055 : /* No clobbers */);
9056 return result;
9059 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9060 vpmin_s16 (int16x4_t a, int16x4_t b)
9062 int16x4_t result;
9063 __asm__ ("sminp %0.4h, %1.4h, %2.4h"
9064 : "=w"(result)
9065 : "w"(a), "w"(b)
9066 : /* No clobbers */);
9067 return result;
9070 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9071 vpmin_s32 (int32x2_t a, int32x2_t b)
9073 int32x2_t result;
9074 __asm__ ("sminp %0.2s, %1.2s, %2.2s"
9075 : "=w"(result)
9076 : "w"(a), "w"(b)
9077 : /* No clobbers */);
9078 return result;
9081 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9082 vpmin_u8 (uint8x8_t a, uint8x8_t b)
9084 uint8x8_t result;
9085 __asm__ ("uminp %0.8b, %1.8b, %2.8b"
9086 : "=w"(result)
9087 : "w"(a), "w"(b)
9088 : /* No clobbers */);
9089 return result;
9092 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9093 vpmin_u16 (uint16x4_t a, uint16x4_t b)
9095 uint16x4_t result;
9096 __asm__ ("uminp %0.4h, %1.4h, %2.4h"
9097 : "=w"(result)
9098 : "w"(a), "w"(b)
9099 : /* No clobbers */);
9100 return result;
9103 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9104 vpmin_u32 (uint32x2_t a, uint32x2_t b)
9106 uint32x2_t result;
9107 __asm__ ("uminp %0.2s, %1.2s, %2.2s"
9108 : "=w"(result)
9109 : "w"(a), "w"(b)
9110 : /* No clobbers */);
9111 return result;
9114 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9115 vpminnm_f32 (float32x2_t a, float32x2_t b)
9117 float32x2_t result;
9118 __asm__ ("fminnmp %0.2s,%1.2s,%2.2s"
9119 : "=w"(result)
9120 : "w"(a), "w"(b)
9121 : /* No clobbers */);
9122 return result;
9125 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9126 vpminnmq_f32 (float32x4_t a, float32x4_t b)
9128 float32x4_t result;
9129 __asm__ ("fminnmp %0.4s,%1.4s,%2.4s"
9130 : "=w"(result)
9131 : "w"(a), "w"(b)
9132 : /* No clobbers */);
9133 return result;
9136 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9137 vpminnmq_f64 (float64x2_t a, float64x2_t b)
9139 float64x2_t result;
9140 __asm__ ("fminnmp %0.2d,%1.2d,%2.2d"
9141 : "=w"(result)
9142 : "w"(a), "w"(b)
9143 : /* No clobbers */);
9144 return result;
9147 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9148 vpminnmqd_f64 (float64x2_t a)
9150 float64_t result;
9151 __asm__ ("fminnmp %d0,%1.2d"
9152 : "=w"(result)
9153 : "w"(a)
9154 : /* No clobbers */);
9155 return result;
9158 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9159 vpminnms_f32 (float32x2_t a)
9161 float32_t result;
9162 __asm__ ("fminnmp %s0,%1.2s"
9163 : "=w"(result)
9164 : "w"(a)
9165 : /* No clobbers */);
9166 return result;
9169 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9170 vpminq_f32 (float32x4_t a, float32x4_t b)
9172 float32x4_t result;
9173 __asm__ ("fminp %0.4s, %1.4s, %2.4s"
9174 : "=w"(result)
9175 : "w"(a), "w"(b)
9176 : /* No clobbers */);
9177 return result;
9180 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9181 vpminq_f64 (float64x2_t a, float64x2_t b)
9183 float64x2_t result;
9184 __asm__ ("fminp %0.2d, %1.2d, %2.2d"
9185 : "=w"(result)
9186 : "w"(a), "w"(b)
9187 : /* No clobbers */);
9188 return result;
9191 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9192 vpminq_s8 (int8x16_t a, int8x16_t b)
9194 int8x16_t result;
9195 __asm__ ("sminp %0.16b, %1.16b, %2.16b"
9196 : "=w"(result)
9197 : "w"(a), "w"(b)
9198 : /* No clobbers */);
9199 return result;
9202 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9203 vpminq_s16 (int16x8_t a, int16x8_t b)
9205 int16x8_t result;
9206 __asm__ ("sminp %0.8h, %1.8h, %2.8h"
9207 : "=w"(result)
9208 : "w"(a), "w"(b)
9209 : /* No clobbers */);
9210 return result;
9213 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9214 vpminq_s32 (int32x4_t a, int32x4_t b)
9216 int32x4_t result;
9217 __asm__ ("sminp %0.4s, %1.4s, %2.4s"
9218 : "=w"(result)
9219 : "w"(a), "w"(b)
9220 : /* No clobbers */);
9221 return result;
9224 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9225 vpminq_u8 (uint8x16_t a, uint8x16_t b)
9227 uint8x16_t result;
9228 __asm__ ("uminp %0.16b, %1.16b, %2.16b"
9229 : "=w"(result)
9230 : "w"(a), "w"(b)
9231 : /* No clobbers */);
9232 return result;
9235 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9236 vpminq_u16 (uint16x8_t a, uint16x8_t b)
9238 uint16x8_t result;
9239 __asm__ ("uminp %0.8h, %1.8h, %2.8h"
9240 : "=w"(result)
9241 : "w"(a), "w"(b)
9242 : /* No clobbers */);
9243 return result;
9246 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9247 vpminq_u32 (uint32x4_t a, uint32x4_t b)
9249 uint32x4_t result;
9250 __asm__ ("uminp %0.4s, %1.4s, %2.4s"
9251 : "=w"(result)
9252 : "w"(a), "w"(b)
9253 : /* No clobbers */);
9254 return result;
9257 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9258 vpminqd_f64 (float64x2_t a)
9260 float64_t result;
9261 __asm__ ("fminp %d0,%1.2d"
9262 : "=w"(result)
9263 : "w"(a)
9264 : /* No clobbers */);
9265 return result;
9268 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9269 vpmins_f32 (float32x2_t a)
9271 float32_t result;
9272 __asm__ ("fminp %s0,%1.2s"
9273 : "=w"(result)
9274 : "w"(a)
9275 : /* No clobbers */);
9276 return result;
9279 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9280 vqdmulh_n_s16 (int16x4_t a, int16_t b)
9282 int16x4_t result;
9283 __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]"
9284 : "=w"(result)
9285 : "w"(a), "x"(b)
9286 : /* No clobbers */);
9287 return result;
9290 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9291 vqdmulh_n_s32 (int32x2_t a, int32_t b)
9293 int32x2_t result;
9294 __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]"
9295 : "=w"(result)
9296 : "w"(a), "w"(b)
9297 : /* No clobbers */);
9298 return result;
9301 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9302 vqdmulhq_n_s16 (int16x8_t a, int16_t b)
9304 int16x8_t result;
9305 __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]"
9306 : "=w"(result)
9307 : "w"(a), "x"(b)
9308 : /* No clobbers */);
9309 return result;
9312 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9313 vqdmulhq_n_s32 (int32x4_t a, int32_t b)
9315 int32x4_t result;
9316 __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]"
9317 : "=w"(result)
9318 : "w"(a), "w"(b)
9319 : /* No clobbers */);
9320 return result;
9323 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9324 vqmovn_high_s16 (int8x8_t a, int16x8_t b)
9326 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
9327 __asm__ ("sqxtn2 %0.16b, %1.8h"
9328 : "+w"(result)
9329 : "w"(b)
9330 : /* No clobbers */);
9331 return result;
9334 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9335 vqmovn_high_s32 (int16x4_t a, int32x4_t b)
9337 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
9338 __asm__ ("sqxtn2 %0.8h, %1.4s"
9339 : "+w"(result)
9340 : "w"(b)
9341 : /* No clobbers */);
9342 return result;
9345 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9346 vqmovn_high_s64 (int32x2_t a, int64x2_t b)
9348 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
9349 __asm__ ("sqxtn2 %0.4s, %1.2d"
9350 : "+w"(result)
9351 : "w"(b)
9352 : /* No clobbers */);
9353 return result;
9356 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9357 vqmovn_high_u16 (uint8x8_t a, uint16x8_t b)
9359 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
9360 __asm__ ("uqxtn2 %0.16b, %1.8h"
9361 : "+w"(result)
9362 : "w"(b)
9363 : /* No clobbers */);
9364 return result;
9367 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9368 vqmovn_high_u32 (uint16x4_t a, uint32x4_t b)
9370 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
9371 __asm__ ("uqxtn2 %0.8h, %1.4s"
9372 : "+w"(result)
9373 : "w"(b)
9374 : /* No clobbers */);
9375 return result;
9378 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9379 vqmovn_high_u64 (uint32x2_t a, uint64x2_t b)
9381 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
9382 __asm__ ("uqxtn2 %0.4s, %1.2d"
9383 : "+w"(result)
9384 : "w"(b)
9385 : /* No clobbers */);
9386 return result;
9389 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9390 vqmovun_high_s16 (uint8x8_t a, int16x8_t b)
9392 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
9393 __asm__ ("sqxtun2 %0.16b, %1.8h"
9394 : "+w"(result)
9395 : "w"(b)
9396 : /* No clobbers */);
9397 return result;
9400 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9401 vqmovun_high_s32 (uint16x4_t a, int32x4_t b)
9403 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
9404 __asm__ ("sqxtun2 %0.8h, %1.4s"
9405 : "+w"(result)
9406 : "w"(b)
9407 : /* No clobbers */);
9408 return result;
9411 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9412 vqmovun_high_s64 (uint32x2_t a, int64x2_t b)
9414 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
9415 __asm__ ("sqxtun2 %0.4s, %1.2d"
9416 : "+w"(result)
9417 : "w"(b)
9418 : /* No clobbers */);
9419 return result;
9422 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9423 vqrdmulh_n_s16 (int16x4_t a, int16_t b)
9425 int16x4_t result;
9426 __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
9427 : "=w"(result)
9428 : "w"(a), "x"(b)
9429 : /* No clobbers */);
9430 return result;
9433 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9434 vqrdmulh_n_s32 (int32x2_t a, int32_t b)
9436 int32x2_t result;
9437 __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]"
9438 : "=w"(result)
9439 : "w"(a), "w"(b)
9440 : /* No clobbers */);
9441 return result;
9444 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9445 vqrdmulhq_n_s16 (int16x8_t a, int16_t b)
9447 int16x8_t result;
9448 __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
9449 : "=w"(result)
9450 : "w"(a), "x"(b)
9451 : /* No clobbers */);
9452 return result;
9455 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9456 vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
9458 int32x4_t result;
9459 __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]"
9460 : "=w"(result)
9461 : "w"(a), "w"(b)
9462 : /* No clobbers */);
9463 return result;
9466 #define vqrshrn_high_n_s16(a, b, c) \
9467 __extension__ \
9468 ({ \
9469 int16x8_t b_ = (b); \
9470 int8x8_t a_ = (a); \
9471 int8x16_t result = vcombine_s8 \
9472 (a_, vcreate_s8 \
9473 (__AARCH64_UINT64_C (0x0))); \
9474 __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \
9475 : "+w"(result) \
9476 : "w"(b_), "i"(c) \
9477 : /* No clobbers */); \
9478 result; \
9481 #define vqrshrn_high_n_s32(a, b, c) \
9482 __extension__ \
9483 ({ \
9484 int32x4_t b_ = (b); \
9485 int16x4_t a_ = (a); \
9486 int16x8_t result = vcombine_s16 \
9487 (a_, vcreate_s16 \
9488 (__AARCH64_UINT64_C (0x0))); \
9489 __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \
9490 : "+w"(result) \
9491 : "w"(b_), "i"(c) \
9492 : /* No clobbers */); \
9493 result; \
9496 #define vqrshrn_high_n_s64(a, b, c) \
9497 __extension__ \
9498 ({ \
9499 int64x2_t b_ = (b); \
9500 int32x2_t a_ = (a); \
9501 int32x4_t result = vcombine_s32 \
9502 (a_, vcreate_s32 \
9503 (__AARCH64_UINT64_C (0x0))); \
9504 __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \
9505 : "+w"(result) \
9506 : "w"(b_), "i"(c) \
9507 : /* No clobbers */); \
9508 result; \
9511 #define vqrshrn_high_n_u16(a, b, c) \
9512 __extension__ \
9513 ({ \
9514 uint16x8_t b_ = (b); \
9515 uint8x8_t a_ = (a); \
9516 uint8x16_t result = vcombine_u8 \
9517 (a_, vcreate_u8 \
9518 (__AARCH64_UINT64_C (0x0))); \
9519 __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \
9520 : "+w"(result) \
9521 : "w"(b_), "i"(c) \
9522 : /* No clobbers */); \
9523 result; \
9526 #define vqrshrn_high_n_u32(a, b, c) \
9527 __extension__ \
9528 ({ \
9529 uint32x4_t b_ = (b); \
9530 uint16x4_t a_ = (a); \
9531 uint16x8_t result = vcombine_u16 \
9532 (a_, vcreate_u16 \
9533 (__AARCH64_UINT64_C (0x0))); \
9534 __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \
9535 : "+w"(result) \
9536 : "w"(b_), "i"(c) \
9537 : /* No clobbers */); \
9538 result; \
9541 #define vqrshrn_high_n_u64(a, b, c) \
9542 __extension__ \
9543 ({ \
9544 uint64x2_t b_ = (b); \
9545 uint32x2_t a_ = (a); \
9546 uint32x4_t result = vcombine_u32 \
9547 (a_, vcreate_u32 \
9548 (__AARCH64_UINT64_C (0x0))); \
9549 __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \
9550 : "+w"(result) \
9551 : "w"(b_), "i"(c) \
9552 : /* No clobbers */); \
9553 result; \
9556 #define vqrshrun_high_n_s16(a, b, c) \
9557 __extension__ \
9558 ({ \
9559 int16x8_t b_ = (b); \
9560 uint8x8_t a_ = (a); \
9561 uint8x16_t result = vcombine_u8 \
9562 (a_, vcreate_u8 \
9563 (__AARCH64_UINT64_C (0x0))); \
9564 __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \
9565 : "+w"(result) \
9566 : "w"(b_), "i"(c) \
9567 : /* No clobbers */); \
9568 result; \
9571 #define vqrshrun_high_n_s32(a, b, c) \
9572 __extension__ \
9573 ({ \
9574 int32x4_t b_ = (b); \
9575 uint16x4_t a_ = (a); \
9576 uint16x8_t result = vcombine_u16 \
9577 (a_, vcreate_u16 \
9578 (__AARCH64_UINT64_C (0x0))); \
9579 __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \
9580 : "+w"(result) \
9581 : "w"(b_), "i"(c) \
9582 : /* No clobbers */); \
9583 result; \
9586 #define vqrshrun_high_n_s64(a, b, c) \
9587 __extension__ \
9588 ({ \
9589 int64x2_t b_ = (b); \
9590 uint32x2_t a_ = (a); \
9591 uint32x4_t result = vcombine_u32 \
9592 (a_, vcreate_u32 \
9593 (__AARCH64_UINT64_C (0x0))); \
9594 __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \
9595 : "+w"(result) \
9596 : "w"(b_), "i"(c) \
9597 : /* No clobbers */); \
9598 result; \
9601 #define vqshrn_high_n_s16(a, b, c) \
9602 __extension__ \
9603 ({ \
9604 int16x8_t b_ = (b); \
9605 int8x8_t a_ = (a); \
9606 int8x16_t result = vcombine_s8 \
9607 (a_, vcreate_s8 \
9608 (__AARCH64_UINT64_C (0x0))); \
9609 __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \
9610 : "+w"(result) \
9611 : "w"(b_), "i"(c) \
9612 : /* No clobbers */); \
9613 result; \
9616 #define vqshrn_high_n_s32(a, b, c) \
9617 __extension__ \
9618 ({ \
9619 int32x4_t b_ = (b); \
9620 int16x4_t a_ = (a); \
9621 int16x8_t result = vcombine_s16 \
9622 (a_, vcreate_s16 \
9623 (__AARCH64_UINT64_C (0x0))); \
9624 __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \
9625 : "+w"(result) \
9626 : "w"(b_), "i"(c) \
9627 : /* No clobbers */); \
9628 result; \
9631 #define vqshrn_high_n_s64(a, b, c) \
9632 __extension__ \
9633 ({ \
9634 int64x2_t b_ = (b); \
9635 int32x2_t a_ = (a); \
9636 int32x4_t result = vcombine_s32 \
9637 (a_, vcreate_s32 \
9638 (__AARCH64_UINT64_C (0x0))); \
9639 __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \
9640 : "+w"(result) \
9641 : "w"(b_), "i"(c) \
9642 : /* No clobbers */); \
9643 result; \
9646 #define vqshrn_high_n_u16(a, b, c) \
9647 __extension__ \
9648 ({ \
9649 uint16x8_t b_ = (b); \
9650 uint8x8_t a_ = (a); \
9651 uint8x16_t result = vcombine_u8 \
9652 (a_, vcreate_u8 \
9653 (__AARCH64_UINT64_C (0x0))); \
9654 __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \
9655 : "+w"(result) \
9656 : "w"(b_), "i"(c) \
9657 : /* No clobbers */); \
9658 result; \
9661 #define vqshrn_high_n_u32(a, b, c) \
9662 __extension__ \
9663 ({ \
9664 uint32x4_t b_ = (b); \
9665 uint16x4_t a_ = (a); \
9666 uint16x8_t result = vcombine_u16 \
9667 (a_, vcreate_u16 \
9668 (__AARCH64_UINT64_C (0x0))); \
9669 __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \
9670 : "+w"(result) \
9671 : "w"(b_), "i"(c) \
9672 : /* No clobbers */); \
9673 result; \
9676 #define vqshrn_high_n_u64(a, b, c) \
9677 __extension__ \
9678 ({ \
9679 uint64x2_t b_ = (b); \
9680 uint32x2_t a_ = (a); \
9681 uint32x4_t result = vcombine_u32 \
9682 (a_, vcreate_u32 \
9683 (__AARCH64_UINT64_C (0x0))); \
9684 __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \
9685 : "+w"(result) \
9686 : "w"(b_), "i"(c) \
9687 : /* No clobbers */); \
9688 result; \
9691 #define vqshrun_high_n_s16(a, b, c) \
9692 __extension__ \
9693 ({ \
9694 int16x8_t b_ = (b); \
9695 uint8x8_t a_ = (a); \
9696 uint8x16_t result = vcombine_u8 \
9697 (a_, vcreate_u8 \
9698 (__AARCH64_UINT64_C (0x0))); \
9699 __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \
9700 : "+w"(result) \
9701 : "w"(b_), "i"(c) \
9702 : /* No clobbers */); \
9703 result; \
9706 #define vqshrun_high_n_s32(a, b, c) \
9707 __extension__ \
9708 ({ \
9709 int32x4_t b_ = (b); \
9710 uint16x4_t a_ = (a); \
9711 uint16x8_t result = vcombine_u16 \
9712 (a_, vcreate_u16 \
9713 (__AARCH64_UINT64_C (0x0))); \
9714 __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \
9715 : "+w"(result) \
9716 : "w"(b_), "i"(c) \
9717 : /* No clobbers */); \
9718 result; \
9721 #define vqshrun_high_n_s64(a, b, c) \
9722 __extension__ \
9723 ({ \
9724 int64x2_t b_ = (b); \
9725 uint32x2_t a_ = (a); \
9726 uint32x4_t result = vcombine_u32 \
9727 (a_, vcreate_u32 \
9728 (__AARCH64_UINT64_C (0x0))); \
9729 __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \
9730 : "+w"(result) \
9731 : "w"(b_), "i"(c) \
9732 : /* No clobbers */); \
9733 result; \
9736 #define vrshrn_high_n_s16(a, b, c) \
9737 __extension__ \
9738 ({ \
9739 int16x8_t b_ = (b); \
9740 int8x8_t a_ = (a); \
9741 int8x16_t result = vcombine_s8 \
9742 (a_, vcreate_s8 \
9743 (__AARCH64_UINT64_C (0x0))); \
9744 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
9745 : "+w"(result) \
9746 : "w"(b_), "i"(c) \
9747 : /* No clobbers */); \
9748 result; \
9751 #define vrshrn_high_n_s32(a, b, c) \
9752 __extension__ \
9753 ({ \
9754 int32x4_t b_ = (b); \
9755 int16x4_t a_ = (a); \
9756 int16x8_t result = vcombine_s16 \
9757 (a_, vcreate_s16 \
9758 (__AARCH64_UINT64_C (0x0))); \
9759 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
9760 : "+w"(result) \
9761 : "w"(b_), "i"(c) \
9762 : /* No clobbers */); \
9763 result; \
9766 #define vrshrn_high_n_s64(a, b, c) \
9767 __extension__ \
9768 ({ \
9769 int64x2_t b_ = (b); \
9770 int32x2_t a_ = (a); \
9771 int32x4_t result = vcombine_s32 \
9772 (a_, vcreate_s32 \
9773 (__AARCH64_UINT64_C (0x0))); \
9774 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
9775 : "+w"(result) \
9776 : "w"(b_), "i"(c) \
9777 : /* No clobbers */); \
9778 result; \
9781 #define vrshrn_high_n_u16(a, b, c) \
9782 __extension__ \
9783 ({ \
9784 uint16x8_t b_ = (b); \
9785 uint8x8_t a_ = (a); \
9786 uint8x16_t result = vcombine_u8 \
9787 (a_, vcreate_u8 \
9788 (__AARCH64_UINT64_C (0x0))); \
9789 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
9790 : "+w"(result) \
9791 : "w"(b_), "i"(c) \
9792 : /* No clobbers */); \
9793 result; \
9796 #define vrshrn_high_n_u32(a, b, c) \
9797 __extension__ \
9798 ({ \
9799 uint32x4_t b_ = (b); \
9800 uint16x4_t a_ = (a); \
9801 uint16x8_t result = vcombine_u16 \
9802 (a_, vcreate_u16 \
9803 (__AARCH64_UINT64_C (0x0))); \
9804 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
9805 : "+w"(result) \
9806 : "w"(b_), "i"(c) \
9807 : /* No clobbers */); \
9808 result; \
9811 #define vrshrn_high_n_u64(a, b, c) \
9812 __extension__ \
9813 ({ \
9814 uint64x2_t b_ = (b); \
9815 uint32x2_t a_ = (a); \
9816 uint32x4_t result = vcombine_u32 \
9817 (a_, vcreate_u32 \
9818 (__AARCH64_UINT64_C (0x0))); \
9819 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
9820 : "+w"(result) \
9821 : "w"(b_), "i"(c) \
9822 : /* No clobbers */); \
9823 result; \
9826 #define vrshrn_n_s16(a, b) \
9827 __extension__ \
9828 ({ \
9829 int16x8_t a_ = (a); \
9830 int8x8_t result; \
9831 __asm__ ("rshrn %0.8b,%1.8h,%2" \
9832 : "=w"(result) \
9833 : "w"(a_), "i"(b) \
9834 : /* No clobbers */); \
9835 result; \
9838 #define vrshrn_n_s32(a, b) \
9839 __extension__ \
9840 ({ \
9841 int32x4_t a_ = (a); \
9842 int16x4_t result; \
9843 __asm__ ("rshrn %0.4h,%1.4s,%2" \
9844 : "=w"(result) \
9845 : "w"(a_), "i"(b) \
9846 : /* No clobbers */); \
9847 result; \
9850 #define vrshrn_n_s64(a, b) \
9851 __extension__ \
9852 ({ \
9853 int64x2_t a_ = (a); \
9854 int32x2_t result; \
9855 __asm__ ("rshrn %0.2s,%1.2d,%2" \
9856 : "=w"(result) \
9857 : "w"(a_), "i"(b) \
9858 : /* No clobbers */); \
9859 result; \
9862 #define vrshrn_n_u16(a, b) \
9863 __extension__ \
9864 ({ \
9865 uint16x8_t a_ = (a); \
9866 uint8x8_t result; \
9867 __asm__ ("rshrn %0.8b,%1.8h,%2" \
9868 : "=w"(result) \
9869 : "w"(a_), "i"(b) \
9870 : /* No clobbers */); \
9871 result; \
9874 #define vrshrn_n_u32(a, b) \
9875 __extension__ \
9876 ({ \
9877 uint32x4_t a_ = (a); \
9878 uint16x4_t result; \
9879 __asm__ ("rshrn %0.4h,%1.4s,%2" \
9880 : "=w"(result) \
9881 : "w"(a_), "i"(b) \
9882 : /* No clobbers */); \
9883 result; \
9886 #define vrshrn_n_u64(a, b) \
9887 __extension__ \
9888 ({ \
9889 uint64x2_t a_ = (a); \
9890 uint32x2_t result; \
9891 __asm__ ("rshrn %0.2s,%1.2d,%2" \
9892 : "=w"(result) \
9893 : "w"(a_), "i"(b) \
9894 : /* No clobbers */); \
9895 result; \
9898 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9899 vrsqrte_f32 (float32x2_t a)
9901 float32x2_t result;
9902 __asm__ ("frsqrte %0.2s,%1.2s"
9903 : "=w"(result)
9904 : "w"(a)
9905 : /* No clobbers */);
9906 return result;
9909 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
9910 vrsqrte_f64 (float64x1_t a)
9912 float64x1_t result;
9913 __asm__ ("frsqrte %d0,%d1"
9914 : "=w"(result)
9915 : "w"(a)
9916 : /* No clobbers */);
9917 return result;
9920 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9921 vrsqrte_u32 (uint32x2_t a)
9923 uint32x2_t result;
9924 __asm__ ("ursqrte %0.2s,%1.2s"
9925 : "=w"(result)
9926 : "w"(a)
9927 : /* No clobbers */);
9928 return result;
9931 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9932 vrsqrted_f64 (float64_t a)
9934 float64_t result;
9935 __asm__ ("frsqrte %d0,%d1"
9936 : "=w"(result)
9937 : "w"(a)
9938 : /* No clobbers */);
9939 return result;
9942 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9943 vrsqrteq_f32 (float32x4_t a)
9945 float32x4_t result;
9946 __asm__ ("frsqrte %0.4s,%1.4s"
9947 : "=w"(result)
9948 : "w"(a)
9949 : /* No clobbers */);
9950 return result;
9953 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9954 vrsqrteq_f64 (float64x2_t a)
9956 float64x2_t result;
9957 __asm__ ("frsqrte %0.2d,%1.2d"
9958 : "=w"(result)
9959 : "w"(a)
9960 : /* No clobbers */);
9961 return result;
9964 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9965 vrsqrteq_u32 (uint32x4_t a)
9967 uint32x4_t result;
9968 __asm__ ("ursqrte %0.4s,%1.4s"
9969 : "=w"(result)
9970 : "w"(a)
9971 : /* No clobbers */);
9972 return result;
9975 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9976 vrsqrtes_f32 (float32_t a)
9978 float32_t result;
9979 __asm__ ("frsqrte %s0,%s1"
9980 : "=w"(result)
9981 : "w"(a)
9982 : /* No clobbers */);
9983 return result;
9986 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9987 vrsqrts_f32 (float32x2_t a, float32x2_t b)
9989 float32x2_t result;
9990 __asm__ ("frsqrts %0.2s,%1.2s,%2.2s"
9991 : "=w"(result)
9992 : "w"(a), "w"(b)
9993 : /* No clobbers */);
9994 return result;
9997 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9998 vrsqrtsd_f64 (float64_t a, float64_t b)
10000 float64_t result;
10001 __asm__ ("frsqrts %d0,%d1,%d2"
10002 : "=w"(result)
10003 : "w"(a), "w"(b)
10004 : /* No clobbers */);
10005 return result;
10008 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10009 vrsqrtsq_f32 (float32x4_t a, float32x4_t b)
10011 float32x4_t result;
10012 __asm__ ("frsqrts %0.4s,%1.4s,%2.4s"
10013 : "=w"(result)
10014 : "w"(a), "w"(b)
10015 : /* No clobbers */);
10016 return result;
10019 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10020 vrsqrtsq_f64 (float64x2_t a, float64x2_t b)
10022 float64x2_t result;
10023 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
10024 : "=w"(result)
10025 : "w"(a), "w"(b)
10026 : /* No clobbers */);
10027 return result;
10030 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
10031 vrsqrtss_f32 (float32_t a, float32_t b)
10033 float32_t result;
10034 __asm__ ("frsqrts %s0,%s1,%s2"
10035 : "=w"(result)
10036 : "w"(a), "w"(b)
10037 : /* No clobbers */);
10038 return result;
10041 #define vshrn_high_n_s16(a, b, c) \
10042 __extension__ \
10043 ({ \
10044 int16x8_t b_ = (b); \
10045 int8x8_t a_ = (a); \
10046 int8x16_t result = vcombine_s8 \
10047 (a_, vcreate_s8 \
10048 (__AARCH64_UINT64_C (0x0))); \
10049 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
10050 : "+w"(result) \
10051 : "w"(b_), "i"(c) \
10052 : /* No clobbers */); \
10053 result; \
10056 #define vshrn_high_n_s32(a, b, c) \
10057 __extension__ \
10058 ({ \
10059 int32x4_t b_ = (b); \
10060 int16x4_t a_ = (a); \
10061 int16x8_t result = vcombine_s16 \
10062 (a_, vcreate_s16 \
10063 (__AARCH64_UINT64_C (0x0))); \
10064 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
10065 : "+w"(result) \
10066 : "w"(b_), "i"(c) \
10067 : /* No clobbers */); \
10068 result; \
10071 #define vshrn_high_n_s64(a, b, c) \
10072 __extension__ \
10073 ({ \
10074 int64x2_t b_ = (b); \
10075 int32x2_t a_ = (a); \
10076 int32x4_t result = vcombine_s32 \
10077 (a_, vcreate_s32 \
10078 (__AARCH64_UINT64_C (0x0))); \
10079 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
10080 : "+w"(result) \
10081 : "w"(b_), "i"(c) \
10082 : /* No clobbers */); \
10083 result; \
10086 #define vshrn_high_n_u16(a, b, c) \
10087 __extension__ \
10088 ({ \
10089 uint16x8_t b_ = (b); \
10090 uint8x8_t a_ = (a); \
10091 uint8x16_t result = vcombine_u8 \
10092 (a_, vcreate_u8 \
10093 (__AARCH64_UINT64_C (0x0))); \
10094 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
10095 : "+w"(result) \
10096 : "w"(b_), "i"(c) \
10097 : /* No clobbers */); \
10098 result; \
10101 #define vshrn_high_n_u32(a, b, c) \
10102 __extension__ \
10103 ({ \
10104 uint32x4_t b_ = (b); \
10105 uint16x4_t a_ = (a); \
10106 uint16x8_t result = vcombine_u16 \
10107 (a_, vcreate_u16 \
10108 (__AARCH64_UINT64_C (0x0))); \
10109 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
10110 : "+w"(result) \
10111 : "w"(b_), "i"(c) \
10112 : /* No clobbers */); \
10113 result; \
10116 #define vshrn_high_n_u64(a, b, c) \
10117 __extension__ \
10118 ({ \
10119 uint64x2_t b_ = (b); \
10120 uint32x2_t a_ = (a); \
10121 uint32x4_t result = vcombine_u32 \
10122 (a_, vcreate_u32 \
10123 (__AARCH64_UINT64_C (0x0))); \
10124 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
10125 : "+w"(result) \
10126 : "w"(b_), "i"(c) \
10127 : /* No clobbers */); \
10128 result; \
10131 #define vshrn_n_s16(a, b) \
10132 __extension__ \
10133 ({ \
10134 int16x8_t a_ = (a); \
10135 int8x8_t result; \
10136 __asm__ ("shrn %0.8b,%1.8h,%2" \
10137 : "=w"(result) \
10138 : "w"(a_), "i"(b) \
10139 : /* No clobbers */); \
10140 result; \
10143 #define vshrn_n_s32(a, b) \
10144 __extension__ \
10145 ({ \
10146 int32x4_t a_ = (a); \
10147 int16x4_t result; \
10148 __asm__ ("shrn %0.4h,%1.4s,%2" \
10149 : "=w"(result) \
10150 : "w"(a_), "i"(b) \
10151 : /* No clobbers */); \
10152 result; \
10155 #define vshrn_n_s64(a, b) \
10156 __extension__ \
10157 ({ \
10158 int64x2_t a_ = (a); \
10159 int32x2_t result; \
10160 __asm__ ("shrn %0.2s,%1.2d,%2" \
10161 : "=w"(result) \
10162 : "w"(a_), "i"(b) \
10163 : /* No clobbers */); \
10164 result; \
10167 #define vshrn_n_u16(a, b) \
10168 __extension__ \
10169 ({ \
10170 uint16x8_t a_ = (a); \
10171 uint8x8_t result; \
10172 __asm__ ("shrn %0.8b,%1.8h,%2" \
10173 : "=w"(result) \
10174 : "w"(a_), "i"(b) \
10175 : /* No clobbers */); \
10176 result; \
10179 #define vshrn_n_u32(a, b) \
10180 __extension__ \
10181 ({ \
10182 uint32x4_t a_ = (a); \
10183 uint16x4_t result; \
10184 __asm__ ("shrn %0.4h,%1.4s,%2" \
10185 : "=w"(result) \
10186 : "w"(a_), "i"(b) \
10187 : /* No clobbers */); \
10188 result; \
10191 #define vshrn_n_u64(a, b) \
10192 __extension__ \
10193 ({ \
10194 uint64x2_t a_ = (a); \
10195 uint32x2_t result; \
10196 __asm__ ("shrn %0.2s,%1.2d,%2" \
10197 : "=w"(result) \
10198 : "w"(a_), "i"(b) \
10199 : /* No clobbers */); \
10200 result; \
10203 #define vsli_n_p8(a, b, c) \
10204 __extension__ \
10205 ({ \
10206 poly8x8_t b_ = (b); \
10207 poly8x8_t a_ = (a); \
10208 poly8x8_t result; \
10209 __asm__ ("sli %0.8b,%2.8b,%3" \
10210 : "=w"(result) \
10211 : "0"(a_), "w"(b_), "i"(c) \
10212 : /* No clobbers */); \
10213 result; \
10216 #define vsli_n_p16(a, b, c) \
10217 __extension__ \
10218 ({ \
10219 poly16x4_t b_ = (b); \
10220 poly16x4_t a_ = (a); \
10221 poly16x4_t result; \
10222 __asm__ ("sli %0.4h,%2.4h,%3" \
10223 : "=w"(result) \
10224 : "0"(a_), "w"(b_), "i"(c) \
10225 : /* No clobbers */); \
10226 result; \
10229 #define vsliq_n_p8(a, b, c) \
10230 __extension__ \
10231 ({ \
10232 poly8x16_t b_ = (b); \
10233 poly8x16_t a_ = (a); \
10234 poly8x16_t result; \
10235 __asm__ ("sli %0.16b,%2.16b,%3" \
10236 : "=w"(result) \
10237 : "0"(a_), "w"(b_), "i"(c) \
10238 : /* No clobbers */); \
10239 result; \
10242 #define vsliq_n_p16(a, b, c) \
10243 __extension__ \
10244 ({ \
10245 poly16x8_t b_ = (b); \
10246 poly16x8_t a_ = (a); \
10247 poly16x8_t result; \
10248 __asm__ ("sli %0.8h,%2.8h,%3" \
10249 : "=w"(result) \
10250 : "0"(a_), "w"(b_), "i"(c) \
10251 : /* No clobbers */); \
10252 result; \
10255 #define vsri_n_p8(a, b, c) \
10256 __extension__ \
10257 ({ \
10258 poly8x8_t b_ = (b); \
10259 poly8x8_t a_ = (a); \
10260 poly8x8_t result; \
10261 __asm__ ("sri %0.8b,%2.8b,%3" \
10262 : "=w"(result) \
10263 : "0"(a_), "w"(b_), "i"(c) \
10264 : /* No clobbers */); \
10265 result; \
10268 #define vsri_n_p16(a, b, c) \
10269 __extension__ \
10270 ({ \
10271 poly16x4_t b_ = (b); \
10272 poly16x4_t a_ = (a); \
10273 poly16x4_t result; \
10274 __asm__ ("sri %0.4h,%2.4h,%3" \
10275 : "=w"(result) \
10276 : "0"(a_), "w"(b_), "i"(c) \
10277 : /* No clobbers */); \
10278 result; \
10281 #define vsriq_n_p8(a, b, c) \
10282 __extension__ \
10283 ({ \
10284 poly8x16_t b_ = (b); \
10285 poly8x16_t a_ = (a); \
10286 poly8x16_t result; \
10287 __asm__ ("sri %0.16b,%2.16b,%3" \
10288 : "=w"(result) \
10289 : "0"(a_), "w"(b_), "i"(c) \
10290 : /* No clobbers */); \
10291 result; \
10294 #define vsriq_n_p16(a, b, c) \
10295 __extension__ \
10296 ({ \
10297 poly16x8_t b_ = (b); \
10298 poly16x8_t a_ = (a); \
10299 poly16x8_t result; \
10300 __asm__ ("sri %0.8h,%2.8h,%3" \
10301 : "=w"(result) \
10302 : "0"(a_), "w"(b_), "i"(c) \
10303 : /* No clobbers */); \
10304 result; \
10307 #define vst1_lane_f32(a, b, c) \
10308 __extension__ \
10309 ({ \
10310 float32x2_t b_ = (b); \
10311 float32_t * a_ = (a); \
10312 __asm__ ("st1 {%1.s}[%2],[%0]" \
10314 : "r"(a_), "w"(b_), "i"(c) \
10315 : "memory"); \
10318 #define vst1_lane_f64(a, b, c) \
10319 __extension__ \
10320 ({ \
10321 float64x1_t b_ = (b); \
10322 float64_t * a_ = (a); \
10323 __asm__ ("st1 {%1.d}[%2],[%0]" \
10325 : "r"(a_), "w"(b_), "i"(c) \
10326 : "memory"); \
10329 #define vst1_lane_p8(a, b, c) \
10330 __extension__ \
10331 ({ \
10332 poly8x8_t b_ = (b); \
10333 poly8_t * a_ = (a); \
10334 __asm__ ("st1 {%1.b}[%2],[%0]" \
10336 : "r"(a_), "w"(b_), "i"(c) \
10337 : "memory"); \
10340 #define vst1_lane_p16(a, b, c) \
10341 __extension__ \
10342 ({ \
10343 poly16x4_t b_ = (b); \
10344 poly16_t * a_ = (a); \
10345 __asm__ ("st1 {%1.h}[%2],[%0]" \
10347 : "r"(a_), "w"(b_), "i"(c) \
10348 : "memory"); \
10351 #define vst1_lane_s8(a, b, c) \
10352 __extension__ \
10353 ({ \
10354 int8x8_t b_ = (b); \
10355 int8_t * a_ = (a); \
10356 __asm__ ("st1 {%1.b}[%2],[%0]" \
10358 : "r"(a_), "w"(b_), "i"(c) \
10359 : "memory"); \
10362 #define vst1_lane_s16(a, b, c) \
10363 __extension__ \
10364 ({ \
10365 int16x4_t b_ = (b); \
10366 int16_t * a_ = (a); \
10367 __asm__ ("st1 {%1.h}[%2],[%0]" \
10369 : "r"(a_), "w"(b_), "i"(c) \
10370 : "memory"); \
10373 #define vst1_lane_s32(a, b, c) \
10374 __extension__ \
10375 ({ \
10376 int32x2_t b_ = (b); \
10377 int32_t * a_ = (a); \
10378 __asm__ ("st1 {%1.s}[%2],[%0]" \
10380 : "r"(a_), "w"(b_), "i"(c) \
10381 : "memory"); \
10384 #define vst1_lane_s64(a, b, c) \
10385 __extension__ \
10386 ({ \
10387 int64x1_t b_ = (b); \
10388 int64_t * a_ = (a); \
10389 __asm__ ("st1 {%1.d}[%2],[%0]" \
10391 : "r"(a_), "w"(b_), "i"(c) \
10392 : "memory"); \
10395 #define vst1_lane_u8(a, b, c) \
10396 __extension__ \
10397 ({ \
10398 uint8x8_t b_ = (b); \
10399 uint8_t * a_ = (a); \
10400 __asm__ ("st1 {%1.b}[%2],[%0]" \
10402 : "r"(a_), "w"(b_), "i"(c) \
10403 : "memory"); \
10406 #define vst1_lane_u16(a, b, c) \
10407 __extension__ \
10408 ({ \
10409 uint16x4_t b_ = (b); \
10410 uint16_t * a_ = (a); \
10411 __asm__ ("st1 {%1.h}[%2],[%0]" \
10413 : "r"(a_), "w"(b_), "i"(c) \
10414 : "memory"); \
10417 #define vst1_lane_u32(a, b, c) \
10418 __extension__ \
10419 ({ \
10420 uint32x2_t b_ = (b); \
10421 uint32_t * a_ = (a); \
10422 __asm__ ("st1 {%1.s}[%2],[%0]" \
10424 : "r"(a_), "w"(b_), "i"(c) \
10425 : "memory"); \
10428 #define vst1_lane_u64(a, b, c) \
10429 __extension__ \
10430 ({ \
10431 uint64x1_t b_ = (b); \
10432 uint64_t * a_ = (a); \
10433 __asm__ ("st1 {%1.d}[%2],[%0]" \
10435 : "r"(a_), "w"(b_), "i"(c) \
10436 : "memory"); \
10440 #define vst1q_lane_f32(a, b, c) \
10441 __extension__ \
10442 ({ \
10443 float32x4_t b_ = (b); \
10444 float32_t * a_ = (a); \
10445 __asm__ ("st1 {%1.s}[%2],[%0]" \
10447 : "r"(a_), "w"(b_), "i"(c) \
10448 : "memory"); \
10451 #define vst1q_lane_f64(a, b, c) \
10452 __extension__ \
10453 ({ \
10454 float64x2_t b_ = (b); \
10455 float64_t * a_ = (a); \
10456 __asm__ ("st1 {%1.d}[%2],[%0]" \
10458 : "r"(a_), "w"(b_), "i"(c) \
10459 : "memory"); \
10462 #define vst1q_lane_p8(a, b, c) \
10463 __extension__ \
10464 ({ \
10465 poly8x16_t b_ = (b); \
10466 poly8_t * a_ = (a); \
10467 __asm__ ("st1 {%1.b}[%2],[%0]" \
10469 : "r"(a_), "w"(b_), "i"(c) \
10470 : "memory"); \
10473 #define vst1q_lane_p16(a, b, c) \
10474 __extension__ \
10475 ({ \
10476 poly16x8_t b_ = (b); \
10477 poly16_t * a_ = (a); \
10478 __asm__ ("st1 {%1.h}[%2],[%0]" \
10480 : "r"(a_), "w"(b_), "i"(c) \
10481 : "memory"); \
10484 #define vst1q_lane_s8(a, b, c) \
10485 __extension__ \
10486 ({ \
10487 int8x16_t b_ = (b); \
10488 int8_t * a_ = (a); \
10489 __asm__ ("st1 {%1.b}[%2],[%0]" \
10491 : "r"(a_), "w"(b_), "i"(c) \
10492 : "memory"); \
10495 #define vst1q_lane_s16(a, b, c) \
10496 __extension__ \
10497 ({ \
10498 int16x8_t b_ = (b); \
10499 int16_t * a_ = (a); \
10500 __asm__ ("st1 {%1.h}[%2],[%0]" \
10502 : "r"(a_), "w"(b_), "i"(c) \
10503 : "memory"); \
10506 #define vst1q_lane_s32(a, b, c) \
10507 __extension__ \
10508 ({ \
10509 int32x4_t b_ = (b); \
10510 int32_t * a_ = (a); \
10511 __asm__ ("st1 {%1.s}[%2],[%0]" \
10513 : "r"(a_), "w"(b_), "i"(c) \
10514 : "memory"); \
10517 #define vst1q_lane_s64(a, b, c) \
10518 __extension__ \
10519 ({ \
10520 int64x2_t b_ = (b); \
10521 int64_t * a_ = (a); \
10522 __asm__ ("st1 {%1.d}[%2],[%0]" \
10524 : "r"(a_), "w"(b_), "i"(c) \
10525 : "memory"); \
10528 #define vst1q_lane_u8(a, b, c) \
10529 __extension__ \
10530 ({ \
10531 uint8x16_t b_ = (b); \
10532 uint8_t * a_ = (a); \
10533 __asm__ ("st1 {%1.b}[%2],[%0]" \
10535 : "r"(a_), "w"(b_), "i"(c) \
10536 : "memory"); \
10539 #define vst1q_lane_u16(a, b, c) \
10540 __extension__ \
10541 ({ \
10542 uint16x8_t b_ = (b); \
10543 uint16_t * a_ = (a); \
10544 __asm__ ("st1 {%1.h}[%2],[%0]" \
10546 : "r"(a_), "w"(b_), "i"(c) \
10547 : "memory"); \
10550 #define vst1q_lane_u32(a, b, c) \
10551 __extension__ \
10552 ({ \
10553 uint32x4_t b_ = (b); \
10554 uint32_t * a_ = (a); \
10555 __asm__ ("st1 {%1.s}[%2],[%0]" \
10557 : "r"(a_), "w"(b_), "i"(c) \
10558 : "memory"); \
10561 #define vst1q_lane_u64(a, b, c) \
10562 __extension__ \
10563 ({ \
10564 uint64x2_t b_ = (b); \
10565 uint64_t * a_ = (a); \
10566 __asm__ ("st1 {%1.d}[%2],[%0]" \
10568 : "r"(a_), "w"(b_), "i"(c) \
10569 : "memory"); \
10573 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10574 vtst_p8 (poly8x8_t a, poly8x8_t b)
10576 uint8x8_t result;
10577 __asm__ ("cmtst %0.8b, %1.8b, %2.8b"
10578 : "=w"(result)
10579 : "w"(a), "w"(b)
10580 : /* No clobbers */);
10581 return result;
10584 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10585 vtst_p16 (poly16x4_t a, poly16x4_t b)
10587 uint16x4_t result;
10588 __asm__ ("cmtst %0.4h, %1.4h, %2.4h"
10589 : "=w"(result)
10590 : "w"(a), "w"(b)
10591 : /* No clobbers */);
10592 return result;
10595 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10596 vtstq_p8 (poly8x16_t a, poly8x16_t b)
10598 uint8x16_t result;
10599 __asm__ ("cmtst %0.16b, %1.16b, %2.16b"
10600 : "=w"(result)
10601 : "w"(a), "w"(b)
10602 : /* No clobbers */);
10603 return result;
10606 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10607 vtstq_p16 (poly16x8_t a, poly16x8_t b)
10609 uint16x8_t result;
10610 __asm__ ("cmtst %0.8h, %1.8h, %2.8h"
10611 : "=w"(result)
10612 : "w"(a), "w"(b)
10613 : /* No clobbers */);
10614 return result;
10617 /* End of temporary inline asm implementations. */
10619 /* Start of temporary inline asm for vldn, vstn and friends. */
10621 /* Create struct element types for duplicating loads.
10623 Create 2 element structures of:
10625 +------+----+----+----+----+
10626 | | 8 | 16 | 32 | 64 |
10627 +------+----+----+----+----+
10628 |int | Y | Y | N | N |
10629 +------+----+----+----+----+
10630 |uint | Y | Y | N | N |
10631 +------+----+----+----+----+
10632 |float | - | - | N | N |
10633 +------+----+----+----+----+
10634 |poly | Y | Y | - | - |
10635 +------+----+----+----+----+
10637 Create 3 element structures of:
10639 +------+----+----+----+----+
10640 | | 8 | 16 | 32 | 64 |
10641 +------+----+----+----+----+
10642 |int | Y | Y | Y | Y |
10643 +------+----+----+----+----+
10644 |uint | Y | Y | Y | Y |
10645 +------+----+----+----+----+
10646 |float | - | - | Y | Y |
10647 +------+----+----+----+----+
10648 |poly | Y | Y | - | - |
10649 +------+----+----+----+----+
10651 Create 4 element structures of:
10653 +------+----+----+----+----+
10654 | | 8 | 16 | 32 | 64 |
10655 +------+----+----+----+----+
10656 |int | Y | N | N | Y |
10657 +------+----+----+----+----+
10658 |uint | Y | N | N | Y |
10659 +------+----+----+----+----+
10660 |float | - | - | N | Y |
10661 +------+----+----+----+----+
10662 |poly | Y | N | - | - |
10663 +------+----+----+----+----+
10665 This is required for casting memory reference. */
10666 #define __STRUCTN(t, sz, nelem) \
10667 typedef struct t ## sz ## x ## nelem ## _t { \
10668 t ## sz ## _t val[nelem]; \
10669 } t ## sz ## x ## nelem ## _t;
10671 /* 2-element structs. */
10672 __STRUCTN (int, 8, 2)
10673 __STRUCTN (int, 16, 2)
10674 __STRUCTN (uint, 8, 2)
10675 __STRUCTN (uint, 16, 2)
10676 __STRUCTN (poly, 8, 2)
10677 __STRUCTN (poly, 16, 2)
10678 /* 3-element structs. */
10679 __STRUCTN (int, 8, 3)
10680 __STRUCTN (int, 16, 3)
10681 __STRUCTN (int, 32, 3)
10682 __STRUCTN (int, 64, 3)
10683 __STRUCTN (uint, 8, 3)
10684 __STRUCTN (uint, 16, 3)
10685 __STRUCTN (uint, 32, 3)
10686 __STRUCTN (uint, 64, 3)
10687 __STRUCTN (float, 32, 3)
10688 __STRUCTN (float, 64, 3)
10689 __STRUCTN (poly, 8, 3)
10690 __STRUCTN (poly, 16, 3)
10691 /* 4-element structs. */
10692 __STRUCTN (int, 8, 4)
10693 __STRUCTN (int, 64, 4)
10694 __STRUCTN (uint, 8, 4)
10695 __STRUCTN (uint, 64, 4)
10696 __STRUCTN (poly, 8, 4)
10697 __STRUCTN (float, 64, 4)
10698 #undef __STRUCTN
10701 #define __ST2_LANE_FUNC(intype, largetype, ptrtype, \
10702 mode, ptr_mode, funcsuffix, signedtype) \
10703 __extension__ static __inline void \
10704 __attribute__ ((__always_inline__)) \
10705 vst2_lane_ ## funcsuffix (ptrtype *__ptr, \
10706 intype __b, const int __c) \
10708 __builtin_aarch64_simd_oi __o; \
10709 largetype __temp; \
10710 __temp.val[0] \
10711 = vcombine_##funcsuffix (__b.val[0], \
10712 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10713 __temp.val[1] \
10714 = vcombine_##funcsuffix (__b.val[1], \
10715 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10716 __o = __builtin_aarch64_set_qregoi##mode (__o, \
10717 (signedtype) __temp.val[0], 0); \
10718 __o = __builtin_aarch64_set_qregoi##mode (__o, \
10719 (signedtype) __temp.val[1], 1); \
10720 __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
10721 __ptr, __o, __c); \
10724 __ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v4sf, sf, f32,
10725 float32x4_t)
10726 __ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, v2df, df, f64,
10727 float64x2_t)
10728 __ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v16qi, qi, p8, int8x16_t)
10729 __ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v8hi, hi, p16,
10730 int16x8_t)
10731 __ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v16qi, qi, s8, int8x16_t)
10732 __ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v8hi, hi, s16, int16x8_t)
10733 __ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v4si, si, s32, int32x4_t)
10734 __ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, v2di, di, s64, int64x2_t)
10735 __ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v16qi, qi, u8, int8x16_t)
10736 __ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v8hi, hi, u16,
10737 int16x8_t)
10738 __ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v4si, si, u32,
10739 int32x4_t)
10740 __ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, v2di, di, u64,
10741 int64x2_t)
10743 #undef __ST2_LANE_FUNC
10744 #define __ST2_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
10745 __extension__ static __inline void \
10746 __attribute__ ((__always_inline__)) \
10747 vst2q_lane_ ## funcsuffix (ptrtype *__ptr, \
10748 intype __b, const int __c) \
10750 union { intype __i; \
10751 __builtin_aarch64_simd_oi __o; } __temp = { __b }; \
10752 __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
10753 __ptr, __temp.__o, __c); \
10756 __ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32)
10757 __ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64)
10758 __ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8)
10759 __ST2_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16)
10760 __ST2_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8)
10761 __ST2_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16)
10762 __ST2_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32)
10763 __ST2_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64)
10764 __ST2_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8)
10765 __ST2_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16)
10766 __ST2_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32)
10767 __ST2_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64)
10769 #define __ST3_LANE_FUNC(intype, largetype, ptrtype, \
10770 mode, ptr_mode, funcsuffix, signedtype) \
10771 __extension__ static __inline void \
10772 __attribute__ ((__always_inline__)) \
10773 vst3_lane_ ## funcsuffix (ptrtype *__ptr, \
10774 intype __b, const int __c) \
10776 __builtin_aarch64_simd_ci __o; \
10777 largetype __temp; \
10778 __temp.val[0] \
10779 = vcombine_##funcsuffix (__b.val[0], \
10780 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10781 __temp.val[1] \
10782 = vcombine_##funcsuffix (__b.val[1], \
10783 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10784 __temp.val[2] \
10785 = vcombine_##funcsuffix (__b.val[2], \
10786 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10787 __o = __builtin_aarch64_set_qregci##mode (__o, \
10788 (signedtype) __temp.val[0], 0); \
10789 __o = __builtin_aarch64_set_qregci##mode (__o, \
10790 (signedtype) __temp.val[1], 1); \
10791 __o = __builtin_aarch64_set_qregci##mode (__o, \
10792 (signedtype) __temp.val[2], 2); \
10793 __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
10794 __ptr, __o, __c); \
10797 __ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v4sf, sf, f32,
10798 float32x4_t)
10799 __ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, v2df, df, f64,
10800 float64x2_t)
10801 __ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v16qi, qi, p8, int8x16_t)
10802 __ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v8hi, hi, p16,
10803 int16x8_t)
10804 __ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v16qi, qi, s8, int8x16_t)
10805 __ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v8hi, hi, s16, int16x8_t)
10806 __ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v4si, si, s32, int32x4_t)
10807 __ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, v2di, di, s64, int64x2_t)
10808 __ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v16qi, qi, u8, int8x16_t)
10809 __ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v8hi, hi, u16,
10810 int16x8_t)
10811 __ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v4si, si, u32,
10812 int32x4_t)
10813 __ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, v2di, di, u64,
10814 int64x2_t)
10816 #undef __ST3_LANE_FUNC
10817 #define __ST3_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
10818 __extension__ static __inline void \
10819 __attribute__ ((__always_inline__)) \
10820 vst3q_lane_ ## funcsuffix (ptrtype *__ptr, \
10821 intype __b, const int __c) \
10823 union { intype __i; \
10824 __builtin_aarch64_simd_ci __o; } __temp = { __b }; \
10825 __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
10826 __ptr, __temp.__o, __c); \
10829 __ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32)
10830 __ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64)
10831 __ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8)
10832 __ST3_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16)
10833 __ST3_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8)
10834 __ST3_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16)
10835 __ST3_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32)
10836 __ST3_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64)
10837 __ST3_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8)
10838 __ST3_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16)
10839 __ST3_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32)
10840 __ST3_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64)
10842 #define __ST4_LANE_FUNC(intype, largetype, ptrtype, \
10843 mode, ptr_mode, funcsuffix, signedtype) \
10844 __extension__ static __inline void \
10845 __attribute__ ((__always_inline__)) \
10846 vst4_lane_ ## funcsuffix (ptrtype *__ptr, \
10847 intype __b, const int __c) \
10849 __builtin_aarch64_simd_xi __o; \
10850 largetype __temp; \
10851 __temp.val[0] \
10852 = vcombine_##funcsuffix (__b.val[0], \
10853 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10854 __temp.val[1] \
10855 = vcombine_##funcsuffix (__b.val[1], \
10856 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10857 __temp.val[2] \
10858 = vcombine_##funcsuffix (__b.val[2], \
10859 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10860 __temp.val[3] \
10861 = vcombine_##funcsuffix (__b.val[3], \
10862 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
10863 __o = __builtin_aarch64_set_qregxi##mode (__o, \
10864 (signedtype) __temp.val[0], 0); \
10865 __o = __builtin_aarch64_set_qregxi##mode (__o, \
10866 (signedtype) __temp.val[1], 1); \
10867 __o = __builtin_aarch64_set_qregxi##mode (__o, \
10868 (signedtype) __temp.val[2], 2); \
10869 __o = __builtin_aarch64_set_qregxi##mode (__o, \
10870 (signedtype) __temp.val[3], 3); \
10871 __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
10872 __ptr, __o, __c); \
10875 __ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v4sf, sf, f32,
10876 float32x4_t)
10877 __ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, v2df, df, f64,
10878 float64x2_t)
10879 __ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v16qi, qi, p8, int8x16_t)
10880 __ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v8hi, hi, p16,
10881 int16x8_t)
10882 __ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v16qi, qi, s8, int8x16_t)
10883 __ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v8hi, hi, s16, int16x8_t)
10884 __ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v4si, si, s32, int32x4_t)
10885 __ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, v2di, di, s64, int64x2_t)
10886 __ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v16qi, qi, u8, int8x16_t)
10887 __ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v8hi, hi, u16,
10888 int16x8_t)
10889 __ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v4si, si, u32,
10890 int32x4_t)
10891 __ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, v2di, di, u64,
10892 int64x2_t)
10894 #undef __ST4_LANE_FUNC
10895 #define __ST4_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
10896 __extension__ static __inline void \
10897 __attribute__ ((__always_inline__)) \
10898 vst4q_lane_ ## funcsuffix (ptrtype *__ptr, \
10899 intype __b, const int __c) \
10901 union { intype __i; \
10902 __builtin_aarch64_simd_xi __o; } __temp = { __b }; \
10903 __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
10904 __ptr, __temp.__o, __c); \
10907 __ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32)
10908 __ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64)
10909 __ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8)
10910 __ST4_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16)
10911 __ST4_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8)
10912 __ST4_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16)
10913 __ST4_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32)
10914 __ST4_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64)
10915 __ST4_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8)
10916 __ST4_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16)
10917 __ST4_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32)
10918 __ST4_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64)
10920 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
10921 vaddlv_s32 (int32x2_t a)
10923 int64_t result;
10924 __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
10925 return result;
10928 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
10929 vaddlv_u32 (uint32x2_t a)
10931 uint64_t result;
10932 __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
10933 return result;
10936 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10937 vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
10939 return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c);
10942 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10943 vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
10945 return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c);
10948 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10949 vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
10951 return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c);
10954 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10955 vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
10957 return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c);
10960 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10961 vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
10963 return __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c);
10966 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10967 vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
10969 return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c);
10972 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10973 vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
10975 return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c);
10978 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10979 vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
10981 return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c);
10984 /* Table intrinsics. */
10986 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
10987 vqtbl1_p8 (poly8x16_t a, uint8x8_t b)
10989 poly8x8_t result;
10990 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
10991 : "=w"(result)
10992 : "w"(a), "w"(b)
10993 : /* No clobbers */);
10994 return result;
10997 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10998 vqtbl1_s8 (int8x16_t a, uint8x8_t b)
11000 int8x8_t result;
11001 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
11002 : "=w"(result)
11003 : "w"(a), "w"(b)
11004 : /* No clobbers */);
11005 return result;
11008 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11009 vqtbl1_u8 (uint8x16_t a, uint8x8_t b)
11011 uint8x8_t result;
11012 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
11013 : "=w"(result)
11014 : "w"(a), "w"(b)
11015 : /* No clobbers */);
11016 return result;
11019 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
11020 vqtbl1q_p8 (poly8x16_t a, uint8x16_t b)
11022 poly8x16_t result;
11023 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
11024 : "=w"(result)
11025 : "w"(a), "w"(b)
11026 : /* No clobbers */);
11027 return result;
11030 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11031 vqtbl1q_s8 (int8x16_t a, uint8x16_t b)
11033 int8x16_t result;
11034 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
11035 : "=w"(result)
11036 : "w"(a), "w"(b)
11037 : /* No clobbers */);
11038 return result;
11041 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11042 vqtbl1q_u8 (uint8x16_t a, uint8x16_t b)
11044 uint8x16_t result;
11045 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
11046 : "=w"(result)
11047 : "w"(a), "w"(b)
11048 : /* No clobbers */);
11049 return result;
11052 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11053 vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx)
11055 int8x8_t result;
11056 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
11057 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
11058 :"=w"(result)
11059 :"Q"(tab),"w"(idx)
11060 :"memory", "v16", "v17");
11061 return result;
11064 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11065 vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx)
11067 uint8x8_t result;
11068 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
11069 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
11070 :"=w"(result)
11071 :"Q"(tab),"w"(idx)
11072 :"memory", "v16", "v17");
11073 return result;
11076 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11077 vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx)
11079 poly8x8_t result;
11080 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
11081 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
11082 :"=w"(result)
11083 :"Q"(tab),"w"(idx)
11084 :"memory", "v16", "v17");
11085 return result;
11088 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11089 vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx)
11091 int8x16_t result;
11092 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
11093 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
11094 :"=w"(result)
11095 :"Q"(tab),"w"(idx)
11096 :"memory", "v16", "v17");
11097 return result;
11100 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11101 vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx)
11103 uint8x16_t result;
11104 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
11105 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
11106 :"=w"(result)
11107 :"Q"(tab),"w"(idx)
11108 :"memory", "v16", "v17");
11109 return result;
11112 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
11113 vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx)
11115 poly8x16_t result;
11116 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
11117 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
11118 :"=w"(result)
11119 :"Q"(tab),"w"(idx)
11120 :"memory", "v16", "v17");
11121 return result;
11124 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11125 vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx)
11127 int8x8_t result;
11128 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
11129 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
11130 :"=w"(result)
11131 :"Q"(tab),"w"(idx)
11132 :"memory", "v16", "v17", "v18");
11133 return result;
11136 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11137 vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx)
11139 uint8x8_t result;
11140 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
11141 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
11142 :"=w"(result)
11143 :"Q"(tab),"w"(idx)
11144 :"memory", "v16", "v17", "v18");
11145 return result;
11148 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11149 vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx)
11151 poly8x8_t result;
11152 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
11153 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
11154 :"=w"(result)
11155 :"Q"(tab),"w"(idx)
11156 :"memory", "v16", "v17", "v18");
11157 return result;
11160 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11161 vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx)
11163 int8x16_t result;
11164 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
11165 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
11166 :"=w"(result)
11167 :"Q"(tab),"w"(idx)
11168 :"memory", "v16", "v17", "v18");
11169 return result;
11172 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11173 vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx)
11175 uint8x16_t result;
11176 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
11177 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
11178 :"=w"(result)
11179 :"Q"(tab),"w"(idx)
11180 :"memory", "v16", "v17", "v18");
11181 return result;
11184 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
11185 vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx)
11187 poly8x16_t result;
11188 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
11189 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
11190 :"=w"(result)
11191 :"Q"(tab),"w"(idx)
11192 :"memory", "v16", "v17", "v18");
11193 return result;
11196 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11197 vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx)
11199 int8x8_t result;
11200 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
11201 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
11202 :"=w"(result)
11203 :"Q"(tab),"w"(idx)
11204 :"memory", "v16", "v17", "v18", "v19");
11205 return result;
11208 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11209 vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx)
11211 uint8x8_t result;
11212 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
11213 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
11214 :"=w"(result)
11215 :"Q"(tab),"w"(idx)
11216 :"memory", "v16", "v17", "v18", "v19");
11217 return result;
11220 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11221 vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx)
11223 poly8x8_t result;
11224 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
11225 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
11226 :"=w"(result)
11227 :"Q"(tab),"w"(idx)
11228 :"memory", "v16", "v17", "v18", "v19");
11229 return result;
11233 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11234 vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx)
11236 int8x16_t result;
11237 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
11238 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
11239 :"=w"(result)
11240 :"Q"(tab),"w"(idx)
11241 :"memory", "v16", "v17", "v18", "v19");
11242 return result;
11245 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11246 vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx)
11248 uint8x16_t result;
11249 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
11250 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
11251 :"=w"(result)
11252 :"Q"(tab),"w"(idx)
11253 :"memory", "v16", "v17", "v18", "v19");
11254 return result;
11257 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
11258 vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx)
11260 poly8x16_t result;
11261 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
11262 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
11263 :"=w"(result)
11264 :"Q"(tab),"w"(idx)
11265 :"memory", "v16", "v17", "v18", "v19");
11266 return result;
11270 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11271 vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx)
11273 int8x8_t result = r;
11274 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
11275 : "+w"(result)
11276 : "w"(tab), "w"(idx)
11277 : /* No clobbers */);
11278 return result;
11281 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11282 vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx)
11284 uint8x8_t result = r;
11285 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
11286 : "+w"(result)
11287 : "w"(tab), "w"(idx)
11288 : /* No clobbers */);
11289 return result;
11292 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11293 vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx)
11295 poly8x8_t result = r;
11296 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
11297 : "+w"(result)
11298 : "w"(tab), "w"(idx)
11299 : /* No clobbers */);
11300 return result;
11303 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11304 vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx)
11306 int8x16_t result = r;
11307 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
11308 : "+w"(result)
11309 : "w"(tab), "w"(idx)
11310 : /* No clobbers */);
11311 return result;
11314 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11315 vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx)
11317 uint8x16_t result = r;
11318 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
11319 : "+w"(result)
11320 : "w"(tab), "w"(idx)
11321 : /* No clobbers */);
11322 return result;
11325 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
11326 vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx)
11328 poly8x16_t result = r;
11329 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
11330 : "+w"(result)
11331 : "w"(tab), "w"(idx)
11332 : /* No clobbers */);
11333 return result;
11336 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11337 vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx)
11339 int8x8_t result = r;
11340 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
11341 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
11342 :"+w"(result)
11343 :"Q"(tab),"w"(idx)
11344 :"memory", "v16", "v17");
11345 return result;
11348 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11349 vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx)
11351 uint8x8_t result = r;
11352 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
11353 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
11354 :"+w"(result)
11355 :"Q"(tab),"w"(idx)
11356 :"memory", "v16", "v17");
11357 return result;
11360 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11361 vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx)
11363 poly8x8_t result = r;
11364 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
11365 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
11366 :"+w"(result)
11367 :"Q"(tab),"w"(idx)
11368 :"memory", "v16", "v17");
11369 return result;
11373 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11374 vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx)
11376 int8x16_t result = r;
11377 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
11378 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
11379 :"+w"(result)
11380 :"Q"(tab),"w"(idx)
11381 :"memory", "v16", "v17");
11382 return result;
11385 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11386 vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx)
11388 uint8x16_t result = r;
11389 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
11390 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
11391 :"+w"(result)
11392 :"Q"(tab),"w"(idx)
11393 :"memory", "v16", "v17");
11394 return result;
11397 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
11398 vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx)
11400 poly8x16_t result = r;
11401 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
11402 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
11403 :"+w"(result)
11404 :"Q"(tab),"w"(idx)
11405 :"memory", "v16", "v17");
11406 return result;
11410 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11411 vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx)
11413 int8x8_t result = r;
11414 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
11415 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
11416 :"+w"(result)
11417 :"Q"(tab),"w"(idx)
11418 :"memory", "v16", "v17", "v18");
11419 return result;
11422 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11423 vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx)
11425 uint8x8_t result = r;
11426 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
11427 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
11428 :"+w"(result)
11429 :"Q"(tab),"w"(idx)
11430 :"memory", "v16", "v17", "v18");
11431 return result;
11434 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11435 vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx)
11437 poly8x8_t result = r;
11438 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
11439 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
11440 :"+w"(result)
11441 :"Q"(tab),"w"(idx)
11442 :"memory", "v16", "v17", "v18");
11443 return result;
11447 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11448 vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx)
11450 int8x16_t result = r;
11451 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
11452 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
11453 :"+w"(result)
11454 :"Q"(tab),"w"(idx)
11455 :"memory", "v16", "v17", "v18");
11456 return result;
11459 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11460 vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx)
11462 uint8x16_t result = r;
11463 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
11464 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
11465 :"+w"(result)
11466 :"Q"(tab),"w"(idx)
11467 :"memory", "v16", "v17", "v18");
11468 return result;
11471 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
11472 vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx)
11474 poly8x16_t result = r;
11475 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
11476 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
11477 :"+w"(result)
11478 :"Q"(tab),"w"(idx)
11479 :"memory", "v16", "v17", "v18");
11480 return result;
11484 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11485 vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx)
11487 int8x8_t result = r;
11488 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
11489 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
11490 :"+w"(result)
11491 :"Q"(tab),"w"(idx)
11492 :"memory", "v16", "v17", "v18", "v19");
11493 return result;
11496 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11497 vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx)
11499 uint8x8_t result = r;
11500 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
11501 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
11502 :"+w"(result)
11503 :"Q"(tab),"w"(idx)
11504 :"memory", "v16", "v17", "v18", "v19");
11505 return result;
11508 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11509 vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx)
11511 poly8x8_t result = r;
11512 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
11513 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
11514 :"+w"(result)
11515 :"Q"(tab),"w"(idx)
11516 :"memory", "v16", "v17", "v18", "v19");
11517 return result;
11521 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11522 vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx)
11524 int8x16_t result = r;
11525 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
11526 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
11527 :"+w"(result)
11528 :"Q"(tab),"w"(idx)
11529 :"memory", "v16", "v17", "v18", "v19");
11530 return result;
11533 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11534 vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx)
11536 uint8x16_t result = r;
11537 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
11538 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
11539 :"+w"(result)
11540 :"Q"(tab),"w"(idx)
11541 :"memory", "v16", "v17", "v18", "v19");
11542 return result;
11545 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
11546 vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx)
11548 poly8x16_t result = r;
11549 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
11550 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
11551 :"+w"(result)
11552 :"Q"(tab),"w"(idx)
11553 :"memory", "v16", "v17", "v18", "v19");
11554 return result;
11557 /* V7 legacy table intrinsics. */
11559 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11560 vtbl1_s8 (int8x8_t tab, int8x8_t idx)
11562 int8x8_t result;
11563 int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
11564 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
11565 : "=w"(result)
11566 : "w"(temp), "w"(idx)
11567 : /* No clobbers */);
11568 return result;
11571 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11572 vtbl1_u8 (uint8x8_t tab, uint8x8_t idx)
11574 uint8x8_t result;
11575 uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
11576 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
11577 : "=w"(result)
11578 : "w"(temp), "w"(idx)
11579 : /* No clobbers */);
11580 return result;
11583 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11584 vtbl1_p8 (poly8x8_t tab, uint8x8_t idx)
11586 poly8x8_t result;
11587 poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0)));
11588 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
11589 : "=w"(result)
11590 : "w"(temp), "w"(idx)
11591 : /* No clobbers */);
11592 return result;
11595 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11596 vtbl2_s8 (int8x8x2_t tab, int8x8_t idx)
11598 int8x8_t result;
11599 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
11600 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
11601 : "=w"(result)
11602 : "w"(temp), "w"(idx)
11603 : /* No clobbers */);
11604 return result;
11607 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11608 vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx)
11610 uint8x8_t result;
11611 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
11612 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
11613 : "=w"(result)
11614 : "w"(temp), "w"(idx)
11615 : /* No clobbers */);
11616 return result;
11619 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11620 vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx)
11622 poly8x8_t result;
11623 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
11624 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
11625 : "=w"(result)
11626 : "w"(temp), "w"(idx)
11627 : /* No clobbers */);
11628 return result;
11631 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11632 vtbl3_s8 (int8x8x3_t tab, int8x8_t idx)
11634 int8x8_t result;
11635 int8x16x2_t temp;
11636 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
11637 temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0)));
11638 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
11639 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
11640 : "=w"(result)
11641 : "Q"(temp), "w"(idx)
11642 : "v16", "v17", "memory");
11643 return result;
11646 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11647 vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx)
11649 uint8x8_t result;
11650 uint8x16x2_t temp;
11651 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
11652 temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0)));
11653 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
11654 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
11655 : "=w"(result)
11656 : "Q"(temp), "w"(idx)
11657 : "v16", "v17", "memory");
11658 return result;
11661 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11662 vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx)
11664 poly8x8_t result;
11665 poly8x16x2_t temp;
11666 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
11667 temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0)));
11668 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
11669 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
11670 : "=w"(result)
11671 : "Q"(temp), "w"(idx)
11672 : "v16", "v17", "memory");
11673 return result;
11676 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11677 vtbl4_s8 (int8x8x4_t tab, int8x8_t idx)
11679 int8x8_t result;
11680 int8x16x2_t temp;
11681 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
11682 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
11683 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
11684 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
11685 : "=w"(result)
11686 : "Q"(temp), "w"(idx)
11687 : "v16", "v17", "memory");
11688 return result;
11691 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11692 vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx)
11694 uint8x8_t result;
11695 uint8x16x2_t temp;
11696 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
11697 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
11698 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
11699 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
11700 : "=w"(result)
11701 : "Q"(temp), "w"(idx)
11702 : "v16", "v17", "memory");
11703 return result;
11706 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11707 vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx)
11709 poly8x8_t result;
11710 poly8x16x2_t temp;
11711 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
11712 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
11713 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
11714 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
11715 : "=w"(result)
11716 : "Q"(temp), "w"(idx)
11717 : "v16", "v17", "memory");
11718 return result;
11721 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11722 vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx)
11724 int8x8_t result = r;
11725 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
11726 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
11727 : "+w"(result)
11728 : "w"(temp), "w"(idx)
11729 : /* No clobbers */);
11730 return result;
11733 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11734 vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx)
11736 uint8x8_t result = r;
11737 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
11738 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
11739 : "+w"(result)
11740 : "w"(temp), "w"(idx)
11741 : /* No clobbers */);
11742 return result;
11745 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11746 vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx)
11748 poly8x8_t result = r;
11749 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
11750 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
11751 : "+w"(result)
11752 : "w"(temp), "w"(idx)
11753 : /* No clobbers */);
11754 return result;
11757 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11758 vtbx4_s8 (int8x8_t r, int8x8x4_t tab, int8x8_t idx)
11760 int8x8_t result = r;
11761 int8x16x2_t temp;
11762 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
11763 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
11764 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
11765 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
11766 : "+w"(result)
11767 : "Q"(temp), "w"(idx)
11768 : "v16", "v17", "memory");
11769 return result;
11772 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11773 vtbx4_u8 (uint8x8_t r, uint8x8x4_t tab, uint8x8_t idx)
11775 uint8x8_t result = r;
11776 uint8x16x2_t temp;
11777 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
11778 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
11779 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
11780 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
11781 : "+w"(result)
11782 : "Q"(temp), "w"(idx)
11783 : "v16", "v17", "memory");
11784 return result;
11787 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11788 vtbx4_p8 (poly8x8_t r, poly8x8x4_t tab, uint8x8_t idx)
11790 poly8x8_t result = r;
11791 poly8x16x2_t temp;
11792 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
11793 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
11794 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
11795 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
11796 : "+w"(result)
11797 : "Q"(temp), "w"(idx)
11798 : "v16", "v17", "memory");
11799 return result;
11802 /* End of temporary inline asm. */
11804 /* Start of optimal implementations in approved order. */
11806 /* vabs */
11808 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11809 vabs_f32 (float32x2_t __a)
11811 return __builtin_aarch64_absv2sf (__a);
11814 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
11815 vabs_f64 (float64x1_t __a)
11817 return (float64x1_t) {__builtin_fabs (__a[0])};
11820 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11821 vabs_s8 (int8x8_t __a)
11823 return __builtin_aarch64_absv8qi (__a);
11826 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11827 vabs_s16 (int16x4_t __a)
11829 return __builtin_aarch64_absv4hi (__a);
11832 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11833 vabs_s32 (int32x2_t __a)
11835 return __builtin_aarch64_absv2si (__a);
11838 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
11839 vabs_s64 (int64x1_t __a)
11841 return (int64x1_t) {__builtin_aarch64_absdi (__a[0])};
11844 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11845 vabsq_f32 (float32x4_t __a)
11847 return __builtin_aarch64_absv4sf (__a);
11850 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11851 vabsq_f64 (float64x2_t __a)
11853 return __builtin_aarch64_absv2df (__a);
11856 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11857 vabsq_s8 (int8x16_t __a)
11859 return __builtin_aarch64_absv16qi (__a);
11862 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11863 vabsq_s16 (int16x8_t __a)
11865 return __builtin_aarch64_absv8hi (__a);
11868 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11869 vabsq_s32 (int32x4_t __a)
11871 return __builtin_aarch64_absv4si (__a);
11874 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
11875 vabsq_s64 (int64x2_t __a)
11877 return __builtin_aarch64_absv2di (__a);
11880 /* vadd */
11882 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
11883 vaddd_s64 (int64_t __a, int64_t __b)
11885 return __a + __b;
11888 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11889 vaddd_u64 (uint64_t __a, uint64_t __b)
11891 return __a + __b;
11894 /* vaddv */
11896 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
11897 vaddv_s8 (int8x8_t __a)
11899 return __builtin_aarch64_reduc_plus_scal_v8qi (__a);
11902 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
11903 vaddv_s16 (int16x4_t __a)
11905 return __builtin_aarch64_reduc_plus_scal_v4hi (__a);
11908 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
11909 vaddv_s32 (int32x2_t __a)
11911 return __builtin_aarch64_reduc_plus_scal_v2si (__a);
11914 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
11915 vaddv_u8 (uint8x8_t __a)
11917 return (uint8_t) __builtin_aarch64_reduc_plus_scal_v8qi ((int8x8_t) __a);
11920 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
11921 vaddv_u16 (uint16x4_t __a)
11923 return (uint16_t) __builtin_aarch64_reduc_plus_scal_v4hi ((int16x4_t) __a);
11926 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
11927 vaddv_u32 (uint32x2_t __a)
11929 return (int32_t) __builtin_aarch64_reduc_plus_scal_v2si ((int32x2_t) __a);
11932 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
11933 vaddvq_s8 (int8x16_t __a)
11935 return __builtin_aarch64_reduc_plus_scal_v16qi (__a);
11938 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
11939 vaddvq_s16 (int16x8_t __a)
11941 return __builtin_aarch64_reduc_plus_scal_v8hi (__a);
11944 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
11945 vaddvq_s32 (int32x4_t __a)
11947 return __builtin_aarch64_reduc_plus_scal_v4si (__a);
11950 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
11951 vaddvq_s64 (int64x2_t __a)
11953 return __builtin_aarch64_reduc_plus_scal_v2di (__a);
11956 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
11957 vaddvq_u8 (uint8x16_t __a)
11959 return (uint8_t) __builtin_aarch64_reduc_plus_scal_v16qi ((int8x16_t) __a);
11962 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
11963 vaddvq_u16 (uint16x8_t __a)
11965 return (uint16_t) __builtin_aarch64_reduc_plus_scal_v8hi ((int16x8_t) __a);
11968 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
11969 vaddvq_u32 (uint32x4_t __a)
11971 return (uint32_t) __builtin_aarch64_reduc_plus_scal_v4si ((int32x4_t) __a);
11974 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11975 vaddvq_u64 (uint64x2_t __a)
11977 return (uint64_t) __builtin_aarch64_reduc_plus_scal_v2di ((int64x2_t) __a);
11980 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
11981 vaddv_f32 (float32x2_t __a)
11983 return __builtin_aarch64_reduc_plus_scal_v2sf (__a);
11986 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
11987 vaddvq_f32 (float32x4_t __a)
11989 return __builtin_aarch64_reduc_plus_scal_v4sf (__a);
11992 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
11993 vaddvq_f64 (float64x2_t __a)
11995 return __builtin_aarch64_reduc_plus_scal_v2df (__a);
11998 /* vbsl */
12000 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12001 vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c)
12003 return __builtin_aarch64_simd_bslv2sf_suss (__a, __b, __c);
12006 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
12007 vbsl_f64 (uint64x1_t __a, float64x1_t __b, float64x1_t __c)
12009 return (float64x1_t)
12010 { __builtin_aarch64_simd_bsldf_suss (__a[0], __b[0], __c[0]) };
12013 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12014 vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c)
12016 return __builtin_aarch64_simd_bslv8qi_pupp (__a, __b, __c);
12019 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12020 vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c)
12022 return __builtin_aarch64_simd_bslv4hi_pupp (__a, __b, __c);
12025 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12026 vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c)
12028 return __builtin_aarch64_simd_bslv8qi_suss (__a, __b, __c);
12031 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12032 vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c)
12034 return __builtin_aarch64_simd_bslv4hi_suss (__a, __b, __c);
12037 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12038 vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c)
12040 return __builtin_aarch64_simd_bslv2si_suss (__a, __b, __c);
12043 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
12044 vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c)
12046 return (int64x1_t)
12047 {__builtin_aarch64_simd_bsldi_suss (__a[0], __b[0], __c[0])};
12050 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12051 vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
12053 return __builtin_aarch64_simd_bslv8qi_uuuu (__a, __b, __c);
12056 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12057 vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
12059 return __builtin_aarch64_simd_bslv4hi_uuuu (__a, __b, __c);
12062 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12063 vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
12065 return __builtin_aarch64_simd_bslv2si_uuuu (__a, __b, __c);
12068 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12069 vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c)
12071 return (uint64x1_t)
12072 {__builtin_aarch64_simd_bsldi_uuuu (__a[0], __b[0], __c[0])};
12075 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12076 vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c)
12078 return __builtin_aarch64_simd_bslv4sf_suss (__a, __b, __c);
12081 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12082 vbslq_f64 (uint64x2_t __a, float64x2_t __b, float64x2_t __c)
12084 return __builtin_aarch64_simd_bslv2df_suss (__a, __b, __c);
12087 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12088 vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c)
12090 return __builtin_aarch64_simd_bslv16qi_pupp (__a, __b, __c);
12093 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
12094 vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c)
12096 return __builtin_aarch64_simd_bslv8hi_pupp (__a, __b, __c);
12099 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12100 vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c)
12102 return __builtin_aarch64_simd_bslv16qi_suss (__a, __b, __c);
12105 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12106 vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c)
12108 return __builtin_aarch64_simd_bslv8hi_suss (__a, __b, __c);
12111 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12112 vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c)
12114 return __builtin_aarch64_simd_bslv4si_suss (__a, __b, __c);
12117 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12118 vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c)
12120 return __builtin_aarch64_simd_bslv2di_suss (__a, __b, __c);
12123 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12124 vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
12126 return __builtin_aarch64_simd_bslv16qi_uuuu (__a, __b, __c);
12129 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12130 vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
12132 return __builtin_aarch64_simd_bslv8hi_uuuu (__a, __b, __c);
12135 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12136 vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
12138 return __builtin_aarch64_simd_bslv4si_uuuu (__a, __b, __c);
12141 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12142 vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
12144 return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c);
12147 #ifdef __ARM_FEATURE_CRYPTO
12149 /* vaes */
12151 static __inline uint8x16_t
12152 vaeseq_u8 (uint8x16_t data, uint8x16_t key)
12154 return __builtin_aarch64_crypto_aesev16qi_uuu (data, key);
12157 static __inline uint8x16_t
12158 vaesdq_u8 (uint8x16_t data, uint8x16_t key)
12160 return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key);
12163 static __inline uint8x16_t
12164 vaesmcq_u8 (uint8x16_t data)
12166 return __builtin_aarch64_crypto_aesmcv16qi_uu (data);
12169 static __inline uint8x16_t
12170 vaesimcq_u8 (uint8x16_t data)
12172 return __builtin_aarch64_crypto_aesimcv16qi_uu (data);
12175 #endif
12177 /* vcage */
12179 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12180 vcage_f64 (float64x1_t __a, float64x1_t __b)
12182 return vabs_f64 (__a) >= vabs_f64 (__b);
12185 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12186 vcages_f32 (float32_t __a, float32_t __b)
12188 return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0;
12191 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12192 vcage_f32 (float32x2_t __a, float32x2_t __b)
12194 return vabs_f32 (__a) >= vabs_f32 (__b);
12197 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12198 vcageq_f32 (float32x4_t __a, float32x4_t __b)
12200 return vabsq_f32 (__a) >= vabsq_f32 (__b);
12203 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12204 vcaged_f64 (float64_t __a, float64_t __b)
12206 return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0;
12209 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12210 vcageq_f64 (float64x2_t __a, float64x2_t __b)
12212 return vabsq_f64 (__a) >= vabsq_f64 (__b);
12215 /* vcagt */
12217 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12218 vcagts_f32 (float32_t __a, float32_t __b)
12220 return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0;
12223 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12224 vcagt_f32 (float32x2_t __a, float32x2_t __b)
12226 return vabs_f32 (__a) > vabs_f32 (__b);
12229 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12230 vcagt_f64 (float64x1_t __a, float64x1_t __b)
12232 return vabs_f64 (__a) > vabs_f64 (__b);
12235 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12236 vcagtq_f32 (float32x4_t __a, float32x4_t __b)
12238 return vabsq_f32 (__a) > vabsq_f32 (__b);
12241 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12242 vcagtd_f64 (float64_t __a, float64_t __b)
12244 return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0;
12247 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12248 vcagtq_f64 (float64x2_t __a, float64x2_t __b)
12250 return vabsq_f64 (__a) > vabsq_f64 (__b);
12253 /* vcale */
12255 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12256 vcale_f32 (float32x2_t __a, float32x2_t __b)
12258 return vabs_f32 (__a) <= vabs_f32 (__b);
12261 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12262 vcale_f64 (float64x1_t __a, float64x1_t __b)
12264 return vabs_f64 (__a) <= vabs_f64 (__b);
12267 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12268 vcaled_f64 (float64_t __a, float64_t __b)
12270 return __builtin_fabs (__a) <= __builtin_fabs (__b) ? -1 : 0;
12273 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12274 vcales_f32 (float32_t __a, float32_t __b)
12276 return __builtin_fabsf (__a) <= __builtin_fabsf (__b) ? -1 : 0;
12279 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12280 vcaleq_f32 (float32x4_t __a, float32x4_t __b)
12282 return vabsq_f32 (__a) <= vabsq_f32 (__b);
12285 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12286 vcaleq_f64 (float64x2_t __a, float64x2_t __b)
12288 return vabsq_f64 (__a) <= vabsq_f64 (__b);
12291 /* vcalt */
12293 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12294 vcalt_f32 (float32x2_t __a, float32x2_t __b)
12296 return vabs_f32 (__a) < vabs_f32 (__b);
12299 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12300 vcalt_f64 (float64x1_t __a, float64x1_t __b)
12302 return vabs_f64 (__a) < vabs_f64 (__b);
12305 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12306 vcaltd_f64 (float64_t __a, float64_t __b)
12308 return __builtin_fabs (__a) < __builtin_fabs (__b) ? -1 : 0;
12311 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12312 vcaltq_f32 (float32x4_t __a, float32x4_t __b)
12314 return vabsq_f32 (__a) < vabsq_f32 (__b);
12317 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12318 vcaltq_f64 (float64x2_t __a, float64x2_t __b)
12320 return vabsq_f64 (__a) < vabsq_f64 (__b);
12323 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12324 vcalts_f32 (float32_t __a, float32_t __b)
12326 return __builtin_fabsf (__a) < __builtin_fabsf (__b) ? -1 : 0;
12329 /* vceq - vector. */
12331 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12332 vceq_f32 (float32x2_t __a, float32x2_t __b)
12334 return (uint32x2_t) (__a == __b);
12337 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12338 vceq_f64 (float64x1_t __a, float64x1_t __b)
12340 return (uint64x1_t) (__a == __b);
12343 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12344 vceq_p8 (poly8x8_t __a, poly8x8_t __b)
12346 return (uint8x8_t) (__a == __b);
12349 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12350 vceq_s8 (int8x8_t __a, int8x8_t __b)
12352 return (uint8x8_t) (__a == __b);
12355 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12356 vceq_s16 (int16x4_t __a, int16x4_t __b)
12358 return (uint16x4_t) (__a == __b);
12361 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12362 vceq_s32 (int32x2_t __a, int32x2_t __b)
12364 return (uint32x2_t) (__a == __b);
12367 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12368 vceq_s64 (int64x1_t __a, int64x1_t __b)
12370 return (uint64x1_t) {__a[0] == __b[0] ? -1ll : 0ll};
12373 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12374 vceq_u8 (uint8x8_t __a, uint8x8_t __b)
12376 return (__a == __b);
12379 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12380 vceq_u16 (uint16x4_t __a, uint16x4_t __b)
12382 return (__a == __b);
12385 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12386 vceq_u32 (uint32x2_t __a, uint32x2_t __b)
12388 return (__a == __b);
12391 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12392 vceq_u64 (uint64x1_t __a, uint64x1_t __b)
12394 return (uint64x1_t) {__a[0] == __b[0] ? -1ll : 0ll};
12397 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12398 vceqq_f32 (float32x4_t __a, float32x4_t __b)
12400 return (uint32x4_t) (__a == __b);
12403 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12404 vceqq_f64 (float64x2_t __a, float64x2_t __b)
12406 return (uint64x2_t) (__a == __b);
12409 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12410 vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
12412 return (uint8x16_t) (__a == __b);
12415 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12416 vceqq_s8 (int8x16_t __a, int8x16_t __b)
12418 return (uint8x16_t) (__a == __b);
12421 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12422 vceqq_s16 (int16x8_t __a, int16x8_t __b)
12424 return (uint16x8_t) (__a == __b);
12427 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12428 vceqq_s32 (int32x4_t __a, int32x4_t __b)
12430 return (uint32x4_t) (__a == __b);
12433 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12434 vceqq_s64 (int64x2_t __a, int64x2_t __b)
12436 return (uint64x2_t) (__a == __b);
12439 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12440 vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
12442 return (__a == __b);
12445 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12446 vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
12448 return (__a == __b);
12451 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12452 vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
12454 return (__a == __b);
12457 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12458 vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
12460 return (__a == __b);
12463 /* vceq - scalar. */
12465 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12466 vceqs_f32 (float32_t __a, float32_t __b)
12468 return __a == __b ? -1 : 0;
12471 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12472 vceqd_s64 (int64_t __a, int64_t __b)
12474 return __a == __b ? -1ll : 0ll;
12477 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12478 vceqd_u64 (uint64_t __a, uint64_t __b)
12480 return __a == __b ? -1ll : 0ll;
12483 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12484 vceqd_f64 (float64_t __a, float64_t __b)
12486 return __a == __b ? -1ll : 0ll;
12489 /* vceqz - vector. */
12491 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12492 vceqz_f32 (float32x2_t __a)
12494 return (uint32x2_t) (__a == 0.0f);
12497 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12498 vceqz_f64 (float64x1_t __a)
12500 return (uint64x1_t) (__a == (float64x1_t) {0.0});
12503 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12504 vceqz_p8 (poly8x8_t __a)
12506 return (uint8x8_t) (__a == 0);
12509 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12510 vceqz_s8 (int8x8_t __a)
12512 return (uint8x8_t) (__a == 0);
12515 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12516 vceqz_s16 (int16x4_t __a)
12518 return (uint16x4_t) (__a == 0);
12521 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12522 vceqz_s32 (int32x2_t __a)
12524 return (uint32x2_t) (__a == 0);
12527 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12528 vceqz_s64 (int64x1_t __a)
12530 return (uint64x1_t) {__a[0] == 0ll ? -1ll : 0ll};
12533 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12534 vceqz_u8 (uint8x8_t __a)
12536 return (__a == 0);
12539 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12540 vceqz_u16 (uint16x4_t __a)
12542 return (__a == 0);
12545 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12546 vceqz_u32 (uint32x2_t __a)
12548 return (__a == 0);
12551 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12552 vceqz_u64 (uint64x1_t __a)
12554 return (uint64x1_t) {__a[0] == 0ll ? -1ll : 0ll};
12557 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12558 vceqzq_f32 (float32x4_t __a)
12560 return (uint32x4_t) (__a == 0.0f);
12563 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12564 vceqzq_f64 (float64x2_t __a)
12566 return (uint64x2_t) (__a == 0.0f);
12569 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12570 vceqzq_p8 (poly8x16_t __a)
12572 return (uint8x16_t) (__a == 0);
12575 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12576 vceqzq_s8 (int8x16_t __a)
12578 return (uint8x16_t) (__a == 0);
12581 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12582 vceqzq_s16 (int16x8_t __a)
12584 return (uint16x8_t) (__a == 0);
12587 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12588 vceqzq_s32 (int32x4_t __a)
12590 return (uint32x4_t) (__a == 0);
12593 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12594 vceqzq_s64 (int64x2_t __a)
12596 return (uint64x2_t) (__a == __AARCH64_INT64_C (0));
12599 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12600 vceqzq_u8 (uint8x16_t __a)
12602 return (__a == 0);
12605 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12606 vceqzq_u16 (uint16x8_t __a)
12608 return (__a == 0);
12611 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12612 vceqzq_u32 (uint32x4_t __a)
12614 return (__a == 0);
12617 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12618 vceqzq_u64 (uint64x2_t __a)
12620 return (__a == __AARCH64_UINT64_C (0));
12623 /* vceqz - scalar. */
12625 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12626 vceqzs_f32 (float32_t __a)
12628 return __a == 0.0f ? -1 : 0;
12631 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12632 vceqzd_s64 (int64_t __a)
12634 return __a == 0 ? -1ll : 0ll;
12637 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12638 vceqzd_u64 (uint64_t __a)
12640 return __a == 0 ? -1ll : 0ll;
12643 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12644 vceqzd_f64 (float64_t __a)
12646 return __a == 0.0 ? -1ll : 0ll;
12649 /* vcge - vector. */
12651 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12652 vcge_f32 (float32x2_t __a, float32x2_t __b)
12654 return (uint32x2_t) (__a >= __b);
12657 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12658 vcge_f64 (float64x1_t __a, float64x1_t __b)
12660 return (uint64x1_t) (__a >= __b);
12663 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12664 vcge_s8 (int8x8_t __a, int8x8_t __b)
12666 return (uint8x8_t) (__a >= __b);
12669 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12670 vcge_s16 (int16x4_t __a, int16x4_t __b)
12672 return (uint16x4_t) (__a >= __b);
12675 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12676 vcge_s32 (int32x2_t __a, int32x2_t __b)
12678 return (uint32x2_t) (__a >= __b);
12681 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12682 vcge_s64 (int64x1_t __a, int64x1_t __b)
12684 return (uint64x1_t) {__a[0] >= __b[0] ? -1ll : 0ll};
12687 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12688 vcge_u8 (uint8x8_t __a, uint8x8_t __b)
12690 return (__a >= __b);
12693 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12694 vcge_u16 (uint16x4_t __a, uint16x4_t __b)
12696 return (__a >= __b);
12699 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12700 vcge_u32 (uint32x2_t __a, uint32x2_t __b)
12702 return (__a >= __b);
12705 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12706 vcge_u64 (uint64x1_t __a, uint64x1_t __b)
12708 return (uint64x1_t) {__a[0] >= __b[0] ? -1ll : 0ll};
12711 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12712 vcgeq_f32 (float32x4_t __a, float32x4_t __b)
12714 return (uint32x4_t) (__a >= __b);
12717 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12718 vcgeq_f64 (float64x2_t __a, float64x2_t __b)
12720 return (uint64x2_t) (__a >= __b);
12723 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12724 vcgeq_s8 (int8x16_t __a, int8x16_t __b)
12726 return (uint8x16_t) (__a >= __b);
12729 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12730 vcgeq_s16 (int16x8_t __a, int16x8_t __b)
12732 return (uint16x8_t) (__a >= __b);
12735 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12736 vcgeq_s32 (int32x4_t __a, int32x4_t __b)
12738 return (uint32x4_t) (__a >= __b);
12741 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12742 vcgeq_s64 (int64x2_t __a, int64x2_t __b)
12744 return (uint64x2_t) (__a >= __b);
12747 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12748 vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
12750 return (__a >= __b);
12753 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12754 vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
12756 return (__a >= __b);
12759 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12760 vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
12762 return (__a >= __b);
12765 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12766 vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
12768 return (__a >= __b);
12771 /* vcge - scalar. */
12773 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12774 vcges_f32 (float32_t __a, float32_t __b)
12776 return __a >= __b ? -1 : 0;
12779 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12780 vcged_s64 (int64_t __a, int64_t __b)
12782 return __a >= __b ? -1ll : 0ll;
12785 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12786 vcged_u64 (uint64_t __a, uint64_t __b)
12788 return __a >= __b ? -1ll : 0ll;
12791 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12792 vcged_f64 (float64_t __a, float64_t __b)
12794 return __a >= __b ? -1ll : 0ll;
12797 /* vcgez - vector. */
12799 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12800 vcgez_f32 (float32x2_t __a)
12802 return (uint32x2_t) (__a >= 0.0f);
12805 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12806 vcgez_f64 (float64x1_t __a)
12808 return (uint64x1_t) (__a[0] >= (float64x1_t) {0.0});
12811 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12812 vcgez_s8 (int8x8_t __a)
12814 return (uint8x8_t) (__a >= 0);
12817 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12818 vcgez_s16 (int16x4_t __a)
12820 return (uint16x4_t) (__a >= 0);
12823 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12824 vcgez_s32 (int32x2_t __a)
12826 return (uint32x2_t) (__a >= 0);
12829 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12830 vcgez_s64 (int64x1_t __a)
12832 return (uint64x1_t) {__a[0] >= 0ll ? -1ll : 0ll};
12835 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12836 vcgezq_f32 (float32x4_t __a)
12838 return (uint32x4_t) (__a >= 0.0f);
12841 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12842 vcgezq_f64 (float64x2_t __a)
12844 return (uint64x2_t) (__a >= 0.0);
12847 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12848 vcgezq_s8 (int8x16_t __a)
12850 return (uint8x16_t) (__a >= 0);
12853 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12854 vcgezq_s16 (int16x8_t __a)
12856 return (uint16x8_t) (__a >= 0);
12859 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12860 vcgezq_s32 (int32x4_t __a)
12862 return (uint32x4_t) (__a >= 0);
12865 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12866 vcgezq_s64 (int64x2_t __a)
12868 return (uint64x2_t) (__a >= __AARCH64_INT64_C (0));
12871 /* vcgez - scalar. */
12873 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12874 vcgezs_f32 (float32_t __a)
12876 return __a >= 0.0f ? -1 : 0;
12879 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12880 vcgezd_s64 (int64_t __a)
12882 return __a >= 0 ? -1ll : 0ll;
12885 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12886 vcgezd_f64 (float64_t __a)
12888 return __a >= 0.0 ? -1ll : 0ll;
12891 /* vcgt - vector. */
12893 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12894 vcgt_f32 (float32x2_t __a, float32x2_t __b)
12896 return (uint32x2_t) (__a > __b);
12899 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12900 vcgt_f64 (float64x1_t __a, float64x1_t __b)
12902 return (uint64x1_t) (__a > __b);
12905 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12906 vcgt_s8 (int8x8_t __a, int8x8_t __b)
12908 return (uint8x8_t) (__a > __b);
12911 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12912 vcgt_s16 (int16x4_t __a, int16x4_t __b)
12914 return (uint16x4_t) (__a > __b);
12917 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12918 vcgt_s32 (int32x2_t __a, int32x2_t __b)
12920 return (uint32x2_t) (__a > __b);
12923 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12924 vcgt_s64 (int64x1_t __a, int64x1_t __b)
12926 return (uint64x1_t) (__a[0] > __b[0] ? -1ll : 0ll);
12929 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12930 vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
12932 return (__a > __b);
12935 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12936 vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
12938 return (__a > __b);
12941 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12942 vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
12944 return (__a > __b);
12947 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12948 vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
12950 return (uint64x1_t) (__a[0] > __b[0] ? -1ll : 0ll);
12953 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12954 vcgtq_f32 (float32x4_t __a, float32x4_t __b)
12956 return (uint32x4_t) (__a > __b);
12959 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12960 vcgtq_f64 (float64x2_t __a, float64x2_t __b)
12962 return (uint64x2_t) (__a > __b);
12965 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12966 vcgtq_s8 (int8x16_t __a, int8x16_t __b)
12968 return (uint8x16_t) (__a > __b);
12971 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12972 vcgtq_s16 (int16x8_t __a, int16x8_t __b)
12974 return (uint16x8_t) (__a > __b);
12977 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12978 vcgtq_s32 (int32x4_t __a, int32x4_t __b)
12980 return (uint32x4_t) (__a > __b);
12983 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12984 vcgtq_s64 (int64x2_t __a, int64x2_t __b)
12986 return (uint64x2_t) (__a > __b);
12989 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12990 vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
12992 return (__a > __b);
12995 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12996 vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
12998 return (__a > __b);
13001 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13002 vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
13004 return (__a > __b);
13007 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13008 vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
13010 return (__a > __b);
13013 /* vcgt - scalar. */
13015 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13016 vcgts_f32 (float32_t __a, float32_t __b)
13018 return __a > __b ? -1 : 0;
13021 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13022 vcgtd_s64 (int64_t __a, int64_t __b)
13024 return __a > __b ? -1ll : 0ll;
13027 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13028 vcgtd_u64 (uint64_t __a, uint64_t __b)
13030 return __a > __b ? -1ll : 0ll;
13033 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13034 vcgtd_f64 (float64_t __a, float64_t __b)
13036 return __a > __b ? -1ll : 0ll;
13039 /* vcgtz - vector. */
13041 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13042 vcgtz_f32 (float32x2_t __a)
13044 return (uint32x2_t) (__a > 0.0f);
13047 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13048 vcgtz_f64 (float64x1_t __a)
13050 return (uint64x1_t) (__a > (float64x1_t) {0.0});
13053 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13054 vcgtz_s8 (int8x8_t __a)
13056 return (uint8x8_t) (__a > 0);
13059 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13060 vcgtz_s16 (int16x4_t __a)
13062 return (uint16x4_t) (__a > 0);
13065 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13066 vcgtz_s32 (int32x2_t __a)
13068 return (uint32x2_t) (__a > 0);
13071 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13072 vcgtz_s64 (int64x1_t __a)
13074 return (uint64x1_t) {__a[0] > 0ll ? -1ll : 0ll};
13077 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13078 vcgtzq_f32 (float32x4_t __a)
13080 return (uint32x4_t) (__a > 0.0f);
13083 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13084 vcgtzq_f64 (float64x2_t __a)
13086 return (uint64x2_t) (__a > 0.0);
13089 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13090 vcgtzq_s8 (int8x16_t __a)
13092 return (uint8x16_t) (__a > 0);
13095 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13096 vcgtzq_s16 (int16x8_t __a)
13098 return (uint16x8_t) (__a > 0);
13101 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13102 vcgtzq_s32 (int32x4_t __a)
13104 return (uint32x4_t) (__a > 0);
13107 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13108 vcgtzq_s64 (int64x2_t __a)
13110 return (uint64x2_t) (__a > __AARCH64_INT64_C (0));
13113 /* vcgtz - scalar. */
13115 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13116 vcgtzs_f32 (float32_t __a)
13118 return __a > 0.0f ? -1 : 0;
13121 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13122 vcgtzd_s64 (int64_t __a)
13124 return __a > 0 ? -1ll : 0ll;
13127 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13128 vcgtzd_f64 (float64_t __a)
13130 return __a > 0.0 ? -1ll : 0ll;
13133 /* vcle - vector. */
13135 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13136 vcle_f32 (float32x2_t __a, float32x2_t __b)
13138 return (uint32x2_t) (__a <= __b);
13141 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13142 vcle_f64 (float64x1_t __a, float64x1_t __b)
13144 return (uint64x1_t) (__a <= __b);
13147 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13148 vcle_s8 (int8x8_t __a, int8x8_t __b)
13150 return (uint8x8_t) (__a <= __b);
13153 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13154 vcle_s16 (int16x4_t __a, int16x4_t __b)
13156 return (uint16x4_t) (__a <= __b);
13159 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13160 vcle_s32 (int32x2_t __a, int32x2_t __b)
13162 return (uint32x2_t) (__a <= __b);
13165 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13166 vcle_s64 (int64x1_t __a, int64x1_t __b)
13168 return (uint64x1_t) {__a[0] <= __b[0] ? -1ll : 0ll};
13171 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13172 vcle_u8 (uint8x8_t __a, uint8x8_t __b)
13174 return (__a <= __b);
13177 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13178 vcle_u16 (uint16x4_t __a, uint16x4_t __b)
13180 return (__a <= __b);
13183 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13184 vcle_u32 (uint32x2_t __a, uint32x2_t __b)
13186 return (__a <= __b);
13189 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13190 vcle_u64 (uint64x1_t __a, uint64x1_t __b)
13192 return (uint64x1_t) {__a[0] <= __b[0] ? -1ll : 0ll};
13195 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13196 vcleq_f32 (float32x4_t __a, float32x4_t __b)
13198 return (uint32x4_t) (__a <= __b);
13201 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13202 vcleq_f64 (float64x2_t __a, float64x2_t __b)
13204 return (uint64x2_t) (__a <= __b);
13207 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13208 vcleq_s8 (int8x16_t __a, int8x16_t __b)
13210 return (uint8x16_t) (__a <= __b);
13213 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13214 vcleq_s16 (int16x8_t __a, int16x8_t __b)
13216 return (uint16x8_t) (__a <= __b);
13219 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13220 vcleq_s32 (int32x4_t __a, int32x4_t __b)
13222 return (uint32x4_t) (__a <= __b);
13225 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13226 vcleq_s64 (int64x2_t __a, int64x2_t __b)
13228 return (uint64x2_t) (__a <= __b);
13231 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13232 vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
13234 return (__a <= __b);
13237 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13238 vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
13240 return (__a <= __b);
13243 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13244 vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
13246 return (__a <= __b);
13249 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13250 vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
13252 return (__a <= __b);
13255 /* vcle - scalar. */
13257 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13258 vcles_f32 (float32_t __a, float32_t __b)
13260 return __a <= __b ? -1 : 0;
13263 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13264 vcled_s64 (int64_t __a, int64_t __b)
13266 return __a <= __b ? -1ll : 0ll;
13269 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13270 vcled_u64 (uint64_t __a, uint64_t __b)
13272 return __a <= __b ? -1ll : 0ll;
13275 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13276 vcled_f64 (float64_t __a, float64_t __b)
13278 return __a <= __b ? -1ll : 0ll;
13281 /* vclez - vector. */
13283 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13284 vclez_f32 (float32x2_t __a)
13286 return (uint32x2_t) (__a <= 0.0f);
13289 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13290 vclez_f64 (float64x1_t __a)
13292 return (uint64x1_t) (__a <= (float64x1_t) {0.0});
13295 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13296 vclez_s8 (int8x8_t __a)
13298 return (uint8x8_t) (__a <= 0);
13301 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13302 vclez_s16 (int16x4_t __a)
13304 return (uint16x4_t) (__a <= 0);
13307 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13308 vclez_s32 (int32x2_t __a)
13310 return (uint32x2_t) (__a <= 0);
13313 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13314 vclez_s64 (int64x1_t __a)
13316 return (uint64x1_t) {__a[0] <= 0ll ? -1ll : 0ll};
13319 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13320 vclezq_f32 (float32x4_t __a)
13322 return (uint32x4_t) (__a <= 0.0f);
13325 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13326 vclezq_f64 (float64x2_t __a)
13328 return (uint64x2_t) (__a <= 0.0);
13331 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13332 vclezq_s8 (int8x16_t __a)
13334 return (uint8x16_t) (__a <= 0);
13337 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13338 vclezq_s16 (int16x8_t __a)
13340 return (uint16x8_t) (__a <= 0);
13343 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13344 vclezq_s32 (int32x4_t __a)
13346 return (uint32x4_t) (__a <= 0);
13349 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13350 vclezq_s64 (int64x2_t __a)
13352 return (uint64x2_t) (__a <= __AARCH64_INT64_C (0));
13355 /* vclez - scalar. */
13357 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13358 vclezs_f32 (float32_t __a)
13360 return __a <= 0.0f ? -1 : 0;
13363 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13364 vclezd_s64 (int64_t __a)
13366 return __a <= 0 ? -1ll : 0ll;
13369 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13370 vclezd_f64 (float64_t __a)
13372 return __a <= 0.0 ? -1ll : 0ll;
13375 /* vclt - vector. */
13377 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13378 vclt_f32 (float32x2_t __a, float32x2_t __b)
13380 return (uint32x2_t) (__a < __b);
13383 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13384 vclt_f64 (float64x1_t __a, float64x1_t __b)
13386 return (uint64x1_t) (__a < __b);
13389 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13390 vclt_s8 (int8x8_t __a, int8x8_t __b)
13392 return (uint8x8_t) (__a < __b);
13395 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13396 vclt_s16 (int16x4_t __a, int16x4_t __b)
13398 return (uint16x4_t) (__a < __b);
13401 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13402 vclt_s32 (int32x2_t __a, int32x2_t __b)
13404 return (uint32x2_t) (__a < __b);
13407 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13408 vclt_s64 (int64x1_t __a, int64x1_t __b)
13410 return (uint64x1_t) {__a[0] < __b[0] ? -1ll : 0ll};
13413 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13414 vclt_u8 (uint8x8_t __a, uint8x8_t __b)
13416 return (__a < __b);
13419 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13420 vclt_u16 (uint16x4_t __a, uint16x4_t __b)
13422 return (__a < __b);
13425 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13426 vclt_u32 (uint32x2_t __a, uint32x2_t __b)
13428 return (__a < __b);
13431 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13432 vclt_u64 (uint64x1_t __a, uint64x1_t __b)
13434 return (uint64x1_t) {__a[0] < __b[0] ? -1ll : 0ll};
13437 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13438 vcltq_f32 (float32x4_t __a, float32x4_t __b)
13440 return (uint32x4_t) (__a < __b);
13443 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13444 vcltq_f64 (float64x2_t __a, float64x2_t __b)
13446 return (uint64x2_t) (__a < __b);
13449 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13450 vcltq_s8 (int8x16_t __a, int8x16_t __b)
13452 return (uint8x16_t) (__a < __b);
13455 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13456 vcltq_s16 (int16x8_t __a, int16x8_t __b)
13458 return (uint16x8_t) (__a < __b);
13461 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13462 vcltq_s32 (int32x4_t __a, int32x4_t __b)
13464 return (uint32x4_t) (__a < __b);
13467 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13468 vcltq_s64 (int64x2_t __a, int64x2_t __b)
13470 return (uint64x2_t) (__a < __b);
13473 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13474 vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
13476 return (__a < __b);
13479 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13480 vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
13482 return (__a < __b);
13485 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13486 vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
13488 return (__a < __b);
13491 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13492 vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
13494 return (__a < __b);
13497 /* vclt - scalar. */
13499 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13500 vclts_f32 (float32_t __a, float32_t __b)
13502 return __a < __b ? -1 : 0;
13505 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13506 vcltd_s64 (int64_t __a, int64_t __b)
13508 return __a < __b ? -1ll : 0ll;
13511 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13512 vcltd_u64 (uint64_t __a, uint64_t __b)
13514 return __a < __b ? -1ll : 0ll;
13517 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13518 vcltd_f64 (float64_t __a, float64_t __b)
13520 return __a < __b ? -1ll : 0ll;
13523 /* vcltz - vector. */
13525 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13526 vcltz_f32 (float32x2_t __a)
13528 return (uint32x2_t) (__a < 0.0f);
13531 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13532 vcltz_f64 (float64x1_t __a)
13534 return (uint64x1_t) (__a < (float64x1_t) {0.0});
13537 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13538 vcltz_s8 (int8x8_t __a)
13540 return (uint8x8_t) (__a < 0);
13543 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13544 vcltz_s16 (int16x4_t __a)
13546 return (uint16x4_t) (__a < 0);
13549 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13550 vcltz_s32 (int32x2_t __a)
13552 return (uint32x2_t) (__a < 0);
13555 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13556 vcltz_s64 (int64x1_t __a)
13558 return (uint64x1_t) {__a[0] < 0ll ? -1ll : 0ll};
13561 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13562 vcltzq_f32 (float32x4_t __a)
13564 return (uint32x4_t) (__a < 0.0f);
13567 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13568 vcltzq_f64 (float64x2_t __a)
13570 return (uint64x2_t) (__a < 0.0);
13573 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13574 vcltzq_s8 (int8x16_t __a)
13576 return (uint8x16_t) (__a < 0);
13579 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13580 vcltzq_s16 (int16x8_t __a)
13582 return (uint16x8_t) (__a < 0);
13585 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13586 vcltzq_s32 (int32x4_t __a)
13588 return (uint32x4_t) (__a < 0);
13591 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13592 vcltzq_s64 (int64x2_t __a)
13594 return (uint64x2_t) (__a < __AARCH64_INT64_C (0));
13597 /* vcltz - scalar. */
13599 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13600 vcltzs_f32 (float32_t __a)
13602 return __a < 0.0f ? -1 : 0;
13605 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13606 vcltzd_s64 (int64_t __a)
13608 return __a < 0 ? -1ll : 0ll;
13611 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13612 vcltzd_f64 (float64_t __a)
13614 return __a < 0.0 ? -1ll : 0ll;
13617 /* vcls. */
13619 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13620 vcls_s8 (int8x8_t __a)
13622 return __builtin_aarch64_clrsbv8qi (__a);
13625 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13626 vcls_s16 (int16x4_t __a)
13628 return __builtin_aarch64_clrsbv4hi (__a);
13631 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13632 vcls_s32 (int32x2_t __a)
13634 return __builtin_aarch64_clrsbv2si (__a);
13637 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13638 vclsq_s8 (int8x16_t __a)
13640 return __builtin_aarch64_clrsbv16qi (__a);
13643 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13644 vclsq_s16 (int16x8_t __a)
13646 return __builtin_aarch64_clrsbv8hi (__a);
13649 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13650 vclsq_s32 (int32x4_t __a)
13652 return __builtin_aarch64_clrsbv4si (__a);
13655 /* vclz. */
13657 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13658 vclz_s8 (int8x8_t __a)
13660 return __builtin_aarch64_clzv8qi (__a);
13663 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13664 vclz_s16 (int16x4_t __a)
13666 return __builtin_aarch64_clzv4hi (__a);
13669 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13670 vclz_s32 (int32x2_t __a)
13672 return __builtin_aarch64_clzv2si (__a);
13675 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13676 vclz_u8 (uint8x8_t __a)
13678 return (uint8x8_t)__builtin_aarch64_clzv8qi ((int8x8_t)__a);
13681 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13682 vclz_u16 (uint16x4_t __a)
13684 return (uint16x4_t)__builtin_aarch64_clzv4hi ((int16x4_t)__a);
13687 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13688 vclz_u32 (uint32x2_t __a)
13690 return (uint32x2_t)__builtin_aarch64_clzv2si ((int32x2_t)__a);
13693 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13694 vclzq_s8 (int8x16_t __a)
13696 return __builtin_aarch64_clzv16qi (__a);
13699 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13700 vclzq_s16 (int16x8_t __a)
13702 return __builtin_aarch64_clzv8hi (__a);
13705 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13706 vclzq_s32 (int32x4_t __a)
13708 return __builtin_aarch64_clzv4si (__a);
13711 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13712 vclzq_u8 (uint8x16_t __a)
13714 return (uint8x16_t)__builtin_aarch64_clzv16qi ((int8x16_t)__a);
13717 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13718 vclzq_u16 (uint16x8_t __a)
13720 return (uint16x8_t)__builtin_aarch64_clzv8hi ((int16x8_t)__a);
13723 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13724 vclzq_u32 (uint32x4_t __a)
13726 return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a);
13729 /* vcnt. */
13731 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13732 vcnt_p8 (poly8x8_t __a)
13734 return (poly8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a);
13737 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13738 vcnt_s8 (int8x8_t __a)
13740 return __builtin_aarch64_popcountv8qi (__a);
13743 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13744 vcnt_u8 (uint8x8_t __a)
13746 return (uint8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a);
13749 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13750 vcntq_p8 (poly8x16_t __a)
13752 return (poly8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a);
13755 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13756 vcntq_s8 (int8x16_t __a)
13758 return __builtin_aarch64_popcountv16qi (__a);
13761 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13762 vcntq_u8 (uint8x16_t __a)
13764 return (uint8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a);
13767 /* vcvt (double -> float). */
13769 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13770 vcvt_f32_f64 (float64x2_t __a)
13772 return __builtin_aarch64_float_truncate_lo_v2sf (__a);
13775 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13776 vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
13778 return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b);
13781 /* vcvt (float -> double). */
13783 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13784 vcvt_f64_f32 (float32x2_t __a)
13787 return __builtin_aarch64_float_extend_lo_v2df (__a);
13790 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13791 vcvt_high_f64_f32 (float32x4_t __a)
13793 return __builtin_aarch64_vec_unpacks_hi_v4sf (__a);
13796 /* vcvt (<u>int -> float) */
13798 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
13799 vcvtd_f64_s64 (int64_t __a)
13801 return (float64_t) __a;
13804 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
13805 vcvtd_f64_u64 (uint64_t __a)
13807 return (float64_t) __a;
13810 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
13811 vcvts_f32_s32 (int32_t __a)
13813 return (float32_t) __a;
13816 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
13817 vcvts_f32_u32 (uint32_t __a)
13819 return (float32_t) __a;
13822 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13823 vcvt_f32_s32 (int32x2_t __a)
13825 return __builtin_aarch64_floatv2siv2sf (__a);
13828 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13829 vcvt_f32_u32 (uint32x2_t __a)
13831 return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a);
13834 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13835 vcvtq_f32_s32 (int32x4_t __a)
13837 return __builtin_aarch64_floatv4siv4sf (__a);
13840 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13841 vcvtq_f32_u32 (uint32x4_t __a)
13843 return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a);
13846 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13847 vcvtq_f64_s64 (int64x2_t __a)
13849 return __builtin_aarch64_floatv2div2df (__a);
13852 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13853 vcvtq_f64_u64 (uint64x2_t __a)
13855 return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a);
13858 /* vcvt (float -> <u>int) */
13860 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
13861 vcvtd_s64_f64 (float64_t __a)
13863 return (int64_t) __a;
13866 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13867 vcvtd_u64_f64 (float64_t __a)
13869 return (uint64_t) __a;
13872 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
13873 vcvts_s32_f32 (float32_t __a)
13875 return (int32_t) __a;
13878 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13879 vcvts_u32_f32 (float32_t __a)
13881 return (uint32_t) __a;
13884 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13885 vcvt_s32_f32 (float32x2_t __a)
13887 return __builtin_aarch64_lbtruncv2sfv2si (__a);
13890 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13891 vcvt_u32_f32 (float32x2_t __a)
13893 /* TODO: This cast should go away when builtins have
13894 their correct types. */
13895 return (uint32x2_t) __builtin_aarch64_lbtruncuv2sfv2si (__a);
13898 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13899 vcvtq_s32_f32 (float32x4_t __a)
13901 return __builtin_aarch64_lbtruncv4sfv4si (__a);
13904 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13905 vcvtq_u32_f32 (float32x4_t __a)
13907 /* TODO: This cast should go away when builtins have
13908 their correct types. */
13909 return (uint32x4_t) __builtin_aarch64_lbtruncuv4sfv4si (__a);
13912 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13913 vcvtq_s64_f64 (float64x2_t __a)
13915 return __builtin_aarch64_lbtruncv2dfv2di (__a);
13918 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13919 vcvtq_u64_f64 (float64x2_t __a)
13921 /* TODO: This cast should go away when builtins have
13922 their correct types. */
13923 return (uint64x2_t) __builtin_aarch64_lbtruncuv2dfv2di (__a);
13926 /* vcvta */
13928 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
13929 vcvtad_s64_f64 (float64_t __a)
13931 return __builtin_aarch64_lrounddfdi (__a);
13934 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13935 vcvtad_u64_f64 (float64_t __a)
13937 return __builtin_aarch64_lroundudfdi (__a);
13940 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
13941 vcvtas_s32_f32 (float32_t __a)
13943 return __builtin_aarch64_lroundsfsi (__a);
13946 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13947 vcvtas_u32_f32 (float32_t __a)
13949 return __builtin_aarch64_lroundusfsi (__a);
13952 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13953 vcvta_s32_f32 (float32x2_t __a)
13955 return __builtin_aarch64_lroundv2sfv2si (__a);
13958 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13959 vcvta_u32_f32 (float32x2_t __a)
13961 /* TODO: This cast should go away when builtins have
13962 their correct types. */
13963 return (uint32x2_t) __builtin_aarch64_lrounduv2sfv2si (__a);
13966 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13967 vcvtaq_s32_f32 (float32x4_t __a)
13969 return __builtin_aarch64_lroundv4sfv4si (__a);
13972 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13973 vcvtaq_u32_f32 (float32x4_t __a)
13975 /* TODO: This cast should go away when builtins have
13976 their correct types. */
13977 return (uint32x4_t) __builtin_aarch64_lrounduv4sfv4si (__a);
13980 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13981 vcvtaq_s64_f64 (float64x2_t __a)
13983 return __builtin_aarch64_lroundv2dfv2di (__a);
13986 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13987 vcvtaq_u64_f64 (float64x2_t __a)
13989 /* TODO: This cast should go away when builtins have
13990 their correct types. */
13991 return (uint64x2_t) __builtin_aarch64_lrounduv2dfv2di (__a);
13994 /* vcvtm */
13996 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
13997 vcvtmd_s64_f64 (float64_t __a)
13999 return __builtin_llfloor (__a);
14002 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14003 vcvtmd_u64_f64 (float64_t __a)
14005 return __builtin_aarch64_lfloorudfdi (__a);
14008 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
14009 vcvtms_s32_f32 (float32_t __a)
14011 return __builtin_ifloorf (__a);
14014 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14015 vcvtms_u32_f32 (float32_t __a)
14017 return __builtin_aarch64_lfloorusfsi (__a);
14020 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14021 vcvtm_s32_f32 (float32x2_t __a)
14023 return __builtin_aarch64_lfloorv2sfv2si (__a);
14026 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14027 vcvtm_u32_f32 (float32x2_t __a)
14029 /* TODO: This cast should go away when builtins have
14030 their correct types. */
14031 return (uint32x2_t) __builtin_aarch64_lflooruv2sfv2si (__a);
14034 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14035 vcvtmq_s32_f32 (float32x4_t __a)
14037 return __builtin_aarch64_lfloorv4sfv4si (__a);
14040 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14041 vcvtmq_u32_f32 (float32x4_t __a)
14043 /* TODO: This cast should go away when builtins have
14044 their correct types. */
14045 return (uint32x4_t) __builtin_aarch64_lflooruv4sfv4si (__a);
14048 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14049 vcvtmq_s64_f64 (float64x2_t __a)
14051 return __builtin_aarch64_lfloorv2dfv2di (__a);
14054 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14055 vcvtmq_u64_f64 (float64x2_t __a)
14057 /* TODO: This cast should go away when builtins have
14058 their correct types. */
14059 return (uint64x2_t) __builtin_aarch64_lflooruv2dfv2di (__a);
14062 /* vcvtn */
14064 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
14065 vcvtnd_s64_f64 (float64_t __a)
14067 return __builtin_aarch64_lfrintndfdi (__a);
14070 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14071 vcvtnd_u64_f64 (float64_t __a)
14073 return __builtin_aarch64_lfrintnudfdi (__a);
14076 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
14077 vcvtns_s32_f32 (float32_t __a)
14079 return __builtin_aarch64_lfrintnsfsi (__a);
14082 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14083 vcvtns_u32_f32 (float32_t __a)
14085 return __builtin_aarch64_lfrintnusfsi (__a);
14088 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14089 vcvtn_s32_f32 (float32x2_t __a)
14091 return __builtin_aarch64_lfrintnv2sfv2si (__a);
14094 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14095 vcvtn_u32_f32 (float32x2_t __a)
14097 /* TODO: This cast should go away when builtins have
14098 their correct types. */
14099 return (uint32x2_t) __builtin_aarch64_lfrintnuv2sfv2si (__a);
14102 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14103 vcvtnq_s32_f32 (float32x4_t __a)
14105 return __builtin_aarch64_lfrintnv4sfv4si (__a);
14108 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14109 vcvtnq_u32_f32 (float32x4_t __a)
14111 /* TODO: This cast should go away when builtins have
14112 their correct types. */
14113 return (uint32x4_t) __builtin_aarch64_lfrintnuv4sfv4si (__a);
14116 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14117 vcvtnq_s64_f64 (float64x2_t __a)
14119 return __builtin_aarch64_lfrintnv2dfv2di (__a);
14122 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14123 vcvtnq_u64_f64 (float64x2_t __a)
14125 /* TODO: This cast should go away when builtins have
14126 their correct types. */
14127 return (uint64x2_t) __builtin_aarch64_lfrintnuv2dfv2di (__a);
14130 /* vcvtp */
14132 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
14133 vcvtpd_s64_f64 (float64_t __a)
14135 return __builtin_llceil (__a);
14138 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14139 vcvtpd_u64_f64 (float64_t __a)
14141 return __builtin_aarch64_lceiludfdi (__a);
14144 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
14145 vcvtps_s32_f32 (float32_t __a)
14147 return __builtin_iceilf (__a);
14150 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14151 vcvtps_u32_f32 (float32_t __a)
14153 return __builtin_aarch64_lceilusfsi (__a);
14156 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14157 vcvtp_s32_f32 (float32x2_t __a)
14159 return __builtin_aarch64_lceilv2sfv2si (__a);
14162 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14163 vcvtp_u32_f32 (float32x2_t __a)
14165 /* TODO: This cast should go away when builtins have
14166 their correct types. */
14167 return (uint32x2_t) __builtin_aarch64_lceiluv2sfv2si (__a);
14170 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14171 vcvtpq_s32_f32 (float32x4_t __a)
14173 return __builtin_aarch64_lceilv4sfv4si (__a);
14176 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14177 vcvtpq_u32_f32 (float32x4_t __a)
14179 /* TODO: This cast should go away when builtins have
14180 their correct types. */
14181 return (uint32x4_t) __builtin_aarch64_lceiluv4sfv4si (__a);
14184 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14185 vcvtpq_s64_f64 (float64x2_t __a)
14187 return __builtin_aarch64_lceilv2dfv2di (__a);
14190 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14191 vcvtpq_u64_f64 (float64x2_t __a)
14193 /* TODO: This cast should go away when builtins have
14194 their correct types. */
14195 return (uint64x2_t) __builtin_aarch64_lceiluv2dfv2di (__a);
14198 /* vdup_n */
14200 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14201 vdup_n_f32 (float32_t __a)
14203 return (float32x2_t) {__a, __a};
14206 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
14207 vdup_n_f64 (float64_t __a)
14209 return (float64x1_t) {__a};
14212 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14213 vdup_n_p8 (poly8_t __a)
14215 return (poly8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
14218 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
14219 vdup_n_p16 (poly16_t __a)
14221 return (poly16x4_t) {__a, __a, __a, __a};
14224 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14225 vdup_n_s8 (int8_t __a)
14227 return (int8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
14230 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14231 vdup_n_s16 (int16_t __a)
14233 return (int16x4_t) {__a, __a, __a, __a};
14236 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14237 vdup_n_s32 (int32_t __a)
14239 return (int32x2_t) {__a, __a};
14242 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
14243 vdup_n_s64 (int64_t __a)
14245 return (int64x1_t) {__a};
14248 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14249 vdup_n_u8 (uint8_t __a)
14251 return (uint8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
14254 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14255 vdup_n_u16 (uint16_t __a)
14257 return (uint16x4_t) {__a, __a, __a, __a};
14260 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14261 vdup_n_u32 (uint32_t __a)
14263 return (uint32x2_t) {__a, __a};
14266 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14267 vdup_n_u64 (uint64_t __a)
14269 return (uint64x1_t) {__a};
14272 /* vdupq_n */
14274 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14275 vdupq_n_f32 (float32_t __a)
14277 return (float32x4_t) {__a, __a, __a, __a};
14280 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14281 vdupq_n_f64 (float64_t __a)
14283 return (float64x2_t) {__a, __a};
14286 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14287 vdupq_n_p8 (uint32_t __a)
14289 return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
14290 __a, __a, __a, __a, __a, __a, __a, __a};
14293 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
14294 vdupq_n_p16 (uint32_t __a)
14296 return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
14299 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14300 vdupq_n_s8 (int32_t __a)
14302 return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
14303 __a, __a, __a, __a, __a, __a, __a, __a};
14306 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14307 vdupq_n_s16 (int32_t __a)
14309 return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
14312 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14313 vdupq_n_s32 (int32_t __a)
14315 return (int32x4_t) {__a, __a, __a, __a};
14318 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14319 vdupq_n_s64 (int64_t __a)
14321 return (int64x2_t) {__a, __a};
14324 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14325 vdupq_n_u8 (uint32_t __a)
14327 return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
14328 __a, __a, __a, __a, __a, __a, __a, __a};
14331 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14332 vdupq_n_u16 (uint32_t __a)
14334 return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
14337 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14338 vdupq_n_u32 (uint32_t __a)
14340 return (uint32x4_t) {__a, __a, __a, __a};
14343 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14344 vdupq_n_u64 (uint64_t __a)
14346 return (uint64x2_t) {__a, __a};
14349 /* vdup_lane */
14351 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14352 vdup_lane_f32 (float32x2_t __a, const int __b)
14354 return __aarch64_vdup_lane_f32 (__a, __b);
14357 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
14358 vdup_lane_f64 (float64x1_t __a, const int __b)
14360 return __aarch64_vdup_lane_f64 (__a, __b);
14363 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14364 vdup_lane_p8 (poly8x8_t __a, const int __b)
14366 return __aarch64_vdup_lane_p8 (__a, __b);
14369 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
14370 vdup_lane_p16 (poly16x4_t __a, const int __b)
14372 return __aarch64_vdup_lane_p16 (__a, __b);
14375 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14376 vdup_lane_s8 (int8x8_t __a, const int __b)
14378 return __aarch64_vdup_lane_s8 (__a, __b);
14381 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14382 vdup_lane_s16 (int16x4_t __a, const int __b)
14384 return __aarch64_vdup_lane_s16 (__a, __b);
14387 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14388 vdup_lane_s32 (int32x2_t __a, const int __b)
14390 return __aarch64_vdup_lane_s32 (__a, __b);
14393 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
14394 vdup_lane_s64 (int64x1_t __a, const int __b)
14396 return __aarch64_vdup_lane_s64 (__a, __b);
14399 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14400 vdup_lane_u8 (uint8x8_t __a, const int __b)
14402 return __aarch64_vdup_lane_u8 (__a, __b);
14405 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14406 vdup_lane_u16 (uint16x4_t __a, const int __b)
14408 return __aarch64_vdup_lane_u16 (__a, __b);
14411 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14412 vdup_lane_u32 (uint32x2_t __a, const int __b)
14414 return __aarch64_vdup_lane_u32 (__a, __b);
14417 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14418 vdup_lane_u64 (uint64x1_t __a, const int __b)
14420 return __aarch64_vdup_lane_u64 (__a, __b);
14423 /* vdup_laneq */
14425 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14426 vdup_laneq_f32 (float32x4_t __a, const int __b)
14428 return __aarch64_vdup_laneq_f32 (__a, __b);
14431 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
14432 vdup_laneq_f64 (float64x2_t __a, const int __b)
14434 return __aarch64_vdup_laneq_f64 (__a, __b);
14437 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14438 vdup_laneq_p8 (poly8x16_t __a, const int __b)
14440 return __aarch64_vdup_laneq_p8 (__a, __b);
14443 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
14444 vdup_laneq_p16 (poly16x8_t __a, const int __b)
14446 return __aarch64_vdup_laneq_p16 (__a, __b);
14449 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14450 vdup_laneq_s8 (int8x16_t __a, const int __b)
14452 return __aarch64_vdup_laneq_s8 (__a, __b);
14455 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14456 vdup_laneq_s16 (int16x8_t __a, const int __b)
14458 return __aarch64_vdup_laneq_s16 (__a, __b);
14461 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14462 vdup_laneq_s32 (int32x4_t __a, const int __b)
14464 return __aarch64_vdup_laneq_s32 (__a, __b);
14467 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
14468 vdup_laneq_s64 (int64x2_t __a, const int __b)
14470 return __aarch64_vdup_laneq_s64 (__a, __b);
14473 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14474 vdup_laneq_u8 (uint8x16_t __a, const int __b)
14476 return __aarch64_vdup_laneq_u8 (__a, __b);
14479 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14480 vdup_laneq_u16 (uint16x8_t __a, const int __b)
14482 return __aarch64_vdup_laneq_u16 (__a, __b);
14485 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14486 vdup_laneq_u32 (uint32x4_t __a, const int __b)
14488 return __aarch64_vdup_laneq_u32 (__a, __b);
14491 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14492 vdup_laneq_u64 (uint64x2_t __a, const int __b)
14494 return __aarch64_vdup_laneq_u64 (__a, __b);
14497 /* vdupq_lane */
14498 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14499 vdupq_lane_f32 (float32x2_t __a, const int __b)
14501 return __aarch64_vdupq_lane_f32 (__a, __b);
14504 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14505 vdupq_lane_f64 (float64x1_t __a, const int __b)
14507 return __aarch64_vdupq_lane_f64 (__a, __b);
14510 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14511 vdupq_lane_p8 (poly8x8_t __a, const int __b)
14513 return __aarch64_vdupq_lane_p8 (__a, __b);
14516 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
14517 vdupq_lane_p16 (poly16x4_t __a, const int __b)
14519 return __aarch64_vdupq_lane_p16 (__a, __b);
14522 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14523 vdupq_lane_s8 (int8x8_t __a, const int __b)
14525 return __aarch64_vdupq_lane_s8 (__a, __b);
14528 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14529 vdupq_lane_s16 (int16x4_t __a, const int __b)
14531 return __aarch64_vdupq_lane_s16 (__a, __b);
14534 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14535 vdupq_lane_s32 (int32x2_t __a, const int __b)
14537 return __aarch64_vdupq_lane_s32 (__a, __b);
14540 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14541 vdupq_lane_s64 (int64x1_t __a, const int __b)
14543 return __aarch64_vdupq_lane_s64 (__a, __b);
14546 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14547 vdupq_lane_u8 (uint8x8_t __a, const int __b)
14549 return __aarch64_vdupq_lane_u8 (__a, __b);
14552 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14553 vdupq_lane_u16 (uint16x4_t __a, const int __b)
14555 return __aarch64_vdupq_lane_u16 (__a, __b);
14558 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14559 vdupq_lane_u32 (uint32x2_t __a, const int __b)
14561 return __aarch64_vdupq_lane_u32 (__a, __b);
14564 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14565 vdupq_lane_u64 (uint64x1_t __a, const int __b)
14567 return __aarch64_vdupq_lane_u64 (__a, __b);
14570 /* vdupq_laneq */
14571 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14572 vdupq_laneq_f32 (float32x4_t __a, const int __b)
14574 return __aarch64_vdupq_laneq_f32 (__a, __b);
14577 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14578 vdupq_laneq_f64 (float64x2_t __a, const int __b)
14580 return __aarch64_vdupq_laneq_f64 (__a, __b);
14583 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14584 vdupq_laneq_p8 (poly8x16_t __a, const int __b)
14586 return __aarch64_vdupq_laneq_p8 (__a, __b);
14589 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
14590 vdupq_laneq_p16 (poly16x8_t __a, const int __b)
14592 return __aarch64_vdupq_laneq_p16 (__a, __b);
14595 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14596 vdupq_laneq_s8 (int8x16_t __a, const int __b)
14598 return __aarch64_vdupq_laneq_s8 (__a, __b);
14601 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14602 vdupq_laneq_s16 (int16x8_t __a, const int __b)
14604 return __aarch64_vdupq_laneq_s16 (__a, __b);
14607 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14608 vdupq_laneq_s32 (int32x4_t __a, const int __b)
14610 return __aarch64_vdupq_laneq_s32 (__a, __b);
14613 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14614 vdupq_laneq_s64 (int64x2_t __a, const int __b)
14616 return __aarch64_vdupq_laneq_s64 (__a, __b);
14619 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14620 vdupq_laneq_u8 (uint8x16_t __a, const int __b)
14622 return __aarch64_vdupq_laneq_u8 (__a, __b);
14625 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14626 vdupq_laneq_u16 (uint16x8_t __a, const int __b)
14628 return __aarch64_vdupq_laneq_u16 (__a, __b);
14631 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14632 vdupq_laneq_u32 (uint32x4_t __a, const int __b)
14634 return __aarch64_vdupq_laneq_u32 (__a, __b);
14637 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14638 vdupq_laneq_u64 (uint64x2_t __a, const int __b)
14640 return __aarch64_vdupq_laneq_u64 (__a, __b);
14643 /* vdupb_lane */
14644 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
14645 vdupb_lane_p8 (poly8x8_t __a, const int __b)
14647 return __aarch64_vget_lane_any (__a, __b);
14650 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
14651 vdupb_lane_s8 (int8x8_t __a, const int __b)
14653 return __aarch64_vget_lane_any (__a, __b);
14656 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
14657 vdupb_lane_u8 (uint8x8_t __a, const int __b)
14659 return __aarch64_vget_lane_any (__a, __b);
14662 /* vduph_lane */
14663 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
14664 vduph_lane_p16 (poly16x4_t __a, const int __b)
14666 return __aarch64_vget_lane_any (__a, __b);
14669 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
14670 vduph_lane_s16 (int16x4_t __a, const int __b)
14672 return __aarch64_vget_lane_any (__a, __b);
14675 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
14676 vduph_lane_u16 (uint16x4_t __a, const int __b)
14678 return __aarch64_vget_lane_any (__a, __b);
14681 /* vdups_lane */
14682 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
14683 vdups_lane_f32 (float32x2_t __a, const int __b)
14685 return __aarch64_vget_lane_any (__a, __b);
14688 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
14689 vdups_lane_s32 (int32x2_t __a, const int __b)
14691 return __aarch64_vget_lane_any (__a, __b);
14694 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14695 vdups_lane_u32 (uint32x2_t __a, const int __b)
14697 return __aarch64_vget_lane_any (__a, __b);
14700 /* vdupd_lane */
14701 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
14702 vdupd_lane_f64 (float64x1_t __a, const int __b)
14704 __AARCH64_LANE_CHECK (__a, __b);
14705 return __a[0];
14708 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
14709 vdupd_lane_s64 (int64x1_t __a, const int __b)
14711 __AARCH64_LANE_CHECK (__a, __b);
14712 return __a[0];
14715 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14716 vdupd_lane_u64 (uint64x1_t __a, const int __b)
14718 __AARCH64_LANE_CHECK (__a, __b);
14719 return __a[0];
14722 /* vdupb_laneq */
14723 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
14724 vdupb_laneq_p8 (poly8x16_t __a, const int __b)
14726 return __aarch64_vget_lane_any (__a, __b);
14729 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
14730 vdupb_laneq_s8 (int8x16_t __a, const int __attribute__ ((unused)) __b)
14732 return __aarch64_vget_lane_any (__a, __b);
14735 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
14736 vdupb_laneq_u8 (uint8x16_t __a, const int __b)
14738 return __aarch64_vget_lane_any (__a, __b);
14741 /* vduph_laneq */
14742 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
14743 vduph_laneq_p16 (poly16x8_t __a, const int __b)
14745 return __aarch64_vget_lane_any (__a, __b);
14748 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
14749 vduph_laneq_s16 (int16x8_t __a, const int __b)
14751 return __aarch64_vget_lane_any (__a, __b);
14754 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
14755 vduph_laneq_u16 (uint16x8_t __a, const int __b)
14757 return __aarch64_vget_lane_any (__a, __b);
14760 /* vdups_laneq */
14761 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
14762 vdups_laneq_f32 (float32x4_t __a, const int __b)
14764 return __aarch64_vget_lane_any (__a, __b);
14767 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
14768 vdups_laneq_s32 (int32x4_t __a, const int __b)
14770 return __aarch64_vget_lane_any (__a, __b);
14773 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14774 vdups_laneq_u32 (uint32x4_t __a, const int __b)
14776 return __aarch64_vget_lane_any (__a, __b);
14779 /* vdupd_laneq */
14780 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
14781 vdupd_laneq_f64 (float64x2_t __a, const int __b)
14783 return __aarch64_vget_lane_any (__a, __b);
14786 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
14787 vdupd_laneq_s64 (int64x2_t __a, const int __b)
14789 return __aarch64_vget_lane_any (__a, __b);
14792 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14793 vdupd_laneq_u64 (uint64x2_t __a, const int __b)
14795 return __aarch64_vget_lane_any (__a, __b);
14798 /* vext */
14800 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14801 vext_f32 (float32x2_t __a, float32x2_t __b, __const int __c)
14803 __AARCH64_LANE_CHECK (__a, __c);
14804 #ifdef __AARCH64EB__
14805 return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
14806 #else
14807 return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
14808 #endif
14811 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
14812 vext_f64 (float64x1_t __a, float64x1_t __b, __const int __c)
14814 __AARCH64_LANE_CHECK (__a, __c);
14815 /* The only possible index to the assembler instruction returns element 0. */
14816 return __a;
14818 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14819 vext_p8 (poly8x8_t __a, poly8x8_t __b, __const int __c)
14821 __AARCH64_LANE_CHECK (__a, __c);
14822 #ifdef __AARCH64EB__
14823 return __builtin_shuffle (__b, __a, (uint8x8_t)
14824 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
14825 #else
14826 return __builtin_shuffle (__a, __b,
14827 (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
14828 #endif
14831 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
14832 vext_p16 (poly16x4_t __a, poly16x4_t __b, __const int __c)
14834 __AARCH64_LANE_CHECK (__a, __c);
14835 #ifdef __AARCH64EB__
14836 return __builtin_shuffle (__b, __a,
14837 (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
14838 #else
14839 return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
14840 #endif
14843 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14844 vext_s8 (int8x8_t __a, int8x8_t __b, __const int __c)
14846 __AARCH64_LANE_CHECK (__a, __c);
14847 #ifdef __AARCH64EB__
14848 return __builtin_shuffle (__b, __a, (uint8x8_t)
14849 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
14850 #else
14851 return __builtin_shuffle (__a, __b,
14852 (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
14853 #endif
14856 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14857 vext_s16 (int16x4_t __a, int16x4_t __b, __const int __c)
14859 __AARCH64_LANE_CHECK (__a, __c);
14860 #ifdef __AARCH64EB__
14861 return __builtin_shuffle (__b, __a,
14862 (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
14863 #else
14864 return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
14865 #endif
14868 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14869 vext_s32 (int32x2_t __a, int32x2_t __b, __const int __c)
14871 __AARCH64_LANE_CHECK (__a, __c);
14872 #ifdef __AARCH64EB__
14873 return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
14874 #else
14875 return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
14876 #endif
14879 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
14880 vext_s64 (int64x1_t __a, int64x1_t __b, __const int __c)
14882 __AARCH64_LANE_CHECK (__a, __c);
14883 /* The only possible index to the assembler instruction returns element 0. */
14884 return __a;
14887 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14888 vext_u8 (uint8x8_t __a, uint8x8_t __b, __const int __c)
14890 __AARCH64_LANE_CHECK (__a, __c);
14891 #ifdef __AARCH64EB__
14892 return __builtin_shuffle (__b, __a, (uint8x8_t)
14893 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
14894 #else
14895 return __builtin_shuffle (__a, __b,
14896 (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
14897 #endif
14900 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14901 vext_u16 (uint16x4_t __a, uint16x4_t __b, __const int __c)
14903 __AARCH64_LANE_CHECK (__a, __c);
14904 #ifdef __AARCH64EB__
14905 return __builtin_shuffle (__b, __a,
14906 (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
14907 #else
14908 return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
14909 #endif
14912 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14913 vext_u32 (uint32x2_t __a, uint32x2_t __b, __const int __c)
14915 __AARCH64_LANE_CHECK (__a, __c);
14916 #ifdef __AARCH64EB__
14917 return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
14918 #else
14919 return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
14920 #endif
14923 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14924 vext_u64 (uint64x1_t __a, uint64x1_t __b, __const int __c)
14926 __AARCH64_LANE_CHECK (__a, __c);
14927 /* The only possible index to the assembler instruction returns element 0. */
14928 return __a;
14931 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14932 vextq_f32 (float32x4_t __a, float32x4_t __b, __const int __c)
14934 __AARCH64_LANE_CHECK (__a, __c);
14935 #ifdef __AARCH64EB__
14936 return __builtin_shuffle (__b, __a,
14937 (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
14938 #else
14939 return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
14940 #endif
14943 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14944 vextq_f64 (float64x2_t __a, float64x2_t __b, __const int __c)
14946 __AARCH64_LANE_CHECK (__a, __c);
14947 #ifdef __AARCH64EB__
14948 return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
14949 #else
14950 return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
14951 #endif
14954 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14955 vextq_p8 (poly8x16_t __a, poly8x16_t __b, __const int __c)
14957 __AARCH64_LANE_CHECK (__a, __c);
14958 #ifdef __AARCH64EB__
14959 return __builtin_shuffle (__b, __a, (uint8x16_t)
14960 {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
14961 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
14962 #else
14963 return __builtin_shuffle (__a, __b, (uint8x16_t)
14964 {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
14965 __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
14966 #endif
14969 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
14970 vextq_p16 (poly16x8_t __a, poly16x8_t __b, __const int __c)
14972 __AARCH64_LANE_CHECK (__a, __c);
14973 #ifdef __AARCH64EB__
14974 return __builtin_shuffle (__b, __a, (uint16x8_t)
14975 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
14976 #else
14977 return __builtin_shuffle (__a, __b,
14978 (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
14979 #endif
14982 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14983 vextq_s8 (int8x16_t __a, int8x16_t __b, __const int __c)
14985 __AARCH64_LANE_CHECK (__a, __c);
14986 #ifdef __AARCH64EB__
14987 return __builtin_shuffle (__b, __a, (uint8x16_t)
14988 {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
14989 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
14990 #else
14991 return __builtin_shuffle (__a, __b, (uint8x16_t)
14992 {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
14993 __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
14994 #endif
14997 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14998 vextq_s16 (int16x8_t __a, int16x8_t __b, __const int __c)
15000 __AARCH64_LANE_CHECK (__a, __c);
15001 #ifdef __AARCH64EB__
15002 return __builtin_shuffle (__b, __a, (uint16x8_t)
15003 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
15004 #else
15005 return __builtin_shuffle (__a, __b,
15006 (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
15007 #endif
15010 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15011 vextq_s32 (int32x4_t __a, int32x4_t __b, __const int __c)
15013 __AARCH64_LANE_CHECK (__a, __c);
15014 #ifdef __AARCH64EB__
15015 return __builtin_shuffle (__b, __a,
15016 (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
15017 #else
15018 return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
15019 #endif
15022 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15023 vextq_s64 (int64x2_t __a, int64x2_t __b, __const int __c)
15025 __AARCH64_LANE_CHECK (__a, __c);
15026 #ifdef __AARCH64EB__
15027 return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
15028 #else
15029 return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
15030 #endif
15033 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15034 vextq_u8 (uint8x16_t __a, uint8x16_t __b, __const int __c)
15036 __AARCH64_LANE_CHECK (__a, __c);
15037 #ifdef __AARCH64EB__
15038 return __builtin_shuffle (__b, __a, (uint8x16_t)
15039 {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
15040 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
15041 #else
15042 return __builtin_shuffle (__a, __b, (uint8x16_t)
15043 {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
15044 __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
15045 #endif
15048 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15049 vextq_u16 (uint16x8_t __a, uint16x8_t __b, __const int __c)
15051 __AARCH64_LANE_CHECK (__a, __c);
15052 #ifdef __AARCH64EB__
15053 return __builtin_shuffle (__b, __a, (uint16x8_t)
15054 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
15055 #else
15056 return __builtin_shuffle (__a, __b,
15057 (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
15058 #endif
15061 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15062 vextq_u32 (uint32x4_t __a, uint32x4_t __b, __const int __c)
15064 __AARCH64_LANE_CHECK (__a, __c);
15065 #ifdef __AARCH64EB__
15066 return __builtin_shuffle (__b, __a,
15067 (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
15068 #else
15069 return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
15070 #endif
15073 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15074 vextq_u64 (uint64x2_t __a, uint64x2_t __b, __const int __c)
15076 __AARCH64_LANE_CHECK (__a, __c);
15077 #ifdef __AARCH64EB__
15078 return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
15079 #else
15080 return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
15081 #endif
15084 /* vfma */
15086 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15087 vfma_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
15089 return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])};
15092 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15093 vfma_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
15095 return __builtin_aarch64_fmav2sf (__b, __c, __a);
15098 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15099 vfmaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
15101 return __builtin_aarch64_fmav4sf (__b, __c, __a);
15104 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15105 vfmaq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c)
15107 return __builtin_aarch64_fmav2df (__b, __c, __a);
15110 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15111 vfma_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
15113 return __builtin_aarch64_fmav2sf (__b, vdup_n_f32 (__c), __a);
15116 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15117 vfmaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
15119 return __builtin_aarch64_fmav4sf (__b, vdupq_n_f32 (__c), __a);
15122 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15123 vfmaq_n_f64 (float64x2_t __a, float64x2_t __b, float64_t __c)
15125 return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c), __a);
15128 /* vfma_lane */
15130 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15131 vfma_lane_f32 (float32x2_t __a, float32x2_t __b,
15132 float32x2_t __c, const int __lane)
15134 return __builtin_aarch64_fmav2sf (__b,
15135 __aarch64_vdup_lane_f32 (__c, __lane),
15136 __a);
15139 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15140 vfma_lane_f64 (float64x1_t __a, float64x1_t __b,
15141 float64x1_t __c, const int __lane)
15143 return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])};
15146 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
15147 vfmad_lane_f64 (float64_t __a, float64_t __b,
15148 float64x1_t __c, const int __lane)
15150 return __builtin_fma (__b, __c[0], __a);
15153 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
15154 vfmas_lane_f32 (float32_t __a, float32_t __b,
15155 float32x2_t __c, const int __lane)
15157 return __builtin_fmaf (__b, __aarch64_vget_lane_any (__c, __lane), __a);
15160 /* vfma_laneq */
15162 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15163 vfma_laneq_f32 (float32x2_t __a, float32x2_t __b,
15164 float32x4_t __c, const int __lane)
15166 return __builtin_aarch64_fmav2sf (__b,
15167 __aarch64_vdup_laneq_f32 (__c, __lane),
15168 __a);
15171 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15172 vfma_laneq_f64 (float64x1_t __a, float64x1_t __b,
15173 float64x2_t __c, const int __lane)
15175 float64_t __c0 = __aarch64_vget_lane_any (__c, __lane);
15176 return (float64x1_t) {__builtin_fma (__b[0], __c0, __a[0])};
15179 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
15180 vfmad_laneq_f64 (float64_t __a, float64_t __b,
15181 float64x2_t __c, const int __lane)
15183 return __builtin_fma (__b, __aarch64_vget_lane_any (__c, __lane), __a);
15186 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
15187 vfmas_laneq_f32 (float32_t __a, float32_t __b,
15188 float32x4_t __c, const int __lane)
15190 return __builtin_fmaf (__b, __aarch64_vget_lane_any (__c, __lane), __a);
15193 /* vfmaq_lane */
15195 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15196 vfmaq_lane_f32 (float32x4_t __a, float32x4_t __b,
15197 float32x2_t __c, const int __lane)
15199 return __builtin_aarch64_fmav4sf (__b,
15200 __aarch64_vdupq_lane_f32 (__c, __lane),
15201 __a);
15204 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15205 vfmaq_lane_f64 (float64x2_t __a, float64x2_t __b,
15206 float64x1_t __c, const int __lane)
15208 return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c[0]), __a);
15211 /* vfmaq_laneq */
15213 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15214 vfmaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
15215 float32x4_t __c, const int __lane)
15217 return __builtin_aarch64_fmav4sf (__b,
15218 __aarch64_vdupq_laneq_f32 (__c, __lane),
15219 __a);
15222 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15223 vfmaq_laneq_f64 (float64x2_t __a, float64x2_t __b,
15224 float64x2_t __c, const int __lane)
15226 return __builtin_aarch64_fmav2df (__b,
15227 __aarch64_vdupq_laneq_f64 (__c, __lane),
15228 __a);
15231 /* vfms */
15233 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15234 vfms_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
15236 return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])};
15239 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15240 vfms_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
15242 return __builtin_aarch64_fmav2sf (-__b, __c, __a);
15245 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15246 vfmsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
15248 return __builtin_aarch64_fmav4sf (-__b, __c, __a);
15251 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15252 vfmsq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c)
15254 return __builtin_aarch64_fmav2df (-__b, __c, __a);
15258 /* vfms_lane */
15260 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15261 vfms_lane_f32 (float32x2_t __a, float32x2_t __b,
15262 float32x2_t __c, const int __lane)
15264 return __builtin_aarch64_fmav2sf (-__b,
15265 __aarch64_vdup_lane_f32 (__c, __lane),
15266 __a);
15269 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15270 vfms_lane_f64 (float64x1_t __a, float64x1_t __b,
15271 float64x1_t __c, const int __lane)
15273 return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])};
15276 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
15277 vfmsd_lane_f64 (float64_t __a, float64_t __b,
15278 float64x1_t __c, const int __lane)
15280 return __builtin_fma (-__b, __c[0], __a);
15283 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
15284 vfmss_lane_f32 (float32_t __a, float32_t __b,
15285 float32x2_t __c, const int __lane)
15287 return __builtin_fmaf (-__b, __aarch64_vget_lane_any (__c, __lane), __a);
15290 /* vfms_laneq */
15292 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15293 vfms_laneq_f32 (float32x2_t __a, float32x2_t __b,
15294 float32x4_t __c, const int __lane)
15296 return __builtin_aarch64_fmav2sf (-__b,
15297 __aarch64_vdup_laneq_f32 (__c, __lane),
15298 __a);
15301 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15302 vfms_laneq_f64 (float64x1_t __a, float64x1_t __b,
15303 float64x2_t __c, const int __lane)
15305 float64_t __c0 = __aarch64_vget_lane_any (__c, __lane);
15306 return (float64x1_t) {__builtin_fma (-__b[0], __c0, __a[0])};
15309 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
15310 vfmsd_laneq_f64 (float64_t __a, float64_t __b,
15311 float64x2_t __c, const int __lane)
15313 return __builtin_fma (-__b, __aarch64_vget_lane_any (__c, __lane), __a);
15316 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
15317 vfmss_laneq_f32 (float32_t __a, float32_t __b,
15318 float32x4_t __c, const int __lane)
15320 return __builtin_fmaf (-__b, __aarch64_vget_lane_any (__c, __lane), __a);
15323 /* vfmsq_lane */
15325 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15326 vfmsq_lane_f32 (float32x4_t __a, float32x4_t __b,
15327 float32x2_t __c, const int __lane)
15329 return __builtin_aarch64_fmav4sf (-__b,
15330 __aarch64_vdupq_lane_f32 (__c, __lane),
15331 __a);
15334 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15335 vfmsq_lane_f64 (float64x2_t __a, float64x2_t __b,
15336 float64x1_t __c, const int __lane)
15338 return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c[0]), __a);
15341 /* vfmsq_laneq */
15343 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15344 vfmsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
15345 float32x4_t __c, const int __lane)
15347 return __builtin_aarch64_fmav4sf (-__b,
15348 __aarch64_vdupq_laneq_f32 (__c, __lane),
15349 __a);
15352 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15353 vfmsq_laneq_f64 (float64x2_t __a, float64x2_t __b,
15354 float64x2_t __c, const int __lane)
15356 return __builtin_aarch64_fmav2df (-__b,
15357 __aarch64_vdupq_laneq_f64 (__c, __lane),
15358 __a);
15361 /* vld1 */
15363 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15364 vld1_f32 (const float32_t *a)
15366 return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a);
15369 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15370 vld1_f64 (const float64_t *a)
15372 return (float64x1_t) {*a};
15375 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15376 vld1_p8 (const poly8_t *a)
15378 return (poly8x8_t)
15379 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
15382 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
15383 vld1_p16 (const poly16_t *a)
15385 return (poly16x4_t)
15386 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
15389 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15390 vld1_s8 (const int8_t *a)
15392 return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
15395 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15396 vld1_s16 (const int16_t *a)
15398 return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
15401 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15402 vld1_s32 (const int32_t *a)
15404 return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
15407 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15408 vld1_s64 (const int64_t *a)
15410 return (int64x1_t) {*a};
15413 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15414 vld1_u8 (const uint8_t *a)
15416 return (uint8x8_t)
15417 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
15420 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15421 vld1_u16 (const uint16_t *a)
15423 return (uint16x4_t)
15424 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
15427 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15428 vld1_u32 (const uint32_t *a)
15430 return (uint32x2_t)
15431 __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
15434 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15435 vld1_u64 (const uint64_t *a)
15437 return (uint64x1_t) {*a};
15440 /* vld1q */
15442 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15443 vld1q_f32 (const float32_t *a)
15445 return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a);
15448 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15449 vld1q_f64 (const float64_t *a)
15451 return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a);
15454 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15455 vld1q_p8 (const poly8_t *a)
15457 return (poly8x16_t)
15458 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
15461 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
15462 vld1q_p16 (const poly16_t *a)
15464 return (poly16x8_t)
15465 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
15468 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15469 vld1q_s8 (const int8_t *a)
15471 return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
15474 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15475 vld1q_s16 (const int16_t *a)
15477 return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
15480 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15481 vld1q_s32 (const int32_t *a)
15483 return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
15486 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15487 vld1q_s64 (const int64_t *a)
15489 return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
15492 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15493 vld1q_u8 (const uint8_t *a)
15495 return (uint8x16_t)
15496 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
15499 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15500 vld1q_u16 (const uint16_t *a)
15502 return (uint16x8_t)
15503 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
15506 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15507 vld1q_u32 (const uint32_t *a)
15509 return (uint32x4_t)
15510 __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
15513 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15514 vld1q_u64 (const uint64_t *a)
15516 return (uint64x2_t)
15517 __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
15520 /* vld1_dup */
15522 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15523 vld1_dup_f32 (const float32_t* __a)
15525 return vdup_n_f32 (*__a);
15528 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15529 vld1_dup_f64 (const float64_t* __a)
15531 return vdup_n_f64 (*__a);
15534 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15535 vld1_dup_p8 (const poly8_t* __a)
15537 return vdup_n_p8 (*__a);
15540 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
15541 vld1_dup_p16 (const poly16_t* __a)
15543 return vdup_n_p16 (*__a);
15546 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15547 vld1_dup_s8 (const int8_t* __a)
15549 return vdup_n_s8 (*__a);
15552 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15553 vld1_dup_s16 (const int16_t* __a)
15555 return vdup_n_s16 (*__a);
15558 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15559 vld1_dup_s32 (const int32_t* __a)
15561 return vdup_n_s32 (*__a);
15564 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15565 vld1_dup_s64 (const int64_t* __a)
15567 return vdup_n_s64 (*__a);
15570 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15571 vld1_dup_u8 (const uint8_t* __a)
15573 return vdup_n_u8 (*__a);
15576 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15577 vld1_dup_u16 (const uint16_t* __a)
15579 return vdup_n_u16 (*__a);
15582 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15583 vld1_dup_u32 (const uint32_t* __a)
15585 return vdup_n_u32 (*__a);
15588 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15589 vld1_dup_u64 (const uint64_t* __a)
15591 return vdup_n_u64 (*__a);
15594 /* vld1q_dup */
15596 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15597 vld1q_dup_f32 (const float32_t* __a)
15599 return vdupq_n_f32 (*__a);
15602 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15603 vld1q_dup_f64 (const float64_t* __a)
15605 return vdupq_n_f64 (*__a);
15608 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15609 vld1q_dup_p8 (const poly8_t* __a)
15611 return vdupq_n_p8 (*__a);
15614 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
15615 vld1q_dup_p16 (const poly16_t* __a)
15617 return vdupq_n_p16 (*__a);
15620 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15621 vld1q_dup_s8 (const int8_t* __a)
15623 return vdupq_n_s8 (*__a);
15626 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15627 vld1q_dup_s16 (const int16_t* __a)
15629 return vdupq_n_s16 (*__a);
15632 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15633 vld1q_dup_s32 (const int32_t* __a)
15635 return vdupq_n_s32 (*__a);
15638 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15639 vld1q_dup_s64 (const int64_t* __a)
15641 return vdupq_n_s64 (*__a);
15644 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15645 vld1q_dup_u8 (const uint8_t* __a)
15647 return vdupq_n_u8 (*__a);
15650 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15651 vld1q_dup_u16 (const uint16_t* __a)
15653 return vdupq_n_u16 (*__a);
15656 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15657 vld1q_dup_u32 (const uint32_t* __a)
15659 return vdupq_n_u32 (*__a);
15662 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15663 vld1q_dup_u64 (const uint64_t* __a)
15665 return vdupq_n_u64 (*__a);
15668 /* vld1_lane */
15670 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15671 vld1_lane_f32 (const float32_t *__src, float32x2_t __vec, const int __lane)
15673 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15676 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15677 vld1_lane_f64 (const float64_t *__src, float64x1_t __vec, const int __lane)
15679 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15682 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15683 vld1_lane_p8 (const poly8_t *__src, poly8x8_t __vec, const int __lane)
15685 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15688 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
15689 vld1_lane_p16 (const poly16_t *__src, poly16x4_t __vec, const int __lane)
15691 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15694 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15695 vld1_lane_s8 (const int8_t *__src, int8x8_t __vec, const int __lane)
15697 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15700 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15701 vld1_lane_s16 (const int16_t *__src, int16x4_t __vec, const int __lane)
15703 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15706 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15707 vld1_lane_s32 (const int32_t *__src, int32x2_t __vec, const int __lane)
15709 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15712 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15713 vld1_lane_s64 (const int64_t *__src, int64x1_t __vec, const int __lane)
15715 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15718 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15719 vld1_lane_u8 (const uint8_t *__src, uint8x8_t __vec, const int __lane)
15721 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15724 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15725 vld1_lane_u16 (const uint16_t *__src, uint16x4_t __vec, const int __lane)
15727 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15730 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15731 vld1_lane_u32 (const uint32_t *__src, uint32x2_t __vec, const int __lane)
15733 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15736 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15737 vld1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane)
15739 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15742 /* vld1q_lane */
15744 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15745 vld1q_lane_f32 (const float32_t *__src, float32x4_t __vec, const int __lane)
15747 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15750 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15751 vld1q_lane_f64 (const float64_t *__src, float64x2_t __vec, const int __lane)
15753 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15756 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15757 vld1q_lane_p8 (const poly8_t *__src, poly8x16_t __vec, const int __lane)
15759 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15762 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
15763 vld1q_lane_p16 (const poly16_t *__src, poly16x8_t __vec, const int __lane)
15765 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15768 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15769 vld1q_lane_s8 (const int8_t *__src, int8x16_t __vec, const int __lane)
15771 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15774 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15775 vld1q_lane_s16 (const int16_t *__src, int16x8_t __vec, const int __lane)
15777 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15780 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15781 vld1q_lane_s32 (const int32_t *__src, int32x4_t __vec, const int __lane)
15783 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15786 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15787 vld1q_lane_s64 (const int64_t *__src, int64x2_t __vec, const int __lane)
15789 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15792 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15793 vld1q_lane_u8 (const uint8_t *__src, uint8x16_t __vec, const int __lane)
15795 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15798 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15799 vld1q_lane_u16 (const uint16_t *__src, uint16x8_t __vec, const int __lane)
15801 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15804 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15805 vld1q_lane_u32 (const uint32_t *__src, uint32x4_t __vec, const int __lane)
15807 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15810 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15811 vld1q_lane_u64 (const uint64_t *__src, uint64x2_t __vec, const int __lane)
15813 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15816 /* vldn */
15818 __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
15819 vld2_s64 (const int64_t * __a)
15821 int64x1x2_t ret;
15822 __builtin_aarch64_simd_oi __o;
15823 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
15824 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
15825 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
15826 return ret;
15829 __extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
15830 vld2_u64 (const uint64_t * __a)
15832 uint64x1x2_t ret;
15833 __builtin_aarch64_simd_oi __o;
15834 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
15835 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
15836 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
15837 return ret;
15840 __extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
15841 vld2_f64 (const float64_t * __a)
15843 float64x1x2_t ret;
15844 __builtin_aarch64_simd_oi __o;
15845 __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a);
15846 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
15847 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
15848 return ret;
15851 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
15852 vld2_s8 (const int8_t * __a)
15854 int8x8x2_t ret;
15855 __builtin_aarch64_simd_oi __o;
15856 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
15857 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
15858 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
15859 return ret;
15862 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
15863 vld2_p8 (const poly8_t * __a)
15865 poly8x8x2_t ret;
15866 __builtin_aarch64_simd_oi __o;
15867 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
15868 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
15869 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
15870 return ret;
15873 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
15874 vld2_s16 (const int16_t * __a)
15876 int16x4x2_t ret;
15877 __builtin_aarch64_simd_oi __o;
15878 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
15879 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
15880 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
15881 return ret;
15884 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
15885 vld2_p16 (const poly16_t * __a)
15887 poly16x4x2_t ret;
15888 __builtin_aarch64_simd_oi __o;
15889 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
15890 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
15891 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
15892 return ret;
15895 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
15896 vld2_s32 (const int32_t * __a)
15898 int32x2x2_t ret;
15899 __builtin_aarch64_simd_oi __o;
15900 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
15901 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
15902 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
15903 return ret;
15906 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
15907 vld2_u8 (const uint8_t * __a)
15909 uint8x8x2_t ret;
15910 __builtin_aarch64_simd_oi __o;
15911 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
15912 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
15913 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
15914 return ret;
15917 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
15918 vld2_u16 (const uint16_t * __a)
15920 uint16x4x2_t ret;
15921 __builtin_aarch64_simd_oi __o;
15922 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
15923 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
15924 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
15925 return ret;
15928 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
15929 vld2_u32 (const uint32_t * __a)
15931 uint32x2x2_t ret;
15932 __builtin_aarch64_simd_oi __o;
15933 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
15934 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
15935 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
15936 return ret;
15939 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
15940 vld2_f32 (const float32_t * __a)
15942 float32x2x2_t ret;
15943 __builtin_aarch64_simd_oi __o;
15944 __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a);
15945 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
15946 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
15947 return ret;
15950 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
15951 vld2q_s8 (const int8_t * __a)
15953 int8x16x2_t ret;
15954 __builtin_aarch64_simd_oi __o;
15955 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
15956 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
15957 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
15958 return ret;
15961 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
15962 vld2q_p8 (const poly8_t * __a)
15964 poly8x16x2_t ret;
15965 __builtin_aarch64_simd_oi __o;
15966 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
15967 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
15968 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
15969 return ret;
15972 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
15973 vld2q_s16 (const int16_t * __a)
15975 int16x8x2_t ret;
15976 __builtin_aarch64_simd_oi __o;
15977 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
15978 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
15979 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
15980 return ret;
15983 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
15984 vld2q_p16 (const poly16_t * __a)
15986 poly16x8x2_t ret;
15987 __builtin_aarch64_simd_oi __o;
15988 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
15989 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
15990 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
15991 return ret;
15994 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
15995 vld2q_s32 (const int32_t * __a)
15997 int32x4x2_t ret;
15998 __builtin_aarch64_simd_oi __o;
15999 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
16000 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
16001 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
16002 return ret;
16005 __extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
16006 vld2q_s64 (const int64_t * __a)
16008 int64x2x2_t ret;
16009 __builtin_aarch64_simd_oi __o;
16010 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
16011 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
16012 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
16013 return ret;
16016 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
16017 vld2q_u8 (const uint8_t * __a)
16019 uint8x16x2_t ret;
16020 __builtin_aarch64_simd_oi __o;
16021 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
16022 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
16023 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
16024 return ret;
16027 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
16028 vld2q_u16 (const uint16_t * __a)
16030 uint16x8x2_t ret;
16031 __builtin_aarch64_simd_oi __o;
16032 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
16033 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
16034 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
16035 return ret;
16038 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
16039 vld2q_u32 (const uint32_t * __a)
16041 uint32x4x2_t ret;
16042 __builtin_aarch64_simd_oi __o;
16043 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
16044 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
16045 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
16046 return ret;
16049 __extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
16050 vld2q_u64 (const uint64_t * __a)
16052 uint64x2x2_t ret;
16053 __builtin_aarch64_simd_oi __o;
16054 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
16055 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
16056 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
16057 return ret;
16060 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
16061 vld2q_f32 (const float32_t * __a)
16063 float32x4x2_t ret;
16064 __builtin_aarch64_simd_oi __o;
16065 __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a);
16066 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
16067 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
16068 return ret;
16071 __extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
16072 vld2q_f64 (const float64_t * __a)
16074 float64x2x2_t ret;
16075 __builtin_aarch64_simd_oi __o;
16076 __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a);
16077 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
16078 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
16079 return ret;
16082 __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
16083 vld3_s64 (const int64_t * __a)
16085 int64x1x3_t ret;
16086 __builtin_aarch64_simd_ci __o;
16087 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
16088 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
16089 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
16090 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
16091 return ret;
16094 __extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
16095 vld3_u64 (const uint64_t * __a)
16097 uint64x1x3_t ret;
16098 __builtin_aarch64_simd_ci __o;
16099 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
16100 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
16101 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
16102 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
16103 return ret;
16106 __extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
16107 vld3_f64 (const float64_t * __a)
16109 float64x1x3_t ret;
16110 __builtin_aarch64_simd_ci __o;
16111 __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a);
16112 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)};
16113 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)};
16114 ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)};
16115 return ret;
16118 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
16119 vld3_s8 (const int8_t * __a)
16121 int8x8x3_t ret;
16122 __builtin_aarch64_simd_ci __o;
16123 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
16124 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
16125 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
16126 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
16127 return ret;
16130 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
16131 vld3_p8 (const poly8_t * __a)
16133 poly8x8x3_t ret;
16134 __builtin_aarch64_simd_ci __o;
16135 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
16136 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
16137 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
16138 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
16139 return ret;
16142 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
16143 vld3_s16 (const int16_t * __a)
16145 int16x4x3_t ret;
16146 __builtin_aarch64_simd_ci __o;
16147 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
16148 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
16149 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
16150 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
16151 return ret;
16154 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
16155 vld3_p16 (const poly16_t * __a)
16157 poly16x4x3_t ret;
16158 __builtin_aarch64_simd_ci __o;
16159 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
16160 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
16161 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
16162 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
16163 return ret;
16166 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
16167 vld3_s32 (const int32_t * __a)
16169 int32x2x3_t ret;
16170 __builtin_aarch64_simd_ci __o;
16171 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
16172 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
16173 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
16174 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
16175 return ret;
16178 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
16179 vld3_u8 (const uint8_t * __a)
16181 uint8x8x3_t ret;
16182 __builtin_aarch64_simd_ci __o;
16183 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
16184 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
16185 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
16186 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
16187 return ret;
16190 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
16191 vld3_u16 (const uint16_t * __a)
16193 uint16x4x3_t ret;
16194 __builtin_aarch64_simd_ci __o;
16195 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
16196 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
16197 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
16198 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
16199 return ret;
16202 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
16203 vld3_u32 (const uint32_t * __a)
16205 uint32x2x3_t ret;
16206 __builtin_aarch64_simd_ci __o;
16207 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
16208 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
16209 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
16210 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
16211 return ret;
16214 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
16215 vld3_f32 (const float32_t * __a)
16217 float32x2x3_t ret;
16218 __builtin_aarch64_simd_ci __o;
16219 __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a);
16220 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
16221 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
16222 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
16223 return ret;
16226 __extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
16227 vld3q_s8 (const int8_t * __a)
16229 int8x16x3_t ret;
16230 __builtin_aarch64_simd_ci __o;
16231 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
16232 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
16233 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
16234 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
16235 return ret;
16238 __extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
16239 vld3q_p8 (const poly8_t * __a)
16241 poly8x16x3_t ret;
16242 __builtin_aarch64_simd_ci __o;
16243 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
16244 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
16245 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
16246 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
16247 return ret;
16250 __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
16251 vld3q_s16 (const int16_t * __a)
16253 int16x8x3_t ret;
16254 __builtin_aarch64_simd_ci __o;
16255 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
16256 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
16257 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
16258 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
16259 return ret;
16262 __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
16263 vld3q_p16 (const poly16_t * __a)
16265 poly16x8x3_t ret;
16266 __builtin_aarch64_simd_ci __o;
16267 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
16268 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
16269 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
16270 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
16271 return ret;
16274 __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
16275 vld3q_s32 (const int32_t * __a)
16277 int32x4x3_t ret;
16278 __builtin_aarch64_simd_ci __o;
16279 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
16280 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
16281 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
16282 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
16283 return ret;
16286 __extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
16287 vld3q_s64 (const int64_t * __a)
16289 int64x2x3_t ret;
16290 __builtin_aarch64_simd_ci __o;
16291 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
16292 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
16293 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
16294 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
16295 return ret;
16298 __extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
16299 vld3q_u8 (const uint8_t * __a)
16301 uint8x16x3_t ret;
16302 __builtin_aarch64_simd_ci __o;
16303 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
16304 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
16305 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
16306 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
16307 return ret;
16310 __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
16311 vld3q_u16 (const uint16_t * __a)
16313 uint16x8x3_t ret;
16314 __builtin_aarch64_simd_ci __o;
16315 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
16316 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
16317 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
16318 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
16319 return ret;
16322 __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
16323 vld3q_u32 (const uint32_t * __a)
16325 uint32x4x3_t ret;
16326 __builtin_aarch64_simd_ci __o;
16327 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
16328 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
16329 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
16330 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
16331 return ret;
16334 __extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
16335 vld3q_u64 (const uint64_t * __a)
16337 uint64x2x3_t ret;
16338 __builtin_aarch64_simd_ci __o;
16339 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
16340 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
16341 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
16342 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
16343 return ret;
16346 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
16347 vld3q_f32 (const float32_t * __a)
16349 float32x4x3_t ret;
16350 __builtin_aarch64_simd_ci __o;
16351 __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a);
16352 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
16353 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
16354 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
16355 return ret;
16358 __extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
16359 vld3q_f64 (const float64_t * __a)
16361 float64x2x3_t ret;
16362 __builtin_aarch64_simd_ci __o;
16363 __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a);
16364 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
16365 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
16366 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
16367 return ret;
16370 __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
16371 vld4_s64 (const int64_t * __a)
16373 int64x1x4_t ret;
16374 __builtin_aarch64_simd_xi __o;
16375 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
16376 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
16377 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
16378 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
16379 ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
16380 return ret;
16383 __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
16384 vld4_u64 (const uint64_t * __a)
16386 uint64x1x4_t ret;
16387 __builtin_aarch64_simd_xi __o;
16388 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
16389 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
16390 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
16391 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
16392 ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
16393 return ret;
16396 __extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
16397 vld4_f64 (const float64_t * __a)
16399 float64x1x4_t ret;
16400 __builtin_aarch64_simd_xi __o;
16401 __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a);
16402 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)};
16403 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)};
16404 ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)};
16405 ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)};
16406 return ret;
16409 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
16410 vld4_s8 (const int8_t * __a)
16412 int8x8x4_t ret;
16413 __builtin_aarch64_simd_xi __o;
16414 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
16415 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
16416 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
16417 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
16418 ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
16419 return ret;
16422 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
16423 vld4_p8 (const poly8_t * __a)
16425 poly8x8x4_t ret;
16426 __builtin_aarch64_simd_xi __o;
16427 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
16428 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
16429 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
16430 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
16431 ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
16432 return ret;
16435 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
16436 vld4_s16 (const int16_t * __a)
16438 int16x4x4_t ret;
16439 __builtin_aarch64_simd_xi __o;
16440 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
16441 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
16442 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
16443 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
16444 ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
16445 return ret;
16448 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
16449 vld4_p16 (const poly16_t * __a)
16451 poly16x4x4_t ret;
16452 __builtin_aarch64_simd_xi __o;
16453 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
16454 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
16455 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
16456 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
16457 ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
16458 return ret;
16461 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
16462 vld4_s32 (const int32_t * __a)
16464 int32x2x4_t ret;
16465 __builtin_aarch64_simd_xi __o;
16466 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
16467 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
16468 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
16469 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
16470 ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
16471 return ret;
16474 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
16475 vld4_u8 (const uint8_t * __a)
16477 uint8x8x4_t ret;
16478 __builtin_aarch64_simd_xi __o;
16479 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
16480 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
16481 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
16482 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
16483 ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
16484 return ret;
16487 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
16488 vld4_u16 (const uint16_t * __a)
16490 uint16x4x4_t ret;
16491 __builtin_aarch64_simd_xi __o;
16492 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
16493 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
16494 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
16495 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
16496 ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
16497 return ret;
16500 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
16501 vld4_u32 (const uint32_t * __a)
16503 uint32x2x4_t ret;
16504 __builtin_aarch64_simd_xi __o;
16505 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
16506 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
16507 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
16508 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
16509 ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
16510 return ret;
16513 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
16514 vld4_f32 (const float32_t * __a)
16516 float32x2x4_t ret;
16517 __builtin_aarch64_simd_xi __o;
16518 __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a);
16519 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
16520 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
16521 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
16522 ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
16523 return ret;
16526 __extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
16527 vld4q_s8 (const int8_t * __a)
16529 int8x16x4_t ret;
16530 __builtin_aarch64_simd_xi __o;
16531 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
16532 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
16533 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
16534 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
16535 ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
16536 return ret;
16539 __extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
16540 vld4q_p8 (const poly8_t * __a)
16542 poly8x16x4_t ret;
16543 __builtin_aarch64_simd_xi __o;
16544 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
16545 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
16546 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
16547 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
16548 ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
16549 return ret;
16552 __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
16553 vld4q_s16 (const int16_t * __a)
16555 int16x8x4_t ret;
16556 __builtin_aarch64_simd_xi __o;
16557 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
16558 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
16559 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
16560 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
16561 ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
16562 return ret;
16565 __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
16566 vld4q_p16 (const poly16_t * __a)
16568 poly16x8x4_t ret;
16569 __builtin_aarch64_simd_xi __o;
16570 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
16571 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
16572 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
16573 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
16574 ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
16575 return ret;
16578 __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
16579 vld4q_s32 (const int32_t * __a)
16581 int32x4x4_t ret;
16582 __builtin_aarch64_simd_xi __o;
16583 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
16584 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
16585 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
16586 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
16587 ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
16588 return ret;
16591 __extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
16592 vld4q_s64 (const int64_t * __a)
16594 int64x2x4_t ret;
16595 __builtin_aarch64_simd_xi __o;
16596 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
16597 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
16598 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
16599 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
16600 ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
16601 return ret;
16604 __extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
16605 vld4q_u8 (const uint8_t * __a)
16607 uint8x16x4_t ret;
16608 __builtin_aarch64_simd_xi __o;
16609 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
16610 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
16611 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
16612 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
16613 ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
16614 return ret;
16617 __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
16618 vld4q_u16 (const uint16_t * __a)
16620 uint16x8x4_t ret;
16621 __builtin_aarch64_simd_xi __o;
16622 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
16623 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
16624 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
16625 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
16626 ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
16627 return ret;
16630 __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
16631 vld4q_u32 (const uint32_t * __a)
16633 uint32x4x4_t ret;
16634 __builtin_aarch64_simd_xi __o;
16635 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
16636 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
16637 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
16638 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
16639 ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
16640 return ret;
16643 __extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
16644 vld4q_u64 (const uint64_t * __a)
16646 uint64x2x4_t ret;
16647 __builtin_aarch64_simd_xi __o;
16648 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
16649 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
16650 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
16651 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
16652 ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
16653 return ret;
16656 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
16657 vld4q_f32 (const float32_t * __a)
16659 float32x4x4_t ret;
16660 __builtin_aarch64_simd_xi __o;
16661 __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a);
16662 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
16663 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
16664 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
16665 ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
16666 return ret;
16669 __extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
16670 vld4q_f64 (const float64_t * __a)
16672 float64x2x4_t ret;
16673 __builtin_aarch64_simd_xi __o;
16674 __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a);
16675 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
16676 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
16677 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
16678 ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
16679 return ret;
16682 /* vldn_dup */
16684 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
16685 vld2_dup_s8 (const int8_t * __a)
16687 int8x8x2_t ret;
16688 __builtin_aarch64_simd_oi __o;
16689 __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16690 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
16691 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
16692 return ret;
16695 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
16696 vld2_dup_s16 (const int16_t * __a)
16698 int16x4x2_t ret;
16699 __builtin_aarch64_simd_oi __o;
16700 __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16701 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
16702 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
16703 return ret;
16706 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
16707 vld2_dup_s32 (const int32_t * __a)
16709 int32x2x2_t ret;
16710 __builtin_aarch64_simd_oi __o;
16711 __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a);
16712 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
16713 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
16714 return ret;
16717 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
16718 vld2_dup_f32 (const float32_t * __a)
16720 float32x2x2_t ret;
16721 __builtin_aarch64_simd_oi __o;
16722 __o = __builtin_aarch64_ld2rv2sf ((const __builtin_aarch64_simd_sf *) __a);
16723 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
16724 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
16725 return ret;
16728 __extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
16729 vld2_dup_f64 (const float64_t * __a)
16731 float64x1x2_t ret;
16732 __builtin_aarch64_simd_oi __o;
16733 __o = __builtin_aarch64_ld2rdf ((const __builtin_aarch64_simd_df *) __a);
16734 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
16735 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
16736 return ret;
16739 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
16740 vld2_dup_u8 (const uint8_t * __a)
16742 uint8x8x2_t ret;
16743 __builtin_aarch64_simd_oi __o;
16744 __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16745 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
16746 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
16747 return ret;
16750 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
16751 vld2_dup_u16 (const uint16_t * __a)
16753 uint16x4x2_t ret;
16754 __builtin_aarch64_simd_oi __o;
16755 __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16756 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
16757 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
16758 return ret;
16761 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
16762 vld2_dup_u32 (const uint32_t * __a)
16764 uint32x2x2_t ret;
16765 __builtin_aarch64_simd_oi __o;
16766 __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a);
16767 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
16768 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
16769 return ret;
16772 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
16773 vld2_dup_p8 (const poly8_t * __a)
16775 poly8x8x2_t ret;
16776 __builtin_aarch64_simd_oi __o;
16777 __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16778 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
16779 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
16780 return ret;
16783 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
16784 vld2_dup_p16 (const poly16_t * __a)
16786 poly16x4x2_t ret;
16787 __builtin_aarch64_simd_oi __o;
16788 __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16789 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
16790 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
16791 return ret;
16794 __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
16795 vld2_dup_s64 (const int64_t * __a)
16797 int64x1x2_t ret;
16798 __builtin_aarch64_simd_oi __o;
16799 __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a);
16800 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
16801 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
16802 return ret;
16805 __extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
16806 vld2_dup_u64 (const uint64_t * __a)
16808 uint64x1x2_t ret;
16809 __builtin_aarch64_simd_oi __o;
16810 __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a);
16811 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
16812 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
16813 return ret;
16816 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
16817 vld2q_dup_s8 (const int8_t * __a)
16819 int8x16x2_t ret;
16820 __builtin_aarch64_simd_oi __o;
16821 __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16822 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
16823 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
16824 return ret;
16827 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
16828 vld2q_dup_p8 (const poly8_t * __a)
16830 poly8x16x2_t ret;
16831 __builtin_aarch64_simd_oi __o;
16832 __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16833 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
16834 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
16835 return ret;
16838 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
16839 vld2q_dup_s16 (const int16_t * __a)
16841 int16x8x2_t ret;
16842 __builtin_aarch64_simd_oi __o;
16843 __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16844 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
16845 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
16846 return ret;
16849 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
16850 vld2q_dup_p16 (const poly16_t * __a)
16852 poly16x8x2_t ret;
16853 __builtin_aarch64_simd_oi __o;
16854 __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16855 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
16856 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
16857 return ret;
16860 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
16861 vld2q_dup_s32 (const int32_t * __a)
16863 int32x4x2_t ret;
16864 __builtin_aarch64_simd_oi __o;
16865 __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a);
16866 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
16867 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
16868 return ret;
16871 __extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
16872 vld2q_dup_s64 (const int64_t * __a)
16874 int64x2x2_t ret;
16875 __builtin_aarch64_simd_oi __o;
16876 __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
16877 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
16878 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
16879 return ret;
16882 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
16883 vld2q_dup_u8 (const uint8_t * __a)
16885 uint8x16x2_t ret;
16886 __builtin_aarch64_simd_oi __o;
16887 __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16888 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
16889 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
16890 return ret;
16893 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
16894 vld2q_dup_u16 (const uint16_t * __a)
16896 uint16x8x2_t ret;
16897 __builtin_aarch64_simd_oi __o;
16898 __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16899 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
16900 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
16901 return ret;
16904 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
16905 vld2q_dup_u32 (const uint32_t * __a)
16907 uint32x4x2_t ret;
16908 __builtin_aarch64_simd_oi __o;
16909 __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a);
16910 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
16911 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
16912 return ret;
16915 __extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
16916 vld2q_dup_u64 (const uint64_t * __a)
16918 uint64x2x2_t ret;
16919 __builtin_aarch64_simd_oi __o;
16920 __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
16921 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
16922 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
16923 return ret;
16926 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
16927 vld2q_dup_f32 (const float32_t * __a)
16929 float32x4x2_t ret;
16930 __builtin_aarch64_simd_oi __o;
16931 __o = __builtin_aarch64_ld2rv4sf ((const __builtin_aarch64_simd_sf *) __a);
16932 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
16933 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
16934 return ret;
16937 __extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
16938 vld2q_dup_f64 (const float64_t * __a)
16940 float64x2x2_t ret;
16941 __builtin_aarch64_simd_oi __o;
16942 __o = __builtin_aarch64_ld2rv2df ((const __builtin_aarch64_simd_df *) __a);
16943 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
16944 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
16945 return ret;
16948 __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
16949 vld3_dup_s64 (const int64_t * __a)
16951 int64x1x3_t ret;
16952 __builtin_aarch64_simd_ci __o;
16953 __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a);
16954 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
16955 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
16956 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
16957 return ret;
16960 __extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
16961 vld3_dup_u64 (const uint64_t * __a)
16963 uint64x1x3_t ret;
16964 __builtin_aarch64_simd_ci __o;
16965 __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a);
16966 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
16967 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
16968 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
16969 return ret;
16972 __extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
16973 vld3_dup_f64 (const float64_t * __a)
16975 float64x1x3_t ret;
16976 __builtin_aarch64_simd_ci __o;
16977 __o = __builtin_aarch64_ld3rdf ((const __builtin_aarch64_simd_df *) __a);
16978 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)};
16979 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)};
16980 ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)};
16981 return ret;
16984 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
16985 vld3_dup_s8 (const int8_t * __a)
16987 int8x8x3_t ret;
16988 __builtin_aarch64_simd_ci __o;
16989 __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16990 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
16991 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
16992 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
16993 return ret;
16996 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
16997 vld3_dup_p8 (const poly8_t * __a)
16999 poly8x8x3_t ret;
17000 __builtin_aarch64_simd_ci __o;
17001 __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
17002 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
17003 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
17004 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
17005 return ret;
17008 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
17009 vld3_dup_s16 (const int16_t * __a)
17011 int16x4x3_t ret;
17012 __builtin_aarch64_simd_ci __o;
17013 __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
17014 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
17015 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
17016 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
17017 return ret;
17020 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
17021 vld3_dup_p16 (const poly16_t * __a)
17023 poly16x4x3_t ret;
17024 __builtin_aarch64_simd_ci __o;
17025 __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
17026 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
17027 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
17028 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
17029 return ret;
17032 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
17033 vld3_dup_s32 (const int32_t * __a)
17035 int32x2x3_t ret;
17036 __builtin_aarch64_simd_ci __o;
17037 __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a);
17038 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
17039 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
17040 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
17041 return ret;
17044 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
17045 vld3_dup_u8 (const uint8_t * __a)
17047 uint8x8x3_t ret;
17048 __builtin_aarch64_simd_ci __o;
17049 __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
17050 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
17051 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
17052 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
17053 return ret;
17056 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
17057 vld3_dup_u16 (const uint16_t * __a)
17059 uint16x4x3_t ret;
17060 __builtin_aarch64_simd_ci __o;
17061 __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
17062 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
17063 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
17064 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
17065 return ret;
17068 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
17069 vld3_dup_u32 (const uint32_t * __a)
17071 uint32x2x3_t ret;
17072 __builtin_aarch64_simd_ci __o;
17073 __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a);
17074 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
17075 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
17076 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
17077 return ret;
17080 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
17081 vld3_dup_f32 (const float32_t * __a)
17083 float32x2x3_t ret;
17084 __builtin_aarch64_simd_ci __o;
17085 __o = __builtin_aarch64_ld3rv2sf ((const __builtin_aarch64_simd_sf *) __a);
17086 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
17087 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
17088 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
17089 return ret;
17092 __extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
17093 vld3q_dup_s8 (const int8_t * __a)
17095 int8x16x3_t ret;
17096 __builtin_aarch64_simd_ci __o;
17097 __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
17098 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
17099 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
17100 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
17101 return ret;
17104 __extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
17105 vld3q_dup_p8 (const poly8_t * __a)
17107 poly8x16x3_t ret;
17108 __builtin_aarch64_simd_ci __o;
17109 __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
17110 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
17111 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
17112 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
17113 return ret;
17116 __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
17117 vld3q_dup_s16 (const int16_t * __a)
17119 int16x8x3_t ret;
17120 __builtin_aarch64_simd_ci __o;
17121 __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
17122 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
17123 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
17124 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
17125 return ret;
17128 __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
17129 vld3q_dup_p16 (const poly16_t * __a)
17131 poly16x8x3_t ret;
17132 __builtin_aarch64_simd_ci __o;
17133 __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
17134 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
17135 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
17136 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
17137 return ret;
17140 __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
17141 vld3q_dup_s32 (const int32_t * __a)
17143 int32x4x3_t ret;
17144 __builtin_aarch64_simd_ci __o;
17145 __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a);
17146 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
17147 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
17148 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
17149 return ret;
17152 __extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
17153 vld3q_dup_s64 (const int64_t * __a)
17155 int64x2x3_t ret;
17156 __builtin_aarch64_simd_ci __o;
17157 __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
17158 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
17159 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
17160 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
17161 return ret;
17164 __extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
17165 vld3q_dup_u8 (const uint8_t * __a)
17167 uint8x16x3_t ret;
17168 __builtin_aarch64_simd_ci __o;
17169 __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
17170 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
17171 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
17172 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
17173 return ret;
17176 __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
17177 vld3q_dup_u16 (const uint16_t * __a)
17179 uint16x8x3_t ret;
17180 __builtin_aarch64_simd_ci __o;
17181 __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
17182 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
17183 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
17184 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
17185 return ret;
17188 __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
17189 vld3q_dup_u32 (const uint32_t * __a)
17191 uint32x4x3_t ret;
17192 __builtin_aarch64_simd_ci __o;
17193 __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a);
17194 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
17195 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
17196 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
17197 return ret;
17200 __extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
17201 vld3q_dup_u64 (const uint64_t * __a)
17203 uint64x2x3_t ret;
17204 __builtin_aarch64_simd_ci __o;
17205 __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
17206 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
17207 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
17208 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
17209 return ret;
17212 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
17213 vld3q_dup_f32 (const float32_t * __a)
17215 float32x4x3_t ret;
17216 __builtin_aarch64_simd_ci __o;
17217 __o = __builtin_aarch64_ld3rv4sf ((const __builtin_aarch64_simd_sf *) __a);
17218 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
17219 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
17220 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
17221 return ret;
17224 __extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
17225 vld3q_dup_f64 (const float64_t * __a)
17227 float64x2x3_t ret;
17228 __builtin_aarch64_simd_ci __o;
17229 __o = __builtin_aarch64_ld3rv2df ((const __builtin_aarch64_simd_df *) __a);
17230 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
17231 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
17232 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
17233 return ret;
17236 __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
17237 vld4_dup_s64 (const int64_t * __a)
17239 int64x1x4_t ret;
17240 __builtin_aarch64_simd_xi __o;
17241 __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a);
17242 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
17243 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
17244 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
17245 ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
17246 return ret;
17249 __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
17250 vld4_dup_u64 (const uint64_t * __a)
17252 uint64x1x4_t ret;
17253 __builtin_aarch64_simd_xi __o;
17254 __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a);
17255 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
17256 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
17257 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
17258 ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
17259 return ret;
17262 __extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
17263 vld4_dup_f64 (const float64_t * __a)
17265 float64x1x4_t ret;
17266 __builtin_aarch64_simd_xi __o;
17267 __o = __builtin_aarch64_ld4rdf ((const __builtin_aarch64_simd_df *) __a);
17268 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)};
17269 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)};
17270 ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)};
17271 ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)};
17272 return ret;
17275 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
17276 vld4_dup_s8 (const int8_t * __a)
17278 int8x8x4_t ret;
17279 __builtin_aarch64_simd_xi __o;
17280 __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
17281 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
17282 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
17283 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
17284 ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
17285 return ret;
17288 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
17289 vld4_dup_p8 (const poly8_t * __a)
17291 poly8x8x4_t ret;
17292 __builtin_aarch64_simd_xi __o;
17293 __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
17294 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
17295 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
17296 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
17297 ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
17298 return ret;
17301 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
17302 vld4_dup_s16 (const int16_t * __a)
17304 int16x4x4_t ret;
17305 __builtin_aarch64_simd_xi __o;
17306 __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
17307 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
17308 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
17309 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
17310 ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
17311 return ret;
17314 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
17315 vld4_dup_p16 (const poly16_t * __a)
17317 poly16x4x4_t ret;
17318 __builtin_aarch64_simd_xi __o;
17319 __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
17320 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
17321 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
17322 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
17323 ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
17324 return ret;
17327 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
17328 vld4_dup_s32 (const int32_t * __a)
17330 int32x2x4_t ret;
17331 __builtin_aarch64_simd_xi __o;
17332 __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a);
17333 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
17334 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
17335 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
17336 ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
17337 return ret;
17340 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
17341 vld4_dup_u8 (const uint8_t * __a)
17343 uint8x8x4_t ret;
17344 __builtin_aarch64_simd_xi __o;
17345 __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
17346 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
17347 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
17348 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
17349 ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
17350 return ret;
17353 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
17354 vld4_dup_u16 (const uint16_t * __a)
17356 uint16x4x4_t ret;
17357 __builtin_aarch64_simd_xi __o;
17358 __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
17359 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
17360 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
17361 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
17362 ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
17363 return ret;
17366 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
17367 vld4_dup_u32 (const uint32_t * __a)
17369 uint32x2x4_t ret;
17370 __builtin_aarch64_simd_xi __o;
17371 __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a);
17372 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
17373 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
17374 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
17375 ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
17376 return ret;
17379 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
17380 vld4_dup_f32 (const float32_t * __a)
17382 float32x2x4_t ret;
17383 __builtin_aarch64_simd_xi __o;
17384 __o = __builtin_aarch64_ld4rv2sf ((const __builtin_aarch64_simd_sf *) __a);
17385 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
17386 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
17387 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
17388 ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
17389 return ret;
17392 __extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
17393 vld4q_dup_s8 (const int8_t * __a)
17395 int8x16x4_t ret;
17396 __builtin_aarch64_simd_xi __o;
17397 __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
17398 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
17399 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
17400 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
17401 ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
17402 return ret;
17405 __extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
17406 vld4q_dup_p8 (const poly8_t * __a)
17408 poly8x16x4_t ret;
17409 __builtin_aarch64_simd_xi __o;
17410 __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
17411 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
17412 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
17413 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
17414 ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
17415 return ret;
17418 __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
17419 vld4q_dup_s16 (const int16_t * __a)
17421 int16x8x4_t ret;
17422 __builtin_aarch64_simd_xi __o;
17423 __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
17424 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
17425 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
17426 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
17427 ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
17428 return ret;
17431 __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
17432 vld4q_dup_p16 (const poly16_t * __a)
17434 poly16x8x4_t ret;
17435 __builtin_aarch64_simd_xi __o;
17436 __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
17437 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
17438 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
17439 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
17440 ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
17441 return ret;
17444 __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
17445 vld4q_dup_s32 (const int32_t * __a)
17447 int32x4x4_t ret;
17448 __builtin_aarch64_simd_xi __o;
17449 __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a);
17450 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
17451 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
17452 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
17453 ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
17454 return ret;
17457 __extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
17458 vld4q_dup_s64 (const int64_t * __a)
17460 int64x2x4_t ret;
17461 __builtin_aarch64_simd_xi __o;
17462 __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
17463 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
17464 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
17465 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
17466 ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
17467 return ret;
17470 __extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
17471 vld4q_dup_u8 (const uint8_t * __a)
17473 uint8x16x4_t ret;
17474 __builtin_aarch64_simd_xi __o;
17475 __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
17476 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
17477 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
17478 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
17479 ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
17480 return ret;
17483 __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
17484 vld4q_dup_u16 (const uint16_t * __a)
17486 uint16x8x4_t ret;
17487 __builtin_aarch64_simd_xi __o;
17488 __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
17489 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
17490 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
17491 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
17492 ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
17493 return ret;
17496 __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
17497 vld4q_dup_u32 (const uint32_t * __a)
17499 uint32x4x4_t ret;
17500 __builtin_aarch64_simd_xi __o;
17501 __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a);
17502 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
17503 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
17504 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
17505 ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
17506 return ret;
17509 __extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
17510 vld4q_dup_u64 (const uint64_t * __a)
17512 uint64x2x4_t ret;
17513 __builtin_aarch64_simd_xi __o;
17514 __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
17515 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
17516 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
17517 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
17518 ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
17519 return ret;
17522 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
17523 vld4q_dup_f32 (const float32_t * __a)
17525 float32x4x4_t ret;
17526 __builtin_aarch64_simd_xi __o;
17527 __o = __builtin_aarch64_ld4rv4sf ((const __builtin_aarch64_simd_sf *) __a);
17528 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
17529 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
17530 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
17531 ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
17532 return ret;
17535 __extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
17536 vld4q_dup_f64 (const float64_t * __a)
17538 float64x2x4_t ret;
17539 __builtin_aarch64_simd_xi __o;
17540 __o = __builtin_aarch64_ld4rv2df ((const __builtin_aarch64_simd_df *) __a);
17541 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
17542 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
17543 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
17544 ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
17545 return ret;
17548 /* vld2_lane */
17550 #define __LD2_LANE_FUNC(intype, vectype, largetype, ptrtype, \
17551 mode, ptrmode, funcsuffix, signedtype) \
17552 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
17553 vld2_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
17555 __builtin_aarch64_simd_oi __o; \
17556 largetype __temp; \
17557 __temp.val[0] = \
17558 vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); \
17559 __temp.val[1] = \
17560 vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); \
17561 __o = __builtin_aarch64_set_qregoi##mode (__o, \
17562 (signedtype) __temp.val[0], \
17563 0); \
17564 __o = __builtin_aarch64_set_qregoi##mode (__o, \
17565 (signedtype) __temp.val[1], \
17566 1); \
17567 __o = __builtin_aarch64_ld2_lane##mode ( \
17568 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
17569 __b.val[0] = (vectype) __builtin_aarch64_get_dregoidi (__o, 0); \
17570 __b.val[1] = (vectype) __builtin_aarch64_get_dregoidi (__o, 1); \
17571 return __b; \
17574 __LD2_LANE_FUNC (float32x2x2_t, float32x2_t, float32x4x2_t, float32_t, v4sf,
17575 sf, f32, float32x4_t)
17576 __LD2_LANE_FUNC (float64x1x2_t, float64x1_t, float64x2x2_t, float64_t, v2df,
17577 df, f64, float64x2_t)
17578 __LD2_LANE_FUNC (poly8x8x2_t, poly8x8_t, poly8x16x2_t, poly8_t, v16qi, qi, p8,
17579 int8x16_t)
17580 __LD2_LANE_FUNC (poly16x4x2_t, poly16x4_t, poly16x8x2_t, poly16_t, v8hi, hi,
17581 p16, int16x8_t)
17582 __LD2_LANE_FUNC (int8x8x2_t, int8x8_t, int8x16x2_t, int8_t, v16qi, qi, s8,
17583 int8x16_t)
17584 __LD2_LANE_FUNC (int16x4x2_t, int16x4_t, int16x8x2_t, int16_t, v8hi, hi, s16,
17585 int16x8_t)
17586 __LD2_LANE_FUNC (int32x2x2_t, int32x2_t, int32x4x2_t, int32_t, v4si, si, s32,
17587 int32x4_t)
17588 __LD2_LANE_FUNC (int64x1x2_t, int64x1_t, int64x2x2_t, int64_t, v2di, di, s64,
17589 int64x2_t)
17590 __LD2_LANE_FUNC (uint8x8x2_t, uint8x8_t, uint8x16x2_t, uint8_t, v16qi, qi, u8,
17591 int8x16_t)
17592 __LD2_LANE_FUNC (uint16x4x2_t, uint16x4_t, uint16x8x2_t, uint16_t, v8hi, hi,
17593 u16, int16x8_t)
17594 __LD2_LANE_FUNC (uint32x2x2_t, uint32x2_t, uint32x4x2_t, uint32_t, v4si, si,
17595 u32, int32x4_t)
17596 __LD2_LANE_FUNC (uint64x1x2_t, uint64x1_t, uint64x2x2_t, uint64_t, v2di, di,
17597 u64, int64x2_t)
17599 #undef __LD2_LANE_FUNC
17601 /* vld2q_lane */
17603 #define __LD2_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
17604 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
17605 vld2q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
17607 __builtin_aarch64_simd_oi __o; \
17608 intype ret; \
17609 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __b.val[0], 0); \
17610 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __b.val[1], 1); \
17611 __o = __builtin_aarch64_ld2_lane##mode ( \
17612 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
17613 ret.val[0] = (vtype) __builtin_aarch64_get_qregoiv4si (__o, 0); \
17614 ret.val[1] = (vtype) __builtin_aarch64_get_qregoiv4si (__o, 1); \
17615 return ret; \
17618 __LD2_LANE_FUNC (float32x4x2_t, float32x4_t, float32_t, v4sf, sf, f32)
17619 __LD2_LANE_FUNC (float64x2x2_t, float64x2_t, float64_t, v2df, df, f64)
17620 __LD2_LANE_FUNC (poly8x16x2_t, poly8x16_t, poly8_t, v16qi, qi, p8)
17621 __LD2_LANE_FUNC (poly16x8x2_t, poly16x8_t, poly16_t, v8hi, hi, p16)
17622 __LD2_LANE_FUNC (int8x16x2_t, int8x16_t, int8_t, v16qi, qi, s8)
17623 __LD2_LANE_FUNC (int16x8x2_t, int16x8_t, int16_t, v8hi, hi, s16)
17624 __LD2_LANE_FUNC (int32x4x2_t, int32x4_t, int32_t, v4si, si, s32)
17625 __LD2_LANE_FUNC (int64x2x2_t, int64x2_t, int64_t, v2di, di, s64)
17626 __LD2_LANE_FUNC (uint8x16x2_t, uint8x16_t, uint8_t, v16qi, qi, u8)
17627 __LD2_LANE_FUNC (uint16x8x2_t, uint16x8_t, uint16_t, v8hi, hi, u16)
17628 __LD2_LANE_FUNC (uint32x4x2_t, uint32x4_t, uint32_t, v4si, si, u32)
17629 __LD2_LANE_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, v2di, di, u64)
17631 #undef __LD2_LANE_FUNC
17633 /* vld3_lane */
17635 #define __LD3_LANE_FUNC(intype, vectype, largetype, ptrtype, \
17636 mode, ptrmode, funcsuffix, signedtype) \
17637 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
17638 vld3_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
17640 __builtin_aarch64_simd_ci __o; \
17641 largetype __temp; \
17642 __temp.val[0] = \
17643 vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); \
17644 __temp.val[1] = \
17645 vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); \
17646 __temp.val[2] = \
17647 vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0)); \
17648 __o = __builtin_aarch64_set_qregci##mode (__o, \
17649 (signedtype) __temp.val[0], \
17650 0); \
17651 __o = __builtin_aarch64_set_qregci##mode (__o, \
17652 (signedtype) __temp.val[1], \
17653 1); \
17654 __o = __builtin_aarch64_set_qregci##mode (__o, \
17655 (signedtype) __temp.val[2], \
17656 2); \
17657 __o = __builtin_aarch64_ld3_lane##mode ( \
17658 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
17659 __b.val[0] = (vectype) __builtin_aarch64_get_dregcidi (__o, 0); \
17660 __b.val[1] = (vectype) __builtin_aarch64_get_dregcidi (__o, 1); \
17661 __b.val[2] = (vectype) __builtin_aarch64_get_dregcidi (__o, 2); \
17662 return __b; \
17665 __LD3_LANE_FUNC (float32x2x3_t, float32x2_t, float32x4x3_t, float32_t, v4sf,
17666 sf, f32, float32x4_t)
17667 __LD3_LANE_FUNC (float64x1x3_t, float64x1_t, float64x2x3_t, float64_t, v2df,
17668 df, f64, float64x2_t)
17669 __LD3_LANE_FUNC (poly8x8x3_t, poly8x8_t, poly8x16x3_t, poly8_t, v16qi, qi, p8,
17670 int8x16_t)
17671 __LD3_LANE_FUNC (poly16x4x3_t, poly16x4_t, poly16x8x3_t, poly16_t, v8hi, hi,
17672 p16, int16x8_t)
17673 __LD3_LANE_FUNC (int8x8x3_t, int8x8_t, int8x16x3_t, int8_t, v16qi, qi, s8,
17674 int8x16_t)
17675 __LD3_LANE_FUNC (int16x4x3_t, int16x4_t, int16x8x3_t, int16_t, v8hi, hi, s16,
17676 int16x8_t)
17677 __LD3_LANE_FUNC (int32x2x3_t, int32x2_t, int32x4x3_t, int32_t, v4si, si, s32,
17678 int32x4_t)
17679 __LD3_LANE_FUNC (int64x1x3_t, int64x1_t, int64x2x3_t, int64_t, v2di, di, s64,
17680 int64x2_t)
17681 __LD3_LANE_FUNC (uint8x8x3_t, uint8x8_t, uint8x16x3_t, uint8_t, v16qi, qi, u8,
17682 int8x16_t)
17683 __LD3_LANE_FUNC (uint16x4x3_t, uint16x4_t, uint16x8x3_t, uint16_t, v8hi, hi,
17684 u16, int16x8_t)
17685 __LD3_LANE_FUNC (uint32x2x3_t, uint32x2_t, uint32x4x3_t, uint32_t, v4si, si,
17686 u32, int32x4_t)
17687 __LD3_LANE_FUNC (uint64x1x3_t, uint64x1_t, uint64x2x3_t, uint64_t, v2di, di,
17688 u64, int64x2_t)
17690 #undef __LD3_LANE_FUNC
17692 /* vld3q_lane */
17694 #define __LD3_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
17695 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
17696 vld3q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
17698 __builtin_aarch64_simd_ci __o; \
17699 intype ret; \
17700 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0); \
17701 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1); \
17702 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2); \
17703 __o = __builtin_aarch64_ld3_lane##mode ( \
17704 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
17705 ret.val[0] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 0); \
17706 ret.val[1] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 1); \
17707 ret.val[2] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 2); \
17708 return ret; \
17711 __LD3_LANE_FUNC (float32x4x3_t, float32x4_t, float32_t, v4sf, sf, f32)
17712 __LD3_LANE_FUNC (float64x2x3_t, float64x2_t, float64_t, v2df, df, f64)
17713 __LD3_LANE_FUNC (poly8x16x3_t, poly8x16_t, poly8_t, v16qi, qi, p8)
17714 __LD3_LANE_FUNC (poly16x8x3_t, poly16x8_t, poly16_t, v8hi, hi, p16)
17715 __LD3_LANE_FUNC (int8x16x3_t, int8x16_t, int8_t, v16qi, qi, s8)
17716 __LD3_LANE_FUNC (int16x8x3_t, int16x8_t, int16_t, v8hi, hi, s16)
17717 __LD3_LANE_FUNC (int32x4x3_t, int32x4_t, int32_t, v4si, si, s32)
17718 __LD3_LANE_FUNC (int64x2x3_t, int64x2_t, int64_t, v2di, di, s64)
17719 __LD3_LANE_FUNC (uint8x16x3_t, uint8x16_t, uint8_t, v16qi, qi, u8)
17720 __LD3_LANE_FUNC (uint16x8x3_t, uint16x8_t, uint16_t, v8hi, hi, u16)
17721 __LD3_LANE_FUNC (uint32x4x3_t, uint32x4_t, uint32_t, v4si, si, u32)
17722 __LD3_LANE_FUNC (uint64x2x3_t, uint64x2_t, uint64_t, v2di, di, u64)
17724 #undef __LD3_LANE_FUNC
17726 /* vld4_lane */
17728 #define __LD4_LANE_FUNC(intype, vectype, largetype, ptrtype, \
17729 mode, ptrmode, funcsuffix, signedtype) \
17730 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
17731 vld4_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
17733 __builtin_aarch64_simd_xi __o; \
17734 largetype __temp; \
17735 __temp.val[0] = \
17736 vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); \
17737 __temp.val[1] = \
17738 vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); \
17739 __temp.val[2] = \
17740 vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0)); \
17741 __temp.val[3] = \
17742 vcombine_##funcsuffix (__b.val[3], vcreate_##funcsuffix (0)); \
17743 __o = __builtin_aarch64_set_qregxi##mode (__o, \
17744 (signedtype) __temp.val[0], \
17745 0); \
17746 __o = __builtin_aarch64_set_qregxi##mode (__o, \
17747 (signedtype) __temp.val[1], \
17748 1); \
17749 __o = __builtin_aarch64_set_qregxi##mode (__o, \
17750 (signedtype) __temp.val[2], \
17751 2); \
17752 __o = __builtin_aarch64_set_qregxi##mode (__o, \
17753 (signedtype) __temp.val[3], \
17754 3); \
17755 __o = __builtin_aarch64_ld4_lane##mode ( \
17756 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
17757 __b.val[0] = (vectype) __builtin_aarch64_get_dregxidi (__o, 0); \
17758 __b.val[1] = (vectype) __builtin_aarch64_get_dregxidi (__o, 1); \
17759 __b.val[2] = (vectype) __builtin_aarch64_get_dregxidi (__o, 2); \
17760 __b.val[3] = (vectype) __builtin_aarch64_get_dregxidi (__o, 3); \
17761 return __b; \
17764 /* vld4q_lane */
17766 __LD4_LANE_FUNC (float32x2x4_t, float32x2_t, float32x4x4_t, float32_t, v4sf,
17767 sf, f32, float32x4_t)
17768 __LD4_LANE_FUNC (float64x1x4_t, float64x1_t, float64x2x4_t, float64_t, v2df,
17769 df, f64, float64x2_t)
17770 __LD4_LANE_FUNC (poly8x8x4_t, poly8x8_t, poly8x16x4_t, poly8_t, v16qi, qi, p8,
17771 int8x16_t)
17772 __LD4_LANE_FUNC (poly16x4x4_t, poly16x4_t, poly16x8x4_t, poly16_t, v8hi, hi,
17773 p16, int16x8_t)
17774 __LD4_LANE_FUNC (int8x8x4_t, int8x8_t, int8x16x4_t, int8_t, v16qi, qi, s8,
17775 int8x16_t)
17776 __LD4_LANE_FUNC (int16x4x4_t, int16x4_t, int16x8x4_t, int16_t, v8hi, hi, s16,
17777 int16x8_t)
17778 __LD4_LANE_FUNC (int32x2x4_t, int32x2_t, int32x4x4_t, int32_t, v4si, si, s32,
17779 int32x4_t)
17780 __LD4_LANE_FUNC (int64x1x4_t, int64x1_t, int64x2x4_t, int64_t, v2di, di, s64,
17781 int64x2_t)
17782 __LD4_LANE_FUNC (uint8x8x4_t, uint8x8_t, uint8x16x4_t, uint8_t, v16qi, qi, u8,
17783 int8x16_t)
17784 __LD4_LANE_FUNC (uint16x4x4_t, uint16x4_t, uint16x8x4_t, uint16_t, v8hi, hi,
17785 u16, int16x8_t)
17786 __LD4_LANE_FUNC (uint32x2x4_t, uint32x2_t, uint32x4x4_t, uint32_t, v4si, si,
17787 u32, int32x4_t)
17788 __LD4_LANE_FUNC (uint64x1x4_t, uint64x1_t, uint64x2x4_t, uint64_t, v2di, di,
17789 u64, int64x2_t)
17791 #undef __LD4_LANE_FUNC
17793 /* vld4q_lane */
17795 #define __LD4_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
17796 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
17797 vld4q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
17799 __builtin_aarch64_simd_xi __o; \
17800 intype ret; \
17801 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); \
17802 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); \
17803 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); \
17804 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); \
17805 __o = __builtin_aarch64_ld4_lane##mode ( \
17806 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
17807 ret.val[0] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 0); \
17808 ret.val[1] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 1); \
17809 ret.val[2] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 2); \
17810 ret.val[3] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 3); \
17811 return ret; \
17814 __LD4_LANE_FUNC (float32x4x4_t, float32x4_t, float32_t, v4sf, sf, f32)
17815 __LD4_LANE_FUNC (float64x2x4_t, float64x2_t, float64_t, v2df, df, f64)
17816 __LD4_LANE_FUNC (poly8x16x4_t, poly8x16_t, poly8_t, v16qi, qi, p8)
17817 __LD4_LANE_FUNC (poly16x8x4_t, poly16x8_t, poly16_t, v8hi, hi, p16)
17818 __LD4_LANE_FUNC (int8x16x4_t, int8x16_t, int8_t, v16qi, qi, s8)
17819 __LD4_LANE_FUNC (int16x8x4_t, int16x8_t, int16_t, v8hi, hi, s16)
17820 __LD4_LANE_FUNC (int32x4x4_t, int32x4_t, int32_t, v4si, si, s32)
17821 __LD4_LANE_FUNC (int64x2x4_t, int64x2_t, int64_t, v2di, di, s64)
17822 __LD4_LANE_FUNC (uint8x16x4_t, uint8x16_t, uint8_t, v16qi, qi, u8)
17823 __LD4_LANE_FUNC (uint16x8x4_t, uint16x8_t, uint16_t, v8hi, hi, u16)
17824 __LD4_LANE_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, v4si, si, u32)
17825 __LD4_LANE_FUNC (uint64x2x4_t, uint64x2_t, uint64_t, v2di, di, u64)
17827 #undef __LD4_LANE_FUNC
17829 /* vmax */
17831 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17832 vmax_f32 (float32x2_t __a, float32x2_t __b)
17834 return __builtin_aarch64_smax_nanv2sf (__a, __b);
17837 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17838 vmax_s8 (int8x8_t __a, int8x8_t __b)
17840 return __builtin_aarch64_smaxv8qi (__a, __b);
17843 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17844 vmax_s16 (int16x4_t __a, int16x4_t __b)
17846 return __builtin_aarch64_smaxv4hi (__a, __b);
17849 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17850 vmax_s32 (int32x2_t __a, int32x2_t __b)
17852 return __builtin_aarch64_smaxv2si (__a, __b);
17855 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17856 vmax_u8 (uint8x8_t __a, uint8x8_t __b)
17858 return (uint8x8_t) __builtin_aarch64_umaxv8qi ((int8x8_t) __a,
17859 (int8x8_t) __b);
17862 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17863 vmax_u16 (uint16x4_t __a, uint16x4_t __b)
17865 return (uint16x4_t) __builtin_aarch64_umaxv4hi ((int16x4_t) __a,
17866 (int16x4_t) __b);
17869 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17870 vmax_u32 (uint32x2_t __a, uint32x2_t __b)
17872 return (uint32x2_t) __builtin_aarch64_umaxv2si ((int32x2_t) __a,
17873 (int32x2_t) __b);
17876 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17877 vmaxq_f32 (float32x4_t __a, float32x4_t __b)
17879 return __builtin_aarch64_smax_nanv4sf (__a, __b);
17882 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17883 vmaxq_f64 (float64x2_t __a, float64x2_t __b)
17885 return __builtin_aarch64_smax_nanv2df (__a, __b);
17888 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
17889 vmaxq_s8 (int8x16_t __a, int8x16_t __b)
17891 return __builtin_aarch64_smaxv16qi (__a, __b);
17894 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
17895 vmaxq_s16 (int16x8_t __a, int16x8_t __b)
17897 return __builtin_aarch64_smaxv8hi (__a, __b);
17900 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17901 vmaxq_s32 (int32x4_t __a, int32x4_t __b)
17903 return __builtin_aarch64_smaxv4si (__a, __b);
17906 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17907 vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
17909 return (uint8x16_t) __builtin_aarch64_umaxv16qi ((int8x16_t) __a,
17910 (int8x16_t) __b);
17913 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17914 vmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
17916 return (uint16x8_t) __builtin_aarch64_umaxv8hi ((int16x8_t) __a,
17917 (int16x8_t) __b);
17920 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17921 vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
17923 return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a,
17924 (int32x4_t) __b);
17927 /* vmaxnm */
17929 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17930 vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
17932 return __builtin_aarch64_smaxv2sf (__a, __b);
17935 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17936 vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
17938 return __builtin_aarch64_smaxv4sf (__a, __b);
17941 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17942 vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
17944 return __builtin_aarch64_smaxv2df (__a, __b);
17947 /* vmaxv */
17949 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17950 vmaxv_f32 (float32x2_t __a)
17952 return __builtin_aarch64_reduc_smax_nan_scal_v2sf (__a);
17955 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
17956 vmaxv_s8 (int8x8_t __a)
17958 return __builtin_aarch64_reduc_smax_scal_v8qi (__a);
17961 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
17962 vmaxv_s16 (int16x4_t __a)
17964 return __builtin_aarch64_reduc_smax_scal_v4hi (__a);
17967 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17968 vmaxv_s32 (int32x2_t __a)
17970 return __builtin_aarch64_reduc_smax_scal_v2si (__a);
17973 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
17974 vmaxv_u8 (uint8x8_t __a)
17976 return __builtin_aarch64_reduc_umax_scal_v8qi_uu (__a);
17979 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
17980 vmaxv_u16 (uint16x4_t __a)
17982 return __builtin_aarch64_reduc_umax_scal_v4hi_uu (__a);
17985 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17986 vmaxv_u32 (uint32x2_t __a)
17988 return __builtin_aarch64_reduc_umax_scal_v2si_uu (__a);
17991 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17992 vmaxvq_f32 (float32x4_t __a)
17994 return __builtin_aarch64_reduc_smax_nan_scal_v4sf (__a);
17997 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17998 vmaxvq_f64 (float64x2_t __a)
18000 return __builtin_aarch64_reduc_smax_nan_scal_v2df (__a);
18003 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18004 vmaxvq_s8 (int8x16_t __a)
18006 return __builtin_aarch64_reduc_smax_scal_v16qi (__a);
18009 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18010 vmaxvq_s16 (int16x8_t __a)
18012 return __builtin_aarch64_reduc_smax_scal_v8hi (__a);
18015 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18016 vmaxvq_s32 (int32x4_t __a)
18018 return __builtin_aarch64_reduc_smax_scal_v4si (__a);
18021 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18022 vmaxvq_u8 (uint8x16_t __a)
18024 return __builtin_aarch64_reduc_umax_scal_v16qi_uu (__a);
18027 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18028 vmaxvq_u16 (uint16x8_t __a)
18030 return __builtin_aarch64_reduc_umax_scal_v8hi_uu (__a);
18033 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18034 vmaxvq_u32 (uint32x4_t __a)
18036 return __builtin_aarch64_reduc_umax_scal_v4si_uu (__a);
18039 /* vmaxnmv */
18041 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18042 vmaxnmv_f32 (float32x2_t __a)
18044 return __builtin_aarch64_reduc_smax_scal_v2sf (__a);
18047 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18048 vmaxnmvq_f32 (float32x4_t __a)
18050 return __builtin_aarch64_reduc_smax_scal_v4sf (__a);
18053 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18054 vmaxnmvq_f64 (float64x2_t __a)
18056 return __builtin_aarch64_reduc_smax_scal_v2df (__a);
18059 /* vmin */
18061 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18062 vmin_f32 (float32x2_t __a, float32x2_t __b)
18064 return __builtin_aarch64_smin_nanv2sf (__a, __b);
18067 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18068 vmin_s8 (int8x8_t __a, int8x8_t __b)
18070 return __builtin_aarch64_sminv8qi (__a, __b);
18073 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18074 vmin_s16 (int16x4_t __a, int16x4_t __b)
18076 return __builtin_aarch64_sminv4hi (__a, __b);
18079 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18080 vmin_s32 (int32x2_t __a, int32x2_t __b)
18082 return __builtin_aarch64_sminv2si (__a, __b);
18085 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18086 vmin_u8 (uint8x8_t __a, uint8x8_t __b)
18088 return (uint8x8_t) __builtin_aarch64_uminv8qi ((int8x8_t) __a,
18089 (int8x8_t) __b);
18092 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18093 vmin_u16 (uint16x4_t __a, uint16x4_t __b)
18095 return (uint16x4_t) __builtin_aarch64_uminv4hi ((int16x4_t) __a,
18096 (int16x4_t) __b);
18099 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18100 vmin_u32 (uint32x2_t __a, uint32x2_t __b)
18102 return (uint32x2_t) __builtin_aarch64_uminv2si ((int32x2_t) __a,
18103 (int32x2_t) __b);
18106 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18107 vminq_f32 (float32x4_t __a, float32x4_t __b)
18109 return __builtin_aarch64_smin_nanv4sf (__a, __b);
18112 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18113 vminq_f64 (float64x2_t __a, float64x2_t __b)
18115 return __builtin_aarch64_smin_nanv2df (__a, __b);
18118 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18119 vminq_s8 (int8x16_t __a, int8x16_t __b)
18121 return __builtin_aarch64_sminv16qi (__a, __b);
18124 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18125 vminq_s16 (int16x8_t __a, int16x8_t __b)
18127 return __builtin_aarch64_sminv8hi (__a, __b);
18130 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18131 vminq_s32 (int32x4_t __a, int32x4_t __b)
18133 return __builtin_aarch64_sminv4si (__a, __b);
18136 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18137 vminq_u8 (uint8x16_t __a, uint8x16_t __b)
18139 return (uint8x16_t) __builtin_aarch64_uminv16qi ((int8x16_t) __a,
18140 (int8x16_t) __b);
18143 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18144 vminq_u16 (uint16x8_t __a, uint16x8_t __b)
18146 return (uint16x8_t) __builtin_aarch64_uminv8hi ((int16x8_t) __a,
18147 (int16x8_t) __b);
18150 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18151 vminq_u32 (uint32x4_t __a, uint32x4_t __b)
18153 return (uint32x4_t) __builtin_aarch64_uminv4si ((int32x4_t) __a,
18154 (int32x4_t) __b);
18157 /* vminnm */
18159 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18160 vminnm_f32 (float32x2_t __a, float32x2_t __b)
18162 return __builtin_aarch64_sminv2sf (__a, __b);
18165 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18166 vminnmq_f32 (float32x4_t __a, float32x4_t __b)
18168 return __builtin_aarch64_sminv4sf (__a, __b);
18171 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18172 vminnmq_f64 (float64x2_t __a, float64x2_t __b)
18174 return __builtin_aarch64_sminv2df (__a, __b);
18177 /* vminv */
18179 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18180 vminv_f32 (float32x2_t __a)
18182 return __builtin_aarch64_reduc_smin_nan_scal_v2sf (__a);
18185 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18186 vminv_s8 (int8x8_t __a)
18188 return __builtin_aarch64_reduc_smin_scal_v8qi (__a);
18191 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18192 vminv_s16 (int16x4_t __a)
18194 return __builtin_aarch64_reduc_smin_scal_v4hi (__a);
18197 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18198 vminv_s32 (int32x2_t __a)
18200 return __builtin_aarch64_reduc_smin_scal_v2si (__a);
18203 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18204 vminv_u8 (uint8x8_t __a)
18206 return __builtin_aarch64_reduc_umin_scal_v8qi_uu (__a);
18209 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18210 vminv_u16 (uint16x4_t __a)
18212 return __builtin_aarch64_reduc_umin_scal_v4hi_uu (__a);
18215 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18216 vminv_u32 (uint32x2_t __a)
18218 return __builtin_aarch64_reduc_umin_scal_v2si_uu (__a);
18221 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18222 vminvq_f32 (float32x4_t __a)
18224 return __builtin_aarch64_reduc_smin_nan_scal_v4sf (__a);
18227 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18228 vminvq_f64 (float64x2_t __a)
18230 return __builtin_aarch64_reduc_smin_nan_scal_v2df (__a);
18233 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18234 vminvq_s8 (int8x16_t __a)
18236 return __builtin_aarch64_reduc_smin_scal_v16qi (__a);
18239 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18240 vminvq_s16 (int16x8_t __a)
18242 return __builtin_aarch64_reduc_smin_scal_v8hi (__a);
18245 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18246 vminvq_s32 (int32x4_t __a)
18248 return __builtin_aarch64_reduc_smin_scal_v4si (__a);
18251 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18252 vminvq_u8 (uint8x16_t __a)
18254 return __builtin_aarch64_reduc_umin_scal_v16qi_uu (__a);
18257 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18258 vminvq_u16 (uint16x8_t __a)
18260 return __builtin_aarch64_reduc_umin_scal_v8hi_uu (__a);
18263 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18264 vminvq_u32 (uint32x4_t __a)
18266 return __builtin_aarch64_reduc_umin_scal_v4si_uu (__a);
18269 /* vminnmv */
18271 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18272 vminnmv_f32 (float32x2_t __a)
18274 return __builtin_aarch64_reduc_smin_scal_v2sf (__a);
18277 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18278 vminnmvq_f32 (float32x4_t __a)
18280 return __builtin_aarch64_reduc_smin_scal_v4sf (__a);
18283 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18284 vminnmvq_f64 (float64x2_t __a)
18286 return __builtin_aarch64_reduc_smin_scal_v2df (__a);
18289 /* vmla */
18291 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18292 vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
18294 return a + b * c;
18297 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18298 vmla_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
18300 return __a + __b * __c;
18303 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18304 vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
18306 return a + b * c;
18309 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18310 vmlaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
18312 return a + b * c;
18315 /* vmla_lane */
18317 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18318 vmla_lane_f32 (float32x2_t __a, float32x2_t __b,
18319 float32x2_t __c, const int __lane)
18321 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18324 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18325 vmla_lane_s16 (int16x4_t __a, int16x4_t __b,
18326 int16x4_t __c, const int __lane)
18328 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18331 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18332 vmla_lane_s32 (int32x2_t __a, int32x2_t __b,
18333 int32x2_t __c, const int __lane)
18335 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18338 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18339 vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b,
18340 uint16x4_t __c, const int __lane)
18342 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18345 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18346 vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b,
18347 uint32x2_t __c, const int __lane)
18349 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18352 /* vmla_laneq */
18354 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18355 vmla_laneq_f32 (float32x2_t __a, float32x2_t __b,
18356 float32x4_t __c, const int __lane)
18358 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18361 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18362 vmla_laneq_s16 (int16x4_t __a, int16x4_t __b,
18363 int16x8_t __c, const int __lane)
18365 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18368 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18369 vmla_laneq_s32 (int32x2_t __a, int32x2_t __b,
18370 int32x4_t __c, const int __lane)
18372 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18375 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18376 vmla_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
18377 uint16x8_t __c, const int __lane)
18379 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18382 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18383 vmla_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
18384 uint32x4_t __c, const int __lane)
18386 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18389 /* vmlaq_lane */
18391 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18392 vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b,
18393 float32x2_t __c, const int __lane)
18395 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18398 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18399 vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b,
18400 int16x4_t __c, const int __lane)
18402 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18405 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18406 vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b,
18407 int32x2_t __c, const int __lane)
18409 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18412 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18413 vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
18414 uint16x4_t __c, const int __lane)
18416 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18419 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18420 vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
18421 uint32x2_t __c, const int __lane)
18423 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18426 /* vmlaq_laneq */
18428 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18429 vmlaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
18430 float32x4_t __c, const int __lane)
18432 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18435 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18436 vmlaq_laneq_s16 (int16x8_t __a, int16x8_t __b,
18437 int16x8_t __c, const int __lane)
18439 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18442 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18443 vmlaq_laneq_s32 (int32x4_t __a, int32x4_t __b,
18444 int32x4_t __c, const int __lane)
18446 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18449 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18450 vmlaq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
18451 uint16x8_t __c, const int __lane)
18453 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18456 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18457 vmlaq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
18458 uint32x4_t __c, const int __lane)
18460 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18463 /* vmls */
18465 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18466 vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
18468 return a - b * c;
18471 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18472 vmls_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
18474 return __a - __b * __c;
18477 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18478 vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
18480 return a - b * c;
18483 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18484 vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
18486 return a - b * c;
18489 /* vmls_lane */
18491 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18492 vmls_lane_f32 (float32x2_t __a, float32x2_t __b,
18493 float32x2_t __c, const int __lane)
18495 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18498 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18499 vmls_lane_s16 (int16x4_t __a, int16x4_t __b,
18500 int16x4_t __c, const int __lane)
18502 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18505 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18506 vmls_lane_s32 (int32x2_t __a, int32x2_t __b,
18507 int32x2_t __c, const int __lane)
18509 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18512 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18513 vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b,
18514 uint16x4_t __c, const int __lane)
18516 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18519 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18520 vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b,
18521 uint32x2_t __c, const int __lane)
18523 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18526 /* vmls_laneq */
18528 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18529 vmls_laneq_f32 (float32x2_t __a, float32x2_t __b,
18530 float32x4_t __c, const int __lane)
18532 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18535 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18536 vmls_laneq_s16 (int16x4_t __a, int16x4_t __b,
18537 int16x8_t __c, const int __lane)
18539 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18542 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18543 vmls_laneq_s32 (int32x2_t __a, int32x2_t __b,
18544 int32x4_t __c, const int __lane)
18546 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18549 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18550 vmls_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
18551 uint16x8_t __c, const int __lane)
18553 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18556 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18557 vmls_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
18558 uint32x4_t __c, const int __lane)
18560 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18563 /* vmlsq_lane */
18565 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18566 vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b,
18567 float32x2_t __c, const int __lane)
18569 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18572 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18573 vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b,
18574 int16x4_t __c, const int __lane)
18576 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18579 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18580 vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b,
18581 int32x2_t __c, const int __lane)
18583 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18586 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18587 vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
18588 uint16x4_t __c, const int __lane)
18590 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18593 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18594 vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
18595 uint32x2_t __c, const int __lane)
18597 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18600 /* vmlsq_laneq */
18602 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18603 vmlsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
18604 float32x4_t __c, const int __lane)
18606 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18609 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18610 vmlsq_laneq_s16 (int16x8_t __a, int16x8_t __b,
18611 int16x8_t __c, const int __lane)
18613 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18616 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18617 vmlsq_laneq_s32 (int32x4_t __a, int32x4_t __b,
18618 int32x4_t __c, const int __lane)
18620 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18622 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18623 vmlsq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
18624 uint16x8_t __c, const int __lane)
18626 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18629 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18630 vmlsq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
18631 uint32x4_t __c, const int __lane)
18633 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18636 /* vmov_n_ */
18638 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18639 vmov_n_f32 (float32_t __a)
18641 return vdup_n_f32 (__a);
18644 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18645 vmov_n_f64 (float64_t __a)
18647 return (float64x1_t) {__a};
18650 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
18651 vmov_n_p8 (poly8_t __a)
18653 return vdup_n_p8 (__a);
18656 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
18657 vmov_n_p16 (poly16_t __a)
18659 return vdup_n_p16 (__a);
18662 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18663 vmov_n_s8 (int8_t __a)
18665 return vdup_n_s8 (__a);
18668 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18669 vmov_n_s16 (int16_t __a)
18671 return vdup_n_s16 (__a);
18674 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18675 vmov_n_s32 (int32_t __a)
18677 return vdup_n_s32 (__a);
18680 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
18681 vmov_n_s64 (int64_t __a)
18683 return (int64x1_t) {__a};
18686 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18687 vmov_n_u8 (uint8_t __a)
18689 return vdup_n_u8 (__a);
18692 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18693 vmov_n_u16 (uint16_t __a)
18695 return vdup_n_u16 (__a);
18698 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18699 vmov_n_u32 (uint32_t __a)
18701 return vdup_n_u32 (__a);
18704 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18705 vmov_n_u64 (uint64_t __a)
18707 return (uint64x1_t) {__a};
18710 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18711 vmovq_n_f32 (float32_t __a)
18713 return vdupq_n_f32 (__a);
18716 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18717 vmovq_n_f64 (float64_t __a)
18719 return vdupq_n_f64 (__a);
18722 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
18723 vmovq_n_p8 (poly8_t __a)
18725 return vdupq_n_p8 (__a);
18728 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
18729 vmovq_n_p16 (poly16_t __a)
18731 return vdupq_n_p16 (__a);
18734 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18735 vmovq_n_s8 (int8_t __a)
18737 return vdupq_n_s8 (__a);
18740 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18741 vmovq_n_s16 (int16_t __a)
18743 return vdupq_n_s16 (__a);
18746 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18747 vmovq_n_s32 (int32_t __a)
18749 return vdupq_n_s32 (__a);
18752 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18753 vmovq_n_s64 (int64_t __a)
18755 return vdupq_n_s64 (__a);
18758 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18759 vmovq_n_u8 (uint8_t __a)
18761 return vdupq_n_u8 (__a);
18764 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18765 vmovq_n_u16 (uint16_t __a)
18767 return vdupq_n_u16 (__a);
18770 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18771 vmovq_n_u32 (uint32_t __a)
18773 return vdupq_n_u32 (__a);
18776 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18777 vmovq_n_u64 (uint64_t __a)
18779 return vdupq_n_u64 (__a);
18782 /* vmul_lane */
18784 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18785 vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __lane)
18787 return __a * __aarch64_vget_lane_any (__b, __lane);
18790 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18791 vmul_lane_f64 (float64x1_t __a, float64x1_t __b, const int __lane)
18793 return __a * __b;
18796 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18797 vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __lane)
18799 return __a * __aarch64_vget_lane_any (__b, __lane);
18802 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18803 vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __lane)
18805 return __a * __aarch64_vget_lane_any (__b, __lane);
18808 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18809 vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __lane)
18811 return __a * __aarch64_vget_lane_any (__b, __lane);
18814 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18815 vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __lane)
18817 return __a * __aarch64_vget_lane_any (__b, __lane);
18820 /* vmuld_lane */
18822 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18823 vmuld_lane_f64 (float64_t __a, float64x1_t __b, const int __lane)
18825 return __a * __aarch64_vget_lane_any (__b, __lane);
18828 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18829 vmuld_laneq_f64 (float64_t __a, float64x2_t __b, const int __lane)
18831 return __a * __aarch64_vget_lane_any (__b, __lane);
18834 /* vmuls_lane */
18836 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18837 vmuls_lane_f32 (float32_t __a, float32x2_t __b, const int __lane)
18839 return __a * __aarch64_vget_lane_any (__b, __lane);
18842 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18843 vmuls_laneq_f32 (float32_t __a, float32x4_t __b, const int __lane)
18845 return __a * __aarch64_vget_lane_any (__b, __lane);
18848 /* vmul_laneq */
18850 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18851 vmul_laneq_f32 (float32x2_t __a, float32x4_t __b, const int __lane)
18853 return __a * __aarch64_vget_lane_any (__b, __lane);
18856 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18857 vmul_laneq_f64 (float64x1_t __a, float64x2_t __b, const int __lane)
18859 return __a * __aarch64_vget_lane_any (__b, __lane);
18862 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18863 vmul_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __lane)
18865 return __a * __aarch64_vget_lane_any (__b, __lane);
18868 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18869 vmul_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __lane)
18871 return __a * __aarch64_vget_lane_any (__b, __lane);
18874 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18875 vmul_laneq_u16 (uint16x4_t __a, uint16x8_t __b, const int __lane)
18877 return __a * __aarch64_vget_lane_any (__b, __lane);
18880 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18881 vmul_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __lane)
18883 return __a * __aarch64_vget_lane_any (__b, __lane);
18886 /* vmul_n */
18888 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18889 vmul_n_f64 (float64x1_t __a, float64_t __b)
18891 return (float64x1_t) { vget_lane_f64 (__a, 0) * __b };
18894 /* vmulq_lane */
18896 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18897 vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __lane)
18899 return __a * __aarch64_vget_lane_any (__b, __lane);
18902 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18903 vmulq_lane_f64 (float64x2_t __a, float64x1_t __b, const int __lane)
18905 __AARCH64_LANE_CHECK (__a, __lane);
18906 return __a * __b[0];
18909 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18910 vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __lane)
18912 return __a * __aarch64_vget_lane_any (__b, __lane);
18915 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18916 vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __lane)
18918 return __a * __aarch64_vget_lane_any (__b, __lane);
18921 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18922 vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __lane)
18924 return __a * __aarch64_vget_lane_any (__b, __lane);
18927 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18928 vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __lane)
18930 return __a * __aarch64_vget_lane_any (__b, __lane);
18933 /* vmulq_laneq */
18935 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18936 vmulq_laneq_f32 (float32x4_t __a, float32x4_t __b, const int __lane)
18938 return __a * __aarch64_vget_lane_any (__b, __lane);
18941 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18942 vmulq_laneq_f64 (float64x2_t __a, float64x2_t __b, const int __lane)
18944 return __a * __aarch64_vget_lane_any (__b, __lane);
18947 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18948 vmulq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __lane)
18950 return __a * __aarch64_vget_lane_any (__b, __lane);
18953 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18954 vmulq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __lane)
18956 return __a * __aarch64_vget_lane_any (__b, __lane);
18959 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18960 vmulq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, const int __lane)
18962 return __a * __aarch64_vget_lane_any (__b, __lane);
18965 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18966 vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane)
18968 return __a * __aarch64_vget_lane_any (__b, __lane);
18971 /* vneg */
18973 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18974 vneg_f32 (float32x2_t __a)
18976 return -__a;
18979 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18980 vneg_f64 (float64x1_t __a)
18982 return -__a;
18985 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18986 vneg_s8 (int8x8_t __a)
18988 return -__a;
18991 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18992 vneg_s16 (int16x4_t __a)
18994 return -__a;
18997 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18998 vneg_s32 (int32x2_t __a)
19000 return -__a;
19003 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
19004 vneg_s64 (int64x1_t __a)
19006 return -__a;
19009 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19010 vnegq_f32 (float32x4_t __a)
19012 return -__a;
19015 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19016 vnegq_f64 (float64x2_t __a)
19018 return -__a;
19021 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19022 vnegq_s8 (int8x16_t __a)
19024 return -__a;
19027 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19028 vnegq_s16 (int16x8_t __a)
19030 return -__a;
19033 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19034 vnegq_s32 (int32x4_t __a)
19036 return -__a;
19039 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19040 vnegq_s64 (int64x2_t __a)
19042 return -__a;
19045 /* vpadd */
19047 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19048 vpadd_s8 (int8x8_t __a, int8x8_t __b)
19050 return __builtin_aarch64_addpv8qi (__a, __b);
19053 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19054 vpadd_s16 (int16x4_t __a, int16x4_t __b)
19056 return __builtin_aarch64_addpv4hi (__a, __b);
19059 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19060 vpadd_s32 (int32x2_t __a, int32x2_t __b)
19062 return __builtin_aarch64_addpv2si (__a, __b);
19065 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19066 vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
19068 return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
19069 (int8x8_t) __b);
19072 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19073 vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
19075 return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
19076 (int16x4_t) __b);
19079 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19080 vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
19082 return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
19083 (int32x2_t) __b);
19086 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
19087 vpaddd_f64 (float64x2_t __a)
19089 return __builtin_aarch64_reduc_plus_scal_v2df (__a);
19092 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19093 vpaddd_s64 (int64x2_t __a)
19095 return __builtin_aarch64_addpdi (__a);
19098 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19099 vpaddd_u64 (uint64x2_t __a)
19101 return __builtin_aarch64_addpdi ((int64x2_t) __a);
19104 /* vqabs */
19106 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19107 vqabsq_s64 (int64x2_t __a)
19109 return (int64x2_t) __builtin_aarch64_sqabsv2di (__a);
19112 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19113 vqabsb_s8 (int8_t __a)
19115 return (int8_t) __builtin_aarch64_sqabsqi (__a);
19118 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19119 vqabsh_s16 (int16_t __a)
19121 return (int16_t) __builtin_aarch64_sqabshi (__a);
19124 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19125 vqabss_s32 (int32_t __a)
19127 return (int32_t) __builtin_aarch64_sqabssi (__a);
19130 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19131 vqabsd_s64 (int64_t __a)
19133 return __builtin_aarch64_sqabsdi (__a);
19136 /* vqadd */
19138 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19139 vqaddb_s8 (int8_t __a, int8_t __b)
19141 return (int8_t) __builtin_aarch64_sqaddqi (__a, __b);
19144 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19145 vqaddh_s16 (int16_t __a, int16_t __b)
19147 return (int16_t) __builtin_aarch64_sqaddhi (__a, __b);
19150 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19151 vqadds_s32 (int32_t __a, int32_t __b)
19153 return (int32_t) __builtin_aarch64_sqaddsi (__a, __b);
19156 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19157 vqaddd_s64 (int64_t __a, int64_t __b)
19159 return __builtin_aarch64_sqadddi (__a, __b);
19162 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
19163 vqaddb_u8 (uint8_t __a, uint8_t __b)
19165 return (uint8_t) __builtin_aarch64_uqaddqi_uuu (__a, __b);
19168 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
19169 vqaddh_u16 (uint16_t __a, uint16_t __b)
19171 return (uint16_t) __builtin_aarch64_uqaddhi_uuu (__a, __b);
19174 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19175 vqadds_u32 (uint32_t __a, uint32_t __b)
19177 return (uint32_t) __builtin_aarch64_uqaddsi_uuu (__a, __b);
19180 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19181 vqaddd_u64 (uint64_t __a, uint64_t __b)
19183 return __builtin_aarch64_uqadddi_uuu (__a, __b);
19186 /* vqdmlal */
19188 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19189 vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
19191 return __builtin_aarch64_sqdmlalv4hi (__a, __b, __c);
19194 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19195 vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
19197 return __builtin_aarch64_sqdmlal2v8hi (__a, __b, __c);
19200 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19201 vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
19202 int const __d)
19204 return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d);
19207 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19208 vqdmlal_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
19209 int const __d)
19211 return __builtin_aarch64_sqdmlal2_laneqv8hi (__a, __b, __c, __d);
19214 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19215 vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
19217 return __builtin_aarch64_sqdmlal2_nv8hi (__a, __b, __c);
19220 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19221 vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
19223 return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __c, __d);
19226 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19227 vqdmlal_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
19229 return __builtin_aarch64_sqdmlal_laneqv4hi (__a, __b, __c, __d);
19232 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19233 vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
19235 return __builtin_aarch64_sqdmlal_nv4hi (__a, __b, __c);
19238 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19239 vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
19241 return __builtin_aarch64_sqdmlalv2si (__a, __b, __c);
19244 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19245 vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
19247 return __builtin_aarch64_sqdmlal2v4si (__a, __b, __c);
19250 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19251 vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
19252 int const __d)
19254 return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d);
19257 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19258 vqdmlal_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
19259 int const __d)
19261 return __builtin_aarch64_sqdmlal2_laneqv4si (__a, __b, __c, __d);
19264 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19265 vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
19267 return __builtin_aarch64_sqdmlal2_nv4si (__a, __b, __c);
19270 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19271 vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
19273 return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __c, __d);
19276 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19277 vqdmlal_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
19279 return __builtin_aarch64_sqdmlal_laneqv2si (__a, __b, __c, __d);
19282 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19283 vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
19285 return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c);
19288 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19289 vqdmlalh_s16 (int32_t __a, int16_t __b, int16_t __c)
19291 return __builtin_aarch64_sqdmlalhi (__a, __b, __c);
19294 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19295 vqdmlalh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d)
19297 return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
19300 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19301 vqdmlalh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d)
19303 return __builtin_aarch64_sqdmlal_laneqhi (__a, __b, __c, __d);
19306 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19307 vqdmlals_s32 (int64_t __a, int32_t __b, int32_t __c)
19309 return __builtin_aarch64_sqdmlalsi (__a, __b, __c);
19312 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19313 vqdmlals_lane_s32 (int64_t __a, int32_t __b, int32x2_t __c, const int __d)
19315 return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
19318 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19319 vqdmlals_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d)
19321 return __builtin_aarch64_sqdmlal_laneqsi (__a, __b, __c, __d);
19324 /* vqdmlsl */
19326 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19327 vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
19329 return __builtin_aarch64_sqdmlslv4hi (__a, __b, __c);
19332 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19333 vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
19335 return __builtin_aarch64_sqdmlsl2v8hi (__a, __b, __c);
19338 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19339 vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
19340 int const __d)
19342 return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d);
19345 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19346 vqdmlsl_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
19347 int const __d)
19349 return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a, __b, __c, __d);
19352 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19353 vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
19355 return __builtin_aarch64_sqdmlsl2_nv8hi (__a, __b, __c);
19358 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19359 vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
19361 return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __c, __d);
19364 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19365 vqdmlsl_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
19367 return __builtin_aarch64_sqdmlsl_laneqv4hi (__a, __b, __c, __d);
19370 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19371 vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
19373 return __builtin_aarch64_sqdmlsl_nv4hi (__a, __b, __c);
19376 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19377 vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
19379 return __builtin_aarch64_sqdmlslv2si (__a, __b, __c);
19382 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19383 vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
19385 return __builtin_aarch64_sqdmlsl2v4si (__a, __b, __c);
19388 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19389 vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
19390 int const __d)
19392 return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d);
19395 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19396 vqdmlsl_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
19397 int const __d)
19399 return __builtin_aarch64_sqdmlsl2_laneqv4si (__a, __b, __c, __d);
19402 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19403 vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
19405 return __builtin_aarch64_sqdmlsl2_nv4si (__a, __b, __c);
19408 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19409 vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
19411 return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __c, __d);
19414 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19415 vqdmlsl_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
19417 return __builtin_aarch64_sqdmlsl_laneqv2si (__a, __b, __c, __d);
19420 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19421 vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
19423 return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c);
19426 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19427 vqdmlslh_s16 (int32_t __a, int16_t __b, int16_t __c)
19429 return __builtin_aarch64_sqdmlslhi (__a, __b, __c);
19432 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19433 vqdmlslh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d)
19435 return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
19438 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19439 vqdmlslh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d)
19441 return __builtin_aarch64_sqdmlsl_laneqhi (__a, __b, __c, __d);
19444 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19445 vqdmlsls_s32 (int64_t __a, int32_t __b, int32_t __c)
19447 return __builtin_aarch64_sqdmlslsi (__a, __b, __c);
19450 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19451 vqdmlsls_lane_s32 (int64_t __a, int32_t __b, int32x2_t __c, const int __d)
19453 return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
19456 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19457 vqdmlsls_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d)
19459 return __builtin_aarch64_sqdmlsl_laneqsi (__a, __b, __c, __d);
19462 /* vqdmulh */
19464 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19465 vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
19467 return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c);
19470 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19471 vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
19473 return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c);
19476 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19477 vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
19479 return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c);
19482 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19483 vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
19485 return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c);
19488 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19489 vqdmulhh_s16 (int16_t __a, int16_t __b)
19491 return (int16_t) __builtin_aarch64_sqdmulhhi (__a, __b);
19494 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19495 vqdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
19497 return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
19500 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19501 vqdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
19503 return __builtin_aarch64_sqdmulh_laneqhi (__a, __b, __c);
19506 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19507 vqdmulhs_s32 (int32_t __a, int32_t __b)
19509 return (int32_t) __builtin_aarch64_sqdmulhsi (__a, __b);
19512 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19513 vqdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
19515 return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
19518 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19519 vqdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
19521 return __builtin_aarch64_sqdmulh_laneqsi (__a, __b, __c);
19524 /* vqdmull */
19526 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19527 vqdmull_s16 (int16x4_t __a, int16x4_t __b)
19529 return __builtin_aarch64_sqdmullv4hi (__a, __b);
19532 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19533 vqdmull_high_s16 (int16x8_t __a, int16x8_t __b)
19535 return __builtin_aarch64_sqdmull2v8hi (__a, __b);
19538 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19539 vqdmull_high_lane_s16 (int16x8_t __a, int16x4_t __b, int const __c)
19541 return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c);
19544 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19545 vqdmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, int const __c)
19547 return __builtin_aarch64_sqdmull2_laneqv8hi (__a, __b,__c);
19550 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19551 vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
19553 return __builtin_aarch64_sqdmull2_nv8hi (__a, __b);
19556 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19557 vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
19559 return __builtin_aarch64_sqdmull_lanev4hi (__a, __b, __c);
19562 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19563 vqdmull_laneq_s16 (int16x4_t __a, int16x8_t __b, int const __c)
19565 return __builtin_aarch64_sqdmull_laneqv4hi (__a, __b, __c);
19568 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19569 vqdmull_n_s16 (int16x4_t __a, int16_t __b)
19571 return __builtin_aarch64_sqdmull_nv4hi (__a, __b);
19574 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19575 vqdmull_s32 (int32x2_t __a, int32x2_t __b)
19577 return __builtin_aarch64_sqdmullv2si (__a, __b);
19580 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19581 vqdmull_high_s32 (int32x4_t __a, int32x4_t __b)
19583 return __builtin_aarch64_sqdmull2v4si (__a, __b);
19586 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19587 vqdmull_high_lane_s32 (int32x4_t __a, int32x2_t __b, int const __c)
19589 return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c);
19592 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19593 vqdmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, int const __c)
19595 return __builtin_aarch64_sqdmull2_laneqv4si (__a, __b, __c);
19598 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19599 vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
19601 return __builtin_aarch64_sqdmull2_nv4si (__a, __b);
19604 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19605 vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
19607 return __builtin_aarch64_sqdmull_lanev2si (__a, __b, __c);
19610 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19611 vqdmull_laneq_s32 (int32x2_t __a, int32x4_t __b, int const __c)
19613 return __builtin_aarch64_sqdmull_laneqv2si (__a, __b, __c);
19616 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19617 vqdmull_n_s32 (int32x2_t __a, int32_t __b)
19619 return __builtin_aarch64_sqdmull_nv2si (__a, __b);
19622 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19623 vqdmullh_s16 (int16_t __a, int16_t __b)
19625 return (int32_t) __builtin_aarch64_sqdmullhi (__a, __b);
19628 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19629 vqdmullh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
19631 return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
19634 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19635 vqdmullh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
19637 return __builtin_aarch64_sqdmull_laneqhi (__a, __b, __c);
19640 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19641 vqdmulls_s32 (int32_t __a, int32_t __b)
19643 return __builtin_aarch64_sqdmullsi (__a, __b);
19646 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19647 vqdmulls_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
19649 return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c);
19652 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19653 vqdmulls_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
19655 return __builtin_aarch64_sqdmull_laneqsi (__a, __b, __c);
19658 /* vqmovn */
19660 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19661 vqmovn_s16 (int16x8_t __a)
19663 return (int8x8_t) __builtin_aarch64_sqmovnv8hi (__a);
19666 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19667 vqmovn_s32 (int32x4_t __a)
19669 return (int16x4_t) __builtin_aarch64_sqmovnv4si (__a);
19672 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19673 vqmovn_s64 (int64x2_t __a)
19675 return (int32x2_t) __builtin_aarch64_sqmovnv2di (__a);
19678 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19679 vqmovn_u16 (uint16x8_t __a)
19681 return (uint8x8_t) __builtin_aarch64_uqmovnv8hi ((int16x8_t) __a);
19684 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19685 vqmovn_u32 (uint32x4_t __a)
19687 return (uint16x4_t) __builtin_aarch64_uqmovnv4si ((int32x4_t) __a);
19690 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19691 vqmovn_u64 (uint64x2_t __a)
19693 return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a);
19696 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19697 vqmovnh_s16 (int16_t __a)
19699 return (int8_t) __builtin_aarch64_sqmovnhi (__a);
19702 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19703 vqmovns_s32 (int32_t __a)
19705 return (int16_t) __builtin_aarch64_sqmovnsi (__a);
19708 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19709 vqmovnd_s64 (int64_t __a)
19711 return (int32_t) __builtin_aarch64_sqmovndi (__a);
19714 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
19715 vqmovnh_u16 (uint16_t __a)
19717 return (uint8_t) __builtin_aarch64_uqmovnhi (__a);
19720 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
19721 vqmovns_u32 (uint32_t __a)
19723 return (uint16_t) __builtin_aarch64_uqmovnsi (__a);
19726 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19727 vqmovnd_u64 (uint64_t __a)
19729 return (uint32_t) __builtin_aarch64_uqmovndi (__a);
19732 /* vqmovun */
19734 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19735 vqmovun_s16 (int16x8_t __a)
19737 return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a);
19740 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19741 vqmovun_s32 (int32x4_t __a)
19743 return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a);
19746 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19747 vqmovun_s64 (int64x2_t __a)
19749 return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a);
19752 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19753 vqmovunh_s16 (int16_t __a)
19755 return (int8_t) __builtin_aarch64_sqmovunhi (__a);
19758 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19759 vqmovuns_s32 (int32_t __a)
19761 return (int16_t) __builtin_aarch64_sqmovunsi (__a);
19764 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19765 vqmovund_s64 (int64_t __a)
19767 return (int32_t) __builtin_aarch64_sqmovundi (__a);
19770 /* vqneg */
19772 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19773 vqnegq_s64 (int64x2_t __a)
19775 return (int64x2_t) __builtin_aarch64_sqnegv2di (__a);
19778 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19779 vqnegb_s8 (int8_t __a)
19781 return (int8_t) __builtin_aarch64_sqnegqi (__a);
19784 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19785 vqnegh_s16 (int16_t __a)
19787 return (int16_t) __builtin_aarch64_sqneghi (__a);
19790 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19791 vqnegs_s32 (int32_t __a)
19793 return (int32_t) __builtin_aarch64_sqnegsi (__a);
19796 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19797 vqnegd_s64 (int64_t __a)
19799 return __builtin_aarch64_sqnegdi (__a);
19802 /* vqrdmulh */
19804 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19805 vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
19807 return __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c);
19810 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19811 vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
19813 return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c);
19816 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19817 vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
19819 return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c);
19822 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19823 vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
19825 return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c);
19828 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19829 vqrdmulhh_s16 (int16_t __a, int16_t __b)
19831 return (int16_t) __builtin_aarch64_sqrdmulhhi (__a, __b);
19834 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19835 vqrdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
19837 return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
19840 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19841 vqrdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
19843 return __builtin_aarch64_sqrdmulh_laneqhi (__a, __b, __c);
19846 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19847 vqrdmulhs_s32 (int32_t __a, int32_t __b)
19849 return (int32_t) __builtin_aarch64_sqrdmulhsi (__a, __b);
19852 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19853 vqrdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
19855 return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
19858 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19859 vqrdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
19861 return __builtin_aarch64_sqrdmulh_laneqsi (__a, __b, __c);
19864 /* vqrshl */
19866 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19867 vqrshl_s8 (int8x8_t __a, int8x8_t __b)
19869 return __builtin_aarch64_sqrshlv8qi (__a, __b);
19872 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19873 vqrshl_s16 (int16x4_t __a, int16x4_t __b)
19875 return __builtin_aarch64_sqrshlv4hi (__a, __b);
19878 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19879 vqrshl_s32 (int32x2_t __a, int32x2_t __b)
19881 return __builtin_aarch64_sqrshlv2si (__a, __b);
19884 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
19885 vqrshl_s64 (int64x1_t __a, int64x1_t __b)
19887 return (int64x1_t) {__builtin_aarch64_sqrshldi (__a[0], __b[0])};
19890 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19891 vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
19893 return __builtin_aarch64_uqrshlv8qi_uus ( __a, __b);
19896 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19897 vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
19899 return __builtin_aarch64_uqrshlv4hi_uus ( __a, __b);
19902 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19903 vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
19905 return __builtin_aarch64_uqrshlv2si_uus ( __a, __b);
19908 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19909 vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
19911 return (uint64x1_t) {__builtin_aarch64_uqrshldi_uus (__a[0], __b[0])};
19914 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19915 vqrshlq_s8 (int8x16_t __a, int8x16_t __b)
19917 return __builtin_aarch64_sqrshlv16qi (__a, __b);
19920 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19921 vqrshlq_s16 (int16x8_t __a, int16x8_t __b)
19923 return __builtin_aarch64_sqrshlv8hi (__a, __b);
19926 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19927 vqrshlq_s32 (int32x4_t __a, int32x4_t __b)
19929 return __builtin_aarch64_sqrshlv4si (__a, __b);
19932 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19933 vqrshlq_s64 (int64x2_t __a, int64x2_t __b)
19935 return __builtin_aarch64_sqrshlv2di (__a, __b);
19938 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19939 vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
19941 return __builtin_aarch64_uqrshlv16qi_uus ( __a, __b);
19944 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19945 vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
19947 return __builtin_aarch64_uqrshlv8hi_uus ( __a, __b);
19950 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19951 vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
19953 return __builtin_aarch64_uqrshlv4si_uus ( __a, __b);
19956 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19957 vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
19959 return __builtin_aarch64_uqrshlv2di_uus ( __a, __b);
19962 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19963 vqrshlb_s8 (int8_t __a, int8_t __b)
19965 return __builtin_aarch64_sqrshlqi (__a, __b);
19968 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19969 vqrshlh_s16 (int16_t __a, int16_t __b)
19971 return __builtin_aarch64_sqrshlhi (__a, __b);
19974 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19975 vqrshls_s32 (int32_t __a, int32_t __b)
19977 return __builtin_aarch64_sqrshlsi (__a, __b);
19980 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19981 vqrshld_s64 (int64_t __a, int64_t __b)
19983 return __builtin_aarch64_sqrshldi (__a, __b);
19986 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
19987 vqrshlb_u8 (uint8_t __a, uint8_t __b)
19989 return __builtin_aarch64_uqrshlqi_uus (__a, __b);
19992 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
19993 vqrshlh_u16 (uint16_t __a, uint16_t __b)
19995 return __builtin_aarch64_uqrshlhi_uus (__a, __b);
19998 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19999 vqrshls_u32 (uint32_t __a, uint32_t __b)
20001 return __builtin_aarch64_uqrshlsi_uus (__a, __b);
20004 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20005 vqrshld_u64 (uint64_t __a, uint64_t __b)
20007 return __builtin_aarch64_uqrshldi_uus (__a, __b);
20010 /* vqrshrn */
20012 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20013 vqrshrn_n_s16 (int16x8_t __a, const int __b)
20015 return (int8x8_t) __builtin_aarch64_sqrshrn_nv8hi (__a, __b);
20018 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20019 vqrshrn_n_s32 (int32x4_t __a, const int __b)
20021 return (int16x4_t) __builtin_aarch64_sqrshrn_nv4si (__a, __b);
20024 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20025 vqrshrn_n_s64 (int64x2_t __a, const int __b)
20027 return (int32x2_t) __builtin_aarch64_sqrshrn_nv2di (__a, __b);
20030 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20031 vqrshrn_n_u16 (uint16x8_t __a, const int __b)
20033 return __builtin_aarch64_uqrshrn_nv8hi_uus ( __a, __b);
20036 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20037 vqrshrn_n_u32 (uint32x4_t __a, const int __b)
20039 return __builtin_aarch64_uqrshrn_nv4si_uus ( __a, __b);
20042 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20043 vqrshrn_n_u64 (uint64x2_t __a, const int __b)
20045 return __builtin_aarch64_uqrshrn_nv2di_uus ( __a, __b);
20048 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20049 vqrshrnh_n_s16 (int16_t __a, const int __b)
20051 return (int8_t) __builtin_aarch64_sqrshrn_nhi (__a, __b);
20054 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20055 vqrshrns_n_s32 (int32_t __a, const int __b)
20057 return (int16_t) __builtin_aarch64_sqrshrn_nsi (__a, __b);
20060 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20061 vqrshrnd_n_s64 (int64_t __a, const int __b)
20063 return (int32_t) __builtin_aarch64_sqrshrn_ndi (__a, __b);
20066 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20067 vqrshrnh_n_u16 (uint16_t __a, const int __b)
20069 return __builtin_aarch64_uqrshrn_nhi_uus (__a, __b);
20072 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20073 vqrshrns_n_u32 (uint32_t __a, const int __b)
20075 return __builtin_aarch64_uqrshrn_nsi_uus (__a, __b);
20078 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20079 vqrshrnd_n_u64 (uint64_t __a, const int __b)
20081 return __builtin_aarch64_uqrshrn_ndi_uus (__a, __b);
20084 /* vqrshrun */
20086 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20087 vqrshrun_n_s16 (int16x8_t __a, const int __b)
20089 return (uint8x8_t) __builtin_aarch64_sqrshrun_nv8hi (__a, __b);
20092 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20093 vqrshrun_n_s32 (int32x4_t __a, const int __b)
20095 return (uint16x4_t) __builtin_aarch64_sqrshrun_nv4si (__a, __b);
20098 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20099 vqrshrun_n_s64 (int64x2_t __a, const int __b)
20101 return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b);
20104 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20105 vqrshrunh_n_s16 (int16_t __a, const int __b)
20107 return (int8_t) __builtin_aarch64_sqrshrun_nhi (__a, __b);
20110 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20111 vqrshruns_n_s32 (int32_t __a, const int __b)
20113 return (int16_t) __builtin_aarch64_sqrshrun_nsi (__a, __b);
20116 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20117 vqrshrund_n_s64 (int64_t __a, const int __b)
20119 return (int32_t) __builtin_aarch64_sqrshrun_ndi (__a, __b);
20122 /* vqshl */
20124 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20125 vqshl_s8 (int8x8_t __a, int8x8_t __b)
20127 return __builtin_aarch64_sqshlv8qi (__a, __b);
20130 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20131 vqshl_s16 (int16x4_t __a, int16x4_t __b)
20133 return __builtin_aarch64_sqshlv4hi (__a, __b);
20136 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20137 vqshl_s32 (int32x2_t __a, int32x2_t __b)
20139 return __builtin_aarch64_sqshlv2si (__a, __b);
20142 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20143 vqshl_s64 (int64x1_t __a, int64x1_t __b)
20145 return (int64x1_t) {__builtin_aarch64_sqshldi (__a[0], __b[0])};
20148 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20149 vqshl_u8 (uint8x8_t __a, int8x8_t __b)
20151 return __builtin_aarch64_uqshlv8qi_uus ( __a, __b);
20154 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20155 vqshl_u16 (uint16x4_t __a, int16x4_t __b)
20157 return __builtin_aarch64_uqshlv4hi_uus ( __a, __b);
20160 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20161 vqshl_u32 (uint32x2_t __a, int32x2_t __b)
20163 return __builtin_aarch64_uqshlv2si_uus ( __a, __b);
20166 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20167 vqshl_u64 (uint64x1_t __a, int64x1_t __b)
20169 return (uint64x1_t) {__builtin_aarch64_uqshldi_uus (__a[0], __b[0])};
20172 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20173 vqshlq_s8 (int8x16_t __a, int8x16_t __b)
20175 return __builtin_aarch64_sqshlv16qi (__a, __b);
20178 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20179 vqshlq_s16 (int16x8_t __a, int16x8_t __b)
20181 return __builtin_aarch64_sqshlv8hi (__a, __b);
20184 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20185 vqshlq_s32 (int32x4_t __a, int32x4_t __b)
20187 return __builtin_aarch64_sqshlv4si (__a, __b);
20190 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20191 vqshlq_s64 (int64x2_t __a, int64x2_t __b)
20193 return __builtin_aarch64_sqshlv2di (__a, __b);
20196 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20197 vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
20199 return __builtin_aarch64_uqshlv16qi_uus ( __a, __b);
20202 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20203 vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
20205 return __builtin_aarch64_uqshlv8hi_uus ( __a, __b);
20208 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20209 vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
20211 return __builtin_aarch64_uqshlv4si_uus ( __a, __b);
20214 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20215 vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
20217 return __builtin_aarch64_uqshlv2di_uus ( __a, __b);
20220 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20221 vqshlb_s8 (int8_t __a, int8_t __b)
20223 return __builtin_aarch64_sqshlqi (__a, __b);
20226 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20227 vqshlh_s16 (int16_t __a, int16_t __b)
20229 return __builtin_aarch64_sqshlhi (__a, __b);
20232 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20233 vqshls_s32 (int32_t __a, int32_t __b)
20235 return __builtin_aarch64_sqshlsi (__a, __b);
20238 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20239 vqshld_s64 (int64_t __a, int64_t __b)
20241 return __builtin_aarch64_sqshldi (__a, __b);
20244 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20245 vqshlb_u8 (uint8_t __a, uint8_t __b)
20247 return __builtin_aarch64_uqshlqi_uus (__a, __b);
20250 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20251 vqshlh_u16 (uint16_t __a, uint16_t __b)
20253 return __builtin_aarch64_uqshlhi_uus (__a, __b);
20256 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20257 vqshls_u32 (uint32_t __a, uint32_t __b)
20259 return __builtin_aarch64_uqshlsi_uus (__a, __b);
20262 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20263 vqshld_u64 (uint64_t __a, uint64_t __b)
20265 return __builtin_aarch64_uqshldi_uus (__a, __b);
20268 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20269 vqshl_n_s8 (int8x8_t __a, const int __b)
20271 return (int8x8_t) __builtin_aarch64_sqshl_nv8qi (__a, __b);
20274 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20275 vqshl_n_s16 (int16x4_t __a, const int __b)
20277 return (int16x4_t) __builtin_aarch64_sqshl_nv4hi (__a, __b);
20280 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20281 vqshl_n_s32 (int32x2_t __a, const int __b)
20283 return (int32x2_t) __builtin_aarch64_sqshl_nv2si (__a, __b);
20286 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20287 vqshl_n_s64 (int64x1_t __a, const int __b)
20289 return (int64x1_t) {__builtin_aarch64_sqshl_ndi (__a[0], __b)};
20292 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20293 vqshl_n_u8 (uint8x8_t __a, const int __b)
20295 return __builtin_aarch64_uqshl_nv8qi_uus (__a, __b);
20298 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20299 vqshl_n_u16 (uint16x4_t __a, const int __b)
20301 return __builtin_aarch64_uqshl_nv4hi_uus (__a, __b);
20304 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20305 vqshl_n_u32 (uint32x2_t __a, const int __b)
20307 return __builtin_aarch64_uqshl_nv2si_uus (__a, __b);
20310 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20311 vqshl_n_u64 (uint64x1_t __a, const int __b)
20313 return (uint64x1_t) {__builtin_aarch64_uqshl_ndi_uus (__a[0], __b)};
20316 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20317 vqshlq_n_s8 (int8x16_t __a, const int __b)
20319 return (int8x16_t) __builtin_aarch64_sqshl_nv16qi (__a, __b);
20322 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20323 vqshlq_n_s16 (int16x8_t __a, const int __b)
20325 return (int16x8_t) __builtin_aarch64_sqshl_nv8hi (__a, __b);
20328 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20329 vqshlq_n_s32 (int32x4_t __a, const int __b)
20331 return (int32x4_t) __builtin_aarch64_sqshl_nv4si (__a, __b);
20334 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20335 vqshlq_n_s64 (int64x2_t __a, const int __b)
20337 return (int64x2_t) __builtin_aarch64_sqshl_nv2di (__a, __b);
20340 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20341 vqshlq_n_u8 (uint8x16_t __a, const int __b)
20343 return __builtin_aarch64_uqshl_nv16qi_uus (__a, __b);
20346 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20347 vqshlq_n_u16 (uint16x8_t __a, const int __b)
20349 return __builtin_aarch64_uqshl_nv8hi_uus (__a, __b);
20352 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20353 vqshlq_n_u32 (uint32x4_t __a, const int __b)
20355 return __builtin_aarch64_uqshl_nv4si_uus (__a, __b);
20358 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20359 vqshlq_n_u64 (uint64x2_t __a, const int __b)
20361 return __builtin_aarch64_uqshl_nv2di_uus (__a, __b);
20364 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20365 vqshlb_n_s8 (int8_t __a, const int __b)
20367 return (int8_t) __builtin_aarch64_sqshl_nqi (__a, __b);
20370 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20371 vqshlh_n_s16 (int16_t __a, const int __b)
20373 return (int16_t) __builtin_aarch64_sqshl_nhi (__a, __b);
20376 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20377 vqshls_n_s32 (int32_t __a, const int __b)
20379 return (int32_t) __builtin_aarch64_sqshl_nsi (__a, __b);
20382 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20383 vqshld_n_s64 (int64_t __a, const int __b)
20385 return __builtin_aarch64_sqshl_ndi (__a, __b);
20388 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20389 vqshlb_n_u8 (uint8_t __a, const int __b)
20391 return __builtin_aarch64_uqshl_nqi_uus (__a, __b);
20394 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20395 vqshlh_n_u16 (uint16_t __a, const int __b)
20397 return __builtin_aarch64_uqshl_nhi_uus (__a, __b);
20400 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20401 vqshls_n_u32 (uint32_t __a, const int __b)
20403 return __builtin_aarch64_uqshl_nsi_uus (__a, __b);
20406 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20407 vqshld_n_u64 (uint64_t __a, const int __b)
20409 return __builtin_aarch64_uqshl_ndi_uus (__a, __b);
20412 /* vqshlu */
20414 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20415 vqshlu_n_s8 (int8x8_t __a, const int __b)
20417 return __builtin_aarch64_sqshlu_nv8qi_uss (__a, __b);
20420 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20421 vqshlu_n_s16 (int16x4_t __a, const int __b)
20423 return __builtin_aarch64_sqshlu_nv4hi_uss (__a, __b);
20426 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20427 vqshlu_n_s32 (int32x2_t __a, const int __b)
20429 return __builtin_aarch64_sqshlu_nv2si_uss (__a, __b);
20432 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20433 vqshlu_n_s64 (int64x1_t __a, const int __b)
20435 return (uint64x1_t) {__builtin_aarch64_sqshlu_ndi_uss (__a[0], __b)};
20438 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20439 vqshluq_n_s8 (int8x16_t __a, const int __b)
20441 return __builtin_aarch64_sqshlu_nv16qi_uss (__a, __b);
20444 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20445 vqshluq_n_s16 (int16x8_t __a, const int __b)
20447 return __builtin_aarch64_sqshlu_nv8hi_uss (__a, __b);
20450 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20451 vqshluq_n_s32 (int32x4_t __a, const int __b)
20453 return __builtin_aarch64_sqshlu_nv4si_uss (__a, __b);
20456 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20457 vqshluq_n_s64 (int64x2_t __a, const int __b)
20459 return __builtin_aarch64_sqshlu_nv2di_uss (__a, __b);
20462 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20463 vqshlub_n_s8 (int8_t __a, const int __b)
20465 return (int8_t) __builtin_aarch64_sqshlu_nqi_uss (__a, __b);
20468 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20469 vqshluh_n_s16 (int16_t __a, const int __b)
20471 return (int16_t) __builtin_aarch64_sqshlu_nhi_uss (__a, __b);
20474 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20475 vqshlus_n_s32 (int32_t __a, const int __b)
20477 return (int32_t) __builtin_aarch64_sqshlu_nsi_uss (__a, __b);
20480 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20481 vqshlud_n_s64 (int64_t __a, const int __b)
20483 return __builtin_aarch64_sqshlu_ndi_uss (__a, __b);
20486 /* vqshrn */
20488 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20489 vqshrn_n_s16 (int16x8_t __a, const int __b)
20491 return (int8x8_t) __builtin_aarch64_sqshrn_nv8hi (__a, __b);
20494 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20495 vqshrn_n_s32 (int32x4_t __a, const int __b)
20497 return (int16x4_t) __builtin_aarch64_sqshrn_nv4si (__a, __b);
20500 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20501 vqshrn_n_s64 (int64x2_t __a, const int __b)
20503 return (int32x2_t) __builtin_aarch64_sqshrn_nv2di (__a, __b);
20506 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20507 vqshrn_n_u16 (uint16x8_t __a, const int __b)
20509 return __builtin_aarch64_uqshrn_nv8hi_uus ( __a, __b);
20512 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20513 vqshrn_n_u32 (uint32x4_t __a, const int __b)
20515 return __builtin_aarch64_uqshrn_nv4si_uus ( __a, __b);
20518 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20519 vqshrn_n_u64 (uint64x2_t __a, const int __b)
20521 return __builtin_aarch64_uqshrn_nv2di_uus ( __a, __b);
20524 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20525 vqshrnh_n_s16 (int16_t __a, const int __b)
20527 return (int8_t) __builtin_aarch64_sqshrn_nhi (__a, __b);
20530 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20531 vqshrns_n_s32 (int32_t __a, const int __b)
20533 return (int16_t) __builtin_aarch64_sqshrn_nsi (__a, __b);
20536 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20537 vqshrnd_n_s64 (int64_t __a, const int __b)
20539 return (int32_t) __builtin_aarch64_sqshrn_ndi (__a, __b);
20542 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20543 vqshrnh_n_u16 (uint16_t __a, const int __b)
20545 return __builtin_aarch64_uqshrn_nhi_uus (__a, __b);
20548 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20549 vqshrns_n_u32 (uint32_t __a, const int __b)
20551 return __builtin_aarch64_uqshrn_nsi_uus (__a, __b);
20554 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20555 vqshrnd_n_u64 (uint64_t __a, const int __b)
20557 return __builtin_aarch64_uqshrn_ndi_uus (__a, __b);
20560 /* vqshrun */
20562 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20563 vqshrun_n_s16 (int16x8_t __a, const int __b)
20565 return (uint8x8_t) __builtin_aarch64_sqshrun_nv8hi (__a, __b);
20568 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20569 vqshrun_n_s32 (int32x4_t __a, const int __b)
20571 return (uint16x4_t) __builtin_aarch64_sqshrun_nv4si (__a, __b);
20574 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20575 vqshrun_n_s64 (int64x2_t __a, const int __b)
20577 return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b);
20580 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20581 vqshrunh_n_s16 (int16_t __a, const int __b)
20583 return (int8_t) __builtin_aarch64_sqshrun_nhi (__a, __b);
20586 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20587 vqshruns_n_s32 (int32_t __a, const int __b)
20589 return (int16_t) __builtin_aarch64_sqshrun_nsi (__a, __b);
20592 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20593 vqshrund_n_s64 (int64_t __a, const int __b)
20595 return (int32_t) __builtin_aarch64_sqshrun_ndi (__a, __b);
20598 /* vqsub */
20600 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20601 vqsubb_s8 (int8_t __a, int8_t __b)
20603 return (int8_t) __builtin_aarch64_sqsubqi (__a, __b);
20606 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20607 vqsubh_s16 (int16_t __a, int16_t __b)
20609 return (int16_t) __builtin_aarch64_sqsubhi (__a, __b);
20612 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20613 vqsubs_s32 (int32_t __a, int32_t __b)
20615 return (int32_t) __builtin_aarch64_sqsubsi (__a, __b);
20618 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20619 vqsubd_s64 (int64_t __a, int64_t __b)
20621 return __builtin_aarch64_sqsubdi (__a, __b);
20624 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20625 vqsubb_u8 (uint8_t __a, uint8_t __b)
20627 return (uint8_t) __builtin_aarch64_uqsubqi_uuu (__a, __b);
20630 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20631 vqsubh_u16 (uint16_t __a, uint16_t __b)
20633 return (uint16_t) __builtin_aarch64_uqsubhi_uuu (__a, __b);
20636 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20637 vqsubs_u32 (uint32_t __a, uint32_t __b)
20639 return (uint32_t) __builtin_aarch64_uqsubsi_uuu (__a, __b);
20642 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20643 vqsubd_u64 (uint64_t __a, uint64_t __b)
20645 return __builtin_aarch64_uqsubdi_uuu (__a, __b);
20648 /* vrbit */
20650 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20651 vrbit_p8 (poly8x8_t __a)
20653 return (poly8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
20656 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20657 vrbit_s8 (int8x8_t __a)
20659 return __builtin_aarch64_rbitv8qi (__a);
20662 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20663 vrbit_u8 (uint8x8_t __a)
20665 return (uint8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
20668 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20669 vrbitq_p8 (poly8x16_t __a)
20671 return (poly8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t)__a);
20674 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20675 vrbitq_s8 (int8x16_t __a)
20677 return __builtin_aarch64_rbitv16qi (__a);
20680 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20681 vrbitq_u8 (uint8x16_t __a)
20683 return (uint8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t) __a);
20686 /* vrecpe */
20688 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20689 vrecpe_u32 (uint32x2_t __a)
20691 return (uint32x2_t) __builtin_aarch64_urecpev2si ((int32x2_t) __a);
20694 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20695 vrecpeq_u32 (uint32x4_t __a)
20697 return (uint32x4_t) __builtin_aarch64_urecpev4si ((int32x4_t) __a);
20700 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20701 vrecpes_f32 (float32_t __a)
20703 return __builtin_aarch64_frecpesf (__a);
20706 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
20707 vrecped_f64 (float64_t __a)
20709 return __builtin_aarch64_frecpedf (__a);
20712 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20713 vrecpe_f32 (float32x2_t __a)
20715 return __builtin_aarch64_frecpev2sf (__a);
20718 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20719 vrecpeq_f32 (float32x4_t __a)
20721 return __builtin_aarch64_frecpev4sf (__a);
20724 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20725 vrecpeq_f64 (float64x2_t __a)
20727 return __builtin_aarch64_frecpev2df (__a);
20730 /* vrecps */
20732 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20733 vrecpss_f32 (float32_t __a, float32_t __b)
20735 return __builtin_aarch64_frecpssf (__a, __b);
20738 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
20739 vrecpsd_f64 (float64_t __a, float64_t __b)
20741 return __builtin_aarch64_frecpsdf (__a, __b);
20744 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20745 vrecps_f32 (float32x2_t __a, float32x2_t __b)
20747 return __builtin_aarch64_frecpsv2sf (__a, __b);
20750 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20751 vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
20753 return __builtin_aarch64_frecpsv4sf (__a, __b);
20756 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20757 vrecpsq_f64 (float64x2_t __a, float64x2_t __b)
20759 return __builtin_aarch64_frecpsv2df (__a, __b);
20762 /* vrecpx */
20764 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20765 vrecpxs_f32 (float32_t __a)
20767 return __builtin_aarch64_frecpxsf (__a);
20770 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
20771 vrecpxd_f64 (float64_t __a)
20773 return __builtin_aarch64_frecpxdf (__a);
20777 /* vrev */
20779 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20780 vrev16_p8 (poly8x8_t a)
20782 return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20785 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20786 vrev16_s8 (int8x8_t a)
20788 return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20791 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20792 vrev16_u8 (uint8x8_t a)
20794 return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20797 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20798 vrev16q_p8 (poly8x16_t a)
20800 return __builtin_shuffle (a,
20801 (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
20804 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20805 vrev16q_s8 (int8x16_t a)
20807 return __builtin_shuffle (a,
20808 (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
20811 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20812 vrev16q_u8 (uint8x16_t a)
20814 return __builtin_shuffle (a,
20815 (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
20818 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20819 vrev32_p8 (poly8x8_t a)
20821 return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
20824 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
20825 vrev32_p16 (poly16x4_t a)
20827 return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
20830 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20831 vrev32_s8 (int8x8_t a)
20833 return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
20836 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20837 vrev32_s16 (int16x4_t a)
20839 return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
20842 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20843 vrev32_u8 (uint8x8_t a)
20845 return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
20848 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20849 vrev32_u16 (uint16x4_t a)
20851 return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
20854 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20855 vrev32q_p8 (poly8x16_t a)
20857 return __builtin_shuffle (a,
20858 (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
20861 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
20862 vrev32q_p16 (poly16x8_t a)
20864 return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20867 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20868 vrev32q_s8 (int8x16_t a)
20870 return __builtin_shuffle (a,
20871 (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
20874 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20875 vrev32q_s16 (int16x8_t a)
20877 return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20880 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20881 vrev32q_u8 (uint8x16_t a)
20883 return __builtin_shuffle (a,
20884 (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
20887 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20888 vrev32q_u16 (uint16x8_t a)
20890 return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20893 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20894 vrev64_f32 (float32x2_t a)
20896 return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
20899 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20900 vrev64_p8 (poly8x8_t a)
20902 return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
20905 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
20906 vrev64_p16 (poly16x4_t a)
20908 return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
20911 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20912 vrev64_s8 (int8x8_t a)
20914 return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
20917 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20918 vrev64_s16 (int16x4_t a)
20920 return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
20923 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20924 vrev64_s32 (int32x2_t a)
20926 return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
20929 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20930 vrev64_u8 (uint8x8_t a)
20932 return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
20935 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20936 vrev64_u16 (uint16x4_t a)
20938 return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
20941 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20942 vrev64_u32 (uint32x2_t a)
20944 return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
20947 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20948 vrev64q_f32 (float32x4_t a)
20950 return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
20953 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20954 vrev64q_p8 (poly8x16_t a)
20956 return __builtin_shuffle (a,
20957 (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
20960 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
20961 vrev64q_p16 (poly16x8_t a)
20963 return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
20966 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20967 vrev64q_s8 (int8x16_t a)
20969 return __builtin_shuffle (a,
20970 (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
20973 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20974 vrev64q_s16 (int16x8_t a)
20976 return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
20979 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20980 vrev64q_s32 (int32x4_t a)
20982 return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
20985 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20986 vrev64q_u8 (uint8x16_t a)
20988 return __builtin_shuffle (a,
20989 (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
20992 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20993 vrev64q_u16 (uint16x8_t a)
20995 return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
20998 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20999 vrev64q_u32 (uint32x4_t a)
21001 return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
21004 /* vrnd */
21006 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21007 vrnd_f32 (float32x2_t __a)
21009 return __builtin_aarch64_btruncv2sf (__a);
21012 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21013 vrnd_f64 (float64x1_t __a)
21015 return vset_lane_f64 (__builtin_trunc (vget_lane_f64 (__a, 0)), __a, 0);
21018 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21019 vrndq_f32 (float32x4_t __a)
21021 return __builtin_aarch64_btruncv4sf (__a);
21024 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21025 vrndq_f64 (float64x2_t __a)
21027 return __builtin_aarch64_btruncv2df (__a);
21030 /* vrnda */
21032 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21033 vrnda_f32 (float32x2_t __a)
21035 return __builtin_aarch64_roundv2sf (__a);
21038 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21039 vrnda_f64 (float64x1_t __a)
21041 return vset_lane_f64 (__builtin_round (vget_lane_f64 (__a, 0)), __a, 0);
21044 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21045 vrndaq_f32 (float32x4_t __a)
21047 return __builtin_aarch64_roundv4sf (__a);
21050 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21051 vrndaq_f64 (float64x2_t __a)
21053 return __builtin_aarch64_roundv2df (__a);
21056 /* vrndi */
21058 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21059 vrndi_f32 (float32x2_t __a)
21061 return __builtin_aarch64_nearbyintv2sf (__a);
21064 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21065 vrndi_f64 (float64x1_t __a)
21067 return vset_lane_f64 (__builtin_nearbyint (vget_lane_f64 (__a, 0)), __a, 0);
21070 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21071 vrndiq_f32 (float32x4_t __a)
21073 return __builtin_aarch64_nearbyintv4sf (__a);
21076 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21077 vrndiq_f64 (float64x2_t __a)
21079 return __builtin_aarch64_nearbyintv2df (__a);
21082 /* vrndm */
21084 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21085 vrndm_f32 (float32x2_t __a)
21087 return __builtin_aarch64_floorv2sf (__a);
21090 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21091 vrndm_f64 (float64x1_t __a)
21093 return vset_lane_f64 (__builtin_floor (vget_lane_f64 (__a, 0)), __a, 0);
21096 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21097 vrndmq_f32 (float32x4_t __a)
21099 return __builtin_aarch64_floorv4sf (__a);
21102 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21103 vrndmq_f64 (float64x2_t __a)
21105 return __builtin_aarch64_floorv2df (__a);
21108 /* vrndn */
21110 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21111 vrndn_f32 (float32x2_t __a)
21113 return __builtin_aarch64_frintnv2sf (__a);
21116 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21117 vrndn_f64 (float64x1_t __a)
21119 return (float64x1_t) {__builtin_aarch64_frintndf (__a[0])};
21122 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21123 vrndnq_f32 (float32x4_t __a)
21125 return __builtin_aarch64_frintnv4sf (__a);
21128 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21129 vrndnq_f64 (float64x2_t __a)
21131 return __builtin_aarch64_frintnv2df (__a);
21134 /* vrndp */
21136 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21137 vrndp_f32 (float32x2_t __a)
21139 return __builtin_aarch64_ceilv2sf (__a);
21142 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21143 vrndp_f64 (float64x1_t __a)
21145 return vset_lane_f64 (__builtin_ceil (vget_lane_f64 (__a, 0)), __a, 0);
21148 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21149 vrndpq_f32 (float32x4_t __a)
21151 return __builtin_aarch64_ceilv4sf (__a);
21154 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21155 vrndpq_f64 (float64x2_t __a)
21157 return __builtin_aarch64_ceilv2df (__a);
21160 /* vrndx */
21162 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21163 vrndx_f32 (float32x2_t __a)
21165 return __builtin_aarch64_rintv2sf (__a);
21168 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21169 vrndx_f64 (float64x1_t __a)
21171 return vset_lane_f64 (__builtin_rint (vget_lane_f64 (__a, 0)), __a, 0);
21174 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21175 vrndxq_f32 (float32x4_t __a)
21177 return __builtin_aarch64_rintv4sf (__a);
21180 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21181 vrndxq_f64 (float64x2_t __a)
21183 return __builtin_aarch64_rintv2df (__a);
21186 /* vrshl */
21188 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21189 vrshl_s8 (int8x8_t __a, int8x8_t __b)
21191 return (int8x8_t) __builtin_aarch64_srshlv8qi (__a, __b);
21194 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21195 vrshl_s16 (int16x4_t __a, int16x4_t __b)
21197 return (int16x4_t) __builtin_aarch64_srshlv4hi (__a, __b);
21200 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21201 vrshl_s32 (int32x2_t __a, int32x2_t __b)
21203 return (int32x2_t) __builtin_aarch64_srshlv2si (__a, __b);
21206 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21207 vrshl_s64 (int64x1_t __a, int64x1_t __b)
21209 return (int64x1_t) {__builtin_aarch64_srshldi (__a[0], __b[0])};
21212 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21213 vrshl_u8 (uint8x8_t __a, int8x8_t __b)
21215 return __builtin_aarch64_urshlv8qi_uus (__a, __b);
21218 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21219 vrshl_u16 (uint16x4_t __a, int16x4_t __b)
21221 return __builtin_aarch64_urshlv4hi_uus (__a, __b);
21224 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21225 vrshl_u32 (uint32x2_t __a, int32x2_t __b)
21227 return __builtin_aarch64_urshlv2si_uus (__a, __b);
21230 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21231 vrshl_u64 (uint64x1_t __a, int64x1_t __b)
21233 return (uint64x1_t) {__builtin_aarch64_urshldi_uus (__a[0], __b[0])};
21236 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21237 vrshlq_s8 (int8x16_t __a, int8x16_t __b)
21239 return (int8x16_t) __builtin_aarch64_srshlv16qi (__a, __b);
21242 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21243 vrshlq_s16 (int16x8_t __a, int16x8_t __b)
21245 return (int16x8_t) __builtin_aarch64_srshlv8hi (__a, __b);
21248 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21249 vrshlq_s32 (int32x4_t __a, int32x4_t __b)
21251 return (int32x4_t) __builtin_aarch64_srshlv4si (__a, __b);
21254 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21255 vrshlq_s64 (int64x2_t __a, int64x2_t __b)
21257 return (int64x2_t) __builtin_aarch64_srshlv2di (__a, __b);
21260 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21261 vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
21263 return __builtin_aarch64_urshlv16qi_uus (__a, __b);
21266 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21267 vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
21269 return __builtin_aarch64_urshlv8hi_uus (__a, __b);
21272 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21273 vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
21275 return __builtin_aarch64_urshlv4si_uus (__a, __b);
21278 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21279 vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
21281 return __builtin_aarch64_urshlv2di_uus (__a, __b);
21284 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21285 vrshld_s64 (int64_t __a, int64_t __b)
21287 return __builtin_aarch64_srshldi (__a, __b);
21290 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21291 vrshld_u64 (uint64_t __a, int64_t __b)
21293 return __builtin_aarch64_urshldi_uus (__a, __b);
21296 /* vrshr */
21298 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21299 vrshr_n_s8 (int8x8_t __a, const int __b)
21301 return (int8x8_t) __builtin_aarch64_srshr_nv8qi (__a, __b);
21304 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21305 vrshr_n_s16 (int16x4_t __a, const int __b)
21307 return (int16x4_t) __builtin_aarch64_srshr_nv4hi (__a, __b);
21310 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21311 vrshr_n_s32 (int32x2_t __a, const int __b)
21313 return (int32x2_t) __builtin_aarch64_srshr_nv2si (__a, __b);
21316 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21317 vrshr_n_s64 (int64x1_t __a, const int __b)
21319 return (int64x1_t) {__builtin_aarch64_srshr_ndi (__a[0], __b)};
21322 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21323 vrshr_n_u8 (uint8x8_t __a, const int __b)
21325 return __builtin_aarch64_urshr_nv8qi_uus (__a, __b);
21328 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21329 vrshr_n_u16 (uint16x4_t __a, const int __b)
21331 return __builtin_aarch64_urshr_nv4hi_uus (__a, __b);
21334 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21335 vrshr_n_u32 (uint32x2_t __a, const int __b)
21337 return __builtin_aarch64_urshr_nv2si_uus (__a, __b);
21340 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21341 vrshr_n_u64 (uint64x1_t __a, const int __b)
21343 return (uint64x1_t) {__builtin_aarch64_urshr_ndi_uus (__a[0], __b)};
21346 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21347 vrshrq_n_s8 (int8x16_t __a, const int __b)
21349 return (int8x16_t) __builtin_aarch64_srshr_nv16qi (__a, __b);
21352 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21353 vrshrq_n_s16 (int16x8_t __a, const int __b)
21355 return (int16x8_t) __builtin_aarch64_srshr_nv8hi (__a, __b);
21358 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21359 vrshrq_n_s32 (int32x4_t __a, const int __b)
21361 return (int32x4_t) __builtin_aarch64_srshr_nv4si (__a, __b);
21364 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21365 vrshrq_n_s64 (int64x2_t __a, const int __b)
21367 return (int64x2_t) __builtin_aarch64_srshr_nv2di (__a, __b);
21370 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21371 vrshrq_n_u8 (uint8x16_t __a, const int __b)
21373 return __builtin_aarch64_urshr_nv16qi_uus (__a, __b);
21376 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21377 vrshrq_n_u16 (uint16x8_t __a, const int __b)
21379 return __builtin_aarch64_urshr_nv8hi_uus (__a, __b);
21382 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21383 vrshrq_n_u32 (uint32x4_t __a, const int __b)
21385 return __builtin_aarch64_urshr_nv4si_uus (__a, __b);
21388 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21389 vrshrq_n_u64 (uint64x2_t __a, const int __b)
21391 return __builtin_aarch64_urshr_nv2di_uus (__a, __b);
21394 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21395 vrshrd_n_s64 (int64_t __a, const int __b)
21397 return __builtin_aarch64_srshr_ndi (__a, __b);
21400 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21401 vrshrd_n_u64 (uint64_t __a, const int __b)
21403 return __builtin_aarch64_urshr_ndi_uus (__a, __b);
21406 /* vrsra */
21408 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21409 vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
21411 return (int8x8_t) __builtin_aarch64_srsra_nv8qi (__a, __b, __c);
21414 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21415 vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
21417 return (int16x4_t) __builtin_aarch64_srsra_nv4hi (__a, __b, __c);
21420 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21421 vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
21423 return (int32x2_t) __builtin_aarch64_srsra_nv2si (__a, __b, __c);
21426 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21427 vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
21429 return (int64x1_t) {__builtin_aarch64_srsra_ndi (__a[0], __b[0], __c)};
21432 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21433 vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
21435 return __builtin_aarch64_ursra_nv8qi_uuus (__a, __b, __c);
21438 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21439 vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
21441 return __builtin_aarch64_ursra_nv4hi_uuus (__a, __b, __c);
21444 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21445 vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
21447 return __builtin_aarch64_ursra_nv2si_uuus (__a, __b, __c);
21450 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21451 vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
21453 return (uint64x1_t) {__builtin_aarch64_ursra_ndi_uuus (__a[0], __b[0], __c)};
21456 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21457 vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
21459 return (int8x16_t) __builtin_aarch64_srsra_nv16qi (__a, __b, __c);
21462 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21463 vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
21465 return (int16x8_t) __builtin_aarch64_srsra_nv8hi (__a, __b, __c);
21468 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21469 vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
21471 return (int32x4_t) __builtin_aarch64_srsra_nv4si (__a, __b, __c);
21474 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21475 vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
21477 return (int64x2_t) __builtin_aarch64_srsra_nv2di (__a, __b, __c);
21480 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21481 vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
21483 return __builtin_aarch64_ursra_nv16qi_uuus (__a, __b, __c);
21486 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21487 vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
21489 return __builtin_aarch64_ursra_nv8hi_uuus (__a, __b, __c);
21492 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21493 vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
21495 return __builtin_aarch64_ursra_nv4si_uuus (__a, __b, __c);
21498 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21499 vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
21501 return __builtin_aarch64_ursra_nv2di_uuus (__a, __b, __c);
21504 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21505 vrsrad_n_s64 (int64_t __a, int64_t __b, const int __c)
21507 return __builtin_aarch64_srsra_ndi (__a, __b, __c);
21510 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21511 vrsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c)
21513 return __builtin_aarch64_ursra_ndi_uuus (__a, __b, __c);
21516 #ifdef __ARM_FEATURE_CRYPTO
21518 /* vsha1 */
21520 static __inline uint32x4_t
21521 vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
21523 return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk);
21525 static __inline uint32x4_t
21526 vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
21528 return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk);
21530 static __inline uint32x4_t
21531 vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
21533 return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk);
21536 static __inline uint32_t
21537 vsha1h_u32 (uint32_t hash_e)
21539 return __builtin_aarch64_crypto_sha1hsi_uu (hash_e);
21542 static __inline uint32x4_t
21543 vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11)
21545 return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11);
21548 static __inline uint32x4_t
21549 vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15)
21551 return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15);
21554 static __inline uint32x4_t
21555 vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk)
21557 return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk);
21560 static __inline uint32x4_t
21561 vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk)
21563 return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk);
21566 static __inline uint32x4_t
21567 vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7)
21569 return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7);
21572 static __inline uint32x4_t
21573 vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15)
21575 return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15);
21578 static __inline poly128_t
21579 vmull_p64 (poly64_t a, poly64_t b)
21581 return
21582 __builtin_aarch64_crypto_pmulldi_ppp (a, b);
21585 static __inline poly128_t
21586 vmull_high_p64 (poly64x2_t a, poly64x2_t b)
21588 return __builtin_aarch64_crypto_pmullv2di_ppp (a, b);
21591 #endif
21593 /* vshl */
21595 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21596 vshl_n_s8 (int8x8_t __a, const int __b)
21598 return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b);
21601 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21602 vshl_n_s16 (int16x4_t __a, const int __b)
21604 return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b);
21607 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21608 vshl_n_s32 (int32x2_t __a, const int __b)
21610 return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b);
21613 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21614 vshl_n_s64 (int64x1_t __a, const int __b)
21616 return (int64x1_t) {__builtin_aarch64_ashldi (__a[0], __b)};
21619 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21620 vshl_n_u8 (uint8x8_t __a, const int __b)
21622 return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b);
21625 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21626 vshl_n_u16 (uint16x4_t __a, const int __b)
21628 return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b);
21631 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21632 vshl_n_u32 (uint32x2_t __a, const int __b)
21634 return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b);
21637 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21638 vshl_n_u64 (uint64x1_t __a, const int __b)
21640 return (uint64x1_t) {__builtin_aarch64_ashldi ((int64_t) __a[0], __b)};
21643 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21644 vshlq_n_s8 (int8x16_t __a, const int __b)
21646 return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b);
21649 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21650 vshlq_n_s16 (int16x8_t __a, const int __b)
21652 return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b);
21655 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21656 vshlq_n_s32 (int32x4_t __a, const int __b)
21658 return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b);
21661 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21662 vshlq_n_s64 (int64x2_t __a, const int __b)
21664 return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b);
21667 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21668 vshlq_n_u8 (uint8x16_t __a, const int __b)
21670 return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b);
21673 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21674 vshlq_n_u16 (uint16x8_t __a, const int __b)
21676 return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b);
21679 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21680 vshlq_n_u32 (uint32x4_t __a, const int __b)
21682 return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b);
21685 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21686 vshlq_n_u64 (uint64x2_t __a, const int __b)
21688 return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b);
21691 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21692 vshld_n_s64 (int64_t __a, const int __b)
21694 return __builtin_aarch64_ashldi (__a, __b);
21697 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21698 vshld_n_u64 (uint64_t __a, const int __b)
21700 return (uint64_t) __builtin_aarch64_ashldi (__a, __b);
21703 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21704 vshl_s8 (int8x8_t __a, int8x8_t __b)
21706 return __builtin_aarch64_sshlv8qi (__a, __b);
21709 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21710 vshl_s16 (int16x4_t __a, int16x4_t __b)
21712 return __builtin_aarch64_sshlv4hi (__a, __b);
21715 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21716 vshl_s32 (int32x2_t __a, int32x2_t __b)
21718 return __builtin_aarch64_sshlv2si (__a, __b);
21721 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21722 vshl_s64 (int64x1_t __a, int64x1_t __b)
21724 return (int64x1_t) {__builtin_aarch64_sshldi (__a[0], __b[0])};
21727 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21728 vshl_u8 (uint8x8_t __a, int8x8_t __b)
21730 return __builtin_aarch64_ushlv8qi_uus (__a, __b);
21733 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21734 vshl_u16 (uint16x4_t __a, int16x4_t __b)
21736 return __builtin_aarch64_ushlv4hi_uus (__a, __b);
21739 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21740 vshl_u32 (uint32x2_t __a, int32x2_t __b)
21742 return __builtin_aarch64_ushlv2si_uus (__a, __b);
21745 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21746 vshl_u64 (uint64x1_t __a, int64x1_t __b)
21748 return (uint64x1_t) {__builtin_aarch64_ushldi_uus (__a[0], __b[0])};
21751 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21752 vshlq_s8 (int8x16_t __a, int8x16_t __b)
21754 return __builtin_aarch64_sshlv16qi (__a, __b);
21757 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21758 vshlq_s16 (int16x8_t __a, int16x8_t __b)
21760 return __builtin_aarch64_sshlv8hi (__a, __b);
21763 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21764 vshlq_s32 (int32x4_t __a, int32x4_t __b)
21766 return __builtin_aarch64_sshlv4si (__a, __b);
21769 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21770 vshlq_s64 (int64x2_t __a, int64x2_t __b)
21772 return __builtin_aarch64_sshlv2di (__a, __b);
21775 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21776 vshlq_u8 (uint8x16_t __a, int8x16_t __b)
21778 return __builtin_aarch64_ushlv16qi_uus (__a, __b);
21781 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21782 vshlq_u16 (uint16x8_t __a, int16x8_t __b)
21784 return __builtin_aarch64_ushlv8hi_uus (__a, __b);
21787 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21788 vshlq_u32 (uint32x4_t __a, int32x4_t __b)
21790 return __builtin_aarch64_ushlv4si_uus (__a, __b);
21793 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21794 vshlq_u64 (uint64x2_t __a, int64x2_t __b)
21796 return __builtin_aarch64_ushlv2di_uus (__a, __b);
21799 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21800 vshld_s64 (int64_t __a, int64_t __b)
21802 return __builtin_aarch64_sshldi (__a, __b);
21805 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21806 vshld_u64 (uint64_t __a, uint64_t __b)
21808 return __builtin_aarch64_ushldi_uus (__a, __b);
21811 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21812 vshll_high_n_s8 (int8x16_t __a, const int __b)
21814 return __builtin_aarch64_sshll2_nv16qi (__a, __b);
21817 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21818 vshll_high_n_s16 (int16x8_t __a, const int __b)
21820 return __builtin_aarch64_sshll2_nv8hi (__a, __b);
21823 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21824 vshll_high_n_s32 (int32x4_t __a, const int __b)
21826 return __builtin_aarch64_sshll2_nv4si (__a, __b);
21829 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21830 vshll_high_n_u8 (uint8x16_t __a, const int __b)
21832 return (uint16x8_t) __builtin_aarch64_ushll2_nv16qi ((int8x16_t) __a, __b);
21835 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21836 vshll_high_n_u16 (uint16x8_t __a, const int __b)
21838 return (uint32x4_t) __builtin_aarch64_ushll2_nv8hi ((int16x8_t) __a, __b);
21841 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21842 vshll_high_n_u32 (uint32x4_t __a, const int __b)
21844 return (uint64x2_t) __builtin_aarch64_ushll2_nv4si ((int32x4_t) __a, __b);
21847 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21848 vshll_n_s8 (int8x8_t __a, const int __b)
21850 return __builtin_aarch64_sshll_nv8qi (__a, __b);
21853 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21854 vshll_n_s16 (int16x4_t __a, const int __b)
21856 return __builtin_aarch64_sshll_nv4hi (__a, __b);
21859 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21860 vshll_n_s32 (int32x2_t __a, const int __b)
21862 return __builtin_aarch64_sshll_nv2si (__a, __b);
21865 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21866 vshll_n_u8 (uint8x8_t __a, const int __b)
21868 return __builtin_aarch64_ushll_nv8qi_uus (__a, __b);
21871 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21872 vshll_n_u16 (uint16x4_t __a, const int __b)
21874 return __builtin_aarch64_ushll_nv4hi_uus (__a, __b);
21877 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21878 vshll_n_u32 (uint32x2_t __a, const int __b)
21880 return __builtin_aarch64_ushll_nv2si_uus (__a, __b);
21883 /* vshr */
21885 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21886 vshr_n_s8 (int8x8_t __a, const int __b)
21888 return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b);
21891 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21892 vshr_n_s16 (int16x4_t __a, const int __b)
21894 return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b);
21897 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21898 vshr_n_s32 (int32x2_t __a, const int __b)
21900 return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b);
21903 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21904 vshr_n_s64 (int64x1_t __a, const int __b)
21906 return (int64x1_t) {__builtin_aarch64_ashr_simddi (__a[0], __b)};
21909 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21910 vshr_n_u8 (uint8x8_t __a, const int __b)
21912 return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
21915 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21916 vshr_n_u16 (uint16x4_t __a, const int __b)
21918 return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
21921 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21922 vshr_n_u32 (uint32x2_t __a, const int __b)
21924 return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
21927 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21928 vshr_n_u64 (uint64x1_t __a, const int __b)
21930 return (uint64x1_t) {__builtin_aarch64_lshr_simddi_uus ( __a[0], __b)};
21933 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21934 vshrq_n_s8 (int8x16_t __a, const int __b)
21936 return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b);
21939 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21940 vshrq_n_s16 (int16x8_t __a, const int __b)
21942 return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b);
21945 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21946 vshrq_n_s32 (int32x4_t __a, const int __b)
21948 return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b);
21951 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21952 vshrq_n_s64 (int64x2_t __a, const int __b)
21954 return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b);
21957 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21958 vshrq_n_u8 (uint8x16_t __a, const int __b)
21960 return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
21963 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21964 vshrq_n_u16 (uint16x8_t __a, const int __b)
21966 return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
21969 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21970 vshrq_n_u32 (uint32x4_t __a, const int __b)
21972 return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
21975 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21976 vshrq_n_u64 (uint64x2_t __a, const int __b)
21978 return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
21981 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21982 vshrd_n_s64 (int64_t __a, const int __b)
21984 return __builtin_aarch64_ashr_simddi (__a, __b);
21987 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21988 vshrd_n_u64 (uint64_t __a, const int __b)
21990 return __builtin_aarch64_lshr_simddi_uus (__a, __b);
21993 /* vsli */
21995 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21996 vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
21998 return (int8x8_t) __builtin_aarch64_ssli_nv8qi (__a, __b, __c);
22001 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22002 vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22004 return (int16x4_t) __builtin_aarch64_ssli_nv4hi (__a, __b, __c);
22007 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22008 vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22010 return (int32x2_t) __builtin_aarch64_ssli_nv2si (__a, __b, __c);
22013 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22014 vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22016 return (int64x1_t) {__builtin_aarch64_ssli_ndi (__a[0], __b[0], __c)};
22019 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22020 vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22022 return __builtin_aarch64_usli_nv8qi_uuus (__a, __b, __c);
22025 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22026 vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22028 return __builtin_aarch64_usli_nv4hi_uuus (__a, __b, __c);
22031 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22032 vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22034 return __builtin_aarch64_usli_nv2si_uuus (__a, __b, __c);
22037 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22038 vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22040 return (uint64x1_t) {__builtin_aarch64_usli_ndi_uuus (__a[0], __b[0], __c)};
22043 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22044 vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22046 return (int8x16_t) __builtin_aarch64_ssli_nv16qi (__a, __b, __c);
22049 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22050 vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22052 return (int16x8_t) __builtin_aarch64_ssli_nv8hi (__a, __b, __c);
22055 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22056 vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22058 return (int32x4_t) __builtin_aarch64_ssli_nv4si (__a, __b, __c);
22061 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22062 vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22064 return (int64x2_t) __builtin_aarch64_ssli_nv2di (__a, __b, __c);
22067 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22068 vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22070 return __builtin_aarch64_usli_nv16qi_uuus (__a, __b, __c);
22073 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22074 vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22076 return __builtin_aarch64_usli_nv8hi_uuus (__a, __b, __c);
22079 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22080 vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22082 return __builtin_aarch64_usli_nv4si_uuus (__a, __b, __c);
22085 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22086 vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22088 return __builtin_aarch64_usli_nv2di_uuus (__a, __b, __c);
22091 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22092 vslid_n_s64 (int64_t __a, int64_t __b, const int __c)
22094 return __builtin_aarch64_ssli_ndi (__a, __b, __c);
22097 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22098 vslid_n_u64 (uint64_t __a, uint64_t __b, const int __c)
22100 return __builtin_aarch64_usli_ndi_uuus (__a, __b, __c);
22103 /* vsqadd */
22105 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22106 vsqadd_u8 (uint8x8_t __a, int8x8_t __b)
22108 return __builtin_aarch64_usqaddv8qi_uus (__a, __b);
22111 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22112 vsqadd_u16 (uint16x4_t __a, int16x4_t __b)
22114 return __builtin_aarch64_usqaddv4hi_uus (__a, __b);
22117 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22118 vsqadd_u32 (uint32x2_t __a, int32x2_t __b)
22120 return __builtin_aarch64_usqaddv2si_uus (__a, __b);
22123 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22124 vsqadd_u64 (uint64x1_t __a, int64x1_t __b)
22126 return (uint64x1_t) {__builtin_aarch64_usqadddi_uus (__a[0], __b[0])};
22129 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22130 vsqaddq_u8 (uint8x16_t __a, int8x16_t __b)
22132 return __builtin_aarch64_usqaddv16qi_uus (__a, __b);
22135 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22136 vsqaddq_u16 (uint16x8_t __a, int16x8_t __b)
22138 return __builtin_aarch64_usqaddv8hi_uus (__a, __b);
22141 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22142 vsqaddq_u32 (uint32x4_t __a, int32x4_t __b)
22144 return __builtin_aarch64_usqaddv4si_uus (__a, __b);
22147 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22148 vsqaddq_u64 (uint64x2_t __a, int64x2_t __b)
22150 return __builtin_aarch64_usqaddv2di_uus (__a, __b);
22153 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
22154 vsqaddb_u8 (uint8_t __a, int8_t __b)
22156 return __builtin_aarch64_usqaddqi_uus (__a, __b);
22159 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
22160 vsqaddh_u16 (uint16_t __a, int16_t __b)
22162 return __builtin_aarch64_usqaddhi_uus (__a, __b);
22165 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
22166 vsqadds_u32 (uint32_t __a, int32_t __b)
22168 return __builtin_aarch64_usqaddsi_uus (__a, __b);
22171 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22172 vsqaddd_u64 (uint64_t __a, int64_t __b)
22174 return __builtin_aarch64_usqadddi_uus (__a, __b);
22177 /* vsqrt */
22178 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22179 vsqrt_f32 (float32x2_t a)
22181 return __builtin_aarch64_sqrtv2sf (a);
22184 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22185 vsqrtq_f32 (float32x4_t a)
22187 return __builtin_aarch64_sqrtv4sf (a);
22190 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
22191 vsqrt_f64 (float64x1_t a)
22193 return (float64x1_t) { __builtin_sqrt (a[0]) };
22196 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22197 vsqrtq_f64 (float64x2_t a)
22199 return __builtin_aarch64_sqrtv2df (a);
22202 /* vsra */
22204 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22205 vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22207 return (int8x8_t) __builtin_aarch64_ssra_nv8qi (__a, __b, __c);
22210 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22211 vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22213 return (int16x4_t) __builtin_aarch64_ssra_nv4hi (__a, __b, __c);
22216 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22217 vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22219 return (int32x2_t) __builtin_aarch64_ssra_nv2si (__a, __b, __c);
22222 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22223 vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22225 return (int64x1_t) {__builtin_aarch64_ssra_ndi (__a[0], __b[0], __c)};
22228 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22229 vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22231 return __builtin_aarch64_usra_nv8qi_uuus (__a, __b, __c);
22234 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22235 vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22237 return __builtin_aarch64_usra_nv4hi_uuus (__a, __b, __c);
22240 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22241 vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22243 return __builtin_aarch64_usra_nv2si_uuus (__a, __b, __c);
22246 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22247 vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22249 return (uint64x1_t) {__builtin_aarch64_usra_ndi_uuus (__a[0], __b[0], __c)};
22252 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22253 vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22255 return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c);
22258 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22259 vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22261 return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c);
22264 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22265 vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22267 return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c);
22270 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22271 vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22273 return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c);
22276 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22277 vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22279 return __builtin_aarch64_usra_nv16qi_uuus (__a, __b, __c);
22282 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22283 vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22285 return __builtin_aarch64_usra_nv8hi_uuus (__a, __b, __c);
22288 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22289 vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22291 return __builtin_aarch64_usra_nv4si_uuus (__a, __b, __c);
22294 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22295 vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22297 return __builtin_aarch64_usra_nv2di_uuus (__a, __b, __c);
22300 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22301 vsrad_n_s64 (int64_t __a, int64_t __b, const int __c)
22303 return __builtin_aarch64_ssra_ndi (__a, __b, __c);
22306 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22307 vsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c)
22309 return __builtin_aarch64_usra_ndi_uuus (__a, __b, __c);
22312 /* vsri */
22314 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22315 vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22317 return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c);
22320 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22321 vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22323 return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c);
22326 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22327 vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22329 return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c);
22332 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22333 vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22335 return (int64x1_t) {__builtin_aarch64_ssri_ndi (__a[0], __b[0], __c)};
22338 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22339 vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22341 return __builtin_aarch64_usri_nv8qi_uuus (__a, __b, __c);
22344 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22345 vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22347 return __builtin_aarch64_usri_nv4hi_uuus (__a, __b, __c);
22350 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22351 vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22353 return __builtin_aarch64_usri_nv2si_uuus (__a, __b, __c);
22356 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22357 vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22359 return (uint64x1_t) {__builtin_aarch64_usri_ndi_uuus (__a[0], __b[0], __c)};
22362 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22363 vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22365 return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c);
22368 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22369 vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22371 return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c);
22374 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22375 vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22377 return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c);
22380 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22381 vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22383 return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c);
22386 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22387 vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22389 return __builtin_aarch64_usri_nv16qi_uuus (__a, __b, __c);
22392 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22393 vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22395 return __builtin_aarch64_usri_nv8hi_uuus (__a, __b, __c);
22398 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22399 vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22401 return __builtin_aarch64_usri_nv4si_uuus (__a, __b, __c);
22404 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22405 vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22407 return __builtin_aarch64_usri_nv2di_uuus (__a, __b, __c);
22410 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22411 vsrid_n_s64 (int64_t __a, int64_t __b, const int __c)
22413 return __builtin_aarch64_ssri_ndi (__a, __b, __c);
22416 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22417 vsrid_n_u64 (uint64_t __a, uint64_t __b, const int __c)
22419 return __builtin_aarch64_usri_ndi_uuus (__a, __b, __c);
22422 /* vst1 */
22424 __extension__ static __inline void __attribute__ ((__always_inline__))
22425 vst1_f32 (float32_t *a, float32x2_t b)
22427 __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b);
22430 __extension__ static __inline void __attribute__ ((__always_inline__))
22431 vst1_f64 (float64_t *a, float64x1_t b)
22433 *a = b[0];
22436 __extension__ static __inline void __attribute__ ((__always_inline__))
22437 vst1_p8 (poly8_t *a, poly8x8_t b)
22439 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
22440 (int8x8_t) b);
22443 __extension__ static __inline void __attribute__ ((__always_inline__))
22444 vst1_p16 (poly16_t *a, poly16x4_t b)
22446 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
22447 (int16x4_t) b);
22450 __extension__ static __inline void __attribute__ ((__always_inline__))
22451 vst1_s8 (int8_t *a, int8x8_t b)
22453 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b);
22456 __extension__ static __inline void __attribute__ ((__always_inline__))
22457 vst1_s16 (int16_t *a, int16x4_t b)
22459 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b);
22462 __extension__ static __inline void __attribute__ ((__always_inline__))
22463 vst1_s32 (int32_t *a, int32x2_t b)
22465 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b);
22468 __extension__ static __inline void __attribute__ ((__always_inline__))
22469 vst1_s64 (int64_t *a, int64x1_t b)
22471 *a = b[0];
22474 __extension__ static __inline void __attribute__ ((__always_inline__))
22475 vst1_u8 (uint8_t *a, uint8x8_t b)
22477 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
22478 (int8x8_t) b);
22481 __extension__ static __inline void __attribute__ ((__always_inline__))
22482 vst1_u16 (uint16_t *a, uint16x4_t b)
22484 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
22485 (int16x4_t) b);
22488 __extension__ static __inline void __attribute__ ((__always_inline__))
22489 vst1_u32 (uint32_t *a, uint32x2_t b)
22491 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a,
22492 (int32x2_t) b);
22495 __extension__ static __inline void __attribute__ ((__always_inline__))
22496 vst1_u64 (uint64_t *a, uint64x1_t b)
22498 *a = b[0];
22501 __extension__ static __inline void __attribute__ ((__always_inline__))
22502 vst1q_f32 (float32_t *a, float32x4_t b)
22504 __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b);
22507 __extension__ static __inline void __attribute__ ((__always_inline__))
22508 vst1q_f64 (float64_t *a, float64x2_t b)
22510 __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);
22513 /* vst1q */
22515 __extension__ static __inline void __attribute__ ((__always_inline__))
22516 vst1q_p8 (poly8_t *a, poly8x16_t b)
22518 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
22519 (int8x16_t) b);
22522 __extension__ static __inline void __attribute__ ((__always_inline__))
22523 vst1q_p16 (poly16_t *a, poly16x8_t b)
22525 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
22526 (int16x8_t) b);
22529 __extension__ static __inline void __attribute__ ((__always_inline__))
22530 vst1q_s8 (int8_t *a, int8x16_t b)
22532 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b);
22535 __extension__ static __inline void __attribute__ ((__always_inline__))
22536 vst1q_s16 (int16_t *a, int16x8_t b)
22538 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b);
22541 __extension__ static __inline void __attribute__ ((__always_inline__))
22542 vst1q_s32 (int32_t *a, int32x4_t b)
22544 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b);
22547 __extension__ static __inline void __attribute__ ((__always_inline__))
22548 vst1q_s64 (int64_t *a, int64x2_t b)
22550 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b);
22553 __extension__ static __inline void __attribute__ ((__always_inline__))
22554 vst1q_u8 (uint8_t *a, uint8x16_t b)
22556 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
22557 (int8x16_t) b);
22560 __extension__ static __inline void __attribute__ ((__always_inline__))
22561 vst1q_u16 (uint16_t *a, uint16x8_t b)
22563 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
22564 (int16x8_t) b);
22567 __extension__ static __inline void __attribute__ ((__always_inline__))
22568 vst1q_u32 (uint32_t *a, uint32x4_t b)
22570 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a,
22571 (int32x4_t) b);
22574 __extension__ static __inline void __attribute__ ((__always_inline__))
22575 vst1q_u64 (uint64_t *a, uint64x2_t b)
22577 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a,
22578 (int64x2_t) b);
22581 /* vstn */
22583 __extension__ static __inline void
22584 vst2_s64 (int64_t * __a, int64x1x2_t val)
22586 __builtin_aarch64_simd_oi __o;
22587 int64x2x2_t temp;
22588 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
22589 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
22590 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
22591 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
22592 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
22595 __extension__ static __inline void
22596 vst2_u64 (uint64_t * __a, uint64x1x2_t val)
22598 __builtin_aarch64_simd_oi __o;
22599 uint64x2x2_t temp;
22600 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
22601 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
22602 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
22603 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
22604 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
22607 __extension__ static __inline void
22608 vst2_f64 (float64_t * __a, float64x1x2_t val)
22610 __builtin_aarch64_simd_oi __o;
22611 float64x2x2_t temp;
22612 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
22613 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
22614 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0);
22615 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1);
22616 __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
22619 __extension__ static __inline void
22620 vst2_s8 (int8_t * __a, int8x8x2_t val)
22622 __builtin_aarch64_simd_oi __o;
22623 int8x16x2_t temp;
22624 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
22625 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
22626 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
22627 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
22628 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
22631 __extension__ static __inline void __attribute__ ((__always_inline__))
22632 vst2_p8 (poly8_t * __a, poly8x8x2_t val)
22634 __builtin_aarch64_simd_oi __o;
22635 poly8x16x2_t temp;
22636 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
22637 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
22638 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
22639 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
22640 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
22643 __extension__ static __inline void __attribute__ ((__always_inline__))
22644 vst2_s16 (int16_t * __a, int16x4x2_t val)
22646 __builtin_aarch64_simd_oi __o;
22647 int16x8x2_t temp;
22648 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
22649 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
22650 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
22651 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
22652 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
22655 __extension__ static __inline void __attribute__ ((__always_inline__))
22656 vst2_p16 (poly16_t * __a, poly16x4x2_t val)
22658 __builtin_aarch64_simd_oi __o;
22659 poly16x8x2_t temp;
22660 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
22661 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
22662 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
22663 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
22664 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
22667 __extension__ static __inline void __attribute__ ((__always_inline__))
22668 vst2_s32 (int32_t * __a, int32x2x2_t val)
22670 __builtin_aarch64_simd_oi __o;
22671 int32x4x2_t temp;
22672 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
22673 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
22674 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
22675 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
22676 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
22679 __extension__ static __inline void __attribute__ ((__always_inline__))
22680 vst2_u8 (uint8_t * __a, uint8x8x2_t val)
22682 __builtin_aarch64_simd_oi __o;
22683 uint8x16x2_t temp;
22684 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
22685 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
22686 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
22687 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
22688 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
22691 __extension__ static __inline void __attribute__ ((__always_inline__))
22692 vst2_u16 (uint16_t * __a, uint16x4x2_t val)
22694 __builtin_aarch64_simd_oi __o;
22695 uint16x8x2_t temp;
22696 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
22697 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
22698 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
22699 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
22700 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
22703 __extension__ static __inline void __attribute__ ((__always_inline__))
22704 vst2_u32 (uint32_t * __a, uint32x2x2_t val)
22706 __builtin_aarch64_simd_oi __o;
22707 uint32x4x2_t temp;
22708 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
22709 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
22710 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
22711 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
22712 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
22715 __extension__ static __inline void __attribute__ ((__always_inline__))
22716 vst2_f32 (float32_t * __a, float32x2x2_t val)
22718 __builtin_aarch64_simd_oi __o;
22719 float32x4x2_t temp;
22720 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
22721 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
22722 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0);
22723 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1);
22724 __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
22727 __extension__ static __inline void __attribute__ ((__always_inline__))
22728 vst2q_s8 (int8_t * __a, int8x16x2_t val)
22730 __builtin_aarch64_simd_oi __o;
22731 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
22732 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
22733 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
22736 __extension__ static __inline void __attribute__ ((__always_inline__))
22737 vst2q_p8 (poly8_t * __a, poly8x16x2_t val)
22739 __builtin_aarch64_simd_oi __o;
22740 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
22741 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
22742 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
22745 __extension__ static __inline void __attribute__ ((__always_inline__))
22746 vst2q_s16 (int16_t * __a, int16x8x2_t val)
22748 __builtin_aarch64_simd_oi __o;
22749 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
22750 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
22751 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
22754 __extension__ static __inline void __attribute__ ((__always_inline__))
22755 vst2q_p16 (poly16_t * __a, poly16x8x2_t val)
22757 __builtin_aarch64_simd_oi __o;
22758 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
22759 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
22760 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
22763 __extension__ static __inline void __attribute__ ((__always_inline__))
22764 vst2q_s32 (int32_t * __a, int32x4x2_t val)
22766 __builtin_aarch64_simd_oi __o;
22767 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
22768 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
22769 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
22772 __extension__ static __inline void __attribute__ ((__always_inline__))
22773 vst2q_s64 (int64_t * __a, int64x2x2_t val)
22775 __builtin_aarch64_simd_oi __o;
22776 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
22777 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
22778 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
22781 __extension__ static __inline void __attribute__ ((__always_inline__))
22782 vst2q_u8 (uint8_t * __a, uint8x16x2_t val)
22784 __builtin_aarch64_simd_oi __o;
22785 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
22786 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
22787 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
22790 __extension__ static __inline void __attribute__ ((__always_inline__))
22791 vst2q_u16 (uint16_t * __a, uint16x8x2_t val)
22793 __builtin_aarch64_simd_oi __o;
22794 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
22795 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
22796 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
22799 __extension__ static __inline void __attribute__ ((__always_inline__))
22800 vst2q_u32 (uint32_t * __a, uint32x4x2_t val)
22802 __builtin_aarch64_simd_oi __o;
22803 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
22804 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
22805 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
22808 __extension__ static __inline void __attribute__ ((__always_inline__))
22809 vst2q_u64 (uint64_t * __a, uint64x2x2_t val)
22811 __builtin_aarch64_simd_oi __o;
22812 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
22813 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
22814 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
22817 __extension__ static __inline void __attribute__ ((__always_inline__))
22818 vst2q_f32 (float32_t * __a, float32x4x2_t val)
22820 __builtin_aarch64_simd_oi __o;
22821 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0);
22822 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1);
22823 __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
22826 __extension__ static __inline void __attribute__ ((__always_inline__))
22827 vst2q_f64 (float64_t * __a, float64x2x2_t val)
22829 __builtin_aarch64_simd_oi __o;
22830 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0);
22831 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1);
22832 __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o);
22835 __extension__ static __inline void
22836 vst3_s64 (int64_t * __a, int64x1x3_t val)
22838 __builtin_aarch64_simd_ci __o;
22839 int64x2x3_t temp;
22840 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
22841 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
22842 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
22843 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
22844 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
22845 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
22846 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
22849 __extension__ static __inline void
22850 vst3_u64 (uint64_t * __a, uint64x1x3_t val)
22852 __builtin_aarch64_simd_ci __o;
22853 uint64x2x3_t temp;
22854 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
22855 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
22856 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
22857 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
22858 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
22859 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
22860 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
22863 __extension__ static __inline void
22864 vst3_f64 (float64_t * __a, float64x1x3_t val)
22866 __builtin_aarch64_simd_ci __o;
22867 float64x2x3_t temp;
22868 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
22869 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
22870 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
22871 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0);
22872 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1);
22873 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2);
22874 __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o);
22877 __extension__ static __inline void
22878 vst3_s8 (int8_t * __a, int8x8x3_t val)
22880 __builtin_aarch64_simd_ci __o;
22881 int8x16x3_t temp;
22882 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
22883 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
22884 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
22885 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
22886 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
22887 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
22888 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
22891 __extension__ static __inline void __attribute__ ((__always_inline__))
22892 vst3_p8 (poly8_t * __a, poly8x8x3_t val)
22894 __builtin_aarch64_simd_ci __o;
22895 poly8x16x3_t temp;
22896 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
22897 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
22898 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
22899 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
22900 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
22901 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
22902 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
22905 __extension__ static __inline void __attribute__ ((__always_inline__))
22906 vst3_s16 (int16_t * __a, int16x4x3_t val)
22908 __builtin_aarch64_simd_ci __o;
22909 int16x8x3_t temp;
22910 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
22911 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
22912 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
22913 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
22914 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
22915 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
22916 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
22919 __extension__ static __inline void __attribute__ ((__always_inline__))
22920 vst3_p16 (poly16_t * __a, poly16x4x3_t val)
22922 __builtin_aarch64_simd_ci __o;
22923 poly16x8x3_t temp;
22924 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
22925 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
22926 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
22927 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
22928 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
22929 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
22930 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
22933 __extension__ static __inline void __attribute__ ((__always_inline__))
22934 vst3_s32 (int32_t * __a, int32x2x3_t val)
22936 __builtin_aarch64_simd_ci __o;
22937 int32x4x3_t temp;
22938 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
22939 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
22940 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
22941 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
22942 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
22943 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
22944 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
22947 __extension__ static __inline void __attribute__ ((__always_inline__))
22948 vst3_u8 (uint8_t * __a, uint8x8x3_t val)
22950 __builtin_aarch64_simd_ci __o;
22951 uint8x16x3_t temp;
22952 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
22953 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
22954 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
22955 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
22956 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
22957 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
22958 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
22961 __extension__ static __inline void __attribute__ ((__always_inline__))
22962 vst3_u16 (uint16_t * __a, uint16x4x3_t val)
22964 __builtin_aarch64_simd_ci __o;
22965 uint16x8x3_t temp;
22966 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
22967 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
22968 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
22969 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
22970 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
22971 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
22972 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
22975 __extension__ static __inline void __attribute__ ((__always_inline__))
22976 vst3_u32 (uint32_t * __a, uint32x2x3_t val)
22978 __builtin_aarch64_simd_ci __o;
22979 uint32x4x3_t temp;
22980 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
22981 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
22982 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
22983 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
22984 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
22985 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
22986 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
22989 __extension__ static __inline void __attribute__ ((__always_inline__))
22990 vst3_f32 (float32_t * __a, float32x2x3_t val)
22992 __builtin_aarch64_simd_ci __o;
22993 float32x4x3_t temp;
22994 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
22995 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
22996 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
22997 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0);
22998 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1);
22999 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2);
23000 __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
23003 __extension__ static __inline void __attribute__ ((__always_inline__))
23004 vst3q_s8 (int8_t * __a, int8x16x3_t val)
23006 __builtin_aarch64_simd_ci __o;
23007 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
23008 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
23009 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
23010 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23013 __extension__ static __inline void __attribute__ ((__always_inline__))
23014 vst3q_p8 (poly8_t * __a, poly8x16x3_t val)
23016 __builtin_aarch64_simd_ci __o;
23017 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
23018 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
23019 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
23020 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23023 __extension__ static __inline void __attribute__ ((__always_inline__))
23024 vst3q_s16 (int16_t * __a, int16x8x3_t val)
23026 __builtin_aarch64_simd_ci __o;
23027 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
23028 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
23029 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
23030 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23033 __extension__ static __inline void __attribute__ ((__always_inline__))
23034 vst3q_p16 (poly16_t * __a, poly16x8x3_t val)
23036 __builtin_aarch64_simd_ci __o;
23037 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
23038 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
23039 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
23040 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23043 __extension__ static __inline void __attribute__ ((__always_inline__))
23044 vst3q_s32 (int32_t * __a, int32x4x3_t val)
23046 __builtin_aarch64_simd_ci __o;
23047 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
23048 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
23049 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
23050 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
23053 __extension__ static __inline void __attribute__ ((__always_inline__))
23054 vst3q_s64 (int64_t * __a, int64x2x3_t val)
23056 __builtin_aarch64_simd_ci __o;
23057 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
23058 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
23059 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
23060 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
23063 __extension__ static __inline void __attribute__ ((__always_inline__))
23064 vst3q_u8 (uint8_t * __a, uint8x16x3_t val)
23066 __builtin_aarch64_simd_ci __o;
23067 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
23068 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
23069 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
23070 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23073 __extension__ static __inline void __attribute__ ((__always_inline__))
23074 vst3q_u16 (uint16_t * __a, uint16x8x3_t val)
23076 __builtin_aarch64_simd_ci __o;
23077 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
23078 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
23079 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
23080 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23083 __extension__ static __inline void __attribute__ ((__always_inline__))
23084 vst3q_u32 (uint32_t * __a, uint32x4x3_t val)
23086 __builtin_aarch64_simd_ci __o;
23087 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
23088 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
23089 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
23090 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
23093 __extension__ static __inline void __attribute__ ((__always_inline__))
23094 vst3q_u64 (uint64_t * __a, uint64x2x3_t val)
23096 __builtin_aarch64_simd_ci __o;
23097 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
23098 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
23099 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
23100 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
23103 __extension__ static __inline void __attribute__ ((__always_inline__))
23104 vst3q_f32 (float32_t * __a, float32x4x3_t val)
23106 __builtin_aarch64_simd_ci __o;
23107 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0);
23108 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1);
23109 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2);
23110 __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
23113 __extension__ static __inline void __attribute__ ((__always_inline__))
23114 vst3q_f64 (float64_t * __a, float64x2x3_t val)
23116 __builtin_aarch64_simd_ci __o;
23117 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0);
23118 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1);
23119 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2);
23120 __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o);
23123 __extension__ static __inline void
23124 vst4_s64 (int64_t * __a, int64x1x4_t val)
23126 __builtin_aarch64_simd_xi __o;
23127 int64x2x4_t temp;
23128 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
23129 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
23130 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
23131 temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0)));
23132 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
23133 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
23134 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
23135 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
23136 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
23139 __extension__ static __inline void
23140 vst4_u64 (uint64_t * __a, uint64x1x4_t val)
23142 __builtin_aarch64_simd_xi __o;
23143 uint64x2x4_t temp;
23144 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
23145 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
23146 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
23147 temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0)));
23148 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
23149 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
23150 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
23151 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
23152 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
23155 __extension__ static __inline void
23156 vst4_f64 (float64_t * __a, float64x1x4_t val)
23158 __builtin_aarch64_simd_xi __o;
23159 float64x2x4_t temp;
23160 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
23161 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
23162 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
23163 temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0)));
23164 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0);
23165 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1);
23166 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2);
23167 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3);
23168 __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o);
23171 __extension__ static __inline void
23172 vst4_s8 (int8_t * __a, int8x8x4_t val)
23174 __builtin_aarch64_simd_xi __o;
23175 int8x16x4_t temp;
23176 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
23177 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
23178 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
23179 temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0)));
23180 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
23181 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
23182 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
23183 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
23184 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23187 __extension__ static __inline void __attribute__ ((__always_inline__))
23188 vst4_p8 (poly8_t * __a, poly8x8x4_t val)
23190 __builtin_aarch64_simd_xi __o;
23191 poly8x16x4_t temp;
23192 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
23193 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
23194 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
23195 temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0)));
23196 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
23197 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
23198 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
23199 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
23200 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23203 __extension__ static __inline void __attribute__ ((__always_inline__))
23204 vst4_s16 (int16_t * __a, int16x4x4_t val)
23206 __builtin_aarch64_simd_xi __o;
23207 int16x8x4_t temp;
23208 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
23209 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
23210 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
23211 temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0)));
23212 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
23213 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
23214 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
23215 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
23216 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23219 __extension__ static __inline void __attribute__ ((__always_inline__))
23220 vst4_p16 (poly16_t * __a, poly16x4x4_t val)
23222 __builtin_aarch64_simd_xi __o;
23223 poly16x8x4_t temp;
23224 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
23225 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
23226 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
23227 temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0)));
23228 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
23229 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
23230 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
23231 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
23232 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23235 __extension__ static __inline void __attribute__ ((__always_inline__))
23236 vst4_s32 (int32_t * __a, int32x2x4_t val)
23238 __builtin_aarch64_simd_xi __o;
23239 int32x4x4_t temp;
23240 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
23241 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
23242 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
23243 temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0)));
23244 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
23245 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
23246 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
23247 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
23248 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
23251 __extension__ static __inline void __attribute__ ((__always_inline__))
23252 vst4_u8 (uint8_t * __a, uint8x8x4_t val)
23254 __builtin_aarch64_simd_xi __o;
23255 uint8x16x4_t temp;
23256 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
23257 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
23258 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
23259 temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0)));
23260 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
23261 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
23262 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
23263 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
23264 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23267 __extension__ static __inline void __attribute__ ((__always_inline__))
23268 vst4_u16 (uint16_t * __a, uint16x4x4_t val)
23270 __builtin_aarch64_simd_xi __o;
23271 uint16x8x4_t temp;
23272 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
23273 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
23274 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
23275 temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0)));
23276 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
23277 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
23278 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
23279 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
23280 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23283 __extension__ static __inline void __attribute__ ((__always_inline__))
23284 vst4_u32 (uint32_t * __a, uint32x2x4_t val)
23286 __builtin_aarch64_simd_xi __o;
23287 uint32x4x4_t temp;
23288 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
23289 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
23290 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
23291 temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0)));
23292 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
23293 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
23294 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
23295 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
23296 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
23299 __extension__ static __inline void __attribute__ ((__always_inline__))
23300 vst4_f32 (float32_t * __a, float32x2x4_t val)
23302 __builtin_aarch64_simd_xi __o;
23303 float32x4x4_t temp;
23304 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
23305 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
23306 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
23307 temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0)));
23308 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0);
23309 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1);
23310 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2);
23311 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3);
23312 __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
23315 __extension__ static __inline void __attribute__ ((__always_inline__))
23316 vst4q_s8 (int8_t * __a, int8x16x4_t val)
23318 __builtin_aarch64_simd_xi __o;
23319 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
23320 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
23321 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
23322 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
23323 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23326 __extension__ static __inline void __attribute__ ((__always_inline__))
23327 vst4q_p8 (poly8_t * __a, poly8x16x4_t val)
23329 __builtin_aarch64_simd_xi __o;
23330 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
23331 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
23332 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
23333 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
23334 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23337 __extension__ static __inline void __attribute__ ((__always_inline__))
23338 vst4q_s16 (int16_t * __a, int16x8x4_t val)
23340 __builtin_aarch64_simd_xi __o;
23341 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
23342 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
23343 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
23344 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
23345 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23348 __extension__ static __inline void __attribute__ ((__always_inline__))
23349 vst4q_p16 (poly16_t * __a, poly16x8x4_t val)
23351 __builtin_aarch64_simd_xi __o;
23352 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
23353 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
23354 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
23355 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
23356 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23359 __extension__ static __inline void __attribute__ ((__always_inline__))
23360 vst4q_s32 (int32_t * __a, int32x4x4_t val)
23362 __builtin_aarch64_simd_xi __o;
23363 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
23364 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
23365 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
23366 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
23367 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
23370 __extension__ static __inline void __attribute__ ((__always_inline__))
23371 vst4q_s64 (int64_t * __a, int64x2x4_t val)
23373 __builtin_aarch64_simd_xi __o;
23374 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
23375 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
23376 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
23377 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
23378 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
23381 __extension__ static __inline void __attribute__ ((__always_inline__))
23382 vst4q_u8 (uint8_t * __a, uint8x16x4_t val)
23384 __builtin_aarch64_simd_xi __o;
23385 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
23386 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
23387 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
23388 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
23389 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23392 __extension__ static __inline void __attribute__ ((__always_inline__))
23393 vst4q_u16 (uint16_t * __a, uint16x8x4_t val)
23395 __builtin_aarch64_simd_xi __o;
23396 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
23397 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
23398 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
23399 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
23400 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23403 __extension__ static __inline void __attribute__ ((__always_inline__))
23404 vst4q_u32 (uint32_t * __a, uint32x4x4_t val)
23406 __builtin_aarch64_simd_xi __o;
23407 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
23408 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
23409 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
23410 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
23411 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
23414 __extension__ static __inline void __attribute__ ((__always_inline__))
23415 vst4q_u64 (uint64_t * __a, uint64x2x4_t val)
23417 __builtin_aarch64_simd_xi __o;
23418 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
23419 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
23420 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
23421 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
23422 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
23425 __extension__ static __inline void __attribute__ ((__always_inline__))
23426 vst4q_f32 (float32_t * __a, float32x4x4_t val)
23428 __builtin_aarch64_simd_xi __o;
23429 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0);
23430 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1);
23431 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2);
23432 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3);
23433 __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
23436 __extension__ static __inline void __attribute__ ((__always_inline__))
23437 vst4q_f64 (float64_t * __a, float64x2x4_t val)
23439 __builtin_aarch64_simd_xi __o;
23440 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0);
23441 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1);
23442 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2);
23443 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3);
23444 __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o);
23447 /* vsub */
23449 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
23450 vsubd_s64 (int64_t __a, int64_t __b)
23452 return __a - __b;
23455 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
23456 vsubd_u64 (uint64_t __a, uint64_t __b)
23458 return __a - __b;
23461 /* vtbx1 */
23463 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23464 vtbx1_s8 (int8x8_t __r, int8x8_t __tab, int8x8_t __idx)
23466 uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
23467 vmov_n_u8 (8));
23468 int8x8_t __tbl = vtbl1_s8 (__tab, __idx);
23470 return vbsl_s8 (__mask, __tbl, __r);
23473 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23474 vtbx1_u8 (uint8x8_t __r, uint8x8_t __tab, uint8x8_t __idx)
23476 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
23477 uint8x8_t __tbl = vtbl1_u8 (__tab, __idx);
23479 return vbsl_u8 (__mask, __tbl, __r);
23482 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
23483 vtbx1_p8 (poly8x8_t __r, poly8x8_t __tab, uint8x8_t __idx)
23485 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
23486 poly8x8_t __tbl = vtbl1_p8 (__tab, __idx);
23488 return vbsl_p8 (__mask, __tbl, __r);
23491 /* vtbx3 */
23493 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23494 vtbx3_s8 (int8x8_t __r, int8x8x3_t __tab, int8x8_t __idx)
23496 uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
23497 vmov_n_u8 (24));
23498 int8x8_t __tbl = vtbl3_s8 (__tab, __idx);
23500 return vbsl_s8 (__mask, __tbl, __r);
23503 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23504 vtbx3_u8 (uint8x8_t __r, uint8x8x3_t __tab, uint8x8_t __idx)
23506 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
23507 uint8x8_t __tbl = vtbl3_u8 (__tab, __idx);
23509 return vbsl_u8 (__mask, __tbl, __r);
23512 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
23513 vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx)
23515 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
23516 poly8x8_t __tbl = vtbl3_p8 (__tab, __idx);
23518 return vbsl_p8 (__mask, __tbl, __r);
23521 /* vtrn */
23523 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
23524 vtrn1_f32 (float32x2_t __a, float32x2_t __b)
23526 #ifdef __AARCH64EB__
23527 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
23528 #else
23529 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
23530 #endif
23533 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
23534 vtrn1_p8 (poly8x8_t __a, poly8x8_t __b)
23536 #ifdef __AARCH64EB__
23537 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23538 #else
23539 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23540 #endif
23543 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
23544 vtrn1_p16 (poly16x4_t __a, poly16x4_t __b)
23546 #ifdef __AARCH64EB__
23547 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
23548 #else
23549 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
23550 #endif
23553 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23554 vtrn1_s8 (int8x8_t __a, int8x8_t __b)
23556 #ifdef __AARCH64EB__
23557 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23558 #else
23559 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23560 #endif
23563 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23564 vtrn1_s16 (int16x4_t __a, int16x4_t __b)
23566 #ifdef __AARCH64EB__
23567 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
23568 #else
23569 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
23570 #endif
23573 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23574 vtrn1_s32 (int32x2_t __a, int32x2_t __b)
23576 #ifdef __AARCH64EB__
23577 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
23578 #else
23579 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
23580 #endif
23583 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23584 vtrn1_u8 (uint8x8_t __a, uint8x8_t __b)
23586 #ifdef __AARCH64EB__
23587 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23588 #else
23589 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23590 #endif
23593 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23594 vtrn1_u16 (uint16x4_t __a, uint16x4_t __b)
23596 #ifdef __AARCH64EB__
23597 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
23598 #else
23599 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
23600 #endif
23603 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23604 vtrn1_u32 (uint32x2_t __a, uint32x2_t __b)
23606 #ifdef __AARCH64EB__
23607 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
23608 #else
23609 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
23610 #endif
23613 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
23614 vtrn1q_f32 (float32x4_t __a, float32x4_t __b)
23616 #ifdef __AARCH64EB__
23617 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
23618 #else
23619 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
23620 #endif
23623 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
23624 vtrn1q_f64 (float64x2_t __a, float64x2_t __b)
23626 #ifdef __AARCH64EB__
23627 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
23628 #else
23629 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
23630 #endif
23633 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
23634 vtrn1q_p8 (poly8x16_t __a, poly8x16_t __b)
23636 #ifdef __AARCH64EB__
23637 return __builtin_shuffle (__a, __b,
23638 (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
23639 #else
23640 return __builtin_shuffle (__a, __b,
23641 (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
23642 #endif
23645 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
23646 vtrn1q_p16 (poly16x8_t __a, poly16x8_t __b)
23648 #ifdef __AARCH64EB__
23649 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23650 #else
23651 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23652 #endif
23655 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23656 vtrn1q_s8 (int8x16_t __a, int8x16_t __b)
23658 #ifdef __AARCH64EB__
23659 return __builtin_shuffle (__a, __b,
23660 (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
23661 #else
23662 return __builtin_shuffle (__a, __b,
23663 (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
23664 #endif
23667 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23668 vtrn1q_s16 (int16x8_t __a, int16x8_t __b)
23670 #ifdef __AARCH64EB__
23671 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23672 #else
23673 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23674 #endif
23677 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23678 vtrn1q_s32 (int32x4_t __a, int32x4_t __b)
23680 #ifdef __AARCH64EB__
23681 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
23682 #else
23683 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
23684 #endif
23687 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23688 vtrn1q_s64 (int64x2_t __a, int64x2_t __b)
23690 #ifdef __AARCH64EB__
23691 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
23692 #else
23693 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
23694 #endif
23697 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23698 vtrn1q_u8 (uint8x16_t __a, uint8x16_t __b)
23700 #ifdef __AARCH64EB__
23701 return __builtin_shuffle (__a, __b,
23702 (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
23703 #else
23704 return __builtin_shuffle (__a, __b,
23705 (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
23706 #endif
23709 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23710 vtrn1q_u16 (uint16x8_t __a, uint16x8_t __b)
23712 #ifdef __AARCH64EB__
23713 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23714 #else
23715 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23716 #endif
23719 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23720 vtrn1q_u32 (uint32x4_t __a, uint32x4_t __b)
23722 #ifdef __AARCH64EB__
23723 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
23724 #else
23725 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
23726 #endif
23729 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23730 vtrn1q_u64 (uint64x2_t __a, uint64x2_t __b)
23732 #ifdef __AARCH64EB__
23733 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
23734 #else
23735 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
23736 #endif
23739 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
23740 vtrn2_f32 (float32x2_t __a, float32x2_t __b)
23742 #ifdef __AARCH64EB__
23743 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
23744 #else
23745 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
23746 #endif
23749 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
23750 vtrn2_p8 (poly8x8_t __a, poly8x8_t __b)
23752 #ifdef __AARCH64EB__
23753 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
23754 #else
23755 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
23756 #endif
23759 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
23760 vtrn2_p16 (poly16x4_t __a, poly16x4_t __b)
23762 #ifdef __AARCH64EB__
23763 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
23764 #else
23765 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
23766 #endif
23769 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23770 vtrn2_s8 (int8x8_t __a, int8x8_t __b)
23772 #ifdef __AARCH64EB__
23773 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
23774 #else
23775 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
23776 #endif
23779 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23780 vtrn2_s16 (int16x4_t __a, int16x4_t __b)
23782 #ifdef __AARCH64EB__
23783 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
23784 #else
23785 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
23786 #endif
23789 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23790 vtrn2_s32 (int32x2_t __a, int32x2_t __b)
23792 #ifdef __AARCH64EB__
23793 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
23794 #else
23795 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
23796 #endif
23799 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23800 vtrn2_u8 (uint8x8_t __a, uint8x8_t __b)
23802 #ifdef __AARCH64EB__
23803 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
23804 #else
23805 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
23806 #endif
23809 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23810 vtrn2_u16 (uint16x4_t __a, uint16x4_t __b)
23812 #ifdef __AARCH64EB__
23813 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
23814 #else
23815 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
23816 #endif
23819 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23820 vtrn2_u32 (uint32x2_t __a, uint32x2_t __b)
23822 #ifdef __AARCH64EB__
23823 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
23824 #else
23825 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
23826 #endif
23829 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
23830 vtrn2q_f32 (float32x4_t __a, float32x4_t __b)
23832 #ifdef __AARCH64EB__
23833 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
23834 #else
23835 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
23836 #endif
23839 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
23840 vtrn2q_f64 (float64x2_t __a, float64x2_t __b)
23842 #ifdef __AARCH64EB__
23843 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
23844 #else
23845 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
23846 #endif
23849 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
23850 vtrn2q_p8 (poly8x16_t __a, poly8x16_t __b)
23852 #ifdef __AARCH64EB__
23853 return __builtin_shuffle (__a, __b,
23854 (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
23855 #else
23856 return __builtin_shuffle (__a, __b,
23857 (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
23858 #endif
23861 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
23862 vtrn2q_p16 (poly16x8_t __a, poly16x8_t __b)
23864 #ifdef __AARCH64EB__
23865 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
23866 #else
23867 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
23868 #endif
23871 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23872 vtrn2q_s8 (int8x16_t __a, int8x16_t __b)
23874 #ifdef __AARCH64EB__
23875 return __builtin_shuffle (__a, __b,
23876 (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
23877 #else
23878 return __builtin_shuffle (__a, __b,
23879 (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
23880 #endif
23883 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23884 vtrn2q_s16 (int16x8_t __a, int16x8_t __b)
23886 #ifdef __AARCH64EB__
23887 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
23888 #else
23889 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
23890 #endif
23893 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23894 vtrn2q_s32 (int32x4_t __a, int32x4_t __b)
23896 #ifdef __AARCH64EB__
23897 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
23898 #else
23899 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
23900 #endif
23903 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23904 vtrn2q_s64 (int64x2_t __a, int64x2_t __b)
23906 #ifdef __AARCH64EB__
23907 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
23908 #else
23909 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
23910 #endif
23913 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23914 vtrn2q_u8 (uint8x16_t __a, uint8x16_t __b)
23916 #ifdef __AARCH64EB__
23917 return __builtin_shuffle (__a, __b,
23918 (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
23919 #else
23920 return __builtin_shuffle (__a, __b,
23921 (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
23922 #endif
23925 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23926 vtrn2q_u16 (uint16x8_t __a, uint16x8_t __b)
23928 #ifdef __AARCH64EB__
23929 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
23930 #else
23931 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
23932 #endif
23935 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23936 vtrn2q_u32 (uint32x4_t __a, uint32x4_t __b)
23938 #ifdef __AARCH64EB__
23939 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
23940 #else
23941 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
23942 #endif
23945 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23946 vtrn2q_u64 (uint64x2_t __a, uint64x2_t __b)
23948 #ifdef __AARCH64EB__
23949 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
23950 #else
23951 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
23952 #endif
23955 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
23956 vtrn_f32 (float32x2_t a, float32x2_t b)
23958 return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)};
23961 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
23962 vtrn_p8 (poly8x8_t a, poly8x8_t b)
23964 return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)};
23967 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
23968 vtrn_p16 (poly16x4_t a, poly16x4_t b)
23970 return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)};
23973 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
23974 vtrn_s8 (int8x8_t a, int8x8_t b)
23976 return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)};
23979 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
23980 vtrn_s16 (int16x4_t a, int16x4_t b)
23982 return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)};
23985 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
23986 vtrn_s32 (int32x2_t a, int32x2_t b)
23988 return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)};
23991 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
23992 vtrn_u8 (uint8x8_t a, uint8x8_t b)
23994 return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)};
23997 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
23998 vtrn_u16 (uint16x4_t a, uint16x4_t b)
24000 return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)};
24003 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
24004 vtrn_u32 (uint32x2_t a, uint32x2_t b)
24006 return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)};
24009 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
24010 vtrnq_f32 (float32x4_t a, float32x4_t b)
24012 return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)};
24015 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
24016 vtrnq_p8 (poly8x16_t a, poly8x16_t b)
24018 return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)};
24021 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
24022 vtrnq_p16 (poly16x8_t a, poly16x8_t b)
24024 return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)};
24027 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
24028 vtrnq_s8 (int8x16_t a, int8x16_t b)
24030 return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)};
24033 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
24034 vtrnq_s16 (int16x8_t a, int16x8_t b)
24036 return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)};
24039 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
24040 vtrnq_s32 (int32x4_t a, int32x4_t b)
24042 return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)};
24045 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
24046 vtrnq_u8 (uint8x16_t a, uint8x16_t b)
24048 return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)};
24051 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
24052 vtrnq_u16 (uint16x8_t a, uint16x8_t b)
24054 return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)};
24057 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
24058 vtrnq_u32 (uint32x4_t a, uint32x4_t b)
24060 return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)};
24063 /* vtst */
24065 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24066 vtst_s8 (int8x8_t __a, int8x8_t __b)
24068 return (uint8x8_t) ((__a & __b) != 0);
24071 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24072 vtst_s16 (int16x4_t __a, int16x4_t __b)
24074 return (uint16x4_t) ((__a & __b) != 0);
24077 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24078 vtst_s32 (int32x2_t __a, int32x2_t __b)
24080 return (uint32x2_t) ((__a & __b) != 0);
24083 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24084 vtst_s64 (int64x1_t __a, int64x1_t __b)
24086 return (uint64x1_t) {(__a[0] & __b[0]) ? -1ll : 0ll};
24089 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24090 vtst_u8 (uint8x8_t __a, uint8x8_t __b)
24092 return ((__a & __b) != 0);
24095 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24096 vtst_u16 (uint16x4_t __a, uint16x4_t __b)
24098 return ((__a & __b) != 0);
24101 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24102 vtst_u32 (uint32x2_t __a, uint32x2_t __b)
24104 return ((__a & __b) != 0);
24107 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24108 vtst_u64 (uint64x1_t __a, uint64x1_t __b)
24110 return (uint64x1_t) {(__a[0] & __b[0]) ? -1ll : 0ll};
24113 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24114 vtstq_s8 (int8x16_t __a, int8x16_t __b)
24116 return (uint8x16_t) ((__a & __b) != 0);
24119 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24120 vtstq_s16 (int16x8_t __a, int16x8_t __b)
24122 return (uint16x8_t) ((__a & __b) != 0);
24125 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24126 vtstq_s32 (int32x4_t __a, int32x4_t __b)
24128 return (uint32x4_t) ((__a & __b) != 0);
24131 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24132 vtstq_s64 (int64x2_t __a, int64x2_t __b)
24134 return (uint64x2_t) ((__a & __b) != __AARCH64_INT64_C (0));
24137 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24138 vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
24140 return ((__a & __b) != 0);
24143 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24144 vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
24146 return ((__a & __b) != 0);
24149 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24150 vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
24152 return ((__a & __b) != 0);
24155 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24156 vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
24158 return ((__a & __b) != __AARCH64_UINT64_C (0));
24161 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
24162 vtstd_s64 (int64_t __a, int64_t __b)
24164 return (__a & __b) ? -1ll : 0ll;
24167 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
24168 vtstd_u64 (uint64_t __a, uint64_t __b)
24170 return (__a & __b) ? -1ll : 0ll;
24173 /* vuqadd */
24175 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24176 vuqadd_s8 (int8x8_t __a, uint8x8_t __b)
24178 return __builtin_aarch64_suqaddv8qi_ssu (__a, __b);
24181 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24182 vuqadd_s16 (int16x4_t __a, uint16x4_t __b)
24184 return __builtin_aarch64_suqaddv4hi_ssu (__a, __b);
24187 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24188 vuqadd_s32 (int32x2_t __a, uint32x2_t __b)
24190 return __builtin_aarch64_suqaddv2si_ssu (__a, __b);
24193 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
24194 vuqadd_s64 (int64x1_t __a, uint64x1_t __b)
24196 return (int64x1_t) {__builtin_aarch64_suqadddi_ssu (__a[0], __b[0])};
24199 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24200 vuqaddq_s8 (int8x16_t __a, uint8x16_t __b)
24202 return __builtin_aarch64_suqaddv16qi_ssu (__a, __b);
24205 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24206 vuqaddq_s16 (int16x8_t __a, uint16x8_t __b)
24208 return __builtin_aarch64_suqaddv8hi_ssu (__a, __b);
24211 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24212 vuqaddq_s32 (int32x4_t __a, uint32x4_t __b)
24214 return __builtin_aarch64_suqaddv4si_ssu (__a, __b);
24217 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24218 vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
24220 return __builtin_aarch64_suqaddv2di_ssu (__a, __b);
24223 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
24224 vuqaddb_s8 (int8_t __a, uint8_t __b)
24226 return __builtin_aarch64_suqaddqi_ssu (__a, __b);
24229 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
24230 vuqaddh_s16 (int16_t __a, uint16_t __b)
24232 return __builtin_aarch64_suqaddhi_ssu (__a, __b);
24235 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
24236 vuqadds_s32 (int32_t __a, uint32_t __b)
24238 return __builtin_aarch64_suqaddsi_ssu (__a, __b);
24241 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
24242 vuqaddd_s64 (int64_t __a, uint64_t __b)
24244 return __builtin_aarch64_suqadddi_ssu (__a, __b);
24247 #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \
24248 __extension__ static __inline rettype \
24249 __attribute__ ((__always_inline__)) \
24250 v ## op ## Q ## _ ## funcsuffix (intype a, intype b) \
24252 return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b), \
24253 v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)}; \
24256 #define __INTERLEAVE_LIST(op) \
24257 __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,) \
24258 __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,) \
24259 __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,) \
24260 __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,) \
24261 __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,) \
24262 __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,) \
24263 __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,) \
24264 __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,) \
24265 __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,) \
24266 __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q) \
24267 __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q) \
24268 __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q) \
24269 __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q) \
24270 __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q) \
24271 __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q) \
24272 __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q) \
24273 __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q) \
24274 __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q)
24276 /* vuzp */
24278 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
24279 vuzp1_f32 (float32x2_t __a, float32x2_t __b)
24281 #ifdef __AARCH64EB__
24282 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24283 #else
24284 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24285 #endif
24288 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24289 vuzp1_p8 (poly8x8_t __a, poly8x8_t __b)
24291 #ifdef __AARCH64EB__
24292 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24293 #else
24294 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24295 #endif
24298 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
24299 vuzp1_p16 (poly16x4_t __a, poly16x4_t __b)
24301 #ifdef __AARCH64EB__
24302 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
24303 #else
24304 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
24305 #endif
24308 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24309 vuzp1_s8 (int8x8_t __a, int8x8_t __b)
24311 #ifdef __AARCH64EB__
24312 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24313 #else
24314 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24315 #endif
24318 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24319 vuzp1_s16 (int16x4_t __a, int16x4_t __b)
24321 #ifdef __AARCH64EB__
24322 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
24323 #else
24324 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
24325 #endif
24328 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24329 vuzp1_s32 (int32x2_t __a, int32x2_t __b)
24331 #ifdef __AARCH64EB__
24332 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24333 #else
24334 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24335 #endif
24338 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24339 vuzp1_u8 (uint8x8_t __a, uint8x8_t __b)
24341 #ifdef __AARCH64EB__
24342 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24343 #else
24344 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24345 #endif
24348 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24349 vuzp1_u16 (uint16x4_t __a, uint16x4_t __b)
24351 #ifdef __AARCH64EB__
24352 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
24353 #else
24354 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
24355 #endif
24358 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24359 vuzp1_u32 (uint32x2_t __a, uint32x2_t __b)
24361 #ifdef __AARCH64EB__
24362 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24363 #else
24364 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24365 #endif
24368 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
24369 vuzp1q_f32 (float32x4_t __a, float32x4_t __b)
24371 #ifdef __AARCH64EB__
24372 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
24373 #else
24374 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
24375 #endif
24378 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
24379 vuzp1q_f64 (float64x2_t __a, float64x2_t __b)
24381 #ifdef __AARCH64EB__
24382 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24383 #else
24384 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24385 #endif
24388 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
24389 vuzp1q_p8 (poly8x16_t __a, poly8x16_t __b)
24391 #ifdef __AARCH64EB__
24392 return __builtin_shuffle (__a, __b, (uint8x16_t)
24393 {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
24394 #else
24395 return __builtin_shuffle (__a, __b, (uint8x16_t)
24396 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
24397 #endif
24400 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
24401 vuzp1q_p16 (poly16x8_t __a, poly16x8_t __b)
24403 #ifdef __AARCH64EB__
24404 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24405 #else
24406 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24407 #endif
24410 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24411 vuzp1q_s8 (int8x16_t __a, int8x16_t __b)
24413 #ifdef __AARCH64EB__
24414 return __builtin_shuffle (__a, __b,
24415 (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
24416 #else
24417 return __builtin_shuffle (__a, __b,
24418 (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
24419 #endif
24422 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24423 vuzp1q_s16 (int16x8_t __a, int16x8_t __b)
24425 #ifdef __AARCH64EB__
24426 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24427 #else
24428 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24429 #endif
24432 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24433 vuzp1q_s32 (int32x4_t __a, int32x4_t __b)
24435 #ifdef __AARCH64EB__
24436 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
24437 #else
24438 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
24439 #endif
24442 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24443 vuzp1q_s64 (int64x2_t __a, int64x2_t __b)
24445 #ifdef __AARCH64EB__
24446 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24447 #else
24448 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24449 #endif
24452 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24453 vuzp1q_u8 (uint8x16_t __a, uint8x16_t __b)
24455 #ifdef __AARCH64EB__
24456 return __builtin_shuffle (__a, __b,
24457 (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
24458 #else
24459 return __builtin_shuffle (__a, __b,
24460 (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
24461 #endif
24464 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24465 vuzp1q_u16 (uint16x8_t __a, uint16x8_t __b)
24467 #ifdef __AARCH64EB__
24468 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24469 #else
24470 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24471 #endif
24474 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24475 vuzp1q_u32 (uint32x4_t __a, uint32x4_t __b)
24477 #ifdef __AARCH64EB__
24478 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
24479 #else
24480 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
24481 #endif
24484 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24485 vuzp1q_u64 (uint64x2_t __a, uint64x2_t __b)
24487 #ifdef __AARCH64EB__
24488 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24489 #else
24490 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24491 #endif
24494 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
24495 vuzp2_f32 (float32x2_t __a, float32x2_t __b)
24497 #ifdef __AARCH64EB__
24498 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24499 #else
24500 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24501 #endif
24504 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24505 vuzp2_p8 (poly8x8_t __a, poly8x8_t __b)
24507 #ifdef __AARCH64EB__
24508 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24509 #else
24510 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24511 #endif
24514 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
24515 vuzp2_p16 (poly16x4_t __a, poly16x4_t __b)
24517 #ifdef __AARCH64EB__
24518 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
24519 #else
24520 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
24521 #endif
24524 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24525 vuzp2_s8 (int8x8_t __a, int8x8_t __b)
24527 #ifdef __AARCH64EB__
24528 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24529 #else
24530 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24531 #endif
24534 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24535 vuzp2_s16 (int16x4_t __a, int16x4_t __b)
24537 #ifdef __AARCH64EB__
24538 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
24539 #else
24540 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
24541 #endif
24544 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24545 vuzp2_s32 (int32x2_t __a, int32x2_t __b)
24547 #ifdef __AARCH64EB__
24548 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24549 #else
24550 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24551 #endif
24554 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24555 vuzp2_u8 (uint8x8_t __a, uint8x8_t __b)
24557 #ifdef __AARCH64EB__
24558 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24559 #else
24560 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24561 #endif
24564 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24565 vuzp2_u16 (uint16x4_t __a, uint16x4_t __b)
24567 #ifdef __AARCH64EB__
24568 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
24569 #else
24570 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
24571 #endif
24574 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24575 vuzp2_u32 (uint32x2_t __a, uint32x2_t __b)
24577 #ifdef __AARCH64EB__
24578 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24579 #else
24580 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24581 #endif
24584 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
24585 vuzp2q_f32 (float32x4_t __a, float32x4_t __b)
24587 #ifdef __AARCH64EB__
24588 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
24589 #else
24590 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
24591 #endif
24594 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
24595 vuzp2q_f64 (float64x2_t __a, float64x2_t __b)
24597 #ifdef __AARCH64EB__
24598 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24599 #else
24600 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24601 #endif
24604 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
24605 vuzp2q_p8 (poly8x16_t __a, poly8x16_t __b)
24607 #ifdef __AARCH64EB__
24608 return __builtin_shuffle (__a, __b,
24609 (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
24610 #else
24611 return __builtin_shuffle (__a, __b,
24612 (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
24613 #endif
24616 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
24617 vuzp2q_p16 (poly16x8_t __a, poly16x8_t __b)
24619 #ifdef __AARCH64EB__
24620 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24621 #else
24622 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24623 #endif
24626 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24627 vuzp2q_s8 (int8x16_t __a, int8x16_t __b)
24629 #ifdef __AARCH64EB__
24630 return __builtin_shuffle (__a, __b,
24631 (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
24632 #else
24633 return __builtin_shuffle (__a, __b,
24634 (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
24635 #endif
24638 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24639 vuzp2q_s16 (int16x8_t __a, int16x8_t __b)
24641 #ifdef __AARCH64EB__
24642 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24643 #else
24644 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24645 #endif
24648 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24649 vuzp2q_s32 (int32x4_t __a, int32x4_t __b)
24651 #ifdef __AARCH64EB__
24652 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
24653 #else
24654 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
24655 #endif
24658 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24659 vuzp2q_s64 (int64x2_t __a, int64x2_t __b)
24661 #ifdef __AARCH64EB__
24662 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24663 #else
24664 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24665 #endif
24668 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24669 vuzp2q_u8 (uint8x16_t __a, uint8x16_t __b)
24671 #ifdef __AARCH64EB__
24672 return __builtin_shuffle (__a, __b, (uint8x16_t)
24673 {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
24674 #else
24675 return __builtin_shuffle (__a, __b, (uint8x16_t)
24676 {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
24677 #endif
24680 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24681 vuzp2q_u16 (uint16x8_t __a, uint16x8_t __b)
24683 #ifdef __AARCH64EB__
24684 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24685 #else
24686 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24687 #endif
24690 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24691 vuzp2q_u32 (uint32x4_t __a, uint32x4_t __b)
24693 #ifdef __AARCH64EB__
24694 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
24695 #else
24696 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
24697 #endif
24700 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24701 vuzp2q_u64 (uint64x2_t __a, uint64x2_t __b)
24703 #ifdef __AARCH64EB__
24704 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24705 #else
24706 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24707 #endif
24710 __INTERLEAVE_LIST (uzp)
24712 /* vzip */
24714 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
24715 vzip1_f32 (float32x2_t __a, float32x2_t __b)
24717 #ifdef __AARCH64EB__
24718 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24719 #else
24720 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24721 #endif
24724 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24725 vzip1_p8 (poly8x8_t __a, poly8x8_t __b)
24727 #ifdef __AARCH64EB__
24728 return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
24729 #else
24730 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24731 #endif
24734 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
24735 vzip1_p16 (poly16x4_t __a, poly16x4_t __b)
24737 #ifdef __AARCH64EB__
24738 return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
24739 #else
24740 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
24741 #endif
24744 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24745 vzip1_s8 (int8x8_t __a, int8x8_t __b)
24747 #ifdef __AARCH64EB__
24748 return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
24749 #else
24750 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24751 #endif
24754 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24755 vzip1_s16 (int16x4_t __a, int16x4_t __b)
24757 #ifdef __AARCH64EB__
24758 return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
24759 #else
24760 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
24761 #endif
24764 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24765 vzip1_s32 (int32x2_t __a, int32x2_t __b)
24767 #ifdef __AARCH64EB__
24768 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24769 #else
24770 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24771 #endif
24774 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24775 vzip1_u8 (uint8x8_t __a, uint8x8_t __b)
24777 #ifdef __AARCH64EB__
24778 return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
24779 #else
24780 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24781 #endif
24784 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24785 vzip1_u16 (uint16x4_t __a, uint16x4_t __b)
24787 #ifdef __AARCH64EB__
24788 return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
24789 #else
24790 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
24791 #endif
24794 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24795 vzip1_u32 (uint32x2_t __a, uint32x2_t __b)
24797 #ifdef __AARCH64EB__
24798 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24799 #else
24800 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24801 #endif
24804 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
24805 vzip1q_f32 (float32x4_t __a, float32x4_t __b)
24807 #ifdef __AARCH64EB__
24808 return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
24809 #else
24810 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
24811 #endif
24814 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
24815 vzip1q_f64 (float64x2_t __a, float64x2_t __b)
24817 #ifdef __AARCH64EB__
24818 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24819 #else
24820 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24821 #endif
24824 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
24825 vzip1q_p8 (poly8x16_t __a, poly8x16_t __b)
24827 #ifdef __AARCH64EB__
24828 return __builtin_shuffle (__a, __b, (uint8x16_t)
24829 {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
24830 #else
24831 return __builtin_shuffle (__a, __b, (uint8x16_t)
24832 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
24833 #endif
24836 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
24837 vzip1q_p16 (poly16x8_t __a, poly16x8_t __b)
24839 #ifdef __AARCH64EB__
24840 return __builtin_shuffle (__a, __b, (uint16x8_t)
24841 {12, 4, 13, 5, 14, 6, 15, 7});
24842 #else
24843 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24844 #endif
24847 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24848 vzip1q_s8 (int8x16_t __a, int8x16_t __b)
24850 #ifdef __AARCH64EB__
24851 return __builtin_shuffle (__a, __b, (uint8x16_t)
24852 {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
24853 #else
24854 return __builtin_shuffle (__a, __b, (uint8x16_t)
24855 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
24856 #endif
24859 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24860 vzip1q_s16 (int16x8_t __a, int16x8_t __b)
24862 #ifdef __AARCH64EB__
24863 return __builtin_shuffle (__a, __b, (uint16x8_t)
24864 {12, 4, 13, 5, 14, 6, 15, 7});
24865 #else
24866 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24867 #endif
24870 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24871 vzip1q_s32 (int32x4_t __a, int32x4_t __b)
24873 #ifdef __AARCH64EB__
24874 return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
24875 #else
24876 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
24877 #endif
24880 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24881 vzip1q_s64 (int64x2_t __a, int64x2_t __b)
24883 #ifdef __AARCH64EB__
24884 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24885 #else
24886 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24887 #endif
24890 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24891 vzip1q_u8 (uint8x16_t __a, uint8x16_t __b)
24893 #ifdef __AARCH64EB__
24894 return __builtin_shuffle (__a, __b, (uint8x16_t)
24895 {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
24896 #else
24897 return __builtin_shuffle (__a, __b, (uint8x16_t)
24898 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
24899 #endif
24902 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24903 vzip1q_u16 (uint16x8_t __a, uint16x8_t __b)
24905 #ifdef __AARCH64EB__
24906 return __builtin_shuffle (__a, __b, (uint16x8_t)
24907 {12, 4, 13, 5, 14, 6, 15, 7});
24908 #else
24909 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24910 #endif
24913 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24914 vzip1q_u32 (uint32x4_t __a, uint32x4_t __b)
24916 #ifdef __AARCH64EB__
24917 return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
24918 #else
24919 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
24920 #endif
24923 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24924 vzip1q_u64 (uint64x2_t __a, uint64x2_t __b)
24926 #ifdef __AARCH64EB__
24927 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24928 #else
24929 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24930 #endif
24933 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
24934 vzip2_f32 (float32x2_t __a, float32x2_t __b)
24936 #ifdef __AARCH64EB__
24937 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24938 #else
24939 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24940 #endif
24943 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24944 vzip2_p8 (poly8x8_t __a, poly8x8_t __b)
24946 #ifdef __AARCH64EB__
24947 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
24948 #else
24949 return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
24950 #endif
24953 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
24954 vzip2_p16 (poly16x4_t __a, poly16x4_t __b)
24956 #ifdef __AARCH64EB__
24957 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
24958 #else
24959 return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
24960 #endif
24963 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24964 vzip2_s8 (int8x8_t __a, int8x8_t __b)
24966 #ifdef __AARCH64EB__
24967 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
24968 #else
24969 return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
24970 #endif
24973 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24974 vzip2_s16 (int16x4_t __a, int16x4_t __b)
24976 #ifdef __AARCH64EB__
24977 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
24978 #else
24979 return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
24980 #endif
24983 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24984 vzip2_s32 (int32x2_t __a, int32x2_t __b)
24986 #ifdef __AARCH64EB__
24987 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24988 #else
24989 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24990 #endif
24993 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24994 vzip2_u8 (uint8x8_t __a, uint8x8_t __b)
24996 #ifdef __AARCH64EB__
24997 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
24998 #else
24999 return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
25000 #endif
25003 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25004 vzip2_u16 (uint16x4_t __a, uint16x4_t __b)
25006 #ifdef __AARCH64EB__
25007 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
25008 #else
25009 return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
25010 #endif
25013 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25014 vzip2_u32 (uint32x2_t __a, uint32x2_t __b)
25016 #ifdef __AARCH64EB__
25017 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25018 #else
25019 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25020 #endif
25023 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
25024 vzip2q_f32 (float32x4_t __a, float32x4_t __b)
25026 #ifdef __AARCH64EB__
25027 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25028 #else
25029 return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25030 #endif
25033 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
25034 vzip2q_f64 (float64x2_t __a, float64x2_t __b)
25036 #ifdef __AARCH64EB__
25037 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25038 #else
25039 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25040 #endif
25043 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
25044 vzip2q_p8 (poly8x16_t __a, poly8x16_t __b)
25046 #ifdef __AARCH64EB__
25047 return __builtin_shuffle (__a, __b, (uint8x16_t)
25048 {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25049 #else
25050 return __builtin_shuffle (__a, __b, (uint8x16_t)
25051 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25052 #endif
25055 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
25056 vzip2q_p16 (poly16x8_t __a, poly16x8_t __b)
25058 #ifdef __AARCH64EB__
25059 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25060 #else
25061 return __builtin_shuffle (__a, __b, (uint16x8_t)
25062 {4, 12, 5, 13, 6, 14, 7, 15});
25063 #endif
25066 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
25067 vzip2q_s8 (int8x16_t __a, int8x16_t __b)
25069 #ifdef __AARCH64EB__
25070 return __builtin_shuffle (__a, __b, (uint8x16_t)
25071 {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25072 #else
25073 return __builtin_shuffle (__a, __b, (uint8x16_t)
25074 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25075 #endif
25078 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
25079 vzip2q_s16 (int16x8_t __a, int16x8_t __b)
25081 #ifdef __AARCH64EB__
25082 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25083 #else
25084 return __builtin_shuffle (__a, __b, (uint16x8_t)
25085 {4, 12, 5, 13, 6, 14, 7, 15});
25086 #endif
25089 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
25090 vzip2q_s32 (int32x4_t __a, int32x4_t __b)
25092 #ifdef __AARCH64EB__
25093 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25094 #else
25095 return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25096 #endif
25099 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25100 vzip2q_s64 (int64x2_t __a, int64x2_t __b)
25102 #ifdef __AARCH64EB__
25103 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25104 #else
25105 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25106 #endif
25109 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25110 vzip2q_u8 (uint8x16_t __a, uint8x16_t __b)
25112 #ifdef __AARCH64EB__
25113 return __builtin_shuffle (__a, __b, (uint8x16_t)
25114 {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25115 #else
25116 return __builtin_shuffle (__a, __b, (uint8x16_t)
25117 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25118 #endif
25121 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25122 vzip2q_u16 (uint16x8_t __a, uint16x8_t __b)
25124 #ifdef __AARCH64EB__
25125 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25126 #else
25127 return __builtin_shuffle (__a, __b, (uint16x8_t)
25128 {4, 12, 5, 13, 6, 14, 7, 15});
25129 #endif
25132 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25133 vzip2q_u32 (uint32x4_t __a, uint32x4_t __b)
25135 #ifdef __AARCH64EB__
25136 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25137 #else
25138 return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25139 #endif
25142 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25143 vzip2q_u64 (uint64x2_t __a, uint64x2_t __b)
25145 #ifdef __AARCH64EB__
25146 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25147 #else
25148 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25149 #endif
25152 __INTERLEAVE_LIST (zip)
25154 #undef __INTERLEAVE_LIST
25155 #undef __DEFINTERLEAVE
25157 /* End of optimal implementations in approved order. */
25159 #undef __aarch64_vget_lane_any
25161 #undef __aarch64_vdup_lane_any
25162 #undef __aarch64_vdup_lane_f32
25163 #undef __aarch64_vdup_lane_f64
25164 #undef __aarch64_vdup_lane_p8
25165 #undef __aarch64_vdup_lane_p16
25166 #undef __aarch64_vdup_lane_s8
25167 #undef __aarch64_vdup_lane_s16
25168 #undef __aarch64_vdup_lane_s32
25169 #undef __aarch64_vdup_lane_s64
25170 #undef __aarch64_vdup_lane_u8
25171 #undef __aarch64_vdup_lane_u16
25172 #undef __aarch64_vdup_lane_u32
25173 #undef __aarch64_vdup_lane_u64
25174 #undef __aarch64_vdup_laneq_f32
25175 #undef __aarch64_vdup_laneq_f64
25176 #undef __aarch64_vdup_laneq_p8
25177 #undef __aarch64_vdup_laneq_p16
25178 #undef __aarch64_vdup_laneq_s8
25179 #undef __aarch64_vdup_laneq_s16
25180 #undef __aarch64_vdup_laneq_s32
25181 #undef __aarch64_vdup_laneq_s64
25182 #undef __aarch64_vdup_laneq_u8
25183 #undef __aarch64_vdup_laneq_u16
25184 #undef __aarch64_vdup_laneq_u32
25185 #undef __aarch64_vdup_laneq_u64
25186 #undef __aarch64_vdupq_lane_f32
25187 #undef __aarch64_vdupq_lane_f64
25188 #undef __aarch64_vdupq_lane_p8
25189 #undef __aarch64_vdupq_lane_p16
25190 #undef __aarch64_vdupq_lane_s8
25191 #undef __aarch64_vdupq_lane_s16
25192 #undef __aarch64_vdupq_lane_s32
25193 #undef __aarch64_vdupq_lane_s64
25194 #undef __aarch64_vdupq_lane_u8
25195 #undef __aarch64_vdupq_lane_u16
25196 #undef __aarch64_vdupq_lane_u32
25197 #undef __aarch64_vdupq_lane_u64
25198 #undef __aarch64_vdupq_laneq_f32
25199 #undef __aarch64_vdupq_laneq_f64
25200 #undef __aarch64_vdupq_laneq_p8
25201 #undef __aarch64_vdupq_laneq_p16
25202 #undef __aarch64_vdupq_laneq_s8
25203 #undef __aarch64_vdupq_laneq_s16
25204 #undef __aarch64_vdupq_laneq_s32
25205 #undef __aarch64_vdupq_laneq_s64
25206 #undef __aarch64_vdupq_laneq_u8
25207 #undef __aarch64_vdupq_laneq_u16
25208 #undef __aarch64_vdupq_laneq_u32
25209 #undef __aarch64_vdupq_laneq_u64
25211 #endif