1 /* ARM NEON intrinsics include file.
3 Copyright (C) 2011-2013 Free Software Foundation, Inc.
4 Contributed by ARM Ltd.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 #ifndef _AARCH64_NEON_H_
28 #define _AARCH64_NEON_H_
32 typedef __builtin_aarch64_simd_qi int8x8_t
33 __attribute__ ((__vector_size__ (8)));
34 typedef __builtin_aarch64_simd_hi int16x4_t
35 __attribute__ ((__vector_size__ (8)));
36 typedef __builtin_aarch64_simd_si int32x2_t
37 __attribute__ ((__vector_size__ (8)));
38 typedef int64_t int64x1_t
;
39 typedef int32_t int32x1_t
;
40 typedef int16_t int16x1_t
;
41 typedef int8_t int8x1_t
;
42 typedef double float64x1_t
;
43 typedef __builtin_aarch64_simd_sf float32x2_t
44 __attribute__ ((__vector_size__ (8)));
45 typedef __builtin_aarch64_simd_poly8 poly8x8_t
46 __attribute__ ((__vector_size__ (8)));
47 typedef __builtin_aarch64_simd_poly16 poly16x4_t
48 __attribute__ ((__vector_size__ (8)));
49 typedef __builtin_aarch64_simd_uqi uint8x8_t
50 __attribute__ ((__vector_size__ (8)));
51 typedef __builtin_aarch64_simd_uhi uint16x4_t
52 __attribute__ ((__vector_size__ (8)));
53 typedef __builtin_aarch64_simd_usi uint32x2_t
54 __attribute__ ((__vector_size__ (8)));
55 typedef uint64_t uint64x1_t
;
56 typedef uint32_t uint32x1_t
;
57 typedef uint16_t uint16x1_t
;
58 typedef uint8_t uint8x1_t
;
59 typedef __builtin_aarch64_simd_qi int8x16_t
60 __attribute__ ((__vector_size__ (16)));
61 typedef __builtin_aarch64_simd_hi int16x8_t
62 __attribute__ ((__vector_size__ (16)));
63 typedef __builtin_aarch64_simd_si int32x4_t
64 __attribute__ ((__vector_size__ (16)));
65 typedef __builtin_aarch64_simd_di int64x2_t
66 __attribute__ ((__vector_size__ (16)));
67 typedef __builtin_aarch64_simd_sf float32x4_t
68 __attribute__ ((__vector_size__ (16)));
69 typedef __builtin_aarch64_simd_df float64x2_t
70 __attribute__ ((__vector_size__ (16)));
71 typedef __builtin_aarch64_simd_poly8 poly8x16_t
72 __attribute__ ((__vector_size__ (16)));
73 typedef __builtin_aarch64_simd_poly16 poly16x8_t
74 __attribute__ ((__vector_size__ (16)));
75 typedef __builtin_aarch64_simd_uqi uint8x16_t
76 __attribute__ ((__vector_size__ (16)));
77 typedef __builtin_aarch64_simd_uhi uint16x8_t
78 __attribute__ ((__vector_size__ (16)));
79 typedef __builtin_aarch64_simd_usi uint32x4_t
80 __attribute__ ((__vector_size__ (16)));
81 typedef __builtin_aarch64_simd_udi uint64x2_t
82 __attribute__ ((__vector_size__ (16)));
84 typedef float float32_t
;
85 typedef double float64_t
;
86 typedef __builtin_aarch64_simd_poly8 poly8_t
;
87 typedef __builtin_aarch64_simd_poly16 poly16_t
;
89 typedef struct int8x8x2_t
94 typedef struct int8x16x2_t
99 typedef struct int16x4x2_t
104 typedef struct int16x8x2_t
109 typedef struct int32x2x2_t
114 typedef struct int32x4x2_t
119 typedef struct int64x1x2_t
124 typedef struct int64x2x2_t
129 typedef struct uint8x8x2_t
134 typedef struct uint8x16x2_t
139 typedef struct uint16x4x2_t
144 typedef struct uint16x8x2_t
149 typedef struct uint32x2x2_t
154 typedef struct uint32x4x2_t
159 typedef struct uint64x1x2_t
164 typedef struct uint64x2x2_t
169 typedef struct float32x2x2_t
174 typedef struct float32x4x2_t
179 typedef struct float64x2x2_t
184 typedef struct float64x1x2_t
189 typedef struct poly8x8x2_t
194 typedef struct poly8x16x2_t
199 typedef struct poly16x4x2_t
204 typedef struct poly16x8x2_t
209 typedef struct int8x8x3_t
214 typedef struct int8x16x3_t
219 typedef struct int16x4x3_t
224 typedef struct int16x8x3_t
229 typedef struct int32x2x3_t
234 typedef struct int32x4x3_t
239 typedef struct int64x1x3_t
244 typedef struct int64x2x3_t
249 typedef struct uint8x8x3_t
254 typedef struct uint8x16x3_t
259 typedef struct uint16x4x3_t
264 typedef struct uint16x8x3_t
269 typedef struct uint32x2x3_t
274 typedef struct uint32x4x3_t
279 typedef struct uint64x1x3_t
284 typedef struct uint64x2x3_t
289 typedef struct float32x2x3_t
294 typedef struct float32x4x3_t
299 typedef struct float64x2x3_t
304 typedef struct float64x1x3_t
309 typedef struct poly8x8x3_t
314 typedef struct poly8x16x3_t
319 typedef struct poly16x4x3_t
324 typedef struct poly16x8x3_t
329 typedef struct int8x8x4_t
334 typedef struct int8x16x4_t
339 typedef struct int16x4x4_t
344 typedef struct int16x8x4_t
349 typedef struct int32x2x4_t
354 typedef struct int32x4x4_t
359 typedef struct int64x1x4_t
364 typedef struct int64x2x4_t
369 typedef struct uint8x8x4_t
374 typedef struct uint8x16x4_t
379 typedef struct uint16x4x4_t
384 typedef struct uint16x8x4_t
389 typedef struct uint32x2x4_t
394 typedef struct uint32x4x4_t
399 typedef struct uint64x1x4_t
404 typedef struct uint64x2x4_t
409 typedef struct float32x2x4_t
414 typedef struct float32x4x4_t
419 typedef struct float64x2x4_t
424 typedef struct float64x1x4_t
429 typedef struct poly8x8x4_t
434 typedef struct poly8x16x4_t
439 typedef struct poly16x4x4_t
444 typedef struct poly16x8x4_t
449 /* vget_lane internal macros. */
451 #define __aarch64_vget_lane_any(__size, __cast_ret, __cast_a, __a, __b) \
453 __builtin_aarch64_get_lane##__size (__cast_a __a, __b))
455 #define __aarch64_vget_lane_f32(__a, __b) \
456 __aarch64_vget_lane_any (v2sf, , , __a, __b)
457 #define __aarch64_vget_lane_f64(__a, __b) (__a)
459 #define __aarch64_vget_lane_p8(__a, __b) \
460 __aarch64_vget_lane_any (v8qi, (poly8_t), (int8x8_t), __a, __b)
461 #define __aarch64_vget_lane_p16(__a, __b) \
462 __aarch64_vget_lane_any (v4hi, (poly16_t), (int16x4_t), __a, __b)
464 #define __aarch64_vget_lane_s8(__a, __b) \
465 __aarch64_vget_lane_any (v8qi, , ,__a, __b)
466 #define __aarch64_vget_lane_s16(__a, __b) \
467 __aarch64_vget_lane_any (v4hi, , ,__a, __b)
468 #define __aarch64_vget_lane_s32(__a, __b) \
469 __aarch64_vget_lane_any (v2si, , ,__a, __b)
470 #define __aarch64_vget_lane_s64(__a, __b) (__a)
472 #define __aarch64_vget_lane_u8(__a, __b) \
473 __aarch64_vget_lane_any (v8qi, (uint8_t), (int8x8_t), __a, __b)
474 #define __aarch64_vget_lane_u16(__a, __b) \
475 __aarch64_vget_lane_any (v4hi, (uint16_t), (int16x4_t), __a, __b)
476 #define __aarch64_vget_lane_u32(__a, __b) \
477 __aarch64_vget_lane_any (v2si, (uint32_t), (int32x2_t), __a, __b)
478 #define __aarch64_vget_lane_u64(__a, __b) (__a)
480 #define __aarch64_vgetq_lane_f32(__a, __b) \
481 __aarch64_vget_lane_any (v4sf, , , __a, __b)
482 #define __aarch64_vgetq_lane_f64(__a, __b) \
483 __aarch64_vget_lane_any (v2df, , , __a, __b)
485 #define __aarch64_vgetq_lane_p8(__a, __b) \
486 __aarch64_vget_lane_any (v16qi, (poly8_t), (int8x16_t), __a, __b)
487 #define __aarch64_vgetq_lane_p16(__a, __b) \
488 __aarch64_vget_lane_any (v8hi, (poly16_t), (int16x8_t), __a, __b)
490 #define __aarch64_vgetq_lane_s8(__a, __b) \
491 __aarch64_vget_lane_any (v16qi, , ,__a, __b)
492 #define __aarch64_vgetq_lane_s16(__a, __b) \
493 __aarch64_vget_lane_any (v8hi, , ,__a, __b)
494 #define __aarch64_vgetq_lane_s32(__a, __b) \
495 __aarch64_vget_lane_any (v4si, , ,__a, __b)
496 #define __aarch64_vgetq_lane_s64(__a, __b) \
497 __aarch64_vget_lane_any (v2di, , ,__a, __b)
499 #define __aarch64_vgetq_lane_u8(__a, __b) \
500 __aarch64_vget_lane_any (v16qi, (uint8_t), (int8x16_t), __a, __b)
501 #define __aarch64_vgetq_lane_u16(__a, __b) \
502 __aarch64_vget_lane_any (v8hi, (uint16_t), (int16x8_t), __a, __b)
503 #define __aarch64_vgetq_lane_u32(__a, __b) \
504 __aarch64_vget_lane_any (v4si, (uint32_t), (int32x4_t), __a, __b)
505 #define __aarch64_vgetq_lane_u64(__a, __b) \
506 __aarch64_vget_lane_any (v2di, (uint64_t), (int64x2_t), __a, __b)
509 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
510 vadd_s8 (int8x8_t __a
, int8x8_t __b
)
515 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
516 vadd_s16 (int16x4_t __a
, int16x4_t __b
)
521 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
522 vadd_s32 (int32x2_t __a
, int32x2_t __b
)
527 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
528 vadd_f32 (float32x2_t __a
, float32x2_t __b
)
533 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
534 vadd_u8 (uint8x8_t __a
, uint8x8_t __b
)
539 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
540 vadd_u16 (uint16x4_t __a
, uint16x4_t __b
)
545 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
546 vadd_u32 (uint32x2_t __a
, uint32x2_t __b
)
551 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
552 vadd_s64 (int64x1_t __a
, int64x1_t __b
)
557 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
558 vadd_u64 (uint64x1_t __a
, uint64x1_t __b
)
563 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
564 vaddq_s8 (int8x16_t __a
, int8x16_t __b
)
569 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
570 vaddq_s16 (int16x8_t __a
, int16x8_t __b
)
575 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
576 vaddq_s32 (int32x4_t __a
, int32x4_t __b
)
581 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
582 vaddq_s64 (int64x2_t __a
, int64x2_t __b
)
587 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
588 vaddq_f32 (float32x4_t __a
, float32x4_t __b
)
593 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
594 vaddq_f64 (float64x2_t __a
, float64x2_t __b
)
599 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
600 vaddq_u8 (uint8x16_t __a
, uint8x16_t __b
)
605 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
606 vaddq_u16 (uint16x8_t __a
, uint16x8_t __b
)
611 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
612 vaddq_u32 (uint32x4_t __a
, uint32x4_t __b
)
617 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
618 vaddq_u64 (uint64x2_t __a
, uint64x2_t __b
)
623 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
624 vaddl_s8 (int8x8_t __a
, int8x8_t __b
)
626 return (int16x8_t
) __builtin_aarch64_saddlv8qi (__a
, __b
);
629 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
630 vaddl_s16 (int16x4_t __a
, int16x4_t __b
)
632 return (int32x4_t
) __builtin_aarch64_saddlv4hi (__a
, __b
);
635 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
636 vaddl_s32 (int32x2_t __a
, int32x2_t __b
)
638 return (int64x2_t
) __builtin_aarch64_saddlv2si (__a
, __b
);
641 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
642 vaddl_u8 (uint8x8_t __a
, uint8x8_t __b
)
644 return (uint16x8_t
) __builtin_aarch64_uaddlv8qi ((int8x8_t
) __a
,
648 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
649 vaddl_u16 (uint16x4_t __a
, uint16x4_t __b
)
651 return (uint32x4_t
) __builtin_aarch64_uaddlv4hi ((int16x4_t
) __a
,
655 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
656 vaddl_u32 (uint32x2_t __a
, uint32x2_t __b
)
658 return (uint64x2_t
) __builtin_aarch64_uaddlv2si ((int32x2_t
) __a
,
662 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
663 vaddl_high_s8 (int8x16_t __a
, int8x16_t __b
)
665 return (int16x8_t
) __builtin_aarch64_saddl2v16qi (__a
, __b
);
668 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
669 vaddl_high_s16 (int16x8_t __a
, int16x8_t __b
)
671 return (int32x4_t
) __builtin_aarch64_saddl2v8hi (__a
, __b
);
674 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
675 vaddl_high_s32 (int32x4_t __a
, int32x4_t __b
)
677 return (int64x2_t
) __builtin_aarch64_saddl2v4si (__a
, __b
);
680 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
681 vaddl_high_u8 (uint8x16_t __a
, uint8x16_t __b
)
683 return (uint16x8_t
) __builtin_aarch64_uaddl2v16qi ((int8x16_t
) __a
,
687 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
688 vaddl_high_u16 (uint16x8_t __a
, uint16x8_t __b
)
690 return (uint32x4_t
) __builtin_aarch64_uaddl2v8hi ((int16x8_t
) __a
,
694 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
695 vaddl_high_u32 (uint32x4_t __a
, uint32x4_t __b
)
697 return (uint64x2_t
) __builtin_aarch64_uaddl2v4si ((int32x4_t
) __a
,
701 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
702 vaddw_s8 (int16x8_t __a
, int8x8_t __b
)
704 return (int16x8_t
) __builtin_aarch64_saddwv8qi (__a
, __b
);
707 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
708 vaddw_s16 (int32x4_t __a
, int16x4_t __b
)
710 return (int32x4_t
) __builtin_aarch64_saddwv4hi (__a
, __b
);
713 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
714 vaddw_s32 (int64x2_t __a
, int32x2_t __b
)
716 return (int64x2_t
) __builtin_aarch64_saddwv2si (__a
, __b
);
719 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
720 vaddw_u8 (uint16x8_t __a
, uint8x8_t __b
)
722 return (uint16x8_t
) __builtin_aarch64_uaddwv8qi ((int16x8_t
) __a
,
726 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
727 vaddw_u16 (uint32x4_t __a
, uint16x4_t __b
)
729 return (uint32x4_t
) __builtin_aarch64_uaddwv4hi ((int32x4_t
) __a
,
733 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
734 vaddw_u32 (uint64x2_t __a
, uint32x2_t __b
)
736 return (uint64x2_t
) __builtin_aarch64_uaddwv2si ((int64x2_t
) __a
,
740 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
741 vaddw_high_s8 (int16x8_t __a
, int8x16_t __b
)
743 return (int16x8_t
) __builtin_aarch64_saddw2v16qi (__a
, __b
);
746 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
747 vaddw_high_s16 (int32x4_t __a
, int16x8_t __b
)
749 return (int32x4_t
) __builtin_aarch64_saddw2v8hi (__a
, __b
);
752 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
753 vaddw_high_s32 (int64x2_t __a
, int32x4_t __b
)
755 return (int64x2_t
) __builtin_aarch64_saddw2v4si (__a
, __b
);
758 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
759 vaddw_high_u8 (uint16x8_t __a
, uint8x16_t __b
)
761 return (uint16x8_t
) __builtin_aarch64_uaddw2v16qi ((int16x8_t
) __a
,
765 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
766 vaddw_high_u16 (uint32x4_t __a
, uint16x8_t __b
)
768 return (uint32x4_t
) __builtin_aarch64_uaddw2v8hi ((int32x4_t
) __a
,
772 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
773 vaddw_high_u32 (uint64x2_t __a
, uint32x4_t __b
)
775 return (uint64x2_t
) __builtin_aarch64_uaddw2v4si ((int64x2_t
) __a
,
779 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
780 vhadd_s8 (int8x8_t __a
, int8x8_t __b
)
782 return (int8x8_t
) __builtin_aarch64_shaddv8qi (__a
, __b
);
785 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
786 vhadd_s16 (int16x4_t __a
, int16x4_t __b
)
788 return (int16x4_t
) __builtin_aarch64_shaddv4hi (__a
, __b
);
791 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
792 vhadd_s32 (int32x2_t __a
, int32x2_t __b
)
794 return (int32x2_t
) __builtin_aarch64_shaddv2si (__a
, __b
);
797 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
798 vhadd_u8 (uint8x8_t __a
, uint8x8_t __b
)
800 return (uint8x8_t
) __builtin_aarch64_uhaddv8qi ((int8x8_t
) __a
,
804 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
805 vhadd_u16 (uint16x4_t __a
, uint16x4_t __b
)
807 return (uint16x4_t
) __builtin_aarch64_uhaddv4hi ((int16x4_t
) __a
,
811 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
812 vhadd_u32 (uint32x2_t __a
, uint32x2_t __b
)
814 return (uint32x2_t
) __builtin_aarch64_uhaddv2si ((int32x2_t
) __a
,
818 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
819 vhaddq_s8 (int8x16_t __a
, int8x16_t __b
)
821 return (int8x16_t
) __builtin_aarch64_shaddv16qi (__a
, __b
);
824 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
825 vhaddq_s16 (int16x8_t __a
, int16x8_t __b
)
827 return (int16x8_t
) __builtin_aarch64_shaddv8hi (__a
, __b
);
830 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
831 vhaddq_s32 (int32x4_t __a
, int32x4_t __b
)
833 return (int32x4_t
) __builtin_aarch64_shaddv4si (__a
, __b
);
836 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
837 vhaddq_u8 (uint8x16_t __a
, uint8x16_t __b
)
839 return (uint8x16_t
) __builtin_aarch64_uhaddv16qi ((int8x16_t
) __a
,
843 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
844 vhaddq_u16 (uint16x8_t __a
, uint16x8_t __b
)
846 return (uint16x8_t
) __builtin_aarch64_uhaddv8hi ((int16x8_t
) __a
,
850 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
851 vhaddq_u32 (uint32x4_t __a
, uint32x4_t __b
)
853 return (uint32x4_t
) __builtin_aarch64_uhaddv4si ((int32x4_t
) __a
,
857 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
858 vrhadd_s8 (int8x8_t __a
, int8x8_t __b
)
860 return (int8x8_t
) __builtin_aarch64_srhaddv8qi (__a
, __b
);
863 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
864 vrhadd_s16 (int16x4_t __a
, int16x4_t __b
)
866 return (int16x4_t
) __builtin_aarch64_srhaddv4hi (__a
, __b
);
869 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
870 vrhadd_s32 (int32x2_t __a
, int32x2_t __b
)
872 return (int32x2_t
) __builtin_aarch64_srhaddv2si (__a
, __b
);
875 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
876 vrhadd_u8 (uint8x8_t __a
, uint8x8_t __b
)
878 return (uint8x8_t
) __builtin_aarch64_urhaddv8qi ((int8x8_t
) __a
,
882 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
883 vrhadd_u16 (uint16x4_t __a
, uint16x4_t __b
)
885 return (uint16x4_t
) __builtin_aarch64_urhaddv4hi ((int16x4_t
) __a
,
889 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
890 vrhadd_u32 (uint32x2_t __a
, uint32x2_t __b
)
892 return (uint32x2_t
) __builtin_aarch64_urhaddv2si ((int32x2_t
) __a
,
896 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
897 vrhaddq_s8 (int8x16_t __a
, int8x16_t __b
)
899 return (int8x16_t
) __builtin_aarch64_srhaddv16qi (__a
, __b
);
902 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
903 vrhaddq_s16 (int16x8_t __a
, int16x8_t __b
)
905 return (int16x8_t
) __builtin_aarch64_srhaddv8hi (__a
, __b
);
908 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
909 vrhaddq_s32 (int32x4_t __a
, int32x4_t __b
)
911 return (int32x4_t
) __builtin_aarch64_srhaddv4si (__a
, __b
);
914 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
915 vrhaddq_u8 (uint8x16_t __a
, uint8x16_t __b
)
917 return (uint8x16_t
) __builtin_aarch64_urhaddv16qi ((int8x16_t
) __a
,
921 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
922 vrhaddq_u16 (uint16x8_t __a
, uint16x8_t __b
)
924 return (uint16x8_t
) __builtin_aarch64_urhaddv8hi ((int16x8_t
) __a
,
928 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
929 vrhaddq_u32 (uint32x4_t __a
, uint32x4_t __b
)
931 return (uint32x4_t
) __builtin_aarch64_urhaddv4si ((int32x4_t
) __a
,
935 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
936 vaddhn_s16 (int16x8_t __a
, int16x8_t __b
)
938 return (int8x8_t
) __builtin_aarch64_addhnv8hi (__a
, __b
);
941 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
942 vaddhn_s32 (int32x4_t __a
, int32x4_t __b
)
944 return (int16x4_t
) __builtin_aarch64_addhnv4si (__a
, __b
);
947 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
948 vaddhn_s64 (int64x2_t __a
, int64x2_t __b
)
950 return (int32x2_t
) __builtin_aarch64_addhnv2di (__a
, __b
);
953 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
954 vaddhn_u16 (uint16x8_t __a
, uint16x8_t __b
)
956 return (uint8x8_t
) __builtin_aarch64_addhnv8hi ((int16x8_t
) __a
,
960 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
961 vaddhn_u32 (uint32x4_t __a
, uint32x4_t __b
)
963 return (uint16x4_t
) __builtin_aarch64_addhnv4si ((int32x4_t
) __a
,
967 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
968 vaddhn_u64 (uint64x2_t __a
, uint64x2_t __b
)
970 return (uint32x2_t
) __builtin_aarch64_addhnv2di ((int64x2_t
) __a
,
974 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
975 vraddhn_s16 (int16x8_t __a
, int16x8_t __b
)
977 return (int8x8_t
) __builtin_aarch64_raddhnv8hi (__a
, __b
);
980 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
981 vraddhn_s32 (int32x4_t __a
, int32x4_t __b
)
983 return (int16x4_t
) __builtin_aarch64_raddhnv4si (__a
, __b
);
986 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
987 vraddhn_s64 (int64x2_t __a
, int64x2_t __b
)
989 return (int32x2_t
) __builtin_aarch64_raddhnv2di (__a
, __b
);
992 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
993 vraddhn_u16 (uint16x8_t __a
, uint16x8_t __b
)
995 return (uint8x8_t
) __builtin_aarch64_raddhnv8hi ((int16x8_t
) __a
,
999 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
1000 vraddhn_u32 (uint32x4_t __a
, uint32x4_t __b
)
1002 return (uint16x4_t
) __builtin_aarch64_raddhnv4si ((int32x4_t
) __a
,
1006 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
1007 vraddhn_u64 (uint64x2_t __a
, uint64x2_t __b
)
1009 return (uint32x2_t
) __builtin_aarch64_raddhnv2di ((int64x2_t
) __a
,
1013 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
1014 vaddhn_high_s16 (int8x8_t __a
, int16x8_t __b
, int16x8_t __c
)
1016 return (int8x16_t
) __builtin_aarch64_addhn2v8hi (__a
, __b
, __c
);
1019 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
1020 vaddhn_high_s32 (int16x4_t __a
, int32x4_t __b
, int32x4_t __c
)
1022 return (int16x8_t
) __builtin_aarch64_addhn2v4si (__a
, __b
, __c
);
1025 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
1026 vaddhn_high_s64 (int32x2_t __a
, int64x2_t __b
, int64x2_t __c
)
1028 return (int32x4_t
) __builtin_aarch64_addhn2v2di (__a
, __b
, __c
);
1031 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
1032 vaddhn_high_u16 (uint8x8_t __a
, uint16x8_t __b
, uint16x8_t __c
)
1034 return (uint8x16_t
) __builtin_aarch64_addhn2v8hi ((int8x8_t
) __a
,
1039 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
1040 vaddhn_high_u32 (uint16x4_t __a
, uint32x4_t __b
, uint32x4_t __c
)
1042 return (uint16x8_t
) __builtin_aarch64_addhn2v4si ((int16x4_t
) __a
,
1047 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
1048 vaddhn_high_u64 (uint32x2_t __a
, uint64x2_t __b
, uint64x2_t __c
)
1050 return (uint32x4_t
) __builtin_aarch64_addhn2v2di ((int32x2_t
) __a
,
1055 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
1056 vraddhn_high_s16 (int8x8_t __a
, int16x8_t __b
, int16x8_t __c
)
1058 return (int8x16_t
) __builtin_aarch64_raddhn2v8hi (__a
, __b
, __c
);
1061 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
1062 vraddhn_high_s32 (int16x4_t __a
, int32x4_t __b
, int32x4_t __c
)
1064 return (int16x8_t
) __builtin_aarch64_raddhn2v4si (__a
, __b
, __c
);
1067 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
1068 vraddhn_high_s64 (int32x2_t __a
, int64x2_t __b
, int64x2_t __c
)
1070 return (int32x4_t
) __builtin_aarch64_raddhn2v2di (__a
, __b
, __c
);
1073 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
1074 vraddhn_high_u16 (uint8x8_t __a
, uint16x8_t __b
, uint16x8_t __c
)
1076 return (uint8x16_t
) __builtin_aarch64_raddhn2v8hi ((int8x8_t
) __a
,
1081 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
1082 vraddhn_high_u32 (uint16x4_t __a
, uint32x4_t __b
, uint32x4_t __c
)
1084 return (uint16x8_t
) __builtin_aarch64_raddhn2v4si ((int16x4_t
) __a
,
1089 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
1090 vraddhn_high_u64 (uint32x2_t __a
, uint64x2_t __b
, uint64x2_t __c
)
1092 return (uint32x4_t
) __builtin_aarch64_raddhn2v2di ((int32x2_t
) __a
,
1097 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
1098 vdiv_f32 (float32x2_t __a
, float32x2_t __b
)
1103 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
1104 vdivq_f32 (float32x4_t __a
, float32x4_t __b
)
1109 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
1110 vdivq_f64 (float64x2_t __a
, float64x2_t __b
)
1115 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
1116 vmul_s8 (int8x8_t __a
, int8x8_t __b
)
1121 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
1122 vmul_s16 (int16x4_t __a
, int16x4_t __b
)
1127 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
1128 vmul_s32 (int32x2_t __a
, int32x2_t __b
)
1133 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
1134 vmul_f32 (float32x2_t __a
, float32x2_t __b
)
1139 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
1140 vmul_u8 (uint8x8_t __a
, uint8x8_t __b
)
1145 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
1146 vmul_u16 (uint16x4_t __a
, uint16x4_t __b
)
1151 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
1152 vmul_u32 (uint32x2_t __a
, uint32x2_t __b
)
1157 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
1158 vmul_p8 (poly8x8_t __a
, poly8x8_t __b
)
1160 return (poly8x8_t
) __builtin_aarch64_pmulv8qi ((int8x8_t
) __a
,
1164 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
1165 vmulq_s8 (int8x16_t __a
, int8x16_t __b
)
1170 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
1171 vmulq_s16 (int16x8_t __a
, int16x8_t __b
)
1176 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
1177 vmulq_s32 (int32x4_t __a
, int32x4_t __b
)
1182 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
1183 vmulq_f32 (float32x4_t __a
, float32x4_t __b
)
1188 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
1189 vmulq_f64 (float64x2_t __a
, float64x2_t __b
)
1194 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
1195 vmulq_u8 (uint8x16_t __a
, uint8x16_t __b
)
1200 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
1201 vmulq_u16 (uint16x8_t __a
, uint16x8_t __b
)
1206 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
1207 vmulq_u32 (uint32x4_t __a
, uint32x4_t __b
)
1212 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
1213 vmulq_p8 (poly8x16_t __a
, poly8x16_t __b
)
1215 return (poly8x16_t
) __builtin_aarch64_pmulv16qi ((int8x16_t
) __a
,
1219 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
1220 vand_s8 (int8x8_t __a
, int8x8_t __b
)
1225 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
1226 vand_s16 (int16x4_t __a
, int16x4_t __b
)
1231 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
1232 vand_s32 (int32x2_t __a
, int32x2_t __b
)
1237 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
1238 vand_u8 (uint8x8_t __a
, uint8x8_t __b
)
1243 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
1244 vand_u16 (uint16x4_t __a
, uint16x4_t __b
)
1249 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
1250 vand_u32 (uint32x2_t __a
, uint32x2_t __b
)
1255 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
1256 vand_s64 (int64x1_t __a
, int64x1_t __b
)
1261 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
1262 vand_u64 (uint64x1_t __a
, uint64x1_t __b
)
1267 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
1268 vandq_s8 (int8x16_t __a
, int8x16_t __b
)
1273 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
1274 vandq_s16 (int16x8_t __a
, int16x8_t __b
)
1279 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
1280 vandq_s32 (int32x4_t __a
, int32x4_t __b
)
1285 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
1286 vandq_s64 (int64x2_t __a
, int64x2_t __b
)
1291 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
1292 vandq_u8 (uint8x16_t __a
, uint8x16_t __b
)
1297 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
1298 vandq_u16 (uint16x8_t __a
, uint16x8_t __b
)
1303 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
1304 vandq_u32 (uint32x4_t __a
, uint32x4_t __b
)
1309 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
1310 vandq_u64 (uint64x2_t __a
, uint64x2_t __b
)
1315 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
1316 vorr_s8 (int8x8_t __a
, int8x8_t __b
)
1321 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
1322 vorr_s16 (int16x4_t __a
, int16x4_t __b
)
1327 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
1328 vorr_s32 (int32x2_t __a
, int32x2_t __b
)
1333 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
1334 vorr_u8 (uint8x8_t __a
, uint8x8_t __b
)
1339 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
1340 vorr_u16 (uint16x4_t __a
, uint16x4_t __b
)
1345 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
1346 vorr_u32 (uint32x2_t __a
, uint32x2_t __b
)
1351 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
1352 vorr_s64 (int64x1_t __a
, int64x1_t __b
)
1357 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
1358 vorr_u64 (uint64x1_t __a
, uint64x1_t __b
)
1363 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
1364 vorrq_s8 (int8x16_t __a
, int8x16_t __b
)
1369 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
1370 vorrq_s16 (int16x8_t __a
, int16x8_t __b
)
1375 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
1376 vorrq_s32 (int32x4_t __a
, int32x4_t __b
)
1381 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
1382 vorrq_s64 (int64x2_t __a
, int64x2_t __b
)
1387 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
1388 vorrq_u8 (uint8x16_t __a
, uint8x16_t __b
)
1393 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
1394 vorrq_u16 (uint16x8_t __a
, uint16x8_t __b
)
1399 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
1400 vorrq_u32 (uint32x4_t __a
, uint32x4_t __b
)
1405 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
1406 vorrq_u64 (uint64x2_t __a
, uint64x2_t __b
)
1411 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
1412 veor_s8 (int8x8_t __a
, int8x8_t __b
)
1417 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
1418 veor_s16 (int16x4_t __a
, int16x4_t __b
)
1423 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
1424 veor_s32 (int32x2_t __a
, int32x2_t __b
)
1429 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
1430 veor_u8 (uint8x8_t __a
, uint8x8_t __b
)
1435 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
1436 veor_u16 (uint16x4_t __a
, uint16x4_t __b
)
1441 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
1442 veor_u32 (uint32x2_t __a
, uint32x2_t __b
)
1447 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
1448 veor_s64 (int64x1_t __a
, int64x1_t __b
)
1453 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
1454 veor_u64 (uint64x1_t __a
, uint64x1_t __b
)
1459 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
1460 veorq_s8 (int8x16_t __a
, int8x16_t __b
)
1465 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
1466 veorq_s16 (int16x8_t __a
, int16x8_t __b
)
1471 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
1472 veorq_s32 (int32x4_t __a
, int32x4_t __b
)
1477 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
1478 veorq_s64 (int64x2_t __a
, int64x2_t __b
)
1483 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
1484 veorq_u8 (uint8x16_t __a
, uint8x16_t __b
)
1489 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
1490 veorq_u16 (uint16x8_t __a
, uint16x8_t __b
)
1495 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
1496 veorq_u32 (uint32x4_t __a
, uint32x4_t __b
)
1501 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
1502 veorq_u64 (uint64x2_t __a
, uint64x2_t __b
)
1507 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
1508 vbic_s8 (int8x8_t __a
, int8x8_t __b
)
1513 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
1514 vbic_s16 (int16x4_t __a
, int16x4_t __b
)
1519 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
1520 vbic_s32 (int32x2_t __a
, int32x2_t __b
)
1525 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
1526 vbic_u8 (uint8x8_t __a
, uint8x8_t __b
)
1531 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
1532 vbic_u16 (uint16x4_t __a
, uint16x4_t __b
)
1537 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
1538 vbic_u32 (uint32x2_t __a
, uint32x2_t __b
)
1543 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
1544 vbic_s64 (int64x1_t __a
, int64x1_t __b
)
1549 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
1550 vbic_u64 (uint64x1_t __a
, uint64x1_t __b
)
1555 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
1556 vbicq_s8 (int8x16_t __a
, int8x16_t __b
)
1561 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
1562 vbicq_s16 (int16x8_t __a
, int16x8_t __b
)
1567 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
1568 vbicq_s32 (int32x4_t __a
, int32x4_t __b
)
1573 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
1574 vbicq_s64 (int64x2_t __a
, int64x2_t __b
)
1579 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
1580 vbicq_u8 (uint8x16_t __a
, uint8x16_t __b
)
1585 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
1586 vbicq_u16 (uint16x8_t __a
, uint16x8_t __b
)
1591 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
1592 vbicq_u32 (uint32x4_t __a
, uint32x4_t __b
)
1597 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
1598 vbicq_u64 (uint64x2_t __a
, uint64x2_t __b
)
1603 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
1604 vorn_s8 (int8x8_t __a
, int8x8_t __b
)
1609 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
1610 vorn_s16 (int16x4_t __a
, int16x4_t __b
)
1615 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
1616 vorn_s32 (int32x2_t __a
, int32x2_t __b
)
1621 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
1622 vorn_u8 (uint8x8_t __a
, uint8x8_t __b
)
1627 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
1628 vorn_u16 (uint16x4_t __a
, uint16x4_t __b
)
1633 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
1634 vorn_u32 (uint32x2_t __a
, uint32x2_t __b
)
1639 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
1640 vorn_s64 (int64x1_t __a
, int64x1_t __b
)
1645 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
1646 vorn_u64 (uint64x1_t __a
, uint64x1_t __b
)
1651 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
1652 vornq_s8 (int8x16_t __a
, int8x16_t __b
)
1657 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
1658 vornq_s16 (int16x8_t __a
, int16x8_t __b
)
1663 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
1664 vornq_s32 (int32x4_t __a
, int32x4_t __b
)
1669 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
1670 vornq_s64 (int64x2_t __a
, int64x2_t __b
)
1675 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
1676 vornq_u8 (uint8x16_t __a
, uint8x16_t __b
)
1681 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
1682 vornq_u16 (uint16x8_t __a
, uint16x8_t __b
)
1687 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
1688 vornq_u32 (uint32x4_t __a
, uint32x4_t __b
)
1693 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
1694 vornq_u64 (uint64x2_t __a
, uint64x2_t __b
)
1699 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
1700 vsub_s8 (int8x8_t __a
, int8x8_t __b
)
1705 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
1706 vsub_s16 (int16x4_t __a
, int16x4_t __b
)
1711 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
1712 vsub_s32 (int32x2_t __a
, int32x2_t __b
)
1717 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
1718 vsub_f32 (float32x2_t __a
, float32x2_t __b
)
1723 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
1724 vsub_u8 (uint8x8_t __a
, uint8x8_t __b
)
1729 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
1730 vsub_u16 (uint16x4_t __a
, uint16x4_t __b
)
1735 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
1736 vsub_u32 (uint32x2_t __a
, uint32x2_t __b
)
1741 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
1742 vsub_s64 (int64x1_t __a
, int64x1_t __b
)
1747 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
1748 vsub_u64 (uint64x1_t __a
, uint64x1_t __b
)
1753 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
1754 vsubq_s8 (int8x16_t __a
, int8x16_t __b
)
1759 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
1760 vsubq_s16 (int16x8_t __a
, int16x8_t __b
)
1765 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
1766 vsubq_s32 (int32x4_t __a
, int32x4_t __b
)
1771 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
1772 vsubq_s64 (int64x2_t __a
, int64x2_t __b
)
1777 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
1778 vsubq_f32 (float32x4_t __a
, float32x4_t __b
)
1783 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
1784 vsubq_f64 (float64x2_t __a
, float64x2_t __b
)
1789 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
1790 vsubq_u8 (uint8x16_t __a
, uint8x16_t __b
)
1795 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
1796 vsubq_u16 (uint16x8_t __a
, uint16x8_t __b
)
1801 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
1802 vsubq_u32 (uint32x4_t __a
, uint32x4_t __b
)
1807 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
1808 vsubq_u64 (uint64x2_t __a
, uint64x2_t __b
)
1813 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
1814 vsubl_s8 (int8x8_t __a
, int8x8_t __b
)
1816 return (int16x8_t
) __builtin_aarch64_ssublv8qi (__a
, __b
);
1819 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
1820 vsubl_s16 (int16x4_t __a
, int16x4_t __b
)
1822 return (int32x4_t
) __builtin_aarch64_ssublv4hi (__a
, __b
);
1825 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
1826 vsubl_s32 (int32x2_t __a
, int32x2_t __b
)
1828 return (int64x2_t
) __builtin_aarch64_ssublv2si (__a
, __b
);
1831 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
1832 vsubl_u8 (uint8x8_t __a
, uint8x8_t __b
)
1834 return (uint16x8_t
) __builtin_aarch64_usublv8qi ((int8x8_t
) __a
,
1838 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
1839 vsubl_u16 (uint16x4_t __a
, uint16x4_t __b
)
1841 return (uint32x4_t
) __builtin_aarch64_usublv4hi ((int16x4_t
) __a
,
1845 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
1846 vsubl_u32 (uint32x2_t __a
, uint32x2_t __b
)
1848 return (uint64x2_t
) __builtin_aarch64_usublv2si ((int32x2_t
) __a
,
1852 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
1853 vsubl_high_s8 (int8x16_t __a
, int8x16_t __b
)
1855 return (int16x8_t
) __builtin_aarch64_ssubl2v16qi (__a
, __b
);
1858 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
1859 vsubl_high_s16 (int16x8_t __a
, int16x8_t __b
)
1861 return (int32x4_t
) __builtin_aarch64_ssubl2v8hi (__a
, __b
);
1864 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
1865 vsubl_high_s32 (int32x4_t __a
, int32x4_t __b
)
1867 return (int64x2_t
) __builtin_aarch64_ssubl2v4si (__a
, __b
);
1870 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
1871 vsubl_high_u8 (uint8x16_t __a
, uint8x16_t __b
)
1873 return (uint16x8_t
) __builtin_aarch64_usubl2v16qi ((int8x16_t
) __a
,
1877 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
1878 vsubl_high_u16 (uint16x8_t __a
, uint16x8_t __b
)
1880 return (uint32x4_t
) __builtin_aarch64_usubl2v8hi ((int16x8_t
) __a
,
1884 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
1885 vsubl_high_u32 (uint32x4_t __a
, uint32x4_t __b
)
1887 return (uint64x2_t
) __builtin_aarch64_usubl2v4si ((int32x4_t
) __a
,
1891 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
1892 vsubw_s8 (int16x8_t __a
, int8x8_t __b
)
1894 return (int16x8_t
) __builtin_aarch64_ssubwv8qi (__a
, __b
);
1897 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
1898 vsubw_s16 (int32x4_t __a
, int16x4_t __b
)
1900 return (int32x4_t
) __builtin_aarch64_ssubwv4hi (__a
, __b
);
1903 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
1904 vsubw_s32 (int64x2_t __a
, int32x2_t __b
)
1906 return (int64x2_t
) __builtin_aarch64_ssubwv2si (__a
, __b
);
1909 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
1910 vsubw_u8 (uint16x8_t __a
, uint8x8_t __b
)
1912 return (uint16x8_t
) __builtin_aarch64_usubwv8qi ((int16x8_t
) __a
,
1916 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
1917 vsubw_u16 (uint32x4_t __a
, uint16x4_t __b
)
1919 return (uint32x4_t
) __builtin_aarch64_usubwv4hi ((int32x4_t
) __a
,
1923 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
1924 vsubw_u32 (uint64x2_t __a
, uint32x2_t __b
)
1926 return (uint64x2_t
) __builtin_aarch64_usubwv2si ((int64x2_t
) __a
,
1930 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
1931 vsubw_high_s8 (int16x8_t __a
, int8x16_t __b
)
1933 return (int16x8_t
) __builtin_aarch64_ssubw2v16qi (__a
, __b
);
1936 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
1937 vsubw_high_s16 (int32x4_t __a
, int16x8_t __b
)
1939 return (int32x4_t
) __builtin_aarch64_ssubw2v8hi (__a
, __b
);
1942 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
1943 vsubw_high_s32 (int64x2_t __a
, int32x4_t __b
)
1945 return (int64x2_t
) __builtin_aarch64_ssubw2v4si (__a
, __b
);
1948 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
1949 vsubw_high_u8 (uint16x8_t __a
, uint8x16_t __b
)
1951 return (uint16x8_t
) __builtin_aarch64_usubw2v16qi ((int16x8_t
) __a
,
1955 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
1956 vsubw_high_u16 (uint32x4_t __a
, uint16x8_t __b
)
1958 return (uint32x4_t
) __builtin_aarch64_usubw2v8hi ((int32x4_t
) __a
,
1962 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
1963 vsubw_high_u32 (uint64x2_t __a
, uint32x4_t __b
)
1965 return (uint64x2_t
) __builtin_aarch64_usubw2v4si ((int64x2_t
) __a
,
1969 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
1970 vqadd_s8 (int8x8_t __a
, int8x8_t __b
)
1972 return (int8x8_t
) __builtin_aarch64_sqaddv8qi (__a
, __b
);
1975 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
1976 vqadd_s16 (int16x4_t __a
, int16x4_t __b
)
1978 return (int16x4_t
) __builtin_aarch64_sqaddv4hi (__a
, __b
);
1981 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
1982 vqadd_s32 (int32x2_t __a
, int32x2_t __b
)
1984 return (int32x2_t
) __builtin_aarch64_sqaddv2si (__a
, __b
);
1987 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
1988 vqadd_s64 (int64x1_t __a
, int64x1_t __b
)
1990 return (int64x1_t
) __builtin_aarch64_sqadddi (__a
, __b
);
1993 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
1994 vqadd_u8 (uint8x8_t __a
, uint8x8_t __b
)
1996 return (uint8x8_t
) __builtin_aarch64_uqaddv8qi ((int8x8_t
) __a
,
2000 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
2001 vqadd_u16 (uint16x4_t __a
, uint16x4_t __b
)
2003 return (uint16x4_t
) __builtin_aarch64_uqaddv4hi ((int16x4_t
) __a
,
2007 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
2008 vqadd_u32 (uint32x2_t __a
, uint32x2_t __b
)
2010 return (uint32x2_t
) __builtin_aarch64_uqaddv2si ((int32x2_t
) __a
,
2014 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
2015 vqadd_u64 (uint64x1_t __a
, uint64x1_t __b
)
2017 return (uint64x1_t
) __builtin_aarch64_uqadddi ((int64x1_t
) __a
,
2021 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
2022 vqaddq_s8 (int8x16_t __a
, int8x16_t __b
)
2024 return (int8x16_t
) __builtin_aarch64_sqaddv16qi (__a
, __b
);
2027 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
2028 vqaddq_s16 (int16x8_t __a
, int16x8_t __b
)
2030 return (int16x8_t
) __builtin_aarch64_sqaddv8hi (__a
, __b
);
2033 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
2034 vqaddq_s32 (int32x4_t __a
, int32x4_t __b
)
2036 return (int32x4_t
) __builtin_aarch64_sqaddv4si (__a
, __b
);
2039 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
2040 vqaddq_s64 (int64x2_t __a
, int64x2_t __b
)
2042 return (int64x2_t
) __builtin_aarch64_sqaddv2di (__a
, __b
);
2045 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
2046 vqaddq_u8 (uint8x16_t __a
, uint8x16_t __b
)
2048 return (uint8x16_t
) __builtin_aarch64_uqaddv16qi ((int8x16_t
) __a
,
2052 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
2053 vqaddq_u16 (uint16x8_t __a
, uint16x8_t __b
)
2055 return (uint16x8_t
) __builtin_aarch64_uqaddv8hi ((int16x8_t
) __a
,
2059 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
2060 vqaddq_u32 (uint32x4_t __a
, uint32x4_t __b
)
2062 return (uint32x4_t
) __builtin_aarch64_uqaddv4si ((int32x4_t
) __a
,
2066 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
2067 vqaddq_u64 (uint64x2_t __a
, uint64x2_t __b
)
2069 return (uint64x2_t
) __builtin_aarch64_uqaddv2di ((int64x2_t
) __a
,
2073 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
2074 vqsub_s8 (int8x8_t __a
, int8x8_t __b
)
2076 return (int8x8_t
) __builtin_aarch64_sqsubv8qi (__a
, __b
);
2079 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
2080 vqsub_s16 (int16x4_t __a
, int16x4_t __b
)
2082 return (int16x4_t
) __builtin_aarch64_sqsubv4hi (__a
, __b
);
2085 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
2086 vqsub_s32 (int32x2_t __a
, int32x2_t __b
)
2088 return (int32x2_t
) __builtin_aarch64_sqsubv2si (__a
, __b
);
2091 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
2092 vqsub_s64 (int64x1_t __a
, int64x1_t __b
)
2094 return (int64x1_t
) __builtin_aarch64_sqsubdi (__a
, __b
);
2097 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
2098 vqsub_u8 (uint8x8_t __a
, uint8x8_t __b
)
2100 return (uint8x8_t
) __builtin_aarch64_uqsubv8qi ((int8x8_t
) __a
,
2104 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
2105 vqsub_u16 (uint16x4_t __a
, uint16x4_t __b
)
2107 return (uint16x4_t
) __builtin_aarch64_uqsubv4hi ((int16x4_t
) __a
,
2111 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
2112 vqsub_u32 (uint32x2_t __a
, uint32x2_t __b
)
2114 return (uint32x2_t
) __builtin_aarch64_uqsubv2si ((int32x2_t
) __a
,
2118 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
2119 vqsub_u64 (uint64x1_t __a
, uint64x1_t __b
)
2121 return (uint64x1_t
) __builtin_aarch64_uqsubdi ((int64x1_t
) __a
,
2125 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
2126 vqsubq_s8 (int8x16_t __a
, int8x16_t __b
)
2128 return (int8x16_t
) __builtin_aarch64_sqsubv16qi (__a
, __b
);
2131 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
2132 vqsubq_s16 (int16x8_t __a
, int16x8_t __b
)
2134 return (int16x8_t
) __builtin_aarch64_sqsubv8hi (__a
, __b
);
2137 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
2138 vqsubq_s32 (int32x4_t __a
, int32x4_t __b
)
2140 return (int32x4_t
) __builtin_aarch64_sqsubv4si (__a
, __b
);
2143 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
2144 vqsubq_s64 (int64x2_t __a
, int64x2_t __b
)
2146 return (int64x2_t
) __builtin_aarch64_sqsubv2di (__a
, __b
);
2149 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
2150 vqsubq_u8 (uint8x16_t __a
, uint8x16_t __b
)
2152 return (uint8x16_t
) __builtin_aarch64_uqsubv16qi ((int8x16_t
) __a
,
2156 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
2157 vqsubq_u16 (uint16x8_t __a
, uint16x8_t __b
)
2159 return (uint16x8_t
) __builtin_aarch64_uqsubv8hi ((int16x8_t
) __a
,
2163 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
2164 vqsubq_u32 (uint32x4_t __a
, uint32x4_t __b
)
2166 return (uint32x4_t
) __builtin_aarch64_uqsubv4si ((int32x4_t
) __a
,
2170 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
2171 vqsubq_u64 (uint64x2_t __a
, uint64x2_t __b
)
2173 return (uint64x2_t
) __builtin_aarch64_uqsubv2di ((int64x2_t
) __a
,
2177 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
2178 vqneg_s8 (int8x8_t __a
)
2180 return (int8x8_t
) __builtin_aarch64_sqnegv8qi (__a
);
2183 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
2184 vqneg_s16 (int16x4_t __a
)
2186 return (int16x4_t
) __builtin_aarch64_sqnegv4hi (__a
);
2189 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
2190 vqneg_s32 (int32x2_t __a
)
2192 return (int32x2_t
) __builtin_aarch64_sqnegv2si (__a
);
2195 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
2196 vqnegq_s8 (int8x16_t __a
)
2198 return (int8x16_t
) __builtin_aarch64_sqnegv16qi (__a
);
2201 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
2202 vqnegq_s16 (int16x8_t __a
)
2204 return (int16x8_t
) __builtin_aarch64_sqnegv8hi (__a
);
2207 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
2208 vqnegq_s32 (int32x4_t __a
)
2210 return (int32x4_t
) __builtin_aarch64_sqnegv4si (__a
);
2213 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
2214 vqabs_s8 (int8x8_t __a
)
2216 return (int8x8_t
) __builtin_aarch64_sqabsv8qi (__a
);
2219 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
2220 vqabs_s16 (int16x4_t __a
)
2222 return (int16x4_t
) __builtin_aarch64_sqabsv4hi (__a
);
2225 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
2226 vqabs_s32 (int32x2_t __a
)
2228 return (int32x2_t
) __builtin_aarch64_sqabsv2si (__a
);
2231 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
2232 vqabsq_s8 (int8x16_t __a
)
2234 return (int8x16_t
) __builtin_aarch64_sqabsv16qi (__a
);
2237 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
2238 vqabsq_s16 (int16x8_t __a
)
2240 return (int16x8_t
) __builtin_aarch64_sqabsv8hi (__a
);
2243 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
2244 vqabsq_s32 (int32x4_t __a
)
2246 return (int32x4_t
) __builtin_aarch64_sqabsv4si (__a
);
2249 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
2250 vqdmulh_s16 (int16x4_t __a
, int16x4_t __b
)
2252 return (int16x4_t
) __builtin_aarch64_sqdmulhv4hi (__a
, __b
);
2255 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
2256 vqdmulh_s32 (int32x2_t __a
, int32x2_t __b
)
2258 return (int32x2_t
) __builtin_aarch64_sqdmulhv2si (__a
, __b
);
2261 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
2262 vqdmulhq_s16 (int16x8_t __a
, int16x8_t __b
)
2264 return (int16x8_t
) __builtin_aarch64_sqdmulhv8hi (__a
, __b
);
2267 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
2268 vqdmulhq_s32 (int32x4_t __a
, int32x4_t __b
)
2270 return (int32x4_t
) __builtin_aarch64_sqdmulhv4si (__a
, __b
);
2273 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
2274 vqrdmulh_s16 (int16x4_t __a
, int16x4_t __b
)
2276 return (int16x4_t
) __builtin_aarch64_sqrdmulhv4hi (__a
, __b
);
2279 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
2280 vqrdmulh_s32 (int32x2_t __a
, int32x2_t __b
)
2282 return (int32x2_t
) __builtin_aarch64_sqrdmulhv2si (__a
, __b
);
2285 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
2286 vqrdmulhq_s16 (int16x8_t __a
, int16x8_t __b
)
2288 return (int16x8_t
) __builtin_aarch64_sqrdmulhv8hi (__a
, __b
);
2291 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
2292 vqrdmulhq_s32 (int32x4_t __a
, int32x4_t __b
)
2294 return (int32x4_t
) __builtin_aarch64_sqrdmulhv4si (__a
, __b
);
2297 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
2298 vcreate_s8 (uint64_t __a
)
2300 return (int8x8_t
) __a
;
2303 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
2304 vcreate_s16 (uint64_t __a
)
2306 return (int16x4_t
) __a
;
2309 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
2310 vcreate_s32 (uint64_t __a
)
2312 return (int32x2_t
) __a
;
2315 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
2316 vcreate_s64 (uint64_t __a
)
2318 return (int64x1_t
) __a
;
2321 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
2322 vcreate_f32 (uint64_t __a
)
2324 return (float32x2_t
) __a
;
2327 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
2328 vcreate_u8 (uint64_t __a
)
2330 return (uint8x8_t
) __a
;
2333 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
2334 vcreate_u16 (uint64_t __a
)
2336 return (uint16x4_t
) __a
;
2339 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
2340 vcreate_u32 (uint64_t __a
)
2342 return (uint32x2_t
) __a
;
2345 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
2346 vcreate_u64 (uint64_t __a
)
2348 return (uint64x1_t
) __a
;
2351 __extension__
static __inline float64x1_t
__attribute__ ((__always_inline__
))
2352 vcreate_f64 (uint64_t __a
)
2354 return (float64x1_t
) __builtin_aarch64_createdf (__a
);
2357 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
2358 vcreate_p8 (uint64_t __a
)
2360 return (poly8x8_t
) __a
;
2363 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
2364 vcreate_p16 (uint64_t __a
)
2366 return (poly16x4_t
) __a
;
2371 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
2372 vget_lane_f32 (float32x2_t __a
, const int __b
)
2374 return __aarch64_vget_lane_f32 (__a
, __b
);
2377 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
2378 vget_lane_f64 (float64x1_t __a
, const int __b
)
2380 return __aarch64_vget_lane_f64 (__a
, __b
);
2383 __extension__
static __inline poly8_t
__attribute__ ((__always_inline__
))
2384 vget_lane_p8 (poly8x8_t __a
, const int __b
)
2386 return __aarch64_vget_lane_p8 (__a
, __b
);
2389 __extension__
static __inline poly16_t
__attribute__ ((__always_inline__
))
2390 vget_lane_p16 (poly16x4_t __a
, const int __b
)
2392 return __aarch64_vget_lane_p16 (__a
, __b
);
2395 __extension__
static __inline
int8_t __attribute__ ((__always_inline__
))
2396 vget_lane_s8 (int8x8_t __a
, const int __b
)
2398 return __aarch64_vget_lane_s8 (__a
, __b
);
2401 __extension__
static __inline
int16_t __attribute__ ((__always_inline__
))
2402 vget_lane_s16 (int16x4_t __a
, const int __b
)
2404 return __aarch64_vget_lane_s16 (__a
, __b
);
2407 __extension__
static __inline
int32_t __attribute__ ((__always_inline__
))
2408 vget_lane_s32 (int32x2_t __a
, const int __b
)
2410 return __aarch64_vget_lane_s32 (__a
, __b
);
2413 __extension__
static __inline
int64_t __attribute__ ((__always_inline__
))
2414 vget_lane_s64 (int64x1_t __a
, const int __b
)
2416 return __aarch64_vget_lane_s64 (__a
, __b
);
2419 __extension__
static __inline
uint8_t __attribute__ ((__always_inline__
))
2420 vget_lane_u8 (uint8x8_t __a
, const int __b
)
2422 return __aarch64_vget_lane_u8 (__a
, __b
);
2425 __extension__
static __inline
uint16_t __attribute__ ((__always_inline__
))
2426 vget_lane_u16 (uint16x4_t __a
, const int __b
)
2428 return __aarch64_vget_lane_u16 (__a
, __b
);
2431 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
2432 vget_lane_u32 (uint32x2_t __a
, const int __b
)
2434 return __aarch64_vget_lane_u32 (__a
, __b
);
2437 __extension__
static __inline
uint64_t __attribute__ ((__always_inline__
))
2438 vget_lane_u64 (uint64x1_t __a
, const int __b
)
2440 return __aarch64_vget_lane_u64 (__a
, __b
);
2445 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
2446 vgetq_lane_f32 (float32x4_t __a
, const int __b
)
2448 return __aarch64_vgetq_lane_f32 (__a
, __b
);
2451 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
2452 vgetq_lane_f64 (float64x2_t __a
, const int __b
)
2454 return __aarch64_vgetq_lane_f64 (__a
, __b
);
2457 __extension__
static __inline poly8_t
__attribute__ ((__always_inline__
))
2458 vgetq_lane_p8 (poly8x16_t __a
, const int __b
)
2460 return __aarch64_vgetq_lane_p8 (__a
, __b
);
2463 __extension__
static __inline poly16_t
__attribute__ ((__always_inline__
))
2464 vgetq_lane_p16 (poly16x8_t __a
, const int __b
)
2466 return __aarch64_vgetq_lane_p16 (__a
, __b
);
2469 __extension__
static __inline
int8_t __attribute__ ((__always_inline__
))
2470 vgetq_lane_s8 (int8x16_t __a
, const int __b
)
2472 return __aarch64_vgetq_lane_s8 (__a
, __b
);
2475 __extension__
static __inline
int16_t __attribute__ ((__always_inline__
))
2476 vgetq_lane_s16 (int16x8_t __a
, const int __b
)
2478 return __aarch64_vgetq_lane_s16 (__a
, __b
);
2481 __extension__
static __inline
int32_t __attribute__ ((__always_inline__
))
2482 vgetq_lane_s32 (int32x4_t __a
, const int __b
)
2484 return __aarch64_vgetq_lane_s32 (__a
, __b
);
2487 __extension__
static __inline
int64_t __attribute__ ((__always_inline__
))
2488 vgetq_lane_s64 (int64x2_t __a
, const int __b
)
2490 return __aarch64_vgetq_lane_s64 (__a
, __b
);
2493 __extension__
static __inline
uint8_t __attribute__ ((__always_inline__
))
2494 vgetq_lane_u8 (uint8x16_t __a
, const int __b
)
2496 return __aarch64_vgetq_lane_u8 (__a
, __b
);
2499 __extension__
static __inline
uint16_t __attribute__ ((__always_inline__
))
2500 vgetq_lane_u16 (uint16x8_t __a
, const int __b
)
2502 return __aarch64_vgetq_lane_u16 (__a
, __b
);
2505 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
2506 vgetq_lane_u32 (uint32x4_t __a
, const int __b
)
2508 return __aarch64_vgetq_lane_u32 (__a
, __b
);
2511 __extension__
static __inline
uint64_t __attribute__ ((__always_inline__
))
2512 vgetq_lane_u64 (uint64x2_t __a
, const int __b
)
2514 return __aarch64_vgetq_lane_u64 (__a
, __b
);
2519 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
2520 vreinterpret_p8_s8 (int8x8_t __a
)
2522 return (poly8x8_t
) __builtin_aarch64_reinterpretv8qiv8qi (__a
);
2525 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
2526 vreinterpret_p8_s16 (int16x4_t __a
)
2528 return (poly8x8_t
) __builtin_aarch64_reinterpretv8qiv4hi (__a
);
2531 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
2532 vreinterpret_p8_s32 (int32x2_t __a
)
2534 return (poly8x8_t
) __builtin_aarch64_reinterpretv8qiv2si (__a
);
2537 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
2538 vreinterpret_p8_s64 (int64x1_t __a
)
2540 return (poly8x8_t
) __builtin_aarch64_reinterpretv8qidi (__a
);
2543 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
2544 vreinterpret_p8_f32 (float32x2_t __a
)
2546 return (poly8x8_t
) __builtin_aarch64_reinterpretv8qiv2sf (__a
);
2549 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
2550 vreinterpret_p8_u8 (uint8x8_t __a
)
2552 return (poly8x8_t
) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t
) __a
);
2555 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
2556 vreinterpret_p8_u16 (uint16x4_t __a
)
2558 return (poly8x8_t
) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t
) __a
);
2561 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
2562 vreinterpret_p8_u32 (uint32x2_t __a
)
2564 return (poly8x8_t
) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t
) __a
);
2567 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
2568 vreinterpret_p8_u64 (uint64x1_t __a
)
2570 return (poly8x8_t
) __builtin_aarch64_reinterpretv8qidi ((int64x1_t
) __a
);
2573 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
2574 vreinterpret_p8_p16 (poly16x4_t __a
)
2576 return (poly8x8_t
) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t
) __a
);
2579 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
2580 vreinterpretq_p8_s8 (int8x16_t __a
)
2582 return (poly8x16_t
) __builtin_aarch64_reinterpretv16qiv16qi (__a
);
2585 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
2586 vreinterpretq_p8_s16 (int16x8_t __a
)
2588 return (poly8x16_t
) __builtin_aarch64_reinterpretv16qiv8hi (__a
);
2591 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
2592 vreinterpretq_p8_s32 (int32x4_t __a
)
2594 return (poly8x16_t
) __builtin_aarch64_reinterpretv16qiv4si (__a
);
2597 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
2598 vreinterpretq_p8_s64 (int64x2_t __a
)
2600 return (poly8x16_t
) __builtin_aarch64_reinterpretv16qiv2di (__a
);
2603 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
2604 vreinterpretq_p8_f32 (float32x4_t __a
)
2606 return (poly8x16_t
) __builtin_aarch64_reinterpretv16qiv4sf (__a
);
2609 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
2610 vreinterpretq_p8_u8 (uint8x16_t __a
)
2612 return (poly8x16_t
) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t
)
2616 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
2617 vreinterpretq_p8_u16 (uint16x8_t __a
)
2619 return (poly8x16_t
) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t
)
2623 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
2624 vreinterpretq_p8_u32 (uint32x4_t __a
)
2626 return (poly8x16_t
) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t
)
2630 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
2631 vreinterpretq_p8_u64 (uint64x2_t __a
)
2633 return (poly8x16_t
) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t
)
2637 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
2638 vreinterpretq_p8_p16 (poly16x8_t __a
)
2640 return (poly8x16_t
) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t
)
2644 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
2645 vreinterpret_p16_s8 (int8x8_t __a
)
2647 return (poly16x4_t
) __builtin_aarch64_reinterpretv4hiv8qi (__a
);
2650 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
2651 vreinterpret_p16_s16 (int16x4_t __a
)
2653 return (poly16x4_t
) __builtin_aarch64_reinterpretv4hiv4hi (__a
);
2656 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
2657 vreinterpret_p16_s32 (int32x2_t __a
)
2659 return (poly16x4_t
) __builtin_aarch64_reinterpretv4hiv2si (__a
);
2662 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
2663 vreinterpret_p16_s64 (int64x1_t __a
)
2665 return (poly16x4_t
) __builtin_aarch64_reinterpretv4hidi (__a
);
2668 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
2669 vreinterpret_p16_f32 (float32x2_t __a
)
2671 return (poly16x4_t
) __builtin_aarch64_reinterpretv4hiv2sf (__a
);
2674 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
2675 vreinterpret_p16_u8 (uint8x8_t __a
)
2677 return (poly16x4_t
) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t
) __a
);
2680 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
2681 vreinterpret_p16_u16 (uint16x4_t __a
)
2683 return (poly16x4_t
) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t
) __a
);
2686 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
2687 vreinterpret_p16_u32 (uint32x2_t __a
)
2689 return (poly16x4_t
) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t
) __a
);
2692 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
2693 vreinterpret_p16_u64 (uint64x1_t __a
)
2695 return (poly16x4_t
) __builtin_aarch64_reinterpretv4hidi ((int64x1_t
) __a
);
2698 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
2699 vreinterpret_p16_p8 (poly8x8_t __a
)
2701 return (poly16x4_t
) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t
) __a
);
2704 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
2705 vreinterpretq_p16_s8 (int8x16_t __a
)
2707 return (poly16x8_t
) __builtin_aarch64_reinterpretv8hiv16qi (__a
);
2710 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
2711 vreinterpretq_p16_s16 (int16x8_t __a
)
2713 return (poly16x8_t
) __builtin_aarch64_reinterpretv8hiv8hi (__a
);
2716 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
2717 vreinterpretq_p16_s32 (int32x4_t __a
)
2719 return (poly16x8_t
) __builtin_aarch64_reinterpretv8hiv4si (__a
);
2722 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
2723 vreinterpretq_p16_s64 (int64x2_t __a
)
2725 return (poly16x8_t
) __builtin_aarch64_reinterpretv8hiv2di (__a
);
2728 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
2729 vreinterpretq_p16_f32 (float32x4_t __a
)
2731 return (poly16x8_t
) __builtin_aarch64_reinterpretv8hiv4sf (__a
);
2734 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
2735 vreinterpretq_p16_u8 (uint8x16_t __a
)
2737 return (poly16x8_t
) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t
)
2741 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
2742 vreinterpretq_p16_u16 (uint16x8_t __a
)
2744 return (poly16x8_t
) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t
) __a
);
2747 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
2748 vreinterpretq_p16_u32 (uint32x4_t __a
)
2750 return (poly16x8_t
) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t
) __a
);
2753 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
2754 vreinterpretq_p16_u64 (uint64x2_t __a
)
2756 return (poly16x8_t
) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t
) __a
);
2759 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
2760 vreinterpretq_p16_p8 (poly8x16_t __a
)
2762 return (poly16x8_t
) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t
)
2766 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
2767 vreinterpret_f32_s8 (int8x8_t __a
)
2769 return (float32x2_t
) __builtin_aarch64_reinterpretv2sfv8qi (__a
);
2772 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
2773 vreinterpret_f32_s16 (int16x4_t __a
)
2775 return (float32x2_t
) __builtin_aarch64_reinterpretv2sfv4hi (__a
);
2778 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
2779 vreinterpret_f32_s32 (int32x2_t __a
)
2781 return (float32x2_t
) __builtin_aarch64_reinterpretv2sfv2si (__a
);
2784 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
2785 vreinterpret_f32_s64 (int64x1_t __a
)
2787 return (float32x2_t
) __builtin_aarch64_reinterpretv2sfdi (__a
);
2790 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
2791 vreinterpret_f32_u8 (uint8x8_t __a
)
2793 return (float32x2_t
) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t
) __a
);
2796 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
2797 vreinterpret_f32_u16 (uint16x4_t __a
)
2799 return (float32x2_t
) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t
)
2803 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
2804 vreinterpret_f32_u32 (uint32x2_t __a
)
2806 return (float32x2_t
) __builtin_aarch64_reinterpretv2sfv2si ((int32x2_t
)
2810 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
2811 vreinterpret_f32_u64 (uint64x1_t __a
)
2813 return (float32x2_t
) __builtin_aarch64_reinterpretv2sfdi ((int64x1_t
) __a
);
2816 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
2817 vreinterpret_f32_p8 (poly8x8_t __a
)
2819 return (float32x2_t
) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t
) __a
);
2822 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
2823 vreinterpret_f32_p16 (poly16x4_t __a
)
2825 return (float32x2_t
) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t
)
2829 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
2830 vreinterpretq_f32_s8 (int8x16_t __a
)
2832 return (float32x4_t
) __builtin_aarch64_reinterpretv4sfv16qi (__a
);
2835 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
2836 vreinterpretq_f32_s16 (int16x8_t __a
)
2838 return (float32x4_t
) __builtin_aarch64_reinterpretv4sfv8hi (__a
);
2841 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
2842 vreinterpretq_f32_s32 (int32x4_t __a
)
2844 return (float32x4_t
) __builtin_aarch64_reinterpretv4sfv4si (__a
);
2847 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
2848 vreinterpretq_f32_s64 (int64x2_t __a
)
2850 return (float32x4_t
) __builtin_aarch64_reinterpretv4sfv2di (__a
);
2853 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
2854 vreinterpretq_f32_u8 (uint8x16_t __a
)
2856 return (float32x4_t
) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t
)
2860 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
2861 vreinterpretq_f32_u16 (uint16x8_t __a
)
2863 return (float32x4_t
) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t
)
2867 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
2868 vreinterpretq_f32_u32 (uint32x4_t __a
)
2870 return (float32x4_t
) __builtin_aarch64_reinterpretv4sfv4si ((int32x4_t
)
2874 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
2875 vreinterpretq_f32_u64 (uint64x2_t __a
)
2877 return (float32x4_t
) __builtin_aarch64_reinterpretv4sfv2di ((int64x2_t
)
2881 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
2882 vreinterpretq_f32_p8 (poly8x16_t __a
)
2884 return (float32x4_t
) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t
)
2888 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
2889 vreinterpretq_f32_p16 (poly16x8_t __a
)
2891 return (float32x4_t
) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t
)
2895 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
2896 vreinterpret_s64_s8 (int8x8_t __a
)
2898 return (int64x1_t
) __builtin_aarch64_reinterpretdiv8qi (__a
);
2901 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
2902 vreinterpret_s64_s16 (int16x4_t __a
)
2904 return (int64x1_t
) __builtin_aarch64_reinterpretdiv4hi (__a
);
2907 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
2908 vreinterpret_s64_s32 (int32x2_t __a
)
2910 return (int64x1_t
) __builtin_aarch64_reinterpretdiv2si (__a
);
2913 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
2914 vreinterpret_s64_f32 (float32x2_t __a
)
2916 return (int64x1_t
) __builtin_aarch64_reinterpretdiv2sf (__a
);
2919 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
2920 vreinterpret_s64_u8 (uint8x8_t __a
)
2922 return (int64x1_t
) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t
) __a
);
2925 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
2926 vreinterpret_s64_u16 (uint16x4_t __a
)
2928 return (int64x1_t
) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t
) __a
);
2931 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
2932 vreinterpret_s64_u32 (uint32x2_t __a
)
2934 return (int64x1_t
) __builtin_aarch64_reinterpretdiv2si ((int32x2_t
) __a
);
2937 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
2938 vreinterpret_s64_u64 (uint64x1_t __a
)
2940 return (int64x1_t
) __builtin_aarch64_reinterpretdidi ((int64x1_t
) __a
);
2943 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
2944 vreinterpret_s64_p8 (poly8x8_t __a
)
2946 return (int64x1_t
) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t
) __a
);
2949 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
2950 vreinterpret_s64_p16 (poly16x4_t __a
)
2952 return (int64x1_t
) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t
) __a
);
2955 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
2956 vreinterpretq_s64_s8 (int8x16_t __a
)
2958 return (int64x2_t
) __builtin_aarch64_reinterpretv2div16qi (__a
);
2961 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
2962 vreinterpretq_s64_s16 (int16x8_t __a
)
2964 return (int64x2_t
) __builtin_aarch64_reinterpretv2div8hi (__a
);
2967 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
2968 vreinterpretq_s64_s32 (int32x4_t __a
)
2970 return (int64x2_t
) __builtin_aarch64_reinterpretv2div4si (__a
);
2973 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
2974 vreinterpretq_s64_f32 (float32x4_t __a
)
2976 return (int64x2_t
) __builtin_aarch64_reinterpretv2div4sf (__a
);
2979 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
2980 vreinterpretq_s64_u8 (uint8x16_t __a
)
2982 return (int64x2_t
) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t
) __a
);
2985 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
2986 vreinterpretq_s64_u16 (uint16x8_t __a
)
2988 return (int64x2_t
) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t
) __a
);
2991 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
2992 vreinterpretq_s64_u32 (uint32x4_t __a
)
2994 return (int64x2_t
) __builtin_aarch64_reinterpretv2div4si ((int32x4_t
) __a
);
2997 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
2998 vreinterpretq_s64_u64 (uint64x2_t __a
)
3000 return (int64x2_t
) __builtin_aarch64_reinterpretv2div2di ((int64x2_t
) __a
);
3003 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
3004 vreinterpretq_s64_p8 (poly8x16_t __a
)
3006 return (int64x2_t
) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t
) __a
);
3009 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
3010 vreinterpretq_s64_p16 (poly16x8_t __a
)
3012 return (int64x2_t
) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t
) __a
);
3015 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
3016 vreinterpret_u64_s8 (int8x8_t __a
)
3018 return (uint64x1_t
) __builtin_aarch64_reinterpretdiv8qi (__a
);
3021 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
3022 vreinterpret_u64_s16 (int16x4_t __a
)
3024 return (uint64x1_t
) __builtin_aarch64_reinterpretdiv4hi (__a
);
3027 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
3028 vreinterpret_u64_s32 (int32x2_t __a
)
3030 return (uint64x1_t
) __builtin_aarch64_reinterpretdiv2si (__a
);
3033 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
3034 vreinterpret_u64_s64 (int64x1_t __a
)
3036 return (uint64x1_t
) __builtin_aarch64_reinterpretdidi (__a
);
3039 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
3040 vreinterpret_u64_f32 (float32x2_t __a
)
3042 return (uint64x1_t
) __builtin_aarch64_reinterpretdiv2sf (__a
);
3045 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
3046 vreinterpret_u64_u8 (uint8x8_t __a
)
3048 return (uint64x1_t
) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t
) __a
);
3051 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
3052 vreinterpret_u64_u16 (uint16x4_t __a
)
3054 return (uint64x1_t
) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t
) __a
);
3057 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
3058 vreinterpret_u64_u32 (uint32x2_t __a
)
3060 return (uint64x1_t
) __builtin_aarch64_reinterpretdiv2si ((int32x2_t
) __a
);
3063 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
3064 vreinterpret_u64_p8 (poly8x8_t __a
)
3066 return (uint64x1_t
) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t
) __a
);
3069 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
3070 vreinterpret_u64_p16 (poly16x4_t __a
)
3072 return (uint64x1_t
) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t
) __a
);
3075 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
3076 vreinterpretq_u64_s8 (int8x16_t __a
)
3078 return (uint64x2_t
) __builtin_aarch64_reinterpretv2div16qi (__a
);
3081 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
3082 vreinterpretq_u64_s16 (int16x8_t __a
)
3084 return (uint64x2_t
) __builtin_aarch64_reinterpretv2div8hi (__a
);
3087 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
3088 vreinterpretq_u64_s32 (int32x4_t __a
)
3090 return (uint64x2_t
) __builtin_aarch64_reinterpretv2div4si (__a
);
3093 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
3094 vreinterpretq_u64_s64 (int64x2_t __a
)
3096 return (uint64x2_t
) __builtin_aarch64_reinterpretv2div2di (__a
);
3099 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
3100 vreinterpretq_u64_f32 (float32x4_t __a
)
3102 return (uint64x2_t
) __builtin_aarch64_reinterpretv2div4sf (__a
);
3105 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
3106 vreinterpretq_u64_u8 (uint8x16_t __a
)
3108 return (uint64x2_t
) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t
)
3112 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
3113 vreinterpretq_u64_u16 (uint16x8_t __a
)
3115 return (uint64x2_t
) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t
) __a
);
3118 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
3119 vreinterpretq_u64_u32 (uint32x4_t __a
)
3121 return (uint64x2_t
) __builtin_aarch64_reinterpretv2div4si ((int32x4_t
) __a
);
3124 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
3125 vreinterpretq_u64_p8 (poly8x16_t __a
)
3127 return (uint64x2_t
) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t
)
3131 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
3132 vreinterpretq_u64_p16 (poly16x8_t __a
)
3134 return (uint64x2_t
) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t
) __a
);
3137 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
3138 vreinterpret_s8_s16 (int16x4_t __a
)
3140 return (int8x8_t
) __builtin_aarch64_reinterpretv8qiv4hi (__a
);
3143 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
3144 vreinterpret_s8_s32 (int32x2_t __a
)
3146 return (int8x8_t
) __builtin_aarch64_reinterpretv8qiv2si (__a
);
3149 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
3150 vreinterpret_s8_s64 (int64x1_t __a
)
3152 return (int8x8_t
) __builtin_aarch64_reinterpretv8qidi (__a
);
3155 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
3156 vreinterpret_s8_f32 (float32x2_t __a
)
3158 return (int8x8_t
) __builtin_aarch64_reinterpretv8qiv2sf (__a
);
3161 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
3162 vreinterpret_s8_u8 (uint8x8_t __a
)
3164 return (int8x8_t
) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t
) __a
);
3167 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
3168 vreinterpret_s8_u16 (uint16x4_t __a
)
3170 return (int8x8_t
) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t
) __a
);
3173 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
3174 vreinterpret_s8_u32 (uint32x2_t __a
)
3176 return (int8x8_t
) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t
) __a
);
3179 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
3180 vreinterpret_s8_u64 (uint64x1_t __a
)
3182 return (int8x8_t
) __builtin_aarch64_reinterpretv8qidi ((int64x1_t
) __a
);
3185 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
3186 vreinterpret_s8_p8 (poly8x8_t __a
)
3188 return (int8x8_t
) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t
) __a
);
3191 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
3192 vreinterpret_s8_p16 (poly16x4_t __a
)
3194 return (int8x8_t
) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t
) __a
);
3197 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
3198 vreinterpretq_s8_s16 (int16x8_t __a
)
3200 return (int8x16_t
) __builtin_aarch64_reinterpretv16qiv8hi (__a
);
3203 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
3204 vreinterpretq_s8_s32 (int32x4_t __a
)
3206 return (int8x16_t
) __builtin_aarch64_reinterpretv16qiv4si (__a
);
3209 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
3210 vreinterpretq_s8_s64 (int64x2_t __a
)
3212 return (int8x16_t
) __builtin_aarch64_reinterpretv16qiv2di (__a
);
3215 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
3216 vreinterpretq_s8_f32 (float32x4_t __a
)
3218 return (int8x16_t
) __builtin_aarch64_reinterpretv16qiv4sf (__a
);
3221 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
3222 vreinterpretq_s8_u8 (uint8x16_t __a
)
3224 return (int8x16_t
) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t
)
3228 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
3229 vreinterpretq_s8_u16 (uint16x8_t __a
)
3231 return (int8x16_t
) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t
) __a
);
3234 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
3235 vreinterpretq_s8_u32 (uint32x4_t __a
)
3237 return (int8x16_t
) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t
) __a
);
3240 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
3241 vreinterpretq_s8_u64 (uint64x2_t __a
)
3243 return (int8x16_t
) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t
) __a
);
3246 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
3247 vreinterpretq_s8_p8 (poly8x16_t __a
)
3249 return (int8x16_t
) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t
)
3253 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
3254 vreinterpretq_s8_p16 (poly16x8_t __a
)
3256 return (int8x16_t
) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t
) __a
);
3259 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
3260 vreinterpret_s16_s8 (int8x8_t __a
)
3262 return (int16x4_t
) __builtin_aarch64_reinterpretv4hiv8qi (__a
);
3265 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
3266 vreinterpret_s16_s32 (int32x2_t __a
)
3268 return (int16x4_t
) __builtin_aarch64_reinterpretv4hiv2si (__a
);
3271 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
3272 vreinterpret_s16_s64 (int64x1_t __a
)
3274 return (int16x4_t
) __builtin_aarch64_reinterpretv4hidi (__a
);
3277 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
3278 vreinterpret_s16_f32 (float32x2_t __a
)
3280 return (int16x4_t
) __builtin_aarch64_reinterpretv4hiv2sf (__a
);
3283 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
3284 vreinterpret_s16_u8 (uint8x8_t __a
)
3286 return (int16x4_t
) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t
) __a
);
3289 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
3290 vreinterpret_s16_u16 (uint16x4_t __a
)
3292 return (int16x4_t
) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t
) __a
);
3295 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
3296 vreinterpret_s16_u32 (uint32x2_t __a
)
3298 return (int16x4_t
) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t
) __a
);
3301 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
3302 vreinterpret_s16_u64 (uint64x1_t __a
)
3304 return (int16x4_t
) __builtin_aarch64_reinterpretv4hidi ((int64x1_t
) __a
);
3307 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
3308 vreinterpret_s16_p8 (poly8x8_t __a
)
3310 return (int16x4_t
) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t
) __a
);
3313 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
3314 vreinterpret_s16_p16 (poly16x4_t __a
)
3316 return (int16x4_t
) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t
) __a
);
3319 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
3320 vreinterpretq_s16_s8 (int8x16_t __a
)
3322 return (int16x8_t
) __builtin_aarch64_reinterpretv8hiv16qi (__a
);
3325 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
3326 vreinterpretq_s16_s32 (int32x4_t __a
)
3328 return (int16x8_t
) __builtin_aarch64_reinterpretv8hiv4si (__a
);
3331 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
3332 vreinterpretq_s16_s64 (int64x2_t __a
)
3334 return (int16x8_t
) __builtin_aarch64_reinterpretv8hiv2di (__a
);
3337 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
3338 vreinterpretq_s16_f32 (float32x4_t __a
)
3340 return (int16x8_t
) __builtin_aarch64_reinterpretv8hiv4sf (__a
);
3343 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
3344 vreinterpretq_s16_u8 (uint8x16_t __a
)
3346 return (int16x8_t
) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t
) __a
);
3349 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
3350 vreinterpretq_s16_u16 (uint16x8_t __a
)
3352 return (int16x8_t
) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t
) __a
);
3355 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
3356 vreinterpretq_s16_u32 (uint32x4_t __a
)
3358 return (int16x8_t
) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t
) __a
);
3361 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
3362 vreinterpretq_s16_u64 (uint64x2_t __a
)
3364 return (int16x8_t
) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t
) __a
);
3367 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
3368 vreinterpretq_s16_p8 (poly8x16_t __a
)
3370 return (int16x8_t
) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t
) __a
);
3373 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
3374 vreinterpretq_s16_p16 (poly16x8_t __a
)
3376 return (int16x8_t
) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t
) __a
);
3379 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
3380 vreinterpret_s32_s8 (int8x8_t __a
)
3382 return (int32x2_t
) __builtin_aarch64_reinterpretv2siv8qi (__a
);
3385 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
3386 vreinterpret_s32_s16 (int16x4_t __a
)
3388 return (int32x2_t
) __builtin_aarch64_reinterpretv2siv4hi (__a
);
3391 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
3392 vreinterpret_s32_s64 (int64x1_t __a
)
3394 return (int32x2_t
) __builtin_aarch64_reinterpretv2sidi (__a
);
3397 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
3398 vreinterpret_s32_f32 (float32x2_t __a
)
3400 return (int32x2_t
) __builtin_aarch64_reinterpretv2siv2sf (__a
);
3403 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
3404 vreinterpret_s32_u8 (uint8x8_t __a
)
3406 return (int32x2_t
) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t
) __a
);
3409 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
3410 vreinterpret_s32_u16 (uint16x4_t __a
)
3412 return (int32x2_t
) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t
) __a
);
3415 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
3416 vreinterpret_s32_u32 (uint32x2_t __a
)
3418 return (int32x2_t
) __builtin_aarch64_reinterpretv2siv2si ((int32x2_t
) __a
);
3421 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
3422 vreinterpret_s32_u64 (uint64x1_t __a
)
3424 return (int32x2_t
) __builtin_aarch64_reinterpretv2sidi ((int64x1_t
) __a
);
3427 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
3428 vreinterpret_s32_p8 (poly8x8_t __a
)
3430 return (int32x2_t
) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t
) __a
);
3433 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
3434 vreinterpret_s32_p16 (poly16x4_t __a
)
3436 return (int32x2_t
) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t
) __a
);
3439 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
3440 vreinterpretq_s32_s8 (int8x16_t __a
)
3442 return (int32x4_t
) __builtin_aarch64_reinterpretv4siv16qi (__a
);
3445 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
3446 vreinterpretq_s32_s16 (int16x8_t __a
)
3448 return (int32x4_t
) __builtin_aarch64_reinterpretv4siv8hi (__a
);
3451 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
3452 vreinterpretq_s32_s64 (int64x2_t __a
)
3454 return (int32x4_t
) __builtin_aarch64_reinterpretv4siv2di (__a
);
3457 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
3458 vreinterpretq_s32_f32 (float32x4_t __a
)
3460 return (int32x4_t
) __builtin_aarch64_reinterpretv4siv4sf (__a
);
3463 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
3464 vreinterpretq_s32_u8 (uint8x16_t __a
)
3466 return (int32x4_t
) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t
) __a
);
3469 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
3470 vreinterpretq_s32_u16 (uint16x8_t __a
)
3472 return (int32x4_t
) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t
) __a
);
3475 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
3476 vreinterpretq_s32_u32 (uint32x4_t __a
)
3478 return (int32x4_t
) __builtin_aarch64_reinterpretv4siv4si ((int32x4_t
) __a
);
3481 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
3482 vreinterpretq_s32_u64 (uint64x2_t __a
)
3484 return (int32x4_t
) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t
) __a
);
3487 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
3488 vreinterpretq_s32_p8 (poly8x16_t __a
)
3490 return (int32x4_t
) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t
) __a
);
3493 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
3494 vreinterpretq_s32_p16 (poly16x8_t __a
)
3496 return (int32x4_t
) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t
) __a
);
3499 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
3500 vreinterpret_u8_s8 (int8x8_t __a
)
3502 return (uint8x8_t
) __builtin_aarch64_reinterpretv8qiv8qi (__a
);
3505 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
3506 vreinterpret_u8_s16 (int16x4_t __a
)
3508 return (uint8x8_t
) __builtin_aarch64_reinterpretv8qiv4hi (__a
);
3511 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
3512 vreinterpret_u8_s32 (int32x2_t __a
)
3514 return (uint8x8_t
) __builtin_aarch64_reinterpretv8qiv2si (__a
);
3517 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
3518 vreinterpret_u8_s64 (int64x1_t __a
)
3520 return (uint8x8_t
) __builtin_aarch64_reinterpretv8qidi (__a
);
3523 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
3524 vreinterpret_u8_f32 (float32x2_t __a
)
3526 return (uint8x8_t
) __builtin_aarch64_reinterpretv8qiv2sf (__a
);
3529 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
3530 vreinterpret_u8_u16 (uint16x4_t __a
)
3532 return (uint8x8_t
) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t
) __a
);
3535 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
3536 vreinterpret_u8_u32 (uint32x2_t __a
)
3538 return (uint8x8_t
) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t
) __a
);
3541 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
3542 vreinterpret_u8_u64 (uint64x1_t __a
)
3544 return (uint8x8_t
) __builtin_aarch64_reinterpretv8qidi ((int64x1_t
) __a
);
3547 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
3548 vreinterpret_u8_p8 (poly8x8_t __a
)
3550 return (uint8x8_t
) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t
) __a
);
3553 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
3554 vreinterpret_u8_p16 (poly16x4_t __a
)
3556 return (uint8x8_t
) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t
) __a
);
3559 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
3560 vreinterpretq_u8_s8 (int8x16_t __a
)
3562 return (uint8x16_t
) __builtin_aarch64_reinterpretv16qiv16qi (__a
);
3565 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
3566 vreinterpretq_u8_s16 (int16x8_t __a
)
3568 return (uint8x16_t
) __builtin_aarch64_reinterpretv16qiv8hi (__a
);
3571 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
3572 vreinterpretq_u8_s32 (int32x4_t __a
)
3574 return (uint8x16_t
) __builtin_aarch64_reinterpretv16qiv4si (__a
);
3577 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
3578 vreinterpretq_u8_s64 (int64x2_t __a
)
3580 return (uint8x16_t
) __builtin_aarch64_reinterpretv16qiv2di (__a
);
3583 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
3584 vreinterpretq_u8_f32 (float32x4_t __a
)
3586 return (uint8x16_t
) __builtin_aarch64_reinterpretv16qiv4sf (__a
);
3589 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
3590 vreinterpretq_u8_u16 (uint16x8_t __a
)
3592 return (uint8x16_t
) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t
)
3596 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
3597 vreinterpretq_u8_u32 (uint32x4_t __a
)
3599 return (uint8x16_t
) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t
)
3603 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
3604 vreinterpretq_u8_u64 (uint64x2_t __a
)
3606 return (uint8x16_t
) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t
)
3610 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
3611 vreinterpretq_u8_p8 (poly8x16_t __a
)
3613 return (uint8x16_t
) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t
)
3617 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
3618 vreinterpretq_u8_p16 (poly16x8_t __a
)
3620 return (uint8x16_t
) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t
)
3624 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
3625 vreinterpret_u16_s8 (int8x8_t __a
)
3627 return (uint16x4_t
) __builtin_aarch64_reinterpretv4hiv8qi (__a
);
3630 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
3631 vreinterpret_u16_s16 (int16x4_t __a
)
3633 return (uint16x4_t
) __builtin_aarch64_reinterpretv4hiv4hi (__a
);
3636 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
3637 vreinterpret_u16_s32 (int32x2_t __a
)
3639 return (uint16x4_t
) __builtin_aarch64_reinterpretv4hiv2si (__a
);
3642 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
3643 vreinterpret_u16_s64 (int64x1_t __a
)
3645 return (uint16x4_t
) __builtin_aarch64_reinterpretv4hidi (__a
);
3648 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
3649 vreinterpret_u16_f32 (float32x2_t __a
)
3651 return (uint16x4_t
) __builtin_aarch64_reinterpretv4hiv2sf (__a
);
3654 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
3655 vreinterpret_u16_u8 (uint8x8_t __a
)
3657 return (uint16x4_t
) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t
) __a
);
3660 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
3661 vreinterpret_u16_u32 (uint32x2_t __a
)
3663 return (uint16x4_t
) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t
) __a
);
3666 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
3667 vreinterpret_u16_u64 (uint64x1_t __a
)
3669 return (uint16x4_t
) __builtin_aarch64_reinterpretv4hidi ((int64x1_t
) __a
);
3672 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
3673 vreinterpret_u16_p8 (poly8x8_t __a
)
3675 return (uint16x4_t
) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t
) __a
);
3678 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
3679 vreinterpret_u16_p16 (poly16x4_t __a
)
3681 return (uint16x4_t
) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t
) __a
);
3684 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
3685 vreinterpretq_u16_s8 (int8x16_t __a
)
3687 return (uint16x8_t
) __builtin_aarch64_reinterpretv8hiv16qi (__a
);
3690 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
3691 vreinterpretq_u16_s16 (int16x8_t __a
)
3693 return (uint16x8_t
) __builtin_aarch64_reinterpretv8hiv8hi (__a
);
3696 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
3697 vreinterpretq_u16_s32 (int32x4_t __a
)
3699 return (uint16x8_t
) __builtin_aarch64_reinterpretv8hiv4si (__a
);
3702 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
3703 vreinterpretq_u16_s64 (int64x2_t __a
)
3705 return (uint16x8_t
) __builtin_aarch64_reinterpretv8hiv2di (__a
);
3708 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
3709 vreinterpretq_u16_f32 (float32x4_t __a
)
3711 return (uint16x8_t
) __builtin_aarch64_reinterpretv8hiv4sf (__a
);
3714 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
3715 vreinterpretq_u16_u8 (uint8x16_t __a
)
3717 return (uint16x8_t
) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t
)
3721 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
3722 vreinterpretq_u16_u32 (uint32x4_t __a
)
3724 return (uint16x8_t
) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t
) __a
);
3727 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
3728 vreinterpretq_u16_u64 (uint64x2_t __a
)
3730 return (uint16x8_t
) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t
) __a
);
3733 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
3734 vreinterpretq_u16_p8 (poly8x16_t __a
)
3736 return (uint16x8_t
) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t
)
3740 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
3741 vreinterpretq_u16_p16 (poly16x8_t __a
)
3743 return (uint16x8_t
) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t
) __a
);
3746 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
3747 vreinterpret_u32_s8 (int8x8_t __a
)
3749 return (uint32x2_t
) __builtin_aarch64_reinterpretv2siv8qi (__a
);
3752 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
3753 vreinterpret_u32_s16 (int16x4_t __a
)
3755 return (uint32x2_t
) __builtin_aarch64_reinterpretv2siv4hi (__a
);
3758 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
3759 vreinterpret_u32_s32 (int32x2_t __a
)
3761 return (uint32x2_t
) __builtin_aarch64_reinterpretv2siv2si (__a
);
3764 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
3765 vreinterpret_u32_s64 (int64x1_t __a
)
3767 return (uint32x2_t
) __builtin_aarch64_reinterpretv2sidi (__a
);
3770 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
3771 vreinterpret_u32_f32 (float32x2_t __a
)
3773 return (uint32x2_t
) __builtin_aarch64_reinterpretv2siv2sf (__a
);
3776 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
3777 vreinterpret_u32_u8 (uint8x8_t __a
)
3779 return (uint32x2_t
) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t
) __a
);
3782 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
3783 vreinterpret_u32_u16 (uint16x4_t __a
)
3785 return (uint32x2_t
) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t
) __a
);
3788 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
3789 vreinterpret_u32_u64 (uint64x1_t __a
)
3791 return (uint32x2_t
) __builtin_aarch64_reinterpretv2sidi ((int64x1_t
) __a
);
3794 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
3795 vreinterpret_u32_p8 (poly8x8_t __a
)
3797 return (uint32x2_t
) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t
) __a
);
3800 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
3801 vreinterpret_u32_p16 (poly16x4_t __a
)
3803 return (uint32x2_t
) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t
) __a
);
3806 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
3807 vreinterpretq_u32_s8 (int8x16_t __a
)
3809 return (uint32x4_t
) __builtin_aarch64_reinterpretv4siv16qi (__a
);
3812 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
3813 vreinterpretq_u32_s16 (int16x8_t __a
)
3815 return (uint32x4_t
) __builtin_aarch64_reinterpretv4siv8hi (__a
);
3818 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
3819 vreinterpretq_u32_s32 (int32x4_t __a
)
3821 return (uint32x4_t
) __builtin_aarch64_reinterpretv4siv4si (__a
);
3824 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
3825 vreinterpretq_u32_s64 (int64x2_t __a
)
3827 return (uint32x4_t
) __builtin_aarch64_reinterpretv4siv2di (__a
);
3830 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
3831 vreinterpretq_u32_f32 (float32x4_t __a
)
3833 return (uint32x4_t
) __builtin_aarch64_reinterpretv4siv4sf (__a
);
3836 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
3837 vreinterpretq_u32_u8 (uint8x16_t __a
)
3839 return (uint32x4_t
) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t
)
3843 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
3844 vreinterpretq_u32_u16 (uint16x8_t __a
)
3846 return (uint32x4_t
) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t
) __a
);
3849 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
3850 vreinterpretq_u32_u64 (uint64x2_t __a
)
3852 return (uint32x4_t
) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t
) __a
);
3855 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
3856 vreinterpretq_u32_p8 (poly8x16_t __a
)
3858 return (uint32x4_t
) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t
)
3862 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
3863 vreinterpretq_u32_p16 (poly16x8_t __a
)
3865 return (uint32x4_t
) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t
) __a
);
3868 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
3869 vcombine_s8 (int8x8_t __a
, int8x8_t __b
)
3871 return (int8x16_t
) __builtin_aarch64_combinev8qi (__a
, __b
);
3874 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
3875 vcombine_s16 (int16x4_t __a
, int16x4_t __b
)
3877 return (int16x8_t
) __builtin_aarch64_combinev4hi (__a
, __b
);
3880 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
3881 vcombine_s32 (int32x2_t __a
, int32x2_t __b
)
3883 return (int32x4_t
) __builtin_aarch64_combinev2si (__a
, __b
);
3886 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
3887 vcombine_s64 (int64x1_t __a
, int64x1_t __b
)
3889 return (int64x2_t
) __builtin_aarch64_combinedi (__a
, __b
);
3892 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
3893 vcombine_f32 (float32x2_t __a
, float32x2_t __b
)
3895 return (float32x4_t
) __builtin_aarch64_combinev2sf (__a
, __b
);
3898 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
3899 vcombine_u8 (uint8x8_t __a
, uint8x8_t __b
)
3901 return (uint8x16_t
) __builtin_aarch64_combinev8qi ((int8x8_t
) __a
,
3905 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
3906 vcombine_u16 (uint16x4_t __a
, uint16x4_t __b
)
3908 return (uint16x8_t
) __builtin_aarch64_combinev4hi ((int16x4_t
) __a
,
3912 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
3913 vcombine_u32 (uint32x2_t __a
, uint32x2_t __b
)
3915 return (uint32x4_t
) __builtin_aarch64_combinev2si ((int32x2_t
) __a
,
3919 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
3920 vcombine_u64 (uint64x1_t __a
, uint64x1_t __b
)
3922 return (uint64x2_t
) __builtin_aarch64_combinedi ((int64x1_t
) __a
,
3926 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
3927 vcombine_f64 (float64x1_t __a
, float64x1_t __b
)
3929 return (float64x2_t
) __builtin_aarch64_combinedf (__a
, __b
);
3932 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
3933 vcombine_p8 (poly8x8_t __a
, poly8x8_t __b
)
3935 return (poly8x16_t
) __builtin_aarch64_combinev8qi ((int8x8_t
) __a
,
3939 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
3940 vcombine_p16 (poly16x4_t __a
, poly16x4_t __b
)
3942 return (poly16x8_t
) __builtin_aarch64_combinev4hi ((int16x4_t
) __a
,
3946 /* Start of temporary inline asm implementations. */
3948 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
3949 vaba_s8 (int8x8_t a
, int8x8_t b
, int8x8_t c
)
3952 __asm__ ("saba %0.8b,%2.8b,%3.8b"
3954 : "0"(a
), "w"(b
), "w"(c
)
3955 : /* No clobbers */);
3959 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
3960 vaba_s16 (int16x4_t a
, int16x4_t b
, int16x4_t c
)
3963 __asm__ ("saba %0.4h,%2.4h,%3.4h"
3965 : "0"(a
), "w"(b
), "w"(c
)
3966 : /* No clobbers */);
3970 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
3971 vaba_s32 (int32x2_t a
, int32x2_t b
, int32x2_t c
)
3974 __asm__ ("saba %0.2s,%2.2s,%3.2s"
3976 : "0"(a
), "w"(b
), "w"(c
)
3977 : /* No clobbers */);
3981 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
3982 vaba_u8 (uint8x8_t a
, uint8x8_t b
, uint8x8_t c
)
3985 __asm__ ("uaba %0.8b,%2.8b,%3.8b"
3987 : "0"(a
), "w"(b
), "w"(c
)
3988 : /* No clobbers */);
3992 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
3993 vaba_u16 (uint16x4_t a
, uint16x4_t b
, uint16x4_t c
)
3996 __asm__ ("uaba %0.4h,%2.4h,%3.4h"
3998 : "0"(a
), "w"(b
), "w"(c
)
3999 : /* No clobbers */);
4003 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
4004 vaba_u32 (uint32x2_t a
, uint32x2_t b
, uint32x2_t c
)
4007 __asm__ ("uaba %0.2s,%2.2s,%3.2s"
4009 : "0"(a
), "w"(b
), "w"(c
)
4010 : /* No clobbers */);
4014 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
4015 vabal_high_s8 (int16x8_t a
, int8x16_t b
, int8x16_t c
)
4018 __asm__ ("sabal2 %0.8h,%2.16b,%3.16b"
4020 : "0"(a
), "w"(b
), "w"(c
)
4021 : /* No clobbers */);
4025 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
4026 vabal_high_s16 (int32x4_t a
, int16x8_t b
, int16x8_t c
)
4029 __asm__ ("sabal2 %0.4s,%2.8h,%3.8h"
4031 : "0"(a
), "w"(b
), "w"(c
)
4032 : /* No clobbers */);
4036 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
4037 vabal_high_s32 (int64x2_t a
, int32x4_t b
, int32x4_t c
)
4040 __asm__ ("sabal2 %0.2d,%2.4s,%3.4s"
4042 : "0"(a
), "w"(b
), "w"(c
)
4043 : /* No clobbers */);
4047 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
4048 vabal_high_u8 (uint16x8_t a
, uint8x16_t b
, uint8x16_t c
)
4051 __asm__ ("uabal2 %0.8h,%2.16b,%3.16b"
4053 : "0"(a
), "w"(b
), "w"(c
)
4054 : /* No clobbers */);
4058 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
4059 vabal_high_u16 (uint32x4_t a
, uint16x8_t b
, uint16x8_t c
)
4062 __asm__ ("uabal2 %0.4s,%2.8h,%3.8h"
4064 : "0"(a
), "w"(b
), "w"(c
)
4065 : /* No clobbers */);
4069 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
4070 vabal_high_u32 (uint64x2_t a
, uint32x4_t b
, uint32x4_t c
)
4073 __asm__ ("uabal2 %0.2d,%2.4s,%3.4s"
4075 : "0"(a
), "w"(b
), "w"(c
)
4076 : /* No clobbers */);
4080 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
4081 vabal_s8 (int16x8_t a
, int8x8_t b
, int8x8_t c
)
4084 __asm__ ("sabal %0.8h,%2.8b,%3.8b"
4086 : "0"(a
), "w"(b
), "w"(c
)
4087 : /* No clobbers */);
4091 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
4092 vabal_s16 (int32x4_t a
, int16x4_t b
, int16x4_t c
)
4095 __asm__ ("sabal %0.4s,%2.4h,%3.4h"
4097 : "0"(a
), "w"(b
), "w"(c
)
4098 : /* No clobbers */);
4102 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
4103 vabal_s32 (int64x2_t a
, int32x2_t b
, int32x2_t c
)
4106 __asm__ ("sabal %0.2d,%2.2s,%3.2s"
4108 : "0"(a
), "w"(b
), "w"(c
)
4109 : /* No clobbers */);
4113 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
4114 vabal_u8 (uint16x8_t a
, uint8x8_t b
, uint8x8_t c
)
4117 __asm__ ("uabal %0.8h,%2.8b,%3.8b"
4119 : "0"(a
), "w"(b
), "w"(c
)
4120 : /* No clobbers */);
4124 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
4125 vabal_u16 (uint32x4_t a
, uint16x4_t b
, uint16x4_t c
)
4128 __asm__ ("uabal %0.4s,%2.4h,%3.4h"
4130 : "0"(a
), "w"(b
), "w"(c
)
4131 : /* No clobbers */);
4135 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
4136 vabal_u32 (uint64x2_t a
, uint32x2_t b
, uint32x2_t c
)
4139 __asm__ ("uabal %0.2d,%2.2s,%3.2s"
4141 : "0"(a
), "w"(b
), "w"(c
)
4142 : /* No clobbers */);
4146 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
4147 vabaq_s8 (int8x16_t a
, int8x16_t b
, int8x16_t c
)
4150 __asm__ ("saba %0.16b,%2.16b,%3.16b"
4152 : "0"(a
), "w"(b
), "w"(c
)
4153 : /* No clobbers */);
4157 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
4158 vabaq_s16 (int16x8_t a
, int16x8_t b
, int16x8_t c
)
4161 __asm__ ("saba %0.8h,%2.8h,%3.8h"
4163 : "0"(a
), "w"(b
), "w"(c
)
4164 : /* No clobbers */);
4168 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
4169 vabaq_s32 (int32x4_t a
, int32x4_t b
, int32x4_t c
)
4172 __asm__ ("saba %0.4s,%2.4s,%3.4s"
4174 : "0"(a
), "w"(b
), "w"(c
)
4175 : /* No clobbers */);
4179 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
4180 vabaq_u8 (uint8x16_t a
, uint8x16_t b
, uint8x16_t c
)
4183 __asm__ ("uaba %0.16b,%2.16b,%3.16b"
4185 : "0"(a
), "w"(b
), "w"(c
)
4186 : /* No clobbers */);
4190 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
4191 vabaq_u16 (uint16x8_t a
, uint16x8_t b
, uint16x8_t c
)
4194 __asm__ ("uaba %0.8h,%2.8h,%3.8h"
4196 : "0"(a
), "w"(b
), "w"(c
)
4197 : /* No clobbers */);
4201 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
4202 vabaq_u32 (uint32x4_t a
, uint32x4_t b
, uint32x4_t c
)
4205 __asm__ ("uaba %0.4s,%2.4s,%3.4s"
4207 : "0"(a
), "w"(b
), "w"(c
)
4208 : /* No clobbers */);
4212 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
4213 vabd_f32 (float32x2_t a
, float32x2_t b
)
4216 __asm__ ("fabd %0.2s, %1.2s, %2.2s"
4219 : /* No clobbers */);
4223 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
4224 vabd_s8 (int8x8_t a
, int8x8_t b
)
4227 __asm__ ("sabd %0.8b, %1.8b, %2.8b"
4230 : /* No clobbers */);
4234 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
4235 vabd_s16 (int16x4_t a
, int16x4_t b
)
4238 __asm__ ("sabd %0.4h, %1.4h, %2.4h"
4241 : /* No clobbers */);
4245 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
4246 vabd_s32 (int32x2_t a
, int32x2_t b
)
4249 __asm__ ("sabd %0.2s, %1.2s, %2.2s"
4252 : /* No clobbers */);
4256 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
4257 vabd_u8 (uint8x8_t a
, uint8x8_t b
)
4260 __asm__ ("uabd %0.8b, %1.8b, %2.8b"
4263 : /* No clobbers */);
4267 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
4268 vabd_u16 (uint16x4_t a
, uint16x4_t b
)
4271 __asm__ ("uabd %0.4h, %1.4h, %2.4h"
4274 : /* No clobbers */);
4278 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
4279 vabd_u32 (uint32x2_t a
, uint32x2_t b
)
4282 __asm__ ("uabd %0.2s, %1.2s, %2.2s"
4285 : /* No clobbers */);
4289 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
4290 vabdd_f64 (float64_t a
, float64_t b
)
4293 __asm__ ("fabd %d0, %d1, %d2"
4296 : /* No clobbers */);
4300 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
4301 vabdl_high_s8 (int8x16_t a
, int8x16_t b
)
4304 __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b"
4307 : /* No clobbers */);
4311 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
4312 vabdl_high_s16 (int16x8_t a
, int16x8_t b
)
4315 __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h"
4318 : /* No clobbers */);
4322 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
4323 vabdl_high_s32 (int32x4_t a
, int32x4_t b
)
4326 __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s"
4329 : /* No clobbers */);
4333 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
4334 vabdl_high_u8 (uint8x16_t a
, uint8x16_t b
)
4337 __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b"
4340 : /* No clobbers */);
4344 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
4345 vabdl_high_u16 (uint16x8_t a
, uint16x8_t b
)
4348 __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h"
4351 : /* No clobbers */);
4355 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
4356 vabdl_high_u32 (uint32x4_t a
, uint32x4_t b
)
4359 __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s"
4362 : /* No clobbers */);
4366 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
4367 vabdl_s8 (int8x8_t a
, int8x8_t b
)
4370 __asm__ ("sabdl %0.8h, %1.8b, %2.8b"
4373 : /* No clobbers */);
4377 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
4378 vabdl_s16 (int16x4_t a
, int16x4_t b
)
4381 __asm__ ("sabdl %0.4s, %1.4h, %2.4h"
4384 : /* No clobbers */);
4388 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
4389 vabdl_s32 (int32x2_t a
, int32x2_t b
)
4392 __asm__ ("sabdl %0.2d, %1.2s, %2.2s"
4395 : /* No clobbers */);
4399 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
4400 vabdl_u8 (uint8x8_t a
, uint8x8_t b
)
4403 __asm__ ("uabdl %0.8h, %1.8b, %2.8b"
4406 : /* No clobbers */);
4410 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
4411 vabdl_u16 (uint16x4_t a
, uint16x4_t b
)
4414 __asm__ ("uabdl %0.4s, %1.4h, %2.4h"
4417 : /* No clobbers */);
4421 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
4422 vabdl_u32 (uint32x2_t a
, uint32x2_t b
)
4425 __asm__ ("uabdl %0.2d, %1.2s, %2.2s"
4428 : /* No clobbers */);
4432 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
4433 vabdq_f32 (float32x4_t a
, float32x4_t b
)
4436 __asm__ ("fabd %0.4s, %1.4s, %2.4s"
4439 : /* No clobbers */);
4443 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
4444 vabdq_f64 (float64x2_t a
, float64x2_t b
)
4447 __asm__ ("fabd %0.2d, %1.2d, %2.2d"
4450 : /* No clobbers */);
4454 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
4455 vabdq_s8 (int8x16_t a
, int8x16_t b
)
4458 __asm__ ("sabd %0.16b, %1.16b, %2.16b"
4461 : /* No clobbers */);
4465 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
4466 vabdq_s16 (int16x8_t a
, int16x8_t b
)
4469 __asm__ ("sabd %0.8h, %1.8h, %2.8h"
4472 : /* No clobbers */);
4476 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
4477 vabdq_s32 (int32x4_t a
, int32x4_t b
)
4480 __asm__ ("sabd %0.4s, %1.4s, %2.4s"
4483 : /* No clobbers */);
4487 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
4488 vabdq_u8 (uint8x16_t a
, uint8x16_t b
)
4491 __asm__ ("uabd %0.16b, %1.16b, %2.16b"
4494 : /* No clobbers */);
4498 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
4499 vabdq_u16 (uint16x8_t a
, uint16x8_t b
)
4502 __asm__ ("uabd %0.8h, %1.8h, %2.8h"
4505 : /* No clobbers */);
4509 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
4510 vabdq_u32 (uint32x4_t a
, uint32x4_t b
)
4513 __asm__ ("uabd %0.4s, %1.4s, %2.4s"
4516 : /* No clobbers */);
4520 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
4521 vabds_f32 (float32_t a
, float32_t b
)
4524 __asm__ ("fabd %s0, %s1, %s2"
4527 : /* No clobbers */);
4531 __extension__
static __inline
int16_t __attribute__ ((__always_inline__
))
4532 vaddlv_s8 (int8x8_t a
)
4535 __asm__ ("saddlv %h0,%1.8b"
4538 : /* No clobbers */);
4542 __extension__
static __inline
int32_t __attribute__ ((__always_inline__
))
4543 vaddlv_s16 (int16x4_t a
)
4546 __asm__ ("saddlv %s0,%1.4h"
4549 : /* No clobbers */);
4553 __extension__
static __inline
uint16_t __attribute__ ((__always_inline__
))
4554 vaddlv_u8 (uint8x8_t a
)
4557 __asm__ ("uaddlv %h0,%1.8b"
4560 : /* No clobbers */);
4564 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
4565 vaddlv_u16 (uint16x4_t a
)
4568 __asm__ ("uaddlv %s0,%1.4h"
4571 : /* No clobbers */);
4575 __extension__
static __inline
int16_t __attribute__ ((__always_inline__
))
4576 vaddlvq_s8 (int8x16_t a
)
4579 __asm__ ("saddlv %h0,%1.16b"
4582 : /* No clobbers */);
4586 __extension__
static __inline
int32_t __attribute__ ((__always_inline__
))
4587 vaddlvq_s16 (int16x8_t a
)
4590 __asm__ ("saddlv %s0,%1.8h"
4593 : /* No clobbers */);
4597 __extension__
static __inline
int64_t __attribute__ ((__always_inline__
))
4598 vaddlvq_s32 (int32x4_t a
)
4601 __asm__ ("saddlv %d0,%1.4s"
4604 : /* No clobbers */);
4608 __extension__
static __inline
uint16_t __attribute__ ((__always_inline__
))
4609 vaddlvq_u8 (uint8x16_t a
)
4612 __asm__ ("uaddlv %h0,%1.16b"
4615 : /* No clobbers */);
4619 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
4620 vaddlvq_u16 (uint16x8_t a
)
4623 __asm__ ("uaddlv %s0,%1.8h"
4626 : /* No clobbers */);
4630 __extension__
static __inline
uint64_t __attribute__ ((__always_inline__
))
4631 vaddlvq_u32 (uint32x4_t a
)
4634 __asm__ ("uaddlv %d0,%1.4s"
4637 : /* No clobbers */);
4641 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
4642 vbsl_f32 (uint32x2_t a
, float32x2_t b
, float32x2_t c
)
4645 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4647 : "0"(a
), "w"(b
), "w"(c
)
4648 : /* No clobbers */);
4652 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
4653 vbsl_p8 (uint8x8_t a
, poly8x8_t b
, poly8x8_t c
)
4656 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4658 : "0"(a
), "w"(b
), "w"(c
)
4659 : /* No clobbers */);
4663 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
4664 vbsl_p16 (uint16x4_t a
, poly16x4_t b
, poly16x4_t c
)
4667 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4669 : "0"(a
), "w"(b
), "w"(c
)
4670 : /* No clobbers */);
4674 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
4675 vbsl_s8 (uint8x8_t a
, int8x8_t b
, int8x8_t c
)
4678 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4680 : "0"(a
), "w"(b
), "w"(c
)
4681 : /* No clobbers */);
4685 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
4686 vbsl_s16 (uint16x4_t a
, int16x4_t b
, int16x4_t c
)
4689 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4691 : "0"(a
), "w"(b
), "w"(c
)
4692 : /* No clobbers */);
4696 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
4697 vbsl_s32 (uint32x2_t a
, int32x2_t b
, int32x2_t c
)
4700 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4702 : "0"(a
), "w"(b
), "w"(c
)
4703 : /* No clobbers */);
4707 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
4708 vbsl_s64 (uint64x1_t a
, int64x1_t b
, int64x1_t c
)
4711 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4713 : "0"(a
), "w"(b
), "w"(c
)
4714 : /* No clobbers */);
4718 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
4719 vbsl_u8 (uint8x8_t a
, uint8x8_t b
, uint8x8_t c
)
4722 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4724 : "0"(a
), "w"(b
), "w"(c
)
4725 : /* No clobbers */);
4729 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
4730 vbsl_u16 (uint16x4_t a
, uint16x4_t b
, uint16x4_t c
)
4733 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4735 : "0"(a
), "w"(b
), "w"(c
)
4736 : /* No clobbers */);
4740 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
4741 vbsl_u32 (uint32x2_t a
, uint32x2_t b
, uint32x2_t c
)
4744 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4746 : "0"(a
), "w"(b
), "w"(c
)
4747 : /* No clobbers */);
4751 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
4752 vbsl_u64 (uint64x1_t a
, uint64x1_t b
, uint64x1_t c
)
4755 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4757 : "0"(a
), "w"(b
), "w"(c
)
4758 : /* No clobbers */);
4762 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
4763 vbslq_f32 (uint32x4_t a
, float32x4_t b
, float32x4_t c
)
4766 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4768 : "0"(a
), "w"(b
), "w"(c
)
4769 : /* No clobbers */);
4773 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
4774 vbslq_f64 (uint64x2_t a
, float64x2_t b
, float64x2_t c
)
4777 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4779 : "0"(a
), "w"(b
), "w"(c
)
4780 : /* No clobbers */);
4784 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
4785 vbslq_p8 (uint8x16_t a
, poly8x16_t b
, poly8x16_t c
)
4788 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4790 : "0"(a
), "w"(b
), "w"(c
)
4791 : /* No clobbers */);
4795 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
4796 vbslq_p16 (uint16x8_t a
, poly16x8_t b
, poly16x8_t c
)
4799 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4801 : "0"(a
), "w"(b
), "w"(c
)
4802 : /* No clobbers */);
4806 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
4807 vbslq_s8 (uint8x16_t a
, int8x16_t b
, int8x16_t c
)
4810 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4812 : "0"(a
), "w"(b
), "w"(c
)
4813 : /* No clobbers */);
4817 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
4818 vbslq_s16 (uint16x8_t a
, int16x8_t b
, int16x8_t c
)
4821 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4823 : "0"(a
), "w"(b
), "w"(c
)
4824 : /* No clobbers */);
4828 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
4829 vbslq_s32 (uint32x4_t a
, int32x4_t b
, int32x4_t c
)
4832 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4834 : "0"(a
), "w"(b
), "w"(c
)
4835 : /* No clobbers */);
4839 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
4840 vbslq_s64 (uint64x2_t a
, int64x2_t b
, int64x2_t c
)
4843 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4845 : "0"(a
), "w"(b
), "w"(c
)
4846 : /* No clobbers */);
4850 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
4851 vbslq_u8 (uint8x16_t a
, uint8x16_t b
, uint8x16_t c
)
4854 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4856 : "0"(a
), "w"(b
), "w"(c
)
4857 : /* No clobbers */);
4861 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
4862 vbslq_u16 (uint16x8_t a
, uint16x8_t b
, uint16x8_t c
)
4865 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4867 : "0"(a
), "w"(b
), "w"(c
)
4868 : /* No clobbers */);
4872 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
4873 vbslq_u32 (uint32x4_t a
, uint32x4_t b
, uint32x4_t c
)
4876 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4878 : "0"(a
), "w"(b
), "w"(c
)
4879 : /* No clobbers */);
4883 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
4884 vbslq_u64 (uint64x2_t a
, uint64x2_t b
, uint64x2_t c
)
4887 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4889 : "0"(a
), "w"(b
), "w"(c
)
4890 : /* No clobbers */);
4894 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
4895 vcls_s8 (int8x8_t a
)
4898 __asm__ ("cls %0.8b,%1.8b"
4901 : /* No clobbers */);
4905 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
4906 vcls_s16 (int16x4_t a
)
4909 __asm__ ("cls %0.4h,%1.4h"
4912 : /* No clobbers */);
4916 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
4917 vcls_s32 (int32x2_t a
)
4920 __asm__ ("cls %0.2s,%1.2s"
4923 : /* No clobbers */);
4927 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
4928 vclsq_s8 (int8x16_t a
)
4931 __asm__ ("cls %0.16b,%1.16b"
4934 : /* No clobbers */);
4938 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
4939 vclsq_s16 (int16x8_t a
)
4942 __asm__ ("cls %0.8h,%1.8h"
4945 : /* No clobbers */);
4949 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
4950 vclsq_s32 (int32x4_t a
)
4953 __asm__ ("cls %0.4s,%1.4s"
4956 : /* No clobbers */);
4960 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
4961 vclz_s8 (int8x8_t a
)
4964 __asm__ ("clz %0.8b,%1.8b"
4967 : /* No clobbers */);
4971 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
4972 vclz_s16 (int16x4_t a
)
4975 __asm__ ("clz %0.4h,%1.4h"
4978 : /* No clobbers */);
4982 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
4983 vclz_s32 (int32x2_t a
)
4986 __asm__ ("clz %0.2s,%1.2s"
4989 : /* No clobbers */);
4993 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
4994 vclz_u8 (uint8x8_t a
)
4997 __asm__ ("clz %0.8b,%1.8b"
5000 : /* No clobbers */);
5004 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
5005 vclz_u16 (uint16x4_t a
)
5008 __asm__ ("clz %0.4h,%1.4h"
5011 : /* No clobbers */);
5015 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
5016 vclz_u32 (uint32x2_t a
)
5019 __asm__ ("clz %0.2s,%1.2s"
5022 : /* No clobbers */);
5026 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
5027 vclzq_s8 (int8x16_t a
)
5030 __asm__ ("clz %0.16b,%1.16b"
5033 : /* No clobbers */);
5037 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
5038 vclzq_s16 (int16x8_t a
)
5041 __asm__ ("clz %0.8h,%1.8h"
5044 : /* No clobbers */);
5048 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
5049 vclzq_s32 (int32x4_t a
)
5052 __asm__ ("clz %0.4s,%1.4s"
5055 : /* No clobbers */);
5059 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
5060 vclzq_u8 (uint8x16_t a
)
5063 __asm__ ("clz %0.16b,%1.16b"
5066 : /* No clobbers */);
5070 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
5071 vclzq_u16 (uint16x8_t a
)
5074 __asm__ ("clz %0.8h,%1.8h"
5077 : /* No clobbers */);
5081 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
5082 vclzq_u32 (uint32x4_t a
)
5085 __asm__ ("clz %0.4s,%1.4s"
5088 : /* No clobbers */);
5092 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
5093 vcnt_p8 (poly8x8_t a
)
5096 __asm__ ("cnt %0.8b,%1.8b"
5099 : /* No clobbers */);
5103 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
5104 vcnt_s8 (int8x8_t a
)
5107 __asm__ ("cnt %0.8b,%1.8b"
5110 : /* No clobbers */);
5114 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
5115 vcnt_u8 (uint8x8_t a
)
5118 __asm__ ("cnt %0.8b,%1.8b"
5121 : /* No clobbers */);
5125 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
5126 vcntq_p8 (poly8x16_t a
)
5129 __asm__ ("cnt %0.16b,%1.16b"
5132 : /* No clobbers */);
5136 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
5137 vcntq_s8 (int8x16_t a
)
5140 __asm__ ("cnt %0.16b,%1.16b"
5143 : /* No clobbers */);
5147 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
5148 vcntq_u8 (uint8x16_t a
)
5151 __asm__ ("cnt %0.16b,%1.16b"
5154 : /* No clobbers */);
5158 #define vcopyq_lane_f32(a, b, c, d) \
5161 float32x4_t c_ = (c); \
5162 float32x4_t a_ = (a); \
5163 float32x4_t result; \
5164 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5166 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5167 : /* No clobbers */); \
5171 #define vcopyq_lane_f64(a, b, c, d) \
5174 float64x2_t c_ = (c); \
5175 float64x2_t a_ = (a); \
5176 float64x2_t result; \
5177 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5179 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5180 : /* No clobbers */); \
5184 #define vcopyq_lane_p8(a, b, c, d) \
5187 poly8x16_t c_ = (c); \
5188 poly8x16_t a_ = (a); \
5189 poly8x16_t result; \
5190 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5192 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5193 : /* No clobbers */); \
5197 #define vcopyq_lane_p16(a, b, c, d) \
5200 poly16x8_t c_ = (c); \
5201 poly16x8_t a_ = (a); \
5202 poly16x8_t result; \
5203 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5205 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5206 : /* No clobbers */); \
5210 #define vcopyq_lane_s8(a, b, c, d) \
5213 int8x16_t c_ = (c); \
5214 int8x16_t a_ = (a); \
5216 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5218 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5219 : /* No clobbers */); \
5223 #define vcopyq_lane_s16(a, b, c, d) \
5226 int16x8_t c_ = (c); \
5227 int16x8_t a_ = (a); \
5229 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5231 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5232 : /* No clobbers */); \
5236 #define vcopyq_lane_s32(a, b, c, d) \
5239 int32x4_t c_ = (c); \
5240 int32x4_t a_ = (a); \
5242 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5244 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5245 : /* No clobbers */); \
5249 #define vcopyq_lane_s64(a, b, c, d) \
5252 int64x2_t c_ = (c); \
5253 int64x2_t a_ = (a); \
5255 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5257 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5258 : /* No clobbers */); \
5262 #define vcopyq_lane_u8(a, b, c, d) \
5265 uint8x16_t c_ = (c); \
5266 uint8x16_t a_ = (a); \
5267 uint8x16_t result; \
5268 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5270 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5271 : /* No clobbers */); \
5275 #define vcopyq_lane_u16(a, b, c, d) \
5278 uint16x8_t c_ = (c); \
5279 uint16x8_t a_ = (a); \
5280 uint16x8_t result; \
5281 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5283 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5284 : /* No clobbers */); \
5288 #define vcopyq_lane_u32(a, b, c, d) \
5291 uint32x4_t c_ = (c); \
5292 uint32x4_t a_ = (a); \
5293 uint32x4_t result; \
5294 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5296 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5297 : /* No clobbers */); \
5301 #define vcopyq_lane_u64(a, b, c, d) \
5304 uint64x2_t c_ = (c); \
5305 uint64x2_t a_ = (a); \
5306 uint64x2_t result; \
5307 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5309 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5310 : /* No clobbers */); \
5314 /* vcvt_f16_f32 not supported */
5316 /* vcvt_f32_f16 not supported */
5318 /* vcvt_high_f16_f32 not supported */
5320 /* vcvt_high_f32_f16 not supported */
5322 static float32x2_t
vdup_n_f32 (float32_t
);
5324 #define vcvt_n_f32_s32(a, b) \
5327 int32x2_t a_ = (a); \
5328 float32x2_t result; \
5329 __asm__ ("scvtf %0.2s, %1.2s, #%2" \
5332 : /* No clobbers */); \
5336 #define vcvt_n_f32_u32(a, b) \
5339 uint32x2_t a_ = (a); \
5340 float32x2_t result; \
5341 __asm__ ("ucvtf %0.2s, %1.2s, #%2" \
5344 : /* No clobbers */); \
5348 #define vcvt_n_s32_f32(a, b) \
5351 float32x2_t a_ = (a); \
5353 __asm__ ("fcvtzs %0.2s, %1.2s, #%2" \
5356 : /* No clobbers */); \
5360 #define vcvt_n_u32_f32(a, b) \
5363 float32x2_t a_ = (a); \
5364 uint32x2_t result; \
5365 __asm__ ("fcvtzu %0.2s, %1.2s, #%2" \
5368 : /* No clobbers */); \
5372 #define vcvtd_n_f64_s64(a, b) \
5377 __asm__ ("scvtf %d0,%d1,%2" \
5380 : /* No clobbers */); \
5384 #define vcvtd_n_f64_u64(a, b) \
5387 uint64_t a_ = (a); \
5389 __asm__ ("ucvtf %d0,%d1,%2" \
5392 : /* No clobbers */); \
5396 #define vcvtd_n_s64_f64(a, b) \
5399 float64_t a_ = (a); \
5401 __asm__ ("fcvtzs %d0,%d1,%2" \
5404 : /* No clobbers */); \
5408 #define vcvtd_n_u64_f64(a, b) \
5411 float64_t a_ = (a); \
5413 __asm__ ("fcvtzu %d0,%d1,%2" \
5416 : /* No clobbers */); \
5420 #define vcvtq_n_f32_s32(a, b) \
5423 int32x4_t a_ = (a); \
5424 float32x4_t result; \
5425 __asm__ ("scvtf %0.4s, %1.4s, #%2" \
5428 : /* No clobbers */); \
5432 #define vcvtq_n_f32_u32(a, b) \
5435 uint32x4_t a_ = (a); \
5436 float32x4_t result; \
5437 __asm__ ("ucvtf %0.4s, %1.4s, #%2" \
5440 : /* No clobbers */); \
5444 #define vcvtq_n_f64_s64(a, b) \
5447 int64x2_t a_ = (a); \
5448 float64x2_t result; \
5449 __asm__ ("scvtf %0.2d, %1.2d, #%2" \
5452 : /* No clobbers */); \
5456 #define vcvtq_n_f64_u64(a, b) \
5459 uint64x2_t a_ = (a); \
5460 float64x2_t result; \
5461 __asm__ ("ucvtf %0.2d, %1.2d, #%2" \
5464 : /* No clobbers */); \
5468 #define vcvtq_n_s32_f32(a, b) \
5471 float32x4_t a_ = (a); \
5473 __asm__ ("fcvtzs %0.4s, %1.4s, #%2" \
5476 : /* No clobbers */); \
5480 #define vcvtq_n_s64_f64(a, b) \
5483 float64x2_t a_ = (a); \
5485 __asm__ ("fcvtzs %0.2d, %1.2d, #%2" \
5488 : /* No clobbers */); \
5492 #define vcvtq_n_u32_f32(a, b) \
5495 float32x4_t a_ = (a); \
5496 uint32x4_t result; \
5497 __asm__ ("fcvtzu %0.4s, %1.4s, #%2" \
5500 : /* No clobbers */); \
5504 #define vcvtq_n_u64_f64(a, b) \
5507 float64x2_t a_ = (a); \
5508 uint64x2_t result; \
5509 __asm__ ("fcvtzu %0.2d, %1.2d, #%2" \
5512 : /* No clobbers */); \
5516 #define vcvts_n_f32_s32(a, b) \
5521 __asm__ ("scvtf %s0,%s1,%2" \
5524 : /* No clobbers */); \
5528 #define vcvts_n_f32_u32(a, b) \
5531 uint32_t a_ = (a); \
5533 __asm__ ("ucvtf %s0,%s1,%2" \
5536 : /* No clobbers */); \
5540 #define vcvts_n_s32_f32(a, b) \
5543 float32_t a_ = (a); \
5545 __asm__ ("fcvtzs %s0,%s1,%2" \
5548 : /* No clobbers */); \
5552 #define vcvts_n_u32_f32(a, b) \
5555 float32_t a_ = (a); \
5557 __asm__ ("fcvtzu %s0,%s1,%2" \
5560 : /* No clobbers */); \
5564 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
5565 vcvtx_f32_f64 (float64x2_t a
)
5568 __asm__ ("fcvtxn %0.2s,%1.2d"
5571 : /* No clobbers */);
5575 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
5576 vcvtx_high_f32_f64 (float64x2_t a
)
5579 __asm__ ("fcvtxn2 %0.4s,%1.2d"
5582 : /* No clobbers */);
5586 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
5587 vcvtxd_f32_f64 (float64_t a
)
5590 __asm__ ("fcvtxn %s0,%d1"
5593 : /* No clobbers */);
5597 #define vdup_lane_f32(a, b) \
5600 float32x2_t a_ = (a); \
5601 float32x2_t result; \
5602 __asm__ ("dup %0.2s,%1.s[%2]" \
5605 : /* No clobbers */); \
5609 #define vdup_lane_p8(a, b) \
5612 poly8x8_t a_ = (a); \
5614 __asm__ ("dup %0.8b,%1.b[%2]" \
5617 : /* No clobbers */); \
5621 #define vdup_lane_p16(a, b) \
5624 poly16x4_t a_ = (a); \
5625 poly16x4_t result; \
5626 __asm__ ("dup %0.4h,%1.h[%2]" \
5629 : /* No clobbers */); \
5633 #define vdup_lane_s8(a, b) \
5636 int8x8_t a_ = (a); \
5638 __asm__ ("dup %0.8b,%1.b[%2]" \
5641 : /* No clobbers */); \
5645 #define vdup_lane_s16(a, b) \
5648 int16x4_t a_ = (a); \
5650 __asm__ ("dup %0.4h,%1.h[%2]" \
5653 : /* No clobbers */); \
5657 #define vdup_lane_s32(a, b) \
5660 int32x2_t a_ = (a); \
5662 __asm__ ("dup %0.2s,%1.s[%2]" \
5665 : /* No clobbers */); \
5669 #define vdup_lane_s64(a, b) \
5672 int64x1_t a_ = (a); \
5674 __asm__ ("ins %0.d[0],%1.d[%2]" \
5677 : /* No clobbers */); \
5681 #define vdup_lane_u8(a, b) \
5684 uint8x8_t a_ = (a); \
5686 __asm__ ("dup %0.8b,%1.b[%2]" \
5689 : /* No clobbers */); \
5693 #define vdup_lane_u16(a, b) \
5696 uint16x4_t a_ = (a); \
5697 uint16x4_t result; \
5698 __asm__ ("dup %0.4h,%1.h[%2]" \
5701 : /* No clobbers */); \
5705 #define vdup_lane_u32(a, b) \
5708 uint32x2_t a_ = (a); \
5709 uint32x2_t result; \
5710 __asm__ ("dup %0.2s,%1.s[%2]" \
5713 : /* No clobbers */); \
5717 #define vdup_lane_u64(a, b) \
5720 uint64x1_t a_ = (a); \
5721 uint64x1_t result; \
5722 __asm__ ("ins %0.d[0],%1.d[%2]" \
5725 : /* No clobbers */); \
5729 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
5730 vdup_n_f32 (float32_t a
)
5733 __asm__ ("dup %0.2s, %w1"
5736 : /* No clobbers */);
5740 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
5741 vdup_n_p8 (uint32_t a
)
5744 __asm__ ("dup %0.8b,%w1"
5747 : /* No clobbers */);
5751 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
5752 vdup_n_p16 (uint32_t a
)
5755 __asm__ ("dup %0.4h,%w1"
5758 : /* No clobbers */);
5762 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
5763 vdup_n_s8 (int32_t a
)
5766 __asm__ ("dup %0.8b,%w1"
5769 : /* No clobbers */);
5773 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
5774 vdup_n_s16 (int32_t a
)
5777 __asm__ ("dup %0.4h,%w1"
5780 : /* No clobbers */);
5784 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
5785 vdup_n_s32 (int32_t a
)
5788 __asm__ ("dup %0.2s,%w1"
5791 : /* No clobbers */);
5795 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
5796 vdup_n_s64 (int64_t a
)
5799 __asm__ ("ins %0.d[0],%x1"
5802 : /* No clobbers */);
5806 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
5807 vdup_n_u8 (uint32_t a
)
5810 __asm__ ("dup %0.8b,%w1"
5813 : /* No clobbers */);
5817 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
5818 vdup_n_u16 (uint32_t a
)
5821 __asm__ ("dup %0.4h,%w1"
5824 : /* No clobbers */);
5828 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
5829 vdup_n_u32 (uint32_t a
)
5832 __asm__ ("dup %0.2s,%w1"
5835 : /* No clobbers */);
5839 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
5840 vdup_n_u64 (uint64_t a
)
5843 __asm__ ("ins %0.d[0],%x1"
5846 : /* No clobbers */);
5850 #define vdupd_lane_f64(a, b) \
5853 float64x2_t a_ = (a); \
5855 __asm__ ("dup %d0, %1.d[%2]" \
5858 : /* No clobbers */); \
5862 #define vdupq_lane_f32(a, b) \
5865 float32x2_t a_ = (a); \
5866 float32x4_t result; \
5867 __asm__ ("dup %0.4s,%1.s[%2]" \
5870 : /* No clobbers */); \
5874 #define vdupq_lane_f64(a, b) \
5877 float64x1_t a_ = (a); \
5878 float64x2_t result; \
5879 __asm__ ("dup %0.2d,%1.d[%2]" \
5882 : /* No clobbers */); \
5886 #define vdupq_lane_p8(a, b) \
5889 poly8x8_t a_ = (a); \
5890 poly8x16_t result; \
5891 __asm__ ("dup %0.16b,%1.b[%2]" \
5894 : /* No clobbers */); \
5898 #define vdupq_lane_p16(a, b) \
5901 poly16x4_t a_ = (a); \
5902 poly16x8_t result; \
5903 __asm__ ("dup %0.8h,%1.h[%2]" \
5906 : /* No clobbers */); \
5910 #define vdupq_lane_s8(a, b) \
5913 int8x8_t a_ = (a); \
5915 __asm__ ("dup %0.16b,%1.b[%2]" \
5918 : /* No clobbers */); \
5922 #define vdupq_lane_s16(a, b) \
5925 int16x4_t a_ = (a); \
5927 __asm__ ("dup %0.8h,%1.h[%2]" \
5930 : /* No clobbers */); \
5934 #define vdupq_lane_s32(a, b) \
5937 int32x2_t a_ = (a); \
5939 __asm__ ("dup %0.4s,%1.s[%2]" \
5942 : /* No clobbers */); \
5946 #define vdupq_lane_s64(a, b) \
5949 int64x1_t a_ = (a); \
5951 __asm__ ("dup %0.2d,%1.d[%2]" \
5954 : /* No clobbers */); \
5958 #define vdupq_lane_u8(a, b) \
5961 uint8x8_t a_ = (a); \
5962 uint8x16_t result; \
5963 __asm__ ("dup %0.16b,%1.b[%2]" \
5966 : /* No clobbers */); \
5970 #define vdupq_lane_u16(a, b) \
5973 uint16x4_t a_ = (a); \
5974 uint16x8_t result; \
5975 __asm__ ("dup %0.8h,%1.h[%2]" \
5978 : /* No clobbers */); \
5982 #define vdupq_lane_u32(a, b) \
5985 uint32x2_t a_ = (a); \
5986 uint32x4_t result; \
5987 __asm__ ("dup %0.4s,%1.s[%2]" \
5990 : /* No clobbers */); \
5994 #define vdupq_lane_u64(a, b) \
5997 uint64x1_t a_ = (a); \
5998 uint64x2_t result; \
5999 __asm__ ("dup %0.2d,%1.d[%2]" \
6002 : /* No clobbers */); \
6006 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
6007 vdupq_n_f32 (float32_t a
)
6010 __asm__ ("dup %0.4s, %w1"
6013 : /* No clobbers */);
6017 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
6018 vdupq_n_f64 (float64_t a
)
6021 __asm__ ("dup %0.2d, %x1"
6024 : /* No clobbers */);
6028 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
6029 vdupq_n_p8 (uint32_t a
)
6032 __asm__ ("dup %0.16b,%w1"
6035 : /* No clobbers */);
6039 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
6040 vdupq_n_p16 (uint32_t a
)
6043 __asm__ ("dup %0.8h,%w1"
6046 : /* No clobbers */);
6050 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
6051 vdupq_n_s8 (int32_t a
)
6054 __asm__ ("dup %0.16b,%w1"
6057 : /* No clobbers */);
6061 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
6062 vdupq_n_s16 (int32_t a
)
6065 __asm__ ("dup %0.8h,%w1"
6068 : /* No clobbers */);
6072 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
6073 vdupq_n_s32 (int32_t a
)
6076 __asm__ ("dup %0.4s,%w1"
6079 : /* No clobbers */);
6083 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
6084 vdupq_n_s64 (int64_t a
)
6087 __asm__ ("dup %0.2d,%x1"
6090 : /* No clobbers */);
6094 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
6095 vdupq_n_u8 (uint32_t a
)
6098 __asm__ ("dup %0.16b,%w1"
6101 : /* No clobbers */);
6105 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
6106 vdupq_n_u16 (uint32_t a
)
6109 __asm__ ("dup %0.8h,%w1"
6112 : /* No clobbers */);
6116 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
6117 vdupq_n_u32 (uint32_t a
)
6120 __asm__ ("dup %0.4s,%w1"
6123 : /* No clobbers */);
6127 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
6128 vdupq_n_u64 (uint64_t a
)
6131 __asm__ ("dup %0.2d,%x1"
6134 : /* No clobbers */);
6138 #define vdups_lane_f32(a, b) \
6141 float32x4_t a_ = (a); \
6143 __asm__ ("dup %s0, %1.s[%2]" \
6146 : /* No clobbers */); \
6150 #define vext_f32(a, b, c) \
6153 float32x2_t b_ = (b); \
6154 float32x2_t a_ = (a); \
6155 float32x2_t result; \
6156 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
6158 : "w"(a_), "w"(b_), "i"(c) \
6159 : /* No clobbers */); \
6163 #define vext_f64(a, b, c) \
6166 float64x1_t b_ = (b); \
6167 float64x1_t a_ = (a); \
6168 float64x1_t result; \
6169 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
6171 : "w"(a_), "w"(b_), "i"(c) \
6172 : /* No clobbers */); \
6176 #define vext_p8(a, b, c) \
6179 poly8x8_t b_ = (b); \
6180 poly8x8_t a_ = (a); \
6182 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
6184 : "w"(a_), "w"(b_), "i"(c) \
6185 : /* No clobbers */); \
6189 #define vext_p16(a, b, c) \
6192 poly16x4_t b_ = (b); \
6193 poly16x4_t a_ = (a); \
6194 poly16x4_t result; \
6195 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
6197 : "w"(a_), "w"(b_), "i"(c) \
6198 : /* No clobbers */); \
6202 #define vext_s8(a, b, c) \
6205 int8x8_t b_ = (b); \
6206 int8x8_t a_ = (a); \
6208 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
6210 : "w"(a_), "w"(b_), "i"(c) \
6211 : /* No clobbers */); \
6215 #define vext_s16(a, b, c) \
6218 int16x4_t b_ = (b); \
6219 int16x4_t a_ = (a); \
6221 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
6223 : "w"(a_), "w"(b_), "i"(c) \
6224 : /* No clobbers */); \
6228 #define vext_s32(a, b, c) \
6231 int32x2_t b_ = (b); \
6232 int32x2_t a_ = (a); \
6234 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
6236 : "w"(a_), "w"(b_), "i"(c) \
6237 : /* No clobbers */); \
6241 #define vext_s64(a, b, c) \
6244 int64x1_t b_ = (b); \
6245 int64x1_t a_ = (a); \
6247 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
6249 : "w"(a_), "w"(b_), "i"(c) \
6250 : /* No clobbers */); \
6254 #define vext_u8(a, b, c) \
6257 uint8x8_t b_ = (b); \
6258 uint8x8_t a_ = (a); \
6260 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
6262 : "w"(a_), "w"(b_), "i"(c) \
6263 : /* No clobbers */); \
6267 #define vext_u16(a, b, c) \
6270 uint16x4_t b_ = (b); \
6271 uint16x4_t a_ = (a); \
6272 uint16x4_t result; \
6273 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
6275 : "w"(a_), "w"(b_), "i"(c) \
6276 : /* No clobbers */); \
6280 #define vext_u32(a, b, c) \
6283 uint32x2_t b_ = (b); \
6284 uint32x2_t a_ = (a); \
6285 uint32x2_t result; \
6286 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
6288 : "w"(a_), "w"(b_), "i"(c) \
6289 : /* No clobbers */); \
6293 #define vext_u64(a, b, c) \
6296 uint64x1_t b_ = (b); \
6297 uint64x1_t a_ = (a); \
6298 uint64x1_t result; \
6299 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
6301 : "w"(a_), "w"(b_), "i"(c) \
6302 : /* No clobbers */); \
6306 #define vextq_f32(a, b, c) \
6309 float32x4_t b_ = (b); \
6310 float32x4_t a_ = (a); \
6311 float32x4_t result; \
6312 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
6314 : "w"(a_), "w"(b_), "i"(c) \
6315 : /* No clobbers */); \
6319 #define vextq_f64(a, b, c) \
6322 float64x2_t b_ = (b); \
6323 float64x2_t a_ = (a); \
6324 float64x2_t result; \
6325 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
6327 : "w"(a_), "w"(b_), "i"(c) \
6328 : /* No clobbers */); \
6332 #define vextq_p8(a, b, c) \
6335 poly8x16_t b_ = (b); \
6336 poly8x16_t a_ = (a); \
6337 poly8x16_t result; \
6338 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
6340 : "w"(a_), "w"(b_), "i"(c) \
6341 : /* No clobbers */); \
6345 #define vextq_p16(a, b, c) \
6348 poly16x8_t b_ = (b); \
6349 poly16x8_t a_ = (a); \
6350 poly16x8_t result; \
6351 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
6353 : "w"(a_), "w"(b_), "i"(c) \
6354 : /* No clobbers */); \
6358 #define vextq_s8(a, b, c) \
6361 int8x16_t b_ = (b); \
6362 int8x16_t a_ = (a); \
6364 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
6366 : "w"(a_), "w"(b_), "i"(c) \
6367 : /* No clobbers */); \
6371 #define vextq_s16(a, b, c) \
6374 int16x8_t b_ = (b); \
6375 int16x8_t a_ = (a); \
6377 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
6379 : "w"(a_), "w"(b_), "i"(c) \
6380 : /* No clobbers */); \
6384 #define vextq_s32(a, b, c) \
6387 int32x4_t b_ = (b); \
6388 int32x4_t a_ = (a); \
6390 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
6392 : "w"(a_), "w"(b_), "i"(c) \
6393 : /* No clobbers */); \
6397 #define vextq_s64(a, b, c) \
6400 int64x2_t b_ = (b); \
6401 int64x2_t a_ = (a); \
6403 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
6405 : "w"(a_), "w"(b_), "i"(c) \
6406 : /* No clobbers */); \
6410 #define vextq_u8(a, b, c) \
6413 uint8x16_t b_ = (b); \
6414 uint8x16_t a_ = (a); \
6415 uint8x16_t result; \
6416 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
6418 : "w"(a_), "w"(b_), "i"(c) \
6419 : /* No clobbers */); \
6423 #define vextq_u16(a, b, c) \
6426 uint16x8_t b_ = (b); \
6427 uint16x8_t a_ = (a); \
6428 uint16x8_t result; \
6429 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
6431 : "w"(a_), "w"(b_), "i"(c) \
6432 : /* No clobbers */); \
6436 #define vextq_u32(a, b, c) \
6439 uint32x4_t b_ = (b); \
6440 uint32x4_t a_ = (a); \
6441 uint32x4_t result; \
6442 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
6444 : "w"(a_), "w"(b_), "i"(c) \
6445 : /* No clobbers */); \
6449 #define vextq_u64(a, b, c) \
6452 uint64x2_t b_ = (b); \
6453 uint64x2_t a_ = (a); \
6454 uint64x2_t result; \
6455 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
6457 : "w"(a_), "w"(b_), "i"(c) \
6458 : /* No clobbers */); \
6462 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
6463 vfma_f32 (float32x2_t a
, float32x2_t b
, float32x2_t c
)
6466 __asm__ ("fmla %0.2s,%2.2s,%3.2s"
6468 : "0"(a
), "w"(b
), "w"(c
)
6469 : /* No clobbers */);
6473 #define vfma_lane_f32(a, b, c, d) \
6476 float32x2_t c_ = (c); \
6477 float32x2_t b_ = (b); \
6478 float32x2_t a_ = (a); \
6479 float32x2_t result; \
6480 __asm__ ("fmla %0.2s,%2.2s,%3.s[%4]" \
6482 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6483 : /* No clobbers */); \
6487 #define vfmad_lane_f64(a, b, c) \
6490 float64x2_t b_ = (b); \
6491 float64_t a_ = (a); \
6493 __asm__ ("fmla %d0,%d1,%2.d[%3]" \
6495 : "w"(a_), "w"(b_), "i"(c) \
6496 : /* No clobbers */); \
6500 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
6501 vfmaq_f32 (float32x4_t a
, float32x4_t b
, float32x4_t c
)
6504 __asm__ ("fmla %0.4s,%2.4s,%3.4s"
6506 : "0"(a
), "w"(b
), "w"(c
)
6507 : /* No clobbers */);
6511 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
6512 vfmaq_f64 (float64x2_t a
, float64x2_t b
, float64x2_t c
)
6515 __asm__ ("fmla %0.2d,%2.2d,%3.2d"
6517 : "0"(a
), "w"(b
), "w"(c
)
6518 : /* No clobbers */);
6522 #define vfmaq_lane_f32(a, b, c, d) \
6525 float32x4_t c_ = (c); \
6526 float32x4_t b_ = (b); \
6527 float32x4_t a_ = (a); \
6528 float32x4_t result; \
6529 __asm__ ("fmla %0.4s,%2.4s,%3.s[%4]" \
6531 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6532 : /* No clobbers */); \
6536 #define vfmaq_lane_f64(a, b, c, d) \
6539 float64x2_t c_ = (c); \
6540 float64x2_t b_ = (b); \
6541 float64x2_t a_ = (a); \
6542 float64x2_t result; \
6543 __asm__ ("fmla %0.2d,%2.2d,%3.d[%4]" \
6545 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6546 : /* No clobbers */); \
6550 #define vfmas_lane_f32(a, b, c) \
6553 float32x4_t b_ = (b); \
6554 float32_t a_ = (a); \
6556 __asm__ ("fmla %s0,%s1,%2.s[%3]" \
6558 : "w"(a_), "w"(b_), "i"(c) \
6559 : /* No clobbers */); \
6563 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
6564 vfma_n_f32 (float32x2_t a
, float32x2_t b
, float32_t c
)
6567 __asm__ ("fmla %0.2s, %2.2s, %3.s[0]"
6569 : "0"(a
), "w"(b
), "w"(c
)
6570 : /* No clobbers */);
6574 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
6575 vfmaq_n_f32 (float32x4_t a
, float32x4_t b
, float32_t c
)
6578 __asm__ ("fmla %0.4s, %2.4s, %3.s[0]"
6580 : "0"(a
), "w"(b
), "w"(c
)
6581 : /* No clobbers */);
6585 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
6586 vfmaq_n_f64 (float64x2_t a
, float64x2_t b
, float64_t c
)
6589 __asm__ ("fmla %0.2d, %2.2d, %3.d[0]"
6591 : "0"(a
), "w"(b
), "w"(c
)
6592 : /* No clobbers */);
6596 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
6597 vfms_f32 (float32x2_t a
, float32x2_t b
, float32x2_t c
)
6600 __asm__ ("fmls %0.2s,%2.2s,%3.2s"
6602 : "0"(a
), "w"(b
), "w"(c
)
6603 : /* No clobbers */);
6607 #define vfmsd_lane_f64(a, b, c) \
6610 float64x2_t b_ = (b); \
6611 float64_t a_ = (a); \
6613 __asm__ ("fmls %d0,%d1,%2.d[%3]" \
6615 : "w"(a_), "w"(b_), "i"(c) \
6616 : /* No clobbers */); \
6620 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
6621 vfmsq_f32 (float32x4_t a
, float32x4_t b
, float32x4_t c
)
6624 __asm__ ("fmls %0.4s,%2.4s,%3.4s"
6626 : "0"(a
), "w"(b
), "w"(c
)
6627 : /* No clobbers */);
6631 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
6632 vfmsq_f64 (float64x2_t a
, float64x2_t b
, float64x2_t c
)
6635 __asm__ ("fmls %0.2d,%2.2d,%3.2d"
6637 : "0"(a
), "w"(b
), "w"(c
)
6638 : /* No clobbers */);
6642 #define vfmss_lane_f32(a, b, c) \
6645 float32x4_t b_ = (b); \
6646 float32_t a_ = (a); \
6648 __asm__ ("fmls %s0,%s1,%2.s[%3]" \
6650 : "w"(a_), "w"(b_), "i"(c) \
6651 : /* No clobbers */); \
6655 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
6656 vget_high_f32 (float32x4_t a
)
6659 __asm__ ("ins %0.d[0], %1.d[1]"
6662 : /* No clobbers */);
6666 __extension__
static __inline float64x1_t
__attribute__ ((__always_inline__
))
6667 vget_high_f64 (float64x2_t a
)
6670 __asm__ ("ins %0.d[0], %1.d[1]"
6673 : /* No clobbers */);
6677 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
6678 vget_high_p8 (poly8x16_t a
)
6681 __asm__ ("ins %0.d[0], %1.d[1]"
6684 : /* No clobbers */);
6688 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
6689 vget_high_p16 (poly16x8_t a
)
6692 __asm__ ("ins %0.d[0], %1.d[1]"
6695 : /* No clobbers */);
6699 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
6700 vget_high_s8 (int8x16_t a
)
6703 __asm__ ("ins %0.d[0], %1.d[1]"
6706 : /* No clobbers */);
6710 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
6711 vget_high_s16 (int16x8_t a
)
6714 __asm__ ("ins %0.d[0], %1.d[1]"
6717 : /* No clobbers */);
6721 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
6722 vget_high_s32 (int32x4_t a
)
6725 __asm__ ("ins %0.d[0], %1.d[1]"
6728 : /* No clobbers */);
6732 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
6733 vget_high_s64 (int64x2_t a
)
6736 __asm__ ("ins %0.d[0], %1.d[1]"
6739 : /* No clobbers */);
6743 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
6744 vget_high_u8 (uint8x16_t a
)
6747 __asm__ ("ins %0.d[0], %1.d[1]"
6750 : /* No clobbers */);
6754 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
6755 vget_high_u16 (uint16x8_t a
)
6758 __asm__ ("ins %0.d[0], %1.d[1]"
6761 : /* No clobbers */);
6765 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
6766 vget_high_u32 (uint32x4_t a
)
6769 __asm__ ("ins %0.d[0], %1.d[1]"
6772 : /* No clobbers */);
6776 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
6777 vget_high_u64 (uint64x2_t a
)
6780 __asm__ ("ins %0.d[0], %1.d[1]"
6783 : /* No clobbers */);
6787 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
6788 vget_low_f32 (float32x4_t a
)
6791 __asm__ ("ins %0.d[0], %1.d[0]"
6794 : /* No clobbers */);
6798 __extension__
static __inline float64x1_t
__attribute__ ((__always_inline__
))
6799 vget_low_f64 (float64x2_t a
)
6802 __asm__ ("ins %0.d[0], %1.d[0]"
6805 : /* No clobbers */);
6809 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
6810 vget_low_p8 (poly8x16_t a
)
6813 __asm__ ("ins %0.d[0], %1.d[0]"
6816 : /* No clobbers */);
6820 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
6821 vget_low_p16 (poly16x8_t a
)
6824 __asm__ ("ins %0.d[0], %1.d[0]"
6827 : /* No clobbers */);
6831 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
6832 vget_low_s8 (int8x16_t a
)
6835 __asm__ ("ins %0.d[0], %1.d[0]"
6838 : /* No clobbers */);
6842 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
6843 vget_low_s16 (int16x8_t a
)
6846 __asm__ ("ins %0.d[0], %1.d[0]"
6849 : /* No clobbers */);
6853 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
6854 vget_low_s32 (int32x4_t a
)
6857 __asm__ ("ins %0.d[0], %1.d[0]"
6860 : /* No clobbers */);
6864 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
6865 vget_low_s64 (int64x2_t a
)
6868 __asm__ ("ins %0.d[0], %1.d[0]"
6871 : /* No clobbers */);
6875 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
6876 vget_low_u8 (uint8x16_t a
)
6879 __asm__ ("ins %0.d[0], %1.d[0]"
6882 : /* No clobbers */);
6886 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
6887 vget_low_u16 (uint16x8_t a
)
6890 __asm__ ("ins %0.d[0], %1.d[0]"
6893 : /* No clobbers */);
6897 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
6898 vget_low_u32 (uint32x4_t a
)
6901 __asm__ ("ins %0.d[0], %1.d[0]"
6904 : /* No clobbers */);
6908 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
6909 vget_low_u64 (uint64x2_t a
)
6912 __asm__ ("ins %0.d[0], %1.d[0]"
6915 : /* No clobbers */);
6919 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
6920 vhsub_s8 (int8x8_t a
, int8x8_t b
)
6923 __asm__ ("shsub %0.8b, %1.8b, %2.8b"
6926 : /* No clobbers */);
6930 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
6931 vhsub_s16 (int16x4_t a
, int16x4_t b
)
6934 __asm__ ("shsub %0.4h, %1.4h, %2.4h"
6937 : /* No clobbers */);
6941 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
6942 vhsub_s32 (int32x2_t a
, int32x2_t b
)
6945 __asm__ ("shsub %0.2s, %1.2s, %2.2s"
6948 : /* No clobbers */);
6952 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
6953 vhsub_u8 (uint8x8_t a
, uint8x8_t b
)
6956 __asm__ ("uhsub %0.8b, %1.8b, %2.8b"
6959 : /* No clobbers */);
6963 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
6964 vhsub_u16 (uint16x4_t a
, uint16x4_t b
)
6967 __asm__ ("uhsub %0.4h, %1.4h, %2.4h"
6970 : /* No clobbers */);
6974 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
6975 vhsub_u32 (uint32x2_t a
, uint32x2_t b
)
6978 __asm__ ("uhsub %0.2s, %1.2s, %2.2s"
6981 : /* No clobbers */);
6985 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
6986 vhsubq_s8 (int8x16_t a
, int8x16_t b
)
6989 __asm__ ("shsub %0.16b, %1.16b, %2.16b"
6992 : /* No clobbers */);
6996 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
6997 vhsubq_s16 (int16x8_t a
, int16x8_t b
)
7000 __asm__ ("shsub %0.8h, %1.8h, %2.8h"
7003 : /* No clobbers */);
7007 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
7008 vhsubq_s32 (int32x4_t a
, int32x4_t b
)
7011 __asm__ ("shsub %0.4s, %1.4s, %2.4s"
7014 : /* No clobbers */);
7018 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
7019 vhsubq_u8 (uint8x16_t a
, uint8x16_t b
)
7022 __asm__ ("uhsub %0.16b, %1.16b, %2.16b"
7025 : /* No clobbers */);
7029 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
7030 vhsubq_u16 (uint16x8_t a
, uint16x8_t b
)
7033 __asm__ ("uhsub %0.8h, %1.8h, %2.8h"
7036 : /* No clobbers */);
7040 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
7041 vhsubq_u32 (uint32x4_t a
, uint32x4_t b
)
7044 __asm__ ("uhsub %0.4s, %1.4s, %2.4s"
7047 : /* No clobbers */);
7051 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
7052 vld1_dup_f32 (const float32_t
* a
)
7055 __asm__ ("ld1r {%0.2s}, %1"
7058 : /* No clobbers */);
7062 __extension__
static __inline float64x1_t
__attribute__ ((__always_inline__
))
7063 vld1_dup_f64 (const float64_t
* a
)
7066 __asm__ ("ld1r {%0.1d}, %1"
7069 : /* No clobbers */);
7073 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
7074 vld1_dup_p8 (const poly8_t
* a
)
7077 __asm__ ("ld1r {%0.8b}, %1"
7080 : /* No clobbers */);
7084 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
7085 vld1_dup_p16 (const poly16_t
* a
)
7088 __asm__ ("ld1r {%0.4h}, %1"
7091 : /* No clobbers */);
7095 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
7096 vld1_dup_s8 (const int8_t * a
)
7099 __asm__ ("ld1r {%0.8b}, %1"
7102 : /* No clobbers */);
7106 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
7107 vld1_dup_s16 (const int16_t * a
)
7110 __asm__ ("ld1r {%0.4h}, %1"
7113 : /* No clobbers */);
7117 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
7118 vld1_dup_s32 (const int32_t * a
)
7121 __asm__ ("ld1r {%0.2s}, %1"
7124 : /* No clobbers */);
7128 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
7129 vld1_dup_s64 (const int64_t * a
)
7132 __asm__ ("ld1r {%0.1d}, %1"
7135 : /* No clobbers */);
7139 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
7140 vld1_dup_u8 (const uint8_t * a
)
7143 __asm__ ("ld1r {%0.8b}, %1"
7146 : /* No clobbers */);
7150 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
7151 vld1_dup_u16 (const uint16_t * a
)
7154 __asm__ ("ld1r {%0.4h}, %1"
7157 : /* No clobbers */);
7161 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
7162 vld1_dup_u32 (const uint32_t * a
)
7165 __asm__ ("ld1r {%0.2s}, %1"
7168 : /* No clobbers */);
7172 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
7173 vld1_dup_u64 (const uint64_t * a
)
7176 __asm__ ("ld1r {%0.1d}, %1"
7179 : /* No clobbers */);
7183 #define vld1_lane_f32(a, b, c) \
7186 float32x2_t b_ = (b); \
7187 const float32_t * a_ = (a); \
7188 float32x2_t result; \
7189 __asm__ ("ld1 {%0.s}[%1], %2" \
7191 : "i" (c), "Utv"(*a_), "0"(b_) \
7192 : /* No clobbers */); \
7196 #define vld1_lane_f64(a, b, c) \
7199 float64x1_t b_ = (b); \
7200 const float64_t * a_ = (a); \
7201 float64x1_t result; \
7202 __asm__ ("ld1 {%0.d}[%1], %2" \
7204 : "i" (c), "Utv"(*a_), "0"(b_) \
7205 : /* No clobbers */); \
7209 #define vld1_lane_p8(a, b, c) \
7212 poly8x8_t b_ = (b); \
7213 const poly8_t * a_ = (a); \
7215 __asm__ ("ld1 {%0.b}[%1], %2" \
7217 : "i" (c), "Utv"(*a_), "0"(b_) \
7218 : /* No clobbers */); \
7222 #define vld1_lane_p16(a, b, c) \
7225 poly16x4_t b_ = (b); \
7226 const poly16_t * a_ = (a); \
7227 poly16x4_t result; \
7228 __asm__ ("ld1 {%0.h}[%1], %2" \
7230 : "i" (c), "Utv"(*a_), "0"(b_) \
7231 : /* No clobbers */); \
7235 #define vld1_lane_s8(a, b, c) \
7238 int8x8_t b_ = (b); \
7239 const int8_t * a_ = (a); \
7241 __asm__ ("ld1 {%0.b}[%1], %2" \
7243 : "i" (c), "Utv"(*a_), "0"(b_) \
7244 : /* No clobbers */); \
7248 #define vld1_lane_s16(a, b, c) \
7251 int16x4_t b_ = (b); \
7252 const int16_t * a_ = (a); \
7254 __asm__ ("ld1 {%0.h}[%1], %2" \
7256 : "i" (c), "Utv"(*a_), "0"(b_) \
7257 : /* No clobbers */); \
7261 #define vld1_lane_s32(a, b, c) \
7264 int32x2_t b_ = (b); \
7265 const int32_t * a_ = (a); \
7267 __asm__ ("ld1 {%0.s}[%1], %2" \
7269 : "i" (c), "Utv"(*a_), "0"(b_) \
7270 : /* No clobbers */); \
7274 #define vld1_lane_s64(a, b, c) \
7277 int64x1_t b_ = (b); \
7278 const int64_t * a_ = (a); \
7280 __asm__ ("ld1 {%0.d}[%1], %2" \
7282 : "i" (c), "Utv"(*a_), "0"(b_) \
7283 : /* No clobbers */); \
7287 #define vld1_lane_u8(a, b, c) \
7290 uint8x8_t b_ = (b); \
7291 const uint8_t * a_ = (a); \
7293 __asm__ ("ld1 {%0.b}[%1], %2" \
7295 : "i" (c), "Utv"(*a_), "0"(b_) \
7296 : /* No clobbers */); \
7300 #define vld1_lane_u16(a, b, c) \
7303 uint16x4_t b_ = (b); \
7304 const uint16_t * a_ = (a); \
7305 uint16x4_t result; \
7306 __asm__ ("ld1 {%0.h}[%1], %2" \
7308 : "i" (c), "Utv"(*a_), "0"(b_) \
7309 : /* No clobbers */); \
7313 #define vld1_lane_u32(a, b, c) \
7316 uint32x2_t b_ = (b); \
7317 const uint32_t * a_ = (a); \
7318 uint32x2_t result; \
7319 __asm__ ("ld1 {%0.s}[%1], %2" \
7321 : "i" (c), "Utv"(*a_), "0"(b_) \
7322 : /* No clobbers */); \
7326 #define vld1_lane_u64(a, b, c) \
7329 uint64x1_t b_ = (b); \
7330 const uint64_t * a_ = (a); \
7331 uint64x1_t result; \
7332 __asm__ ("ld1 {%0.d}[%1], %2" \
7334 : "i" (c), "Utv"(*a_), "0"(b_) \
7335 : /* No clobbers */); \
7339 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
7340 vld1q_dup_f32 (const float32_t
* a
)
7343 __asm__ ("ld1r {%0.4s}, %1"
7346 : /* No clobbers */);
7350 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
7351 vld1q_dup_f64 (const float64_t
* a
)
7354 __asm__ ("ld1r {%0.2d}, %1"
7357 : /* No clobbers */);
7361 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
7362 vld1q_dup_p8 (const poly8_t
* a
)
7365 __asm__ ("ld1r {%0.16b}, %1"
7368 : /* No clobbers */);
7372 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
7373 vld1q_dup_p16 (const poly16_t
* a
)
7376 __asm__ ("ld1r {%0.8h}, %1"
7379 : /* No clobbers */);
7383 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
7384 vld1q_dup_s8 (const int8_t * a
)
7387 __asm__ ("ld1r {%0.16b}, %1"
7390 : /* No clobbers */);
7394 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
7395 vld1q_dup_s16 (const int16_t * a
)
7398 __asm__ ("ld1r {%0.8h}, %1"
7401 : /* No clobbers */);
7405 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
7406 vld1q_dup_s32 (const int32_t * a
)
7409 __asm__ ("ld1r {%0.4s}, %1"
7412 : /* No clobbers */);
7416 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
7417 vld1q_dup_s64 (const int64_t * a
)
7420 __asm__ ("ld1r {%0.2d}, %1"
7423 : /* No clobbers */);
7427 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
7428 vld1q_dup_u8 (const uint8_t * a
)
7431 __asm__ ("ld1r {%0.16b}, %1"
7434 : /* No clobbers */);
7438 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
7439 vld1q_dup_u16 (const uint16_t * a
)
7442 __asm__ ("ld1r {%0.8h}, %1"
7445 : /* No clobbers */);
7449 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
7450 vld1q_dup_u32 (const uint32_t * a
)
7453 __asm__ ("ld1r {%0.4s}, %1"
7456 : /* No clobbers */);
7460 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
7461 vld1q_dup_u64 (const uint64_t * a
)
7464 __asm__ ("ld1r {%0.2d}, %1"
7467 : /* No clobbers */);
7471 #define vld1q_lane_f32(a, b, c) \
7474 float32x4_t b_ = (b); \
7475 const float32_t * a_ = (a); \
7476 float32x4_t result; \
7477 __asm__ ("ld1 {%0.s}[%1], %2" \
7479 : "i"(c), "Utv"(*a_), "0"(b_) \
7480 : /* No clobbers */); \
7484 #define vld1q_lane_f64(a, b, c) \
7487 float64x2_t b_ = (b); \
7488 const float64_t * a_ = (a); \
7489 float64x2_t result; \
7490 __asm__ ("ld1 {%0.d}[%1], %2" \
7492 : "i"(c), "Utv"(*a_), "0"(b_) \
7493 : /* No clobbers */); \
7497 #define vld1q_lane_p8(a, b, c) \
7500 poly8x16_t b_ = (b); \
7501 const poly8_t * a_ = (a); \
7502 poly8x16_t result; \
7503 __asm__ ("ld1 {%0.b}[%1], %2" \
7505 : "i"(c), "Utv"(*a_), "0"(b_) \
7506 : /* No clobbers */); \
7510 #define vld1q_lane_p16(a, b, c) \
7513 poly16x8_t b_ = (b); \
7514 const poly16_t * a_ = (a); \
7515 poly16x8_t result; \
7516 __asm__ ("ld1 {%0.h}[%1], %2" \
7518 : "i"(c), "Utv"(*a_), "0"(b_) \
7519 : /* No clobbers */); \
7523 #define vld1q_lane_s8(a, b, c) \
7526 int8x16_t b_ = (b); \
7527 const int8_t * a_ = (a); \
7529 __asm__ ("ld1 {%0.b}[%1], %2" \
7531 : "i"(c), "Utv"(*a_), "0"(b_) \
7532 : /* No clobbers */); \
7536 #define vld1q_lane_s16(a, b, c) \
7539 int16x8_t b_ = (b); \
7540 const int16_t * a_ = (a); \
7542 __asm__ ("ld1 {%0.h}[%1], %2" \
7544 : "i"(c), "Utv"(*a_), "0"(b_) \
7545 : /* No clobbers */); \
7549 #define vld1q_lane_s32(a, b, c) \
7552 int32x4_t b_ = (b); \
7553 const int32_t * a_ = (a); \
7555 __asm__ ("ld1 {%0.s}[%1], %2" \
7557 : "i"(c), "Utv"(*a_), "0"(b_) \
7558 : /* No clobbers */); \
7562 #define vld1q_lane_s64(a, b, c) \
7565 int64x2_t b_ = (b); \
7566 const int64_t * a_ = (a); \
7568 __asm__ ("ld1 {%0.d}[%1], %2" \
7570 : "i"(c), "Utv"(*a_), "0"(b_) \
7571 : /* No clobbers */); \
7575 #define vld1q_lane_u8(a, b, c) \
7578 uint8x16_t b_ = (b); \
7579 const uint8_t * a_ = (a); \
7580 uint8x16_t result; \
7581 __asm__ ("ld1 {%0.b}[%1], %2" \
7583 : "i"(c), "Utv"(*a_), "0"(b_) \
7584 : /* No clobbers */); \
7588 #define vld1q_lane_u16(a, b, c) \
7591 uint16x8_t b_ = (b); \
7592 const uint16_t * a_ = (a); \
7593 uint16x8_t result; \
7594 __asm__ ("ld1 {%0.h}[%1], %2" \
7596 : "i"(c), "Utv"(*a_), "0"(b_) \
7597 : /* No clobbers */); \
7601 #define vld1q_lane_u32(a, b, c) \
7604 uint32x4_t b_ = (b); \
7605 const uint32_t * a_ = (a); \
7606 uint32x4_t result; \
7607 __asm__ ("ld1 {%0.s}[%1], %2" \
7609 : "i"(c), "Utv"(*a_), "0"(b_) \
7610 : /* No clobbers */); \
7614 #define vld1q_lane_u64(a, b, c) \
7617 uint64x2_t b_ = (b); \
7618 const uint64_t * a_ = (a); \
7619 uint64x2_t result; \
7620 __asm__ ("ld1 {%0.d}[%1], %2" \
7622 : "i"(c), "Utv"(*a_), "0"(b_) \
7623 : /* No clobbers */); \
7627 #define vmla_lane_f32(a, b, c, d) \
7630 float32x2_t c_ = (c); \
7631 float32x2_t b_ = (b); \
7632 float32x2_t a_ = (a); \
7633 float32x2_t result; \
7635 __asm__ ("fmul %1.2s, %3.2s, %4.s[%5]; fadd %0.2s, %0.2s, %1.2s" \
7636 : "=w"(result), "=w"(t1) \
7637 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7638 : /* No clobbers */); \
7642 #define vmla_lane_s16(a, b, c, d) \
7645 int16x4_t c_ = (c); \
7646 int16x4_t b_ = (b); \
7647 int16x4_t a_ = (a); \
7649 __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \
7651 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7652 : /* No clobbers */); \
7656 #define vmla_lane_s32(a, b, c, d) \
7659 int32x2_t c_ = (c); \
7660 int32x2_t b_ = (b); \
7661 int32x2_t a_ = (a); \
7663 __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \
7665 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7666 : /* No clobbers */); \
7670 #define vmla_lane_u16(a, b, c, d) \
7673 uint16x4_t c_ = (c); \
7674 uint16x4_t b_ = (b); \
7675 uint16x4_t a_ = (a); \
7676 uint16x4_t result; \
7677 __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \
7679 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7680 : /* No clobbers */); \
7684 #define vmla_lane_u32(a, b, c, d) \
7687 uint32x2_t c_ = (c); \
7688 uint32x2_t b_ = (b); \
7689 uint32x2_t a_ = (a); \
7690 uint32x2_t result; \
7691 __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \
7693 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7694 : /* No clobbers */); \
7698 #define vmla_laneq_s16(a, b, c, d) \
7701 int16x8_t c_ = (c); \
7702 int16x4_t b_ = (b); \
7703 int16x4_t a_ = (a); \
7705 __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \
7707 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7708 : /* No clobbers */); \
7712 #define vmla_laneq_s32(a, b, c, d) \
7715 int32x4_t c_ = (c); \
7716 int32x2_t b_ = (b); \
7717 int32x2_t a_ = (a); \
7719 __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \
7721 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7722 : /* No clobbers */); \
7726 #define vmla_laneq_u16(a, b, c, d) \
7729 uint16x8_t c_ = (c); \
7730 uint16x4_t b_ = (b); \
7731 uint16x4_t a_ = (a); \
7732 uint16x4_t result; \
7733 __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \
7735 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7736 : /* No clobbers */); \
7740 #define vmla_laneq_u32(a, b, c, d) \
7743 uint32x4_t c_ = (c); \
7744 uint32x2_t b_ = (b); \
7745 uint32x2_t a_ = (a); \
7746 uint32x2_t result; \
7747 __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \
7749 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7750 : /* No clobbers */); \
7754 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
7755 vmla_n_f32 (float32x2_t a
, float32x2_t b
, float32_t c
)
7759 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s"
7760 : "=w"(result
), "=w"(t1
)
7761 : "0"(a
), "w"(b
), "w"(c
)
7762 : /* No clobbers */);
7766 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
7767 vmla_n_s16 (int16x4_t a
, int16x4_t b
, int16_t c
)
7770 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
7772 : "0"(a
), "w"(b
), "w"(c
)
7773 : /* No clobbers */);
7777 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
7778 vmla_n_s32 (int32x2_t a
, int32x2_t b
, int32_t c
)
7781 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
7783 : "0"(a
), "w"(b
), "w"(c
)
7784 : /* No clobbers */);
7788 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
7789 vmla_n_u16 (uint16x4_t a
, uint16x4_t b
, uint16_t c
)
7792 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
7794 : "0"(a
), "w"(b
), "w"(c
)
7795 : /* No clobbers */);
7799 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
7800 vmla_n_u32 (uint32x2_t a
, uint32x2_t b
, uint32_t c
)
7803 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
7805 : "0"(a
), "w"(b
), "w"(c
)
7806 : /* No clobbers */);
7810 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
7811 vmla_s8 (int8x8_t a
, int8x8_t b
, int8x8_t c
)
7814 __asm__ ("mla %0.8b, %2.8b, %3.8b"
7816 : "0"(a
), "w"(b
), "w"(c
)
7817 : /* No clobbers */);
7821 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
7822 vmla_s16 (int16x4_t a
, int16x4_t b
, int16x4_t c
)
7825 __asm__ ("mla %0.4h, %2.4h, %3.4h"
7827 : "0"(a
), "w"(b
), "w"(c
)
7828 : /* No clobbers */);
7832 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
7833 vmla_s32 (int32x2_t a
, int32x2_t b
, int32x2_t c
)
7836 __asm__ ("mla %0.2s, %2.2s, %3.2s"
7838 : "0"(a
), "w"(b
), "w"(c
)
7839 : /* No clobbers */);
7843 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
7844 vmla_u8 (uint8x8_t a
, uint8x8_t b
, uint8x8_t c
)
7847 __asm__ ("mla %0.8b, %2.8b, %3.8b"
7849 : "0"(a
), "w"(b
), "w"(c
)
7850 : /* No clobbers */);
7854 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
7855 vmla_u16 (uint16x4_t a
, uint16x4_t b
, uint16x4_t c
)
7858 __asm__ ("mla %0.4h, %2.4h, %3.4h"
7860 : "0"(a
), "w"(b
), "w"(c
)
7861 : /* No clobbers */);
7865 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
7866 vmla_u32 (uint32x2_t a
, uint32x2_t b
, uint32x2_t c
)
7869 __asm__ ("mla %0.2s, %2.2s, %3.2s"
7871 : "0"(a
), "w"(b
), "w"(c
)
7872 : /* No clobbers */);
7876 #define vmlal_high_lane_s16(a, b, c, d) \
7879 int16x8_t c_ = (c); \
7880 int16x8_t b_ = (b); \
7881 int32x4_t a_ = (a); \
7883 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
7885 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7886 : /* No clobbers */); \
7890 #define vmlal_high_lane_s32(a, b, c, d) \
7893 int32x4_t c_ = (c); \
7894 int32x4_t b_ = (b); \
7895 int64x2_t a_ = (a); \
7897 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
7899 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7900 : /* No clobbers */); \
7904 #define vmlal_high_lane_u16(a, b, c, d) \
7907 uint16x8_t c_ = (c); \
7908 uint16x8_t b_ = (b); \
7909 uint32x4_t a_ = (a); \
7910 uint32x4_t result; \
7911 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
7913 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7914 : /* No clobbers */); \
7918 #define vmlal_high_lane_u32(a, b, c, d) \
7921 uint32x4_t c_ = (c); \
7922 uint32x4_t b_ = (b); \
7923 uint64x2_t a_ = (a); \
7924 uint64x2_t result; \
7925 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
7927 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7928 : /* No clobbers */); \
7932 #define vmlal_high_laneq_s16(a, b, c, d) \
7935 int16x8_t c_ = (c); \
7936 int16x8_t b_ = (b); \
7937 int32x4_t a_ = (a); \
7939 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
7941 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7942 : /* No clobbers */); \
7946 #define vmlal_high_laneq_s32(a, b, c, d) \
7949 int32x4_t c_ = (c); \
7950 int32x4_t b_ = (b); \
7951 int64x2_t a_ = (a); \
7953 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
7955 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7956 : /* No clobbers */); \
7960 #define vmlal_high_laneq_u16(a, b, c, d) \
7963 uint16x8_t c_ = (c); \
7964 uint16x8_t b_ = (b); \
7965 uint32x4_t a_ = (a); \
7966 uint32x4_t result; \
7967 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
7969 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7970 : /* No clobbers */); \
7974 #define vmlal_high_laneq_u32(a, b, c, d) \
7977 uint32x4_t c_ = (c); \
7978 uint32x4_t b_ = (b); \
7979 uint64x2_t a_ = (a); \
7980 uint64x2_t result; \
7981 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
7983 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7984 : /* No clobbers */); \
7988 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
7989 vmlal_high_n_s16 (int32x4_t a
, int16x8_t b
, int16_t c
)
7992 __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]"
7994 : "0"(a
), "w"(b
), "w"(c
)
7995 : /* No clobbers */);
7999 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
8000 vmlal_high_n_s32 (int64x2_t a
, int32x4_t b
, int32_t c
)
8003 __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]"
8005 : "0"(a
), "w"(b
), "w"(c
)
8006 : /* No clobbers */);
8010 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
8011 vmlal_high_n_u16 (uint32x4_t a
, uint16x8_t b
, uint16_t c
)
8014 __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]"
8016 : "0"(a
), "w"(b
), "w"(c
)
8017 : /* No clobbers */);
8021 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
8022 vmlal_high_n_u32 (uint64x2_t a
, uint32x4_t b
, uint32_t c
)
8025 __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]"
8027 : "0"(a
), "w"(b
), "w"(c
)
8028 : /* No clobbers */);
8032 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
8033 vmlal_high_s8 (int16x8_t a
, int8x16_t b
, int8x16_t c
)
8036 __asm__ ("smlal2 %0.8h,%2.16b,%3.16b"
8038 : "0"(a
), "w"(b
), "w"(c
)
8039 : /* No clobbers */);
8043 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
8044 vmlal_high_s16 (int32x4_t a
, int16x8_t b
, int16x8_t c
)
8047 __asm__ ("smlal2 %0.4s,%2.8h,%3.8h"
8049 : "0"(a
), "w"(b
), "w"(c
)
8050 : /* No clobbers */);
8054 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
8055 vmlal_high_s32 (int64x2_t a
, int32x4_t b
, int32x4_t c
)
8058 __asm__ ("smlal2 %0.2d,%2.4s,%3.4s"
8060 : "0"(a
), "w"(b
), "w"(c
)
8061 : /* No clobbers */);
8065 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
8066 vmlal_high_u8 (uint16x8_t a
, uint8x16_t b
, uint8x16_t c
)
8069 __asm__ ("umlal2 %0.8h,%2.16b,%3.16b"
8071 : "0"(a
), "w"(b
), "w"(c
)
8072 : /* No clobbers */);
8076 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
8077 vmlal_high_u16 (uint32x4_t a
, uint16x8_t b
, uint16x8_t c
)
8080 __asm__ ("umlal2 %0.4s,%2.8h,%3.8h"
8082 : "0"(a
), "w"(b
), "w"(c
)
8083 : /* No clobbers */);
8087 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
8088 vmlal_high_u32 (uint64x2_t a
, uint32x4_t b
, uint32x4_t c
)
8091 __asm__ ("umlal2 %0.2d,%2.4s,%3.4s"
8093 : "0"(a
), "w"(b
), "w"(c
)
8094 : /* No clobbers */);
8098 #define vmlal_lane_s16(a, b, c, d) \
8101 int16x4_t c_ = (c); \
8102 int16x4_t b_ = (b); \
8103 int32x4_t a_ = (a); \
8105 __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]" \
8107 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8108 : /* No clobbers */); \
8112 #define vmlal_lane_s32(a, b, c, d) \
8115 int32x2_t c_ = (c); \
8116 int32x2_t b_ = (b); \
8117 int64x2_t a_ = (a); \
8119 __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]" \
8121 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8122 : /* No clobbers */); \
8126 #define vmlal_lane_u16(a, b, c, d) \
8129 uint16x4_t c_ = (c); \
8130 uint16x4_t b_ = (b); \
8131 uint32x4_t a_ = (a); \
8132 uint32x4_t result; \
8133 __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]" \
8135 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8136 : /* No clobbers */); \
8140 #define vmlal_lane_u32(a, b, c, d) \
8143 uint32x2_t c_ = (c); \
8144 uint32x2_t b_ = (b); \
8145 uint64x2_t a_ = (a); \
8146 uint64x2_t result; \
8147 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
8149 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8150 : /* No clobbers */); \
8154 #define vmlal_laneq_s16(a, b, c, d) \
8157 int16x8_t c_ = (c); \
8158 int16x4_t b_ = (b); \
8159 int32x4_t a_ = (a); \
8161 __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]" \
8163 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8164 : /* No clobbers */); \
8168 #define vmlal_laneq_s32(a, b, c, d) \
8171 int32x4_t c_ = (c); \
8172 int32x2_t b_ = (b); \
8173 int64x2_t a_ = (a); \
8175 __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]" \
8177 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8178 : /* No clobbers */); \
8182 #define vmlal_laneq_u16(a, b, c, d) \
8185 uint16x8_t c_ = (c); \
8186 uint16x4_t b_ = (b); \
8187 uint32x4_t a_ = (a); \
8188 uint32x4_t result; \
8189 __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]" \
8191 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8192 : /* No clobbers */); \
8196 #define vmlal_laneq_u32(a, b, c, d) \
8199 uint32x4_t c_ = (c); \
8200 uint32x2_t b_ = (b); \
8201 uint64x2_t a_ = (a); \
8202 uint64x2_t result; \
8203 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
8205 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8206 : /* No clobbers */); \
8210 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
8211 vmlal_n_s16 (int32x4_t a
, int16x4_t b
, int16_t c
)
8214 __asm__ ("smlal %0.4s,%2.4h,%3.h[0]"
8216 : "0"(a
), "w"(b
), "w"(c
)
8217 : /* No clobbers */);
8221 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
8222 vmlal_n_s32 (int64x2_t a
, int32x2_t b
, int32_t c
)
8225 __asm__ ("smlal %0.2d,%2.2s,%3.s[0]"
8227 : "0"(a
), "w"(b
), "w"(c
)
8228 : /* No clobbers */);
8232 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
8233 vmlal_n_u16 (uint32x4_t a
, uint16x4_t b
, uint16_t c
)
8236 __asm__ ("umlal %0.4s,%2.4h,%3.h[0]"
8238 : "0"(a
), "w"(b
), "w"(c
)
8239 : /* No clobbers */);
8243 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
8244 vmlal_n_u32 (uint64x2_t a
, uint32x2_t b
, uint32_t c
)
8247 __asm__ ("umlal %0.2d,%2.2s,%3.s[0]"
8249 : "0"(a
), "w"(b
), "w"(c
)
8250 : /* No clobbers */);
8254 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
8255 vmlal_s8 (int16x8_t a
, int8x8_t b
, int8x8_t c
)
8258 __asm__ ("smlal %0.8h,%2.8b,%3.8b"
8260 : "0"(a
), "w"(b
), "w"(c
)
8261 : /* No clobbers */);
8265 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
8266 vmlal_s16 (int32x4_t a
, int16x4_t b
, int16x4_t c
)
8269 __asm__ ("smlal %0.4s,%2.4h,%3.4h"
8271 : "0"(a
), "w"(b
), "w"(c
)
8272 : /* No clobbers */);
8276 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
8277 vmlal_s32 (int64x2_t a
, int32x2_t b
, int32x2_t c
)
8280 __asm__ ("smlal %0.2d,%2.2s,%3.2s"
8282 : "0"(a
), "w"(b
), "w"(c
)
8283 : /* No clobbers */);
8287 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
8288 vmlal_u8 (uint16x8_t a
, uint8x8_t b
, uint8x8_t c
)
8291 __asm__ ("umlal %0.8h,%2.8b,%3.8b"
8293 : "0"(a
), "w"(b
), "w"(c
)
8294 : /* No clobbers */);
8298 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
8299 vmlal_u16 (uint32x4_t a
, uint16x4_t b
, uint16x4_t c
)
8302 __asm__ ("umlal %0.4s,%2.4h,%3.4h"
8304 : "0"(a
), "w"(b
), "w"(c
)
8305 : /* No clobbers */);
8309 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
8310 vmlal_u32 (uint64x2_t a
, uint32x2_t b
, uint32x2_t c
)
8313 __asm__ ("umlal %0.2d,%2.2s,%3.2s"
8315 : "0"(a
), "w"(b
), "w"(c
)
8316 : /* No clobbers */);
8320 #define vmlaq_lane_f32(a, b, c, d) \
8323 float32x4_t c_ = (c); \
8324 float32x4_t b_ = (b); \
8325 float32x4_t a_ = (a); \
8326 float32x4_t result; \
8328 __asm__ ("fmul %1.4s, %3.4s, %4.s[%5]; fadd %0.4s, %0.4s, %1.4s" \
8329 : "=w"(result), "=w"(t1) \
8330 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8331 : /* No clobbers */); \
8335 #define vmlaq_lane_s16(a, b, c, d) \
8338 int16x8_t c_ = (c); \
8339 int16x8_t b_ = (b); \
8340 int16x8_t a_ = (a); \
8342 __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \
8344 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8345 : /* No clobbers */); \
8349 #define vmlaq_lane_s32(a, b, c, d) \
8352 int32x4_t c_ = (c); \
8353 int32x4_t b_ = (b); \
8354 int32x4_t a_ = (a); \
8356 __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \
8358 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8359 : /* No clobbers */); \
8363 #define vmlaq_lane_u16(a, b, c, d) \
8366 uint16x8_t c_ = (c); \
8367 uint16x8_t b_ = (b); \
8368 uint16x8_t a_ = (a); \
8369 uint16x8_t result; \
8370 __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \
8372 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8373 : /* No clobbers */); \
8377 #define vmlaq_lane_u32(a, b, c, d) \
8380 uint32x4_t c_ = (c); \
8381 uint32x4_t b_ = (b); \
8382 uint32x4_t a_ = (a); \
8383 uint32x4_t result; \
8384 __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \
8386 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8387 : /* No clobbers */); \
8391 #define vmlaq_laneq_s16(a, b, c, d) \
8394 int16x8_t c_ = (c); \
8395 int16x8_t b_ = (b); \
8396 int16x8_t a_ = (a); \
8398 __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \
8400 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8401 : /* No clobbers */); \
8405 #define vmlaq_laneq_s32(a, b, c, d) \
8408 int32x4_t c_ = (c); \
8409 int32x4_t b_ = (b); \
8410 int32x4_t a_ = (a); \
8412 __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \
8414 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8415 : /* No clobbers */); \
8419 #define vmlaq_laneq_u16(a, b, c, d) \
8422 uint16x8_t c_ = (c); \
8423 uint16x8_t b_ = (b); \
8424 uint16x8_t a_ = (a); \
8425 uint16x8_t result; \
8426 __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \
8428 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8429 : /* No clobbers */); \
8433 #define vmlaq_laneq_u32(a, b, c, d) \
8436 uint32x4_t c_ = (c); \
8437 uint32x4_t b_ = (b); \
8438 uint32x4_t a_ = (a); \
8439 uint32x4_t result; \
8440 __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \
8442 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8443 : /* No clobbers */); \
8447 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
8448 vmlaq_n_f32 (float32x4_t a
, float32x4_t b
, float32_t c
)
8452 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s"
8453 : "=w"(result
), "=w"(t1
)
8454 : "0"(a
), "w"(b
), "w"(c
)
8455 : /* No clobbers */);
8459 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
8460 vmlaq_n_f64 (float64x2_t a
, float64x2_t b
, float64_t c
)
8464 __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fadd %0.2d, %0.2d, %1.2d"
8465 : "=w"(result
), "=w"(t1
)
8466 : "0"(a
), "w"(b
), "w"(c
)
8467 : /* No clobbers */);
8471 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
8472 vmlaq_n_s16 (int16x8_t a
, int16x8_t b
, int16_t c
)
8475 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
8477 : "0"(a
), "w"(b
), "w"(c
)
8478 : /* No clobbers */);
8482 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
8483 vmlaq_n_s32 (int32x4_t a
, int32x4_t b
, int32_t c
)
8486 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
8488 : "0"(a
), "w"(b
), "w"(c
)
8489 : /* No clobbers */);
8493 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
8494 vmlaq_n_u16 (uint16x8_t a
, uint16x8_t b
, uint16_t c
)
8497 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
8499 : "0"(a
), "w"(b
), "w"(c
)
8500 : /* No clobbers */);
8504 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
8505 vmlaq_n_u32 (uint32x4_t a
, uint32x4_t b
, uint32_t c
)
8508 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
8510 : "0"(a
), "w"(b
), "w"(c
)
8511 : /* No clobbers */);
8515 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
8516 vmlaq_s8 (int8x16_t a
, int8x16_t b
, int8x16_t c
)
8519 __asm__ ("mla %0.16b, %2.16b, %3.16b"
8521 : "0"(a
), "w"(b
), "w"(c
)
8522 : /* No clobbers */);
8526 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
8527 vmlaq_s16 (int16x8_t a
, int16x8_t b
, int16x8_t c
)
8530 __asm__ ("mla %0.8h, %2.8h, %3.8h"
8532 : "0"(a
), "w"(b
), "w"(c
)
8533 : /* No clobbers */);
8537 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
8538 vmlaq_s32 (int32x4_t a
, int32x4_t b
, int32x4_t c
)
8541 __asm__ ("mla %0.4s, %2.4s, %3.4s"
8543 : "0"(a
), "w"(b
), "w"(c
)
8544 : /* No clobbers */);
8548 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
8549 vmlaq_u8 (uint8x16_t a
, uint8x16_t b
, uint8x16_t c
)
8552 __asm__ ("mla %0.16b, %2.16b, %3.16b"
8554 : "0"(a
), "w"(b
), "w"(c
)
8555 : /* No clobbers */);
8559 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
8560 vmlaq_u16 (uint16x8_t a
, uint16x8_t b
, uint16x8_t c
)
8563 __asm__ ("mla %0.8h, %2.8h, %3.8h"
8565 : "0"(a
), "w"(b
), "w"(c
)
8566 : /* No clobbers */);
8570 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
8571 vmlaq_u32 (uint32x4_t a
, uint32x4_t b
, uint32x4_t c
)
8574 __asm__ ("mla %0.4s, %2.4s, %3.4s"
8576 : "0"(a
), "w"(b
), "w"(c
)
8577 : /* No clobbers */);
8581 #define vmls_lane_f32(a, b, c, d) \
8584 float32x2_t c_ = (c); \
8585 float32x2_t b_ = (b); \
8586 float32x2_t a_ = (a); \
8587 float32x2_t result; \
8589 __asm__ ("fmul %1.2s, %3.2s, %4.s[%5]; fsub %0.2s, %0.2s, %1.2s" \
8590 : "=w"(result), "=w"(t1) \
8591 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8592 : /* No clobbers */); \
8596 #define vmls_lane_s16(a, b, c, d) \
8599 int16x4_t c_ = (c); \
8600 int16x4_t b_ = (b); \
8601 int16x4_t a_ = (a); \
8603 __asm__ ("mls %0.4h,%2.4h,%3.h[%4]" \
8605 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8606 : /* No clobbers */); \
8610 #define vmls_lane_s32(a, b, c, d) \
8613 int32x2_t c_ = (c); \
8614 int32x2_t b_ = (b); \
8615 int32x2_t a_ = (a); \
8617 __asm__ ("mls %0.2s,%2.2s,%3.s[%4]" \
8619 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8620 : /* No clobbers */); \
8624 #define vmls_lane_u16(a, b, c, d) \
8627 uint16x4_t c_ = (c); \
8628 uint16x4_t b_ = (b); \
8629 uint16x4_t a_ = (a); \
8630 uint16x4_t result; \
8631 __asm__ ("mls %0.4h,%2.4h,%3.h[%4]" \
8633 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8634 : /* No clobbers */); \
8638 #define vmls_lane_u32(a, b, c, d) \
8641 uint32x2_t c_ = (c); \
8642 uint32x2_t b_ = (b); \
8643 uint32x2_t a_ = (a); \
8644 uint32x2_t result; \
8645 __asm__ ("mls %0.2s,%2.2s,%3.s[%4]" \
8647 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8648 : /* No clobbers */); \
8652 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
8653 vmls_n_f32 (float32x2_t a
, float32x2_t b
, float32_t c
)
8657 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s"
8658 : "=w"(result
), "=w"(t1
)
8659 : "0"(a
), "w"(b
), "w"(c
)
8660 : /* No clobbers */);
8664 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
8665 vmls_n_s16 (int16x4_t a
, int16x4_t b
, int16_t c
)
8668 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
8670 : "0"(a
), "w"(b
), "w"(c
)
8671 : /* No clobbers */);
8675 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
8676 vmls_n_s32 (int32x2_t a
, int32x2_t b
, int32_t c
)
8679 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
8681 : "0"(a
), "w"(b
), "w"(c
)
8682 : /* No clobbers */);
8686 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
8687 vmls_n_u16 (uint16x4_t a
, uint16x4_t b
, uint16_t c
)
8690 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
8692 : "0"(a
), "w"(b
), "w"(c
)
8693 : /* No clobbers */);
8697 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
8698 vmls_n_u32 (uint32x2_t a
, uint32x2_t b
, uint32_t c
)
8701 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
8703 : "0"(a
), "w"(b
), "w"(c
)
8704 : /* No clobbers */);
8708 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
8709 vmls_s8 (int8x8_t a
, int8x8_t b
, int8x8_t c
)
8712 __asm__ ("mls %0.8b,%2.8b,%3.8b"
8714 : "0"(a
), "w"(b
), "w"(c
)
8715 : /* No clobbers */);
8719 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
8720 vmls_s16 (int16x4_t a
, int16x4_t b
, int16x4_t c
)
8723 __asm__ ("mls %0.4h,%2.4h,%3.4h"
8725 : "0"(a
), "w"(b
), "w"(c
)
8726 : /* No clobbers */);
8730 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
8731 vmls_s32 (int32x2_t a
, int32x2_t b
, int32x2_t c
)
8734 __asm__ ("mls %0.2s,%2.2s,%3.2s"
8736 : "0"(a
), "w"(b
), "w"(c
)
8737 : /* No clobbers */);
8741 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
8742 vmls_u8 (uint8x8_t a
, uint8x8_t b
, uint8x8_t c
)
8745 __asm__ ("mls %0.8b,%2.8b,%3.8b"
8747 : "0"(a
), "w"(b
), "w"(c
)
8748 : /* No clobbers */);
8752 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
8753 vmls_u16 (uint16x4_t a
, uint16x4_t b
, uint16x4_t c
)
8756 __asm__ ("mls %0.4h,%2.4h,%3.4h"
8758 : "0"(a
), "w"(b
), "w"(c
)
8759 : /* No clobbers */);
8763 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
8764 vmls_u32 (uint32x2_t a
, uint32x2_t b
, uint32x2_t c
)
8767 __asm__ ("mls %0.2s,%2.2s,%3.2s"
8769 : "0"(a
), "w"(b
), "w"(c
)
8770 : /* No clobbers */);
8774 #define vmlsl_high_lane_s16(a, b, c, d) \
8777 int16x8_t c_ = (c); \
8778 int16x8_t b_ = (b); \
8779 int32x4_t a_ = (a); \
8781 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
8783 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8784 : /* No clobbers */); \
8788 #define vmlsl_high_lane_s32(a, b, c, d) \
8791 int32x4_t c_ = (c); \
8792 int32x4_t b_ = (b); \
8793 int64x2_t a_ = (a); \
8795 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
8797 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8798 : /* No clobbers */); \
8802 #define vmlsl_high_lane_u16(a, b, c, d) \
8805 uint16x8_t c_ = (c); \
8806 uint16x8_t b_ = (b); \
8807 uint32x4_t a_ = (a); \
8808 uint32x4_t result; \
8809 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
8811 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8812 : /* No clobbers */); \
8816 #define vmlsl_high_lane_u32(a, b, c, d) \
8819 uint32x4_t c_ = (c); \
8820 uint32x4_t b_ = (b); \
8821 uint64x2_t a_ = (a); \
8822 uint64x2_t result; \
8823 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
8825 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8826 : /* No clobbers */); \
8830 #define vmlsl_high_laneq_s16(a, b, c, d) \
8833 int16x8_t c_ = (c); \
8834 int16x8_t b_ = (b); \
8835 int32x4_t a_ = (a); \
8837 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
8839 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8840 : /* No clobbers */); \
8844 #define vmlsl_high_laneq_s32(a, b, c, d) \
8847 int32x4_t c_ = (c); \
8848 int32x4_t b_ = (b); \
8849 int64x2_t a_ = (a); \
8851 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
8853 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8854 : /* No clobbers */); \
8858 #define vmlsl_high_laneq_u16(a, b, c, d) \
8861 uint16x8_t c_ = (c); \
8862 uint16x8_t b_ = (b); \
8863 uint32x4_t a_ = (a); \
8864 uint32x4_t result; \
8865 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
8867 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8868 : /* No clobbers */); \
8872 #define vmlsl_high_laneq_u32(a, b, c, d) \
8875 uint32x4_t c_ = (c); \
8876 uint32x4_t b_ = (b); \
8877 uint64x2_t a_ = (a); \
8878 uint64x2_t result; \
8879 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
8881 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8882 : /* No clobbers */); \
8886 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
8887 vmlsl_high_n_s16 (int32x4_t a
, int16x8_t b
, int16_t c
)
8890 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]"
8892 : "0"(a
), "w"(b
), "w"(c
)
8893 : /* No clobbers */);
8897 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
8898 vmlsl_high_n_s32 (int64x2_t a
, int32x4_t b
, int32_t c
)
8901 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]"
8903 : "0"(a
), "w"(b
), "w"(c
)
8904 : /* No clobbers */);
8908 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
8909 vmlsl_high_n_u16 (uint32x4_t a
, uint16x8_t b
, uint16_t c
)
8912 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]"
8914 : "0"(a
), "w"(b
), "w"(c
)
8915 : /* No clobbers */);
8919 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
8920 vmlsl_high_n_u32 (uint64x2_t a
, uint32x4_t b
, uint32_t c
)
8923 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]"
8925 : "0"(a
), "w"(b
), "w"(c
)
8926 : /* No clobbers */);
8930 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
8931 vmlsl_high_s8 (int16x8_t a
, int8x16_t b
, int8x16_t c
)
8934 __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b"
8936 : "0"(a
), "w"(b
), "w"(c
)
8937 : /* No clobbers */);
8941 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
8942 vmlsl_high_s16 (int32x4_t a
, int16x8_t b
, int16x8_t c
)
8945 __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h"
8947 : "0"(a
), "w"(b
), "w"(c
)
8948 : /* No clobbers */);
8952 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
8953 vmlsl_high_s32 (int64x2_t a
, int32x4_t b
, int32x4_t c
)
8956 __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s"
8958 : "0"(a
), "w"(b
), "w"(c
)
8959 : /* No clobbers */);
8963 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
8964 vmlsl_high_u8 (uint16x8_t a
, uint8x16_t b
, uint8x16_t c
)
8967 __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b"
8969 : "0"(a
), "w"(b
), "w"(c
)
8970 : /* No clobbers */);
8974 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
8975 vmlsl_high_u16 (uint32x4_t a
, uint16x8_t b
, uint16x8_t c
)
8978 __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h"
8980 : "0"(a
), "w"(b
), "w"(c
)
8981 : /* No clobbers */);
8985 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
8986 vmlsl_high_u32 (uint64x2_t a
, uint32x4_t b
, uint32x4_t c
)
8989 __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s"
8991 : "0"(a
), "w"(b
), "w"(c
)
8992 : /* No clobbers */);
8996 #define vmlsl_lane_s16(a, b, c, d) \
8999 int16x4_t c_ = (c); \
9000 int16x4_t b_ = (b); \
9001 int32x4_t a_ = (a); \
9003 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
9005 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9006 : /* No clobbers */); \
9010 #define vmlsl_lane_s32(a, b, c, d) \
9013 int32x2_t c_ = (c); \
9014 int32x2_t b_ = (b); \
9015 int64x2_t a_ = (a); \
9017 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
9019 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9020 : /* No clobbers */); \
9024 #define vmlsl_lane_u16(a, b, c, d) \
9027 uint16x4_t c_ = (c); \
9028 uint16x4_t b_ = (b); \
9029 uint32x4_t a_ = (a); \
9030 uint32x4_t result; \
9031 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
9033 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9034 : /* No clobbers */); \
9038 #define vmlsl_lane_u32(a, b, c, d) \
9041 uint32x2_t c_ = (c); \
9042 uint32x2_t b_ = (b); \
9043 uint64x2_t a_ = (a); \
9044 uint64x2_t result; \
9045 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
9047 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9048 : /* No clobbers */); \
9052 #define vmlsl_laneq_s16(a, b, c, d) \
9055 int16x8_t c_ = (c); \
9056 int16x4_t b_ = (b); \
9057 int32x4_t a_ = (a); \
9059 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
9061 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9062 : /* No clobbers */); \
9066 #define vmlsl_laneq_s32(a, b, c, d) \
9069 int32x4_t c_ = (c); \
9070 int32x2_t b_ = (b); \
9071 int64x2_t a_ = (a); \
9073 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
9075 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9076 : /* No clobbers */); \
9080 #define vmlsl_laneq_u16(a, b, c, d) \
9083 uint16x8_t c_ = (c); \
9084 uint16x4_t b_ = (b); \
9085 uint32x4_t a_ = (a); \
9086 uint32x4_t result; \
9087 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
9089 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9090 : /* No clobbers */); \
9094 #define vmlsl_laneq_u32(a, b, c, d) \
9097 uint32x4_t c_ = (c); \
9098 uint32x2_t b_ = (b); \
9099 uint64x2_t a_ = (a); \
9100 uint64x2_t result; \
9101 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
9103 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9104 : /* No clobbers */); \
9108 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
9109 vmlsl_n_s16 (int32x4_t a
, int16x4_t b
, int16_t c
)
9112 __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]"
9114 : "0"(a
), "w"(b
), "w"(c
)
9115 : /* No clobbers */);
9119 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
9120 vmlsl_n_s32 (int64x2_t a
, int32x2_t b
, int32_t c
)
9123 __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]"
9125 : "0"(a
), "w"(b
), "w"(c
)
9126 : /* No clobbers */);
9130 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
9131 vmlsl_n_u16 (uint32x4_t a
, uint16x4_t b
, uint16_t c
)
9134 __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]"
9136 : "0"(a
), "w"(b
), "w"(c
)
9137 : /* No clobbers */);
9141 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
9142 vmlsl_n_u32 (uint64x2_t a
, uint32x2_t b
, uint32_t c
)
9145 __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]"
9147 : "0"(a
), "w"(b
), "w"(c
)
9148 : /* No clobbers */);
9152 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
9153 vmlsl_s8 (int16x8_t a
, int8x8_t b
, int8x8_t c
)
9156 __asm__ ("smlsl %0.8h, %2.8b, %3.8b"
9158 : "0"(a
), "w"(b
), "w"(c
)
9159 : /* No clobbers */);
9163 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
9164 vmlsl_s16 (int32x4_t a
, int16x4_t b
, int16x4_t c
)
9167 __asm__ ("smlsl %0.4s, %2.4h, %3.4h"
9169 : "0"(a
), "w"(b
), "w"(c
)
9170 : /* No clobbers */);
9174 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
9175 vmlsl_s32 (int64x2_t a
, int32x2_t b
, int32x2_t c
)
9178 __asm__ ("smlsl %0.2d, %2.2s, %3.2s"
9180 : "0"(a
), "w"(b
), "w"(c
)
9181 : /* No clobbers */);
9185 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
9186 vmlsl_u8 (uint16x8_t a
, uint8x8_t b
, uint8x8_t c
)
9189 __asm__ ("umlsl %0.8h, %2.8b, %3.8b"
9191 : "0"(a
), "w"(b
), "w"(c
)
9192 : /* No clobbers */);
9196 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
9197 vmlsl_u16 (uint32x4_t a
, uint16x4_t b
, uint16x4_t c
)
9200 __asm__ ("umlsl %0.4s, %2.4h, %3.4h"
9202 : "0"(a
), "w"(b
), "w"(c
)
9203 : /* No clobbers */);
9207 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
9208 vmlsl_u32 (uint64x2_t a
, uint32x2_t b
, uint32x2_t c
)
9211 __asm__ ("umlsl %0.2d, %2.2s, %3.2s"
9213 : "0"(a
), "w"(b
), "w"(c
)
9214 : /* No clobbers */);
9218 #define vmlsq_lane_f32(a, b, c, d) \
9221 float32x4_t c_ = (c); \
9222 float32x4_t b_ = (b); \
9223 float32x4_t a_ = (a); \
9224 float32x4_t result; \
9226 __asm__ ("fmul %1.4s, %3.4s, %4.s[%5]; fsub %0.4s, %0.4s, %1.4s" \
9227 : "=w"(result), "=w"(t1) \
9228 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9229 : /* No clobbers */); \
9233 #define vmlsq_lane_s16(a, b, c, d) \
9236 int16x8_t c_ = (c); \
9237 int16x8_t b_ = (b); \
9238 int16x8_t a_ = (a); \
9240 __asm__ ("mls %0.8h,%2.8h,%3.h[%4]" \
9242 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9243 : /* No clobbers */); \
9247 #define vmlsq_lane_s32(a, b, c, d) \
9250 int32x4_t c_ = (c); \
9251 int32x4_t b_ = (b); \
9252 int32x4_t a_ = (a); \
9254 __asm__ ("mls %0.4s,%2.4s,%3.s[%4]" \
9256 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9257 : /* No clobbers */); \
9261 #define vmlsq_lane_u16(a, b, c, d) \
9264 uint16x8_t c_ = (c); \
9265 uint16x8_t b_ = (b); \
9266 uint16x8_t a_ = (a); \
9267 uint16x8_t result; \
9268 __asm__ ("mls %0.8h,%2.8h,%3.h[%4]" \
9270 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9271 : /* No clobbers */); \
9275 #define vmlsq_lane_u32(a, b, c, d) \
9278 uint32x4_t c_ = (c); \
9279 uint32x4_t b_ = (b); \
9280 uint32x4_t a_ = (a); \
9281 uint32x4_t result; \
9282 __asm__ ("mls %0.4s,%2.4s,%3.s[%4]" \
9284 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9285 : /* No clobbers */); \
9289 #define vmlsq_laneq_f32(__a, __b, __c, __d) \
9292 float32x4_t __c_ = (__c); \
9293 float32x4_t __b_ = (__b); \
9294 float32x4_t __a_ = (__a); \
9295 float32x4_t __result; \
9297 __asm__ ("fmul %1.4s, %3.4s, %4.s[%5]; fsub %0.4s, %0.4s, %1.4s" \
9298 : "=w"(__result), "=w"(__t1) \
9299 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
9300 : /* No clobbers */); \
9304 #define vmlsq_laneq_s16(__a, __b, __c, __d) \
9307 int16x8_t __c_ = (__c); \
9308 int16x8_t __b_ = (__b); \
9309 int16x8_t __a_ = (__a); \
9310 int16x8_t __result; \
9311 __asm__ ("mls %0.8h, %2.8h, %3.h[%4]" \
9313 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
9314 : /* No clobbers */); \
9318 #define vmlsq_laneq_s32(__a, __b, __c, __d) \
9321 int32x4_t __c_ = (__c); \
9322 int32x4_t __b_ = (__b); \
9323 int32x4_t __a_ = (__a); \
9324 int32x4_t __result; \
9325 __asm__ ("mls %0.4s, %2.4s, %3.s[%4]" \
9327 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
9328 : /* No clobbers */); \
9332 #define vmlsq_laneq_u16(__a, __b, __c, __d) \
9335 uint16x8_t __c_ = (__c); \
9336 uint16x8_t __b_ = (__b); \
9337 uint16x8_t __a_ = (__a); \
9338 uint16x8_t __result; \
9339 __asm__ ("mls %0.8h, %2.8h, %3.h[%4]" \
9341 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
9342 : /* No clobbers */); \
9346 #define vmlsq_laneq_u32(__a, __b, __c, __d) \
9349 uint32x4_t __c_ = (__c); \
9350 uint32x4_t __b_ = (__b); \
9351 uint32x4_t __a_ = (__a); \
9352 uint32x4_t __result; \
9353 __asm__ ("mls %0.4s, %2.4s, %3.s[%4]" \
9355 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
9356 : /* No clobbers */); \
9360 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
9361 vmlsq_n_f32 (float32x4_t a
, float32x4_t b
, float32_t c
)
9365 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s"
9366 : "=w"(result
), "=w"(t1
)
9367 : "0"(a
), "w"(b
), "w"(c
)
9368 : /* No clobbers */);
9372 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
9373 vmlsq_n_f64 (float64x2_t a
, float64x2_t b
, float64_t c
)
9377 __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fsub %0.2d, %0.2d, %1.2d"
9378 : "=w"(result
), "=w"(t1
)
9379 : "0"(a
), "w"(b
), "w"(c
)
9380 : /* No clobbers */);
9384 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
9385 vmlsq_n_s16 (int16x8_t a
, int16x8_t b
, int16_t c
)
9388 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
9390 : "0"(a
), "w"(b
), "w"(c
)
9391 : /* No clobbers */);
9395 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
9396 vmlsq_n_s32 (int32x4_t a
, int32x4_t b
, int32_t c
)
9399 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
9401 : "0"(a
), "w"(b
), "w"(c
)
9402 : /* No clobbers */);
9406 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
9407 vmlsq_n_u16 (uint16x8_t a
, uint16x8_t b
, uint16_t c
)
9410 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
9412 : "0"(a
), "w"(b
), "w"(c
)
9413 : /* No clobbers */);
9417 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
9418 vmlsq_n_u32 (uint32x4_t a
, uint32x4_t b
, uint32_t c
)
9421 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
9423 : "0"(a
), "w"(b
), "w"(c
)
9424 : /* No clobbers */);
9428 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
9429 vmlsq_s8 (int8x16_t a
, int8x16_t b
, int8x16_t c
)
9432 __asm__ ("mls %0.16b,%2.16b,%3.16b"
9434 : "0"(a
), "w"(b
), "w"(c
)
9435 : /* No clobbers */);
9439 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
9440 vmlsq_s16 (int16x8_t a
, int16x8_t b
, int16x8_t c
)
9443 __asm__ ("mls %0.8h,%2.8h,%3.8h"
9445 : "0"(a
), "w"(b
), "w"(c
)
9446 : /* No clobbers */);
9450 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
9451 vmlsq_s32 (int32x4_t a
, int32x4_t b
, int32x4_t c
)
9454 __asm__ ("mls %0.4s,%2.4s,%3.4s"
9456 : "0"(a
), "w"(b
), "w"(c
)
9457 : /* No clobbers */);
9461 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
9462 vmlsq_u8 (uint8x16_t a
, uint8x16_t b
, uint8x16_t c
)
9465 __asm__ ("mls %0.16b,%2.16b,%3.16b"
9467 : "0"(a
), "w"(b
), "w"(c
)
9468 : /* No clobbers */);
9472 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
9473 vmlsq_u16 (uint16x8_t a
, uint16x8_t b
, uint16x8_t c
)
9476 __asm__ ("mls %0.8h,%2.8h,%3.8h"
9478 : "0"(a
), "w"(b
), "w"(c
)
9479 : /* No clobbers */);
9483 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
9484 vmlsq_u32 (uint32x4_t a
, uint32x4_t b
, uint32x4_t c
)
9487 __asm__ ("mls %0.4s,%2.4s,%3.4s"
9489 : "0"(a
), "w"(b
), "w"(c
)
9490 : /* No clobbers */);
9494 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
9495 vmov_n_f32 (float32_t a
)
9498 __asm__ ("dup %0.2s, %w1"
9501 : /* No clobbers */);
9505 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
9506 vmov_n_p8 (uint32_t a
)
9509 __asm__ ("dup %0.8b,%w1"
9512 : /* No clobbers */);
9516 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
9517 vmov_n_p16 (uint32_t a
)
9520 __asm__ ("dup %0.4h,%w1"
9523 : /* No clobbers */);
9527 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
9528 vmov_n_s8 (int32_t a
)
9531 __asm__ ("dup %0.8b,%w1"
9534 : /* No clobbers */);
9538 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
9539 vmov_n_s16 (int32_t a
)
9542 __asm__ ("dup %0.4h,%w1"
9545 : /* No clobbers */);
9549 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
9550 vmov_n_s32 (int32_t a
)
9553 __asm__ ("dup %0.2s,%w1"
9556 : /* No clobbers */);
9560 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
9561 vmov_n_s64 (int64_t a
)
9564 __asm__ ("ins %0.d[0],%x1"
9567 : /* No clobbers */);
9571 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
9572 vmov_n_u8 (uint32_t a
)
9575 __asm__ ("dup %0.8b,%w1"
9578 : /* No clobbers */);
9582 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
9583 vmov_n_u16 (uint32_t a
)
9586 __asm__ ("dup %0.4h,%w1"
9589 : /* No clobbers */);
9593 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
9594 vmov_n_u32 (uint32_t a
)
9597 __asm__ ("dup %0.2s,%w1"
9600 : /* No clobbers */);
9604 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
9605 vmov_n_u64 (uint64_t a
)
9608 __asm__ ("ins %0.d[0],%x1"
9611 : /* No clobbers */);
9615 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
9616 vmovl_high_s8 (int8x16_t a
)
9619 __asm__ ("sshll2 %0.8h,%1.16b,#0"
9622 : /* No clobbers */);
9626 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
9627 vmovl_high_s16 (int16x8_t a
)
9630 __asm__ ("sshll2 %0.4s,%1.8h,#0"
9633 : /* No clobbers */);
9637 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
9638 vmovl_high_s32 (int32x4_t a
)
9641 __asm__ ("sshll2 %0.2d,%1.4s,#0"
9644 : /* No clobbers */);
9648 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
9649 vmovl_high_u8 (uint8x16_t a
)
9652 __asm__ ("ushll2 %0.8h,%1.16b,#0"
9655 : /* No clobbers */);
9659 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
9660 vmovl_high_u16 (uint16x8_t a
)
9663 __asm__ ("ushll2 %0.4s,%1.8h,#0"
9666 : /* No clobbers */);
9670 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
9671 vmovl_high_u32 (uint32x4_t a
)
9674 __asm__ ("ushll2 %0.2d,%1.4s,#0"
9677 : /* No clobbers */);
9681 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
9682 vmovl_s8 (int8x8_t a
)
9685 __asm__ ("sshll %0.8h,%1.8b,#0"
9688 : /* No clobbers */);
9692 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
9693 vmovl_s16 (int16x4_t a
)
9696 __asm__ ("sshll %0.4s,%1.4h,#0"
9699 : /* No clobbers */);
9703 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
9704 vmovl_s32 (int32x2_t a
)
9707 __asm__ ("sshll %0.2d,%1.2s,#0"
9710 : /* No clobbers */);
9714 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
9715 vmovl_u8 (uint8x8_t a
)
9718 __asm__ ("ushll %0.8h,%1.8b,#0"
9721 : /* No clobbers */);
9725 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
9726 vmovl_u16 (uint16x4_t a
)
9729 __asm__ ("ushll %0.4s,%1.4h,#0"
9732 : /* No clobbers */);
9736 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
9737 vmovl_u32 (uint32x2_t a
)
9740 __asm__ ("ushll %0.2d,%1.2s,#0"
9743 : /* No clobbers */);
9747 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
9748 vmovn_high_s16 (int8x8_t a
, int16x8_t b
)
9750 int8x16_t result
= vcombine_s8 (a
, vcreate_s8 (UINT64_C (0x0)));
9751 __asm__ ("xtn2 %0.16b,%1.8h"
9754 : /* No clobbers */);
9758 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
9759 vmovn_high_s32 (int16x4_t a
, int32x4_t b
)
9761 int16x8_t result
= vcombine_s16 (a
, vcreate_s16 (UINT64_C (0x0)));
9762 __asm__ ("xtn2 %0.8h,%1.4s"
9765 : /* No clobbers */);
9769 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
9770 vmovn_high_s64 (int32x2_t a
, int64x2_t b
)
9772 int32x4_t result
= vcombine_s32 (a
, vcreate_s32 (UINT64_C (0x0)));
9773 __asm__ ("xtn2 %0.4s,%1.2d"
9776 : /* No clobbers */);
9780 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
9781 vmovn_high_u16 (uint8x8_t a
, uint16x8_t b
)
9783 uint8x16_t result
= vcombine_u8 (a
, vcreate_u8 (UINT64_C (0x0)));
9784 __asm__ ("xtn2 %0.16b,%1.8h"
9787 : /* No clobbers */);
9791 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
9792 vmovn_high_u32 (uint16x4_t a
, uint32x4_t b
)
9794 uint16x8_t result
= vcombine_u16 (a
, vcreate_u16 (UINT64_C (0x0)));
9795 __asm__ ("xtn2 %0.8h,%1.4s"
9798 : /* No clobbers */);
9802 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
9803 vmovn_high_u64 (uint32x2_t a
, uint64x2_t b
)
9805 uint32x4_t result
= vcombine_u32 (a
, vcreate_u32 (UINT64_C (0x0)));
9806 __asm__ ("xtn2 %0.4s,%1.2d"
9809 : /* No clobbers */);
9813 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
9814 vmovn_s16 (int16x8_t a
)
9817 __asm__ ("xtn %0.8b,%1.8h"
9820 : /* No clobbers */);
9824 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
9825 vmovn_s32 (int32x4_t a
)
9828 __asm__ ("xtn %0.4h,%1.4s"
9831 : /* No clobbers */);
9835 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
9836 vmovn_s64 (int64x2_t a
)
9839 __asm__ ("xtn %0.2s,%1.2d"
9842 : /* No clobbers */);
9846 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
9847 vmovn_u16 (uint16x8_t a
)
9850 __asm__ ("xtn %0.8b,%1.8h"
9853 : /* No clobbers */);
9857 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
9858 vmovn_u32 (uint32x4_t a
)
9861 __asm__ ("xtn %0.4h,%1.4s"
9864 : /* No clobbers */);
9868 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
9869 vmovn_u64 (uint64x2_t a
)
9872 __asm__ ("xtn %0.2s,%1.2d"
9875 : /* No clobbers */);
9879 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
9880 vmovq_n_f32 (float32_t a
)
9883 __asm__ ("dup %0.4s, %w1"
9886 : /* No clobbers */);
9890 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
9891 vmovq_n_f64 (float64_t a
)
9893 return (float64x2_t
) {a
, a
};
9896 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
9897 vmovq_n_p8 (uint32_t a
)
9900 __asm__ ("dup %0.16b,%w1"
9903 : /* No clobbers */);
9907 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
9908 vmovq_n_p16 (uint32_t a
)
9911 __asm__ ("dup %0.8h,%w1"
9914 : /* No clobbers */);
9918 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
9919 vmovq_n_s8 (int32_t a
)
9922 __asm__ ("dup %0.16b,%w1"
9925 : /* No clobbers */);
9929 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
9930 vmovq_n_s16 (int32_t a
)
9933 __asm__ ("dup %0.8h,%w1"
9936 : /* No clobbers */);
9940 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
9941 vmovq_n_s32 (int32_t a
)
9944 __asm__ ("dup %0.4s,%w1"
9947 : /* No clobbers */);
9951 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
9952 vmovq_n_s64 (int64_t a
)
9955 __asm__ ("dup %0.2d,%x1"
9958 : /* No clobbers */);
9962 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
9963 vmovq_n_u8 (uint32_t a
)
9966 __asm__ ("dup %0.16b,%w1"
9969 : /* No clobbers */);
9973 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
9974 vmovq_n_u16 (uint32_t a
)
9977 __asm__ ("dup %0.8h,%w1"
9980 : /* No clobbers */);
9984 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
9985 vmovq_n_u32 (uint32_t a
)
9988 __asm__ ("dup %0.4s,%w1"
9991 : /* No clobbers */);
9995 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
9996 vmovq_n_u64 (uint64_t a
)
9999 __asm__ ("dup %0.2d,%x1"
10002 : /* No clobbers */);
10006 #define vmul_lane_f32(a, b, c) \
10009 float32x2_t b_ = (b); \
10010 float32x2_t a_ = (a); \
10011 float32x2_t result; \
10012 __asm__ ("fmul %0.2s,%1.2s,%2.s[%3]" \
10014 : "w"(a_), "w"(b_), "i"(c) \
10015 : /* No clobbers */); \
10019 #define vmul_lane_s16(a, b, c) \
10022 int16x4_t b_ = (b); \
10023 int16x4_t a_ = (a); \
10024 int16x4_t result; \
10025 __asm__ ("mul %0.4h,%1.4h,%2.h[%3]" \
10027 : "w"(a_), "w"(b_), "i"(c) \
10028 : /* No clobbers */); \
10032 #define vmul_lane_s32(a, b, c) \
10035 int32x2_t b_ = (b); \
10036 int32x2_t a_ = (a); \
10037 int32x2_t result; \
10038 __asm__ ("mul %0.2s,%1.2s,%2.s[%3]" \
10040 : "w"(a_), "w"(b_), "i"(c) \
10041 : /* No clobbers */); \
10045 #define vmul_lane_u16(a, b, c) \
10048 uint16x4_t b_ = (b); \
10049 uint16x4_t a_ = (a); \
10050 uint16x4_t result; \
10051 __asm__ ("mul %0.4h,%1.4h,%2.h[%3]" \
10053 : "w"(a_), "w"(b_), "i"(c) \
10054 : /* No clobbers */); \
10058 #define vmul_lane_u32(a, b, c) \
10061 uint32x2_t b_ = (b); \
10062 uint32x2_t a_ = (a); \
10063 uint32x2_t result; \
10064 __asm__ ("mul %0.2s, %1.2s, %2.s[%3]" \
10066 : "w"(a_), "w"(b_), "i"(c) \
10067 : /* No clobbers */); \
10071 #define vmul_laneq_f32(a, b, c) \
10074 float32x4_t b_ = (b); \
10075 float32x2_t a_ = (a); \
10076 float32x2_t result; \
10077 __asm__ ("fmul %0.2s, %1.2s, %2.s[%3]" \
10079 : "w"(a_), "w"(b_), "i"(c) \
10080 : /* No clobbers */); \
10084 #define vmul_laneq_s16(a, b, c) \
10087 int16x8_t b_ = (b); \
10088 int16x4_t a_ = (a); \
10089 int16x4_t result; \
10090 __asm__ ("mul %0.4h, %1.4h, %2.h[%3]" \
10092 : "w"(a_), "w"(b_), "i"(c) \
10093 : /* No clobbers */); \
10097 #define vmul_laneq_s32(a, b, c) \
10100 int32x4_t b_ = (b); \
10101 int32x2_t a_ = (a); \
10102 int32x2_t result; \
10103 __asm__ ("mul %0.2s, %1.2s, %2.s[%3]" \
10105 : "w"(a_), "w"(b_), "i"(c) \
10106 : /* No clobbers */); \
10110 #define vmul_laneq_u16(a, b, c) \
10113 uint16x8_t b_ = (b); \
10114 uint16x4_t a_ = (a); \
10115 uint16x4_t result; \
10116 __asm__ ("mul %0.4h, %1.4h, %2.h[%3]" \
10118 : "w"(a_), "w"(b_), "i"(c) \
10119 : /* No clobbers */); \
10123 #define vmul_laneq_u32(a, b, c) \
10126 uint32x4_t b_ = (b); \
10127 uint32x2_t a_ = (a); \
10128 uint32x2_t result; \
10129 __asm__ ("mul %0.2s, %1.2s, %2.s[%3]" \
10131 : "w"(a_), "w"(b_), "i"(c) \
10132 : /* No clobbers */); \
10136 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
10137 vmul_n_f32 (float32x2_t a
, float32_t b
)
10139 float32x2_t result
;
10140 __asm__ ("fmul %0.2s,%1.2s,%2.s[0]"
10143 : /* No clobbers */);
10147 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
10148 vmul_n_s16 (int16x4_t a
, int16_t b
)
10151 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
10154 : /* No clobbers */);
10158 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
10159 vmul_n_s32 (int32x2_t a
, int32_t b
)
10162 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
10165 : /* No clobbers */);
10169 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
10170 vmul_n_u16 (uint16x4_t a
, uint16_t b
)
10173 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
10176 : /* No clobbers */);
10180 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
10181 vmul_n_u32 (uint32x2_t a
, uint32_t b
)
10184 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
10187 : /* No clobbers */);
10191 #define vmuld_lane_f64(a, b, c) \
10194 float64x2_t b_ = (b); \
10195 float64_t a_ = (a); \
10196 float64_t result; \
10197 __asm__ ("fmul %d0,%d1,%2.d[%3]" \
10199 : "w"(a_), "w"(b_), "i"(c) \
10200 : /* No clobbers */); \
10204 #define vmull_high_lane_s16(a, b, c) \
10207 int16x8_t b_ = (b); \
10208 int16x8_t a_ = (a); \
10209 int32x4_t result; \
10210 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
10212 : "w"(a_), "w"(b_), "i"(c) \
10213 : /* No clobbers */); \
10217 #define vmull_high_lane_s32(a, b, c) \
10220 int32x4_t b_ = (b); \
10221 int32x4_t a_ = (a); \
10222 int64x2_t result; \
10223 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
10225 : "w"(a_), "w"(b_), "i"(c) \
10226 : /* No clobbers */); \
10230 #define vmull_high_lane_u16(a, b, c) \
10233 uint16x8_t b_ = (b); \
10234 uint16x8_t a_ = (a); \
10235 uint32x4_t result; \
10236 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
10238 : "w"(a_), "w"(b_), "i"(c) \
10239 : /* No clobbers */); \
10243 #define vmull_high_lane_u32(a, b, c) \
10246 uint32x4_t b_ = (b); \
10247 uint32x4_t a_ = (a); \
10248 uint64x2_t result; \
10249 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
10251 : "w"(a_), "w"(b_), "i"(c) \
10252 : /* No clobbers */); \
10256 #define vmull_high_laneq_s16(a, b, c) \
10259 int16x8_t b_ = (b); \
10260 int16x8_t a_ = (a); \
10261 int32x4_t result; \
10262 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
10264 : "w"(a_), "w"(b_), "i"(c) \
10265 : /* No clobbers */); \
10269 #define vmull_high_laneq_s32(a, b, c) \
10272 int32x4_t b_ = (b); \
10273 int32x4_t a_ = (a); \
10274 int64x2_t result; \
10275 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
10277 : "w"(a_), "w"(b_), "i"(c) \
10278 : /* No clobbers */); \
10282 #define vmull_high_laneq_u16(a, b, c) \
10285 uint16x8_t b_ = (b); \
10286 uint16x8_t a_ = (a); \
10287 uint32x4_t result; \
10288 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
10290 : "w"(a_), "w"(b_), "i"(c) \
10291 : /* No clobbers */); \
10295 #define vmull_high_laneq_u32(a, b, c) \
10298 uint32x4_t b_ = (b); \
10299 uint32x4_t a_ = (a); \
10300 uint64x2_t result; \
10301 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
10303 : "w"(a_), "w"(b_), "i"(c) \
10304 : /* No clobbers */); \
10308 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
10309 vmull_high_n_s16 (int16x8_t a
, int16_t b
)
10312 __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]"
10315 : /* No clobbers */);
10319 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
10320 vmull_high_n_s32 (int32x4_t a
, int32_t b
)
10323 __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]"
10326 : /* No clobbers */);
10330 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
10331 vmull_high_n_u16 (uint16x8_t a
, uint16_t b
)
10334 __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]"
10337 : /* No clobbers */);
10341 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
10342 vmull_high_n_u32 (uint32x4_t a
, uint32_t b
)
10345 __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]"
10348 : /* No clobbers */);
10352 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
10353 vmull_high_p8 (poly8x16_t a
, poly8x16_t b
)
10356 __asm__ ("pmull2 %0.8h,%1.16b,%2.16b"
10359 : /* No clobbers */);
10363 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
10364 vmull_high_s8 (int8x16_t a
, int8x16_t b
)
10367 __asm__ ("smull2 %0.8h,%1.16b,%2.16b"
10370 : /* No clobbers */);
10374 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
10375 vmull_high_s16 (int16x8_t a
, int16x8_t b
)
10378 __asm__ ("smull2 %0.4s,%1.8h,%2.8h"
10381 : /* No clobbers */);
10385 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
10386 vmull_high_s32 (int32x4_t a
, int32x4_t b
)
10389 __asm__ ("smull2 %0.2d,%1.4s,%2.4s"
10392 : /* No clobbers */);
10396 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
10397 vmull_high_u8 (uint8x16_t a
, uint8x16_t b
)
10400 __asm__ ("umull2 %0.8h,%1.16b,%2.16b"
10403 : /* No clobbers */);
10407 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
10408 vmull_high_u16 (uint16x8_t a
, uint16x8_t b
)
10411 __asm__ ("umull2 %0.4s,%1.8h,%2.8h"
10414 : /* No clobbers */);
10418 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
10419 vmull_high_u32 (uint32x4_t a
, uint32x4_t b
)
10422 __asm__ ("umull2 %0.2d,%1.4s,%2.4s"
10425 : /* No clobbers */);
10429 #define vmull_lane_s16(a, b, c) \
10432 int16x4_t b_ = (b); \
10433 int16x4_t a_ = (a); \
10434 int32x4_t result; \
10435 __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \
10437 : "w"(a_), "w"(b_), "i"(c) \
10438 : /* No clobbers */); \
10442 #define vmull_lane_s32(a, b, c) \
10445 int32x2_t b_ = (b); \
10446 int32x2_t a_ = (a); \
10447 int64x2_t result; \
10448 __asm__ ("smull %0.2d,%1.2s,%2.s[%3]" \
10450 : "w"(a_), "w"(b_), "i"(c) \
10451 : /* No clobbers */); \
10455 #define vmull_lane_u16(a, b, c) \
10458 uint16x4_t b_ = (b); \
10459 uint16x4_t a_ = (a); \
10460 uint32x4_t result; \
10461 __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \
10463 : "w"(a_), "w"(b_), "i"(c) \
10464 : /* No clobbers */); \
10468 #define vmull_lane_u32(a, b, c) \
10471 uint32x2_t b_ = (b); \
10472 uint32x2_t a_ = (a); \
10473 uint64x2_t result; \
10474 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
10476 : "w"(a_), "w"(b_), "i"(c) \
10477 : /* No clobbers */); \
10481 #define vmull_laneq_s16(a, b, c) \
10484 int16x8_t b_ = (b); \
10485 int16x4_t a_ = (a); \
10486 int32x4_t result; \
10487 __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \
10489 : "w"(a_), "w"(b_), "i"(c) \
10490 : /* No clobbers */); \
10494 #define vmull_laneq_s32(a, b, c) \
10497 int32x4_t b_ = (b); \
10498 int32x2_t a_ = (a); \
10499 int64x2_t result; \
10500 __asm__ ("smull %0.2d, %1.2s, %2.s[%3]" \
10502 : "w"(a_), "w"(b_), "i"(c) \
10503 : /* No clobbers */); \
10507 #define vmull_laneq_u16(a, b, c) \
10510 uint16x8_t b_ = (b); \
10511 uint16x4_t a_ = (a); \
10512 uint32x4_t result; \
10513 __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \
10515 : "w"(a_), "w"(b_), "i"(c) \
10516 : /* No clobbers */); \
10520 #define vmull_laneq_u32(a, b, c) \
10523 uint32x4_t b_ = (b); \
10524 uint32x2_t a_ = (a); \
10525 uint64x2_t result; \
10526 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
10528 : "w"(a_), "w"(b_), "i"(c) \
10529 : /* No clobbers */); \
10533 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
10534 vmull_n_s16 (int16x4_t a
, int16_t b
)
10537 __asm__ ("smull %0.4s,%1.4h,%2.h[0]"
10540 : /* No clobbers */);
10544 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
10545 vmull_n_s32 (int32x2_t a
, int32_t b
)
10548 __asm__ ("smull %0.2d,%1.2s,%2.s[0]"
10551 : /* No clobbers */);
10555 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
10556 vmull_n_u16 (uint16x4_t a
, uint16_t b
)
10559 __asm__ ("umull %0.4s,%1.4h,%2.h[0]"
10562 : /* No clobbers */);
10566 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
10567 vmull_n_u32 (uint32x2_t a
, uint32_t b
)
10570 __asm__ ("umull %0.2d,%1.2s,%2.s[0]"
10573 : /* No clobbers */);
10577 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
10578 vmull_p8 (poly8x8_t a
, poly8x8_t b
)
10581 __asm__ ("pmull %0.8h, %1.8b, %2.8b"
10584 : /* No clobbers */);
10588 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
10589 vmull_s8 (int8x8_t a
, int8x8_t b
)
10592 __asm__ ("smull %0.8h, %1.8b, %2.8b"
10595 : /* No clobbers */);
10599 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
10600 vmull_s16 (int16x4_t a
, int16x4_t b
)
10603 __asm__ ("smull %0.4s, %1.4h, %2.4h"
10606 : /* No clobbers */);
10610 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
10611 vmull_s32 (int32x2_t a
, int32x2_t b
)
10614 __asm__ ("smull %0.2d, %1.2s, %2.2s"
10617 : /* No clobbers */);
10621 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
10622 vmull_u8 (uint8x8_t a
, uint8x8_t b
)
10625 __asm__ ("umull %0.8h, %1.8b, %2.8b"
10628 : /* No clobbers */);
10632 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
10633 vmull_u16 (uint16x4_t a
, uint16x4_t b
)
10636 __asm__ ("umull %0.4s, %1.4h, %2.4h"
10639 : /* No clobbers */);
10643 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
10644 vmull_u32 (uint32x2_t a
, uint32x2_t b
)
10647 __asm__ ("umull %0.2d, %1.2s, %2.2s"
10650 : /* No clobbers */);
10654 #define vmulq_lane_f32(a, b, c) \
10657 float32x2_t b_ = (b); \
10658 float32x4_t a_ = (a); \
10659 float32x4_t result; \
10660 __asm__ ("fmul %0.4s, %1.4s, %2.s[%3]" \
10662 : "w"(a_), "w"(b_), "i"(c) \
10663 : /* No clobbers */); \
10667 #define vmulq_lane_f64(a, b, c) \
10670 float64x1_t b_ = (b); \
10671 float64x2_t a_ = (a); \
10672 float64x2_t result; \
10673 __asm__ ("fmul %0.2d,%1.2d,%2.d[%3]" \
10675 : "w"(a_), "w"(b_), "i"(c) \
10676 : /* No clobbers */); \
10680 #define vmulq_lane_s16(a, b, c) \
10683 int16x4_t b_ = (b); \
10684 int16x8_t a_ = (a); \
10685 int16x8_t result; \
10686 __asm__ ("mul %0.8h,%1.8h,%2.h[%3]" \
10688 : "w"(a_), "w"(b_), "i"(c) \
10689 : /* No clobbers */); \
10693 #define vmulq_lane_s32(a, b, c) \
10696 int32x2_t b_ = (b); \
10697 int32x4_t a_ = (a); \
10698 int32x4_t result; \
10699 __asm__ ("mul %0.4s,%1.4s,%2.s[%3]" \
10701 : "w"(a_), "w"(b_), "i"(c) \
10702 : /* No clobbers */); \
10706 #define vmulq_lane_u16(a, b, c) \
10709 uint16x4_t b_ = (b); \
10710 uint16x8_t a_ = (a); \
10711 uint16x8_t result; \
10712 __asm__ ("mul %0.8h,%1.8h,%2.h[%3]" \
10714 : "w"(a_), "w"(b_), "i"(c) \
10715 : /* No clobbers */); \
10719 #define vmulq_lane_u32(a, b, c) \
10722 uint32x2_t b_ = (b); \
10723 uint32x4_t a_ = (a); \
10724 uint32x4_t result; \
10725 __asm__ ("mul %0.4s, %1.4s, %2.s[%3]" \
10727 : "w"(a_), "w"(b_), "i"(c) \
10728 : /* No clobbers */); \
10732 #define vmulq_laneq_f32(a, b, c) \
10735 float32x4_t b_ = (b); \
10736 float32x4_t a_ = (a); \
10737 float32x4_t result; \
10738 __asm__ ("fmul %0.4s, %1.4s, %2.s[%3]" \
10740 : "w"(a_), "w"(b_), "i"(c) \
10741 : /* No clobbers */); \
10745 #define vmulq_laneq_f64(a, b, c) \
10748 float64x2_t b_ = (b); \
10749 float64x2_t a_ = (a); \
10750 float64x2_t result; \
10751 __asm__ ("fmul %0.2d,%1.2d,%2.d[%3]" \
10753 : "w"(a_), "w"(b_), "i"(c) \
10754 : /* No clobbers */); \
10758 #define vmulq_laneq_s16(a, b, c) \
10761 int16x8_t b_ = (b); \
10762 int16x8_t a_ = (a); \
10763 int16x8_t result; \
10764 __asm__ ("mul %0.8h, %1.8h, %2.h[%3]" \
10766 : "w"(a_), "w"(b_), "i"(c) \
10767 : /* No clobbers */); \
10771 #define vmulq_laneq_s32(a, b, c) \
10774 int32x4_t b_ = (b); \
10775 int32x4_t a_ = (a); \
10776 int32x4_t result; \
10777 __asm__ ("mul %0.4s, %1.4s, %2.s[%3]" \
10779 : "w"(a_), "w"(b_), "i"(c) \
10780 : /* No clobbers */); \
10784 #define vmulq_laneq_u16(a, b, c) \
10787 uint16x8_t b_ = (b); \
10788 uint16x8_t a_ = (a); \
10789 uint16x8_t result; \
10790 __asm__ ("mul %0.8h, %1.8h, %2.h[%3]" \
10792 : "w"(a_), "w"(b_), "i"(c) \
10793 : /* No clobbers */); \
10797 #define vmulq_laneq_u32(a, b, c) \
10800 uint32x4_t b_ = (b); \
10801 uint32x4_t a_ = (a); \
10802 uint32x4_t result; \
10803 __asm__ ("mul %0.4s, %1.4s, %2.s[%3]" \
10805 : "w"(a_), "w"(b_), "i"(c) \
10806 : /* No clobbers */); \
10810 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
10811 vmulq_n_f32 (float32x4_t a
, float32_t b
)
10813 float32x4_t result
;
10814 __asm__ ("fmul %0.4s,%1.4s,%2.s[0]"
10817 : /* No clobbers */);
10821 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
10822 vmulq_n_f64 (float64x2_t a
, float64_t b
)
10824 float64x2_t result
;
10825 __asm__ ("fmul %0.2d,%1.2d,%2.d[0]"
10828 : /* No clobbers */);
10832 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
10833 vmulq_n_s16 (int16x8_t a
, int16_t b
)
10836 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
10839 : /* No clobbers */);
10843 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
10844 vmulq_n_s32 (int32x4_t a
, int32_t b
)
10847 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
10850 : /* No clobbers */);
10854 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
10855 vmulq_n_u16 (uint16x8_t a
, uint16_t b
)
10858 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
10861 : /* No clobbers */);
10865 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
10866 vmulq_n_u32 (uint32x4_t a
, uint32_t b
)
10869 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
10872 : /* No clobbers */);
10876 #define vmuls_lane_f32(a, b, c) \
10879 float32x4_t b_ = (b); \
10880 float32_t a_ = (a); \
10881 float32_t result; \
10882 __asm__ ("fmul %s0,%s1,%2.s[%3]" \
10884 : "w"(a_), "w"(b_), "i"(c) \
10885 : /* No clobbers */); \
10889 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
10890 vmulx_f32 (float32x2_t a
, float32x2_t b
)
10892 float32x2_t result
;
10893 __asm__ ("fmulx %0.2s,%1.2s,%2.2s"
10896 : /* No clobbers */);
10900 #define vmulx_lane_f32(a, b, c) \
10903 float32x4_t b_ = (b); \
10904 float32x2_t a_ = (a); \
10905 float32x2_t result; \
10906 __asm__ ("fmulx %0.2s,%1.2s,%2.s[%3]" \
10908 : "w"(a_), "w"(b_), "i"(c) \
10909 : /* No clobbers */); \
10913 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
10914 vmulxd_f64 (float64_t a
, float64_t b
)
10917 __asm__ ("fmulx %d0, %d1, %d2"
10920 : /* No clobbers */);
10924 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
10925 vmulxq_f32 (float32x4_t a
, float32x4_t b
)
10927 float32x4_t result
;
10928 __asm__ ("fmulx %0.4s,%1.4s,%2.4s"
10931 : /* No clobbers */);
10935 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
10936 vmulxq_f64 (float64x2_t a
, float64x2_t b
)
10938 float64x2_t result
;
10939 __asm__ ("fmulx %0.2d,%1.2d,%2.2d"
10942 : /* No clobbers */);
10946 #define vmulxq_lane_f32(a, b, c) \
10949 float32x4_t b_ = (b); \
10950 float32x4_t a_ = (a); \
10951 float32x4_t result; \
10952 __asm__ ("fmulx %0.4s,%1.4s,%2.s[%3]" \
10954 : "w"(a_), "w"(b_), "i"(c) \
10955 : /* No clobbers */); \
10959 #define vmulxq_lane_f64(a, b, c) \
10962 float64x2_t b_ = (b); \
10963 float64x2_t a_ = (a); \
10964 float64x2_t result; \
10965 __asm__ ("fmulx %0.2d,%1.2d,%2.d[%3]" \
10967 : "w"(a_), "w"(b_), "i"(c) \
10968 : /* No clobbers */); \
10972 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
10973 vmulxs_f32 (float32_t a
, float32_t b
)
10976 __asm__ ("fmulx %s0, %s1, %s2"
10979 : /* No clobbers */);
10983 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
10984 vmvn_p8 (poly8x8_t a
)
10987 __asm__ ("mvn %0.8b,%1.8b"
10990 : /* No clobbers */);
10994 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
10995 vmvn_s8 (int8x8_t a
)
10998 __asm__ ("mvn %0.8b,%1.8b"
11001 : /* No clobbers */);
11005 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
11006 vmvn_s16 (int16x4_t a
)
11009 __asm__ ("mvn %0.8b,%1.8b"
11012 : /* No clobbers */);
11016 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
11017 vmvn_s32 (int32x2_t a
)
11020 __asm__ ("mvn %0.8b,%1.8b"
11023 : /* No clobbers */);
11027 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
11028 vmvn_u8 (uint8x8_t a
)
11031 __asm__ ("mvn %0.8b,%1.8b"
11034 : /* No clobbers */);
11038 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
11039 vmvn_u16 (uint16x4_t a
)
11042 __asm__ ("mvn %0.8b,%1.8b"
11045 : /* No clobbers */);
11049 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
11050 vmvn_u32 (uint32x2_t a
)
11053 __asm__ ("mvn %0.8b,%1.8b"
11056 : /* No clobbers */);
11060 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
11061 vmvnq_p8 (poly8x16_t a
)
11064 __asm__ ("mvn %0.16b,%1.16b"
11067 : /* No clobbers */);
11071 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
11072 vmvnq_s8 (int8x16_t a
)
11075 __asm__ ("mvn %0.16b,%1.16b"
11078 : /* No clobbers */);
11082 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
11083 vmvnq_s16 (int16x8_t a
)
11086 __asm__ ("mvn %0.16b,%1.16b"
11089 : /* No clobbers */);
11093 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
11094 vmvnq_s32 (int32x4_t a
)
11097 __asm__ ("mvn %0.16b,%1.16b"
11100 : /* No clobbers */);
11104 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
11105 vmvnq_u8 (uint8x16_t a
)
11108 __asm__ ("mvn %0.16b,%1.16b"
11111 : /* No clobbers */);
11115 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
11116 vmvnq_u16 (uint16x8_t a
)
11119 __asm__ ("mvn %0.16b,%1.16b"
11122 : /* No clobbers */);
11126 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
11127 vmvnq_u32 (uint32x4_t a
)
11130 __asm__ ("mvn %0.16b,%1.16b"
11133 : /* No clobbers */);
11137 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
11138 vneg_f32 (float32x2_t a
)
11140 float32x2_t result
;
11141 __asm__ ("fneg %0.2s,%1.2s"
11144 : /* No clobbers */);
11148 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
11149 vneg_s8 (int8x8_t a
)
11152 __asm__ ("neg %0.8b,%1.8b"
11155 : /* No clobbers */);
11159 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
11160 vneg_s16 (int16x4_t a
)
11163 __asm__ ("neg %0.4h,%1.4h"
11166 : /* No clobbers */);
11170 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
11171 vneg_s32 (int32x2_t a
)
11174 __asm__ ("neg %0.2s,%1.2s"
11177 : /* No clobbers */);
11181 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
11182 vnegq_f32 (float32x4_t a
)
11184 float32x4_t result
;
11185 __asm__ ("fneg %0.4s,%1.4s"
11188 : /* No clobbers */);
11192 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
11193 vnegq_f64 (float64x2_t a
)
11195 float64x2_t result
;
11196 __asm__ ("fneg %0.2d,%1.2d"
11199 : /* No clobbers */);
11203 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
11204 vnegq_s8 (int8x16_t a
)
11207 __asm__ ("neg %0.16b,%1.16b"
11210 : /* No clobbers */);
11214 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
11215 vnegq_s16 (int16x8_t a
)
11218 __asm__ ("neg %0.8h,%1.8h"
11221 : /* No clobbers */);
11225 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
11226 vnegq_s32 (int32x4_t a
)
11229 __asm__ ("neg %0.4s,%1.4s"
11232 : /* No clobbers */);
11236 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
11237 vnegq_s64 (int64x2_t a
)
11240 __asm__ ("neg %0.2d,%1.2d"
11243 : /* No clobbers */);
11247 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
11248 vpadal_s8 (int16x4_t a
, int8x8_t b
)
11251 __asm__ ("sadalp %0.4h,%2.8b"
11254 : /* No clobbers */);
11258 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
11259 vpadal_s16 (int32x2_t a
, int16x4_t b
)
11262 __asm__ ("sadalp %0.2s,%2.4h"
11265 : /* No clobbers */);
11269 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
11270 vpadal_s32 (int64x1_t a
, int32x2_t b
)
11273 __asm__ ("sadalp %0.1d,%2.2s"
11276 : /* No clobbers */);
11280 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
11281 vpadal_u8 (uint16x4_t a
, uint8x8_t b
)
11284 __asm__ ("uadalp %0.4h,%2.8b"
11287 : /* No clobbers */);
11291 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
11292 vpadal_u16 (uint32x2_t a
, uint16x4_t b
)
11295 __asm__ ("uadalp %0.2s,%2.4h"
11298 : /* No clobbers */);
11302 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
11303 vpadal_u32 (uint64x1_t a
, uint32x2_t b
)
11306 __asm__ ("uadalp %0.1d,%2.2s"
11309 : /* No clobbers */);
11313 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
11314 vpadalq_s8 (int16x8_t a
, int8x16_t b
)
11317 __asm__ ("sadalp %0.8h,%2.16b"
11320 : /* No clobbers */);
11324 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
11325 vpadalq_s16 (int32x4_t a
, int16x8_t b
)
11328 __asm__ ("sadalp %0.4s,%2.8h"
11331 : /* No clobbers */);
11335 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
11336 vpadalq_s32 (int64x2_t a
, int32x4_t b
)
11339 __asm__ ("sadalp %0.2d,%2.4s"
11342 : /* No clobbers */);
11346 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
11347 vpadalq_u8 (uint16x8_t a
, uint8x16_t b
)
11350 __asm__ ("uadalp %0.8h,%2.16b"
11353 : /* No clobbers */);
11357 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
11358 vpadalq_u16 (uint32x4_t a
, uint16x8_t b
)
11361 __asm__ ("uadalp %0.4s,%2.8h"
11364 : /* No clobbers */);
11368 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
11369 vpadalq_u32 (uint64x2_t a
, uint32x4_t b
)
11372 __asm__ ("uadalp %0.2d,%2.4s"
11375 : /* No clobbers */);
11379 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
11380 vpadd_f32 (float32x2_t a
, float32x2_t b
)
11382 float32x2_t result
;
11383 __asm__ ("faddp %0.2s,%1.2s,%2.2s"
11386 : /* No clobbers */);
11390 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
11391 vpadd_s8 (int8x8_t __a
, int8x8_t __b
)
11393 return __builtin_aarch64_addpv8qi (__a
, __b
);
11396 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
11397 vpadd_s16 (int16x4_t __a
, int16x4_t __b
)
11399 return __builtin_aarch64_addpv4hi (__a
, __b
);
11402 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
11403 vpadd_s32 (int32x2_t __a
, int32x2_t __b
)
11405 return __builtin_aarch64_addpv2si (__a
, __b
);
11408 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
11409 vpadd_u8 (uint8x8_t __a
, uint8x8_t __b
)
11411 return (uint8x8_t
) __builtin_aarch64_addpv8qi ((int8x8_t
) __a
,
11415 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
11416 vpadd_u16 (uint16x4_t __a
, uint16x4_t __b
)
11418 return (uint16x4_t
) __builtin_aarch64_addpv4hi ((int16x4_t
) __a
,
11422 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
11423 vpadd_u32 (uint32x2_t __a
, uint32x2_t __b
)
11425 return (uint32x2_t
) __builtin_aarch64_addpv2si ((int32x2_t
) __a
,
11429 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
11430 vpaddd_f64 (float64x2_t a
)
11433 __asm__ ("faddp %d0,%1.2d"
11436 : /* No clobbers */);
11440 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
11441 vpaddl_s8 (int8x8_t a
)
11444 __asm__ ("saddlp %0.4h,%1.8b"
11447 : /* No clobbers */);
11451 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
11452 vpaddl_s16 (int16x4_t a
)
11455 __asm__ ("saddlp %0.2s,%1.4h"
11458 : /* No clobbers */);
11462 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
11463 vpaddl_s32 (int32x2_t a
)
11466 __asm__ ("saddlp %0.1d,%1.2s"
11469 : /* No clobbers */);
11473 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
11474 vpaddl_u8 (uint8x8_t a
)
11477 __asm__ ("uaddlp %0.4h,%1.8b"
11480 : /* No clobbers */);
11484 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
11485 vpaddl_u16 (uint16x4_t a
)
11488 __asm__ ("uaddlp %0.2s,%1.4h"
11491 : /* No clobbers */);
11495 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
11496 vpaddl_u32 (uint32x2_t a
)
11499 __asm__ ("uaddlp %0.1d,%1.2s"
11502 : /* No clobbers */);
11506 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
11507 vpaddlq_s8 (int8x16_t a
)
11510 __asm__ ("saddlp %0.8h,%1.16b"
11513 : /* No clobbers */);
11517 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
11518 vpaddlq_s16 (int16x8_t a
)
11521 __asm__ ("saddlp %0.4s,%1.8h"
11524 : /* No clobbers */);
11528 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
11529 vpaddlq_s32 (int32x4_t a
)
11532 __asm__ ("saddlp %0.2d,%1.4s"
11535 : /* No clobbers */);
11539 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
11540 vpaddlq_u8 (uint8x16_t a
)
11543 __asm__ ("uaddlp %0.8h,%1.16b"
11546 : /* No clobbers */);
11550 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
11551 vpaddlq_u16 (uint16x8_t a
)
11554 __asm__ ("uaddlp %0.4s,%1.8h"
11557 : /* No clobbers */);
11561 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
11562 vpaddlq_u32 (uint32x4_t a
)
11565 __asm__ ("uaddlp %0.2d,%1.4s"
11568 : /* No clobbers */);
11572 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
11573 vpaddq_f32 (float32x4_t a
, float32x4_t b
)
11575 float32x4_t result
;
11576 __asm__ ("faddp %0.4s,%1.4s,%2.4s"
11579 : /* No clobbers */);
11583 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
11584 vpaddq_f64 (float64x2_t a
, float64x2_t b
)
11586 float64x2_t result
;
11587 __asm__ ("faddp %0.2d,%1.2d,%2.2d"
11590 : /* No clobbers */);
11594 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
11595 vpaddq_s8 (int8x16_t a
, int8x16_t b
)
11598 __asm__ ("addp %0.16b,%1.16b,%2.16b"
11601 : /* No clobbers */);
11605 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
11606 vpaddq_s16 (int16x8_t a
, int16x8_t b
)
11609 __asm__ ("addp %0.8h,%1.8h,%2.8h"
11612 : /* No clobbers */);
11616 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
11617 vpaddq_s32 (int32x4_t a
, int32x4_t b
)
11620 __asm__ ("addp %0.4s,%1.4s,%2.4s"
11623 : /* No clobbers */);
11627 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
11628 vpaddq_s64 (int64x2_t a
, int64x2_t b
)
11631 __asm__ ("addp %0.2d,%1.2d,%2.2d"
11634 : /* No clobbers */);
11638 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
11639 vpaddq_u8 (uint8x16_t a
, uint8x16_t b
)
11642 __asm__ ("addp %0.16b,%1.16b,%2.16b"
11645 : /* No clobbers */);
11649 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
11650 vpaddq_u16 (uint16x8_t a
, uint16x8_t b
)
11653 __asm__ ("addp %0.8h,%1.8h,%2.8h"
11656 : /* No clobbers */);
11660 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
11661 vpaddq_u32 (uint32x4_t a
, uint32x4_t b
)
11664 __asm__ ("addp %0.4s,%1.4s,%2.4s"
11667 : /* No clobbers */);
11671 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
11672 vpaddq_u64 (uint64x2_t a
, uint64x2_t b
)
11675 __asm__ ("addp %0.2d,%1.2d,%2.2d"
11678 : /* No clobbers */);
11682 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
11683 vpadds_f32 (float32x2_t a
)
11686 __asm__ ("faddp %s0,%1.2s"
11689 : /* No clobbers */);
11693 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
11694 vpmax_f32 (float32x2_t a
, float32x2_t b
)
11696 float32x2_t result
;
11697 __asm__ ("fmaxp %0.2s, %1.2s, %2.2s"
11700 : /* No clobbers */);
11704 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
11705 vpmax_s8 (int8x8_t a
, int8x8_t b
)
11708 __asm__ ("smaxp %0.8b, %1.8b, %2.8b"
11711 : /* No clobbers */);
11715 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
11716 vpmax_s16 (int16x4_t a
, int16x4_t b
)
11719 __asm__ ("smaxp %0.4h, %1.4h, %2.4h"
11722 : /* No clobbers */);
11726 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
11727 vpmax_s32 (int32x2_t a
, int32x2_t b
)
11730 __asm__ ("smaxp %0.2s, %1.2s, %2.2s"
11733 : /* No clobbers */);
11737 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
11738 vpmax_u8 (uint8x8_t a
, uint8x8_t b
)
11741 __asm__ ("umaxp %0.8b, %1.8b, %2.8b"
11744 : /* No clobbers */);
11748 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
11749 vpmax_u16 (uint16x4_t a
, uint16x4_t b
)
11752 __asm__ ("umaxp %0.4h, %1.4h, %2.4h"
11755 : /* No clobbers */);
11759 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
11760 vpmax_u32 (uint32x2_t a
, uint32x2_t b
)
11763 __asm__ ("umaxp %0.2s, %1.2s, %2.2s"
11766 : /* No clobbers */);
11770 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
11771 vpmaxnm_f32 (float32x2_t a
, float32x2_t b
)
11773 float32x2_t result
;
11774 __asm__ ("fmaxnmp %0.2s,%1.2s,%2.2s"
11777 : /* No clobbers */);
11781 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
11782 vpmaxnmq_f32 (float32x4_t a
, float32x4_t b
)
11784 float32x4_t result
;
11785 __asm__ ("fmaxnmp %0.4s,%1.4s,%2.4s"
11788 : /* No clobbers */);
11792 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
11793 vpmaxnmq_f64 (float64x2_t a
, float64x2_t b
)
11795 float64x2_t result
;
11796 __asm__ ("fmaxnmp %0.2d,%1.2d,%2.2d"
11799 : /* No clobbers */);
11803 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
11804 vpmaxnmqd_f64 (float64x2_t a
)
11807 __asm__ ("fmaxnmp %d0,%1.2d"
11810 : /* No clobbers */);
11814 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
11815 vpmaxnms_f32 (float32x2_t a
)
11818 __asm__ ("fmaxnmp %s0,%1.2s"
11821 : /* No clobbers */);
11825 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
11826 vpmaxq_f32 (float32x4_t a
, float32x4_t b
)
11828 float32x4_t result
;
11829 __asm__ ("fmaxp %0.4s, %1.4s, %2.4s"
11832 : /* No clobbers */);
11836 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
11837 vpmaxq_f64 (float64x2_t a
, float64x2_t b
)
11839 float64x2_t result
;
11840 __asm__ ("fmaxp %0.2d, %1.2d, %2.2d"
11843 : /* No clobbers */);
11847 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
11848 vpmaxq_s8 (int8x16_t a
, int8x16_t b
)
11851 __asm__ ("smaxp %0.16b, %1.16b, %2.16b"
11854 : /* No clobbers */);
11858 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
11859 vpmaxq_s16 (int16x8_t a
, int16x8_t b
)
11862 __asm__ ("smaxp %0.8h, %1.8h, %2.8h"
11865 : /* No clobbers */);
11869 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
11870 vpmaxq_s32 (int32x4_t a
, int32x4_t b
)
11873 __asm__ ("smaxp %0.4s, %1.4s, %2.4s"
11876 : /* No clobbers */);
11880 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
11881 vpmaxq_u8 (uint8x16_t a
, uint8x16_t b
)
11884 __asm__ ("umaxp %0.16b, %1.16b, %2.16b"
11887 : /* No clobbers */);
11891 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
11892 vpmaxq_u16 (uint16x8_t a
, uint16x8_t b
)
11895 __asm__ ("umaxp %0.8h, %1.8h, %2.8h"
11898 : /* No clobbers */);
11902 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
11903 vpmaxq_u32 (uint32x4_t a
, uint32x4_t b
)
11906 __asm__ ("umaxp %0.4s, %1.4s, %2.4s"
11909 : /* No clobbers */);
11913 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
11914 vpmaxqd_f64 (float64x2_t a
)
11917 __asm__ ("fmaxp %d0,%1.2d"
11920 : /* No clobbers */);
11924 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
11925 vpmaxs_f32 (float32x2_t a
)
11928 __asm__ ("fmaxp %s0,%1.2s"
11931 : /* No clobbers */);
11935 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
11936 vpmin_f32 (float32x2_t a
, float32x2_t b
)
11938 float32x2_t result
;
11939 __asm__ ("fminp %0.2s, %1.2s, %2.2s"
11942 : /* No clobbers */);
11946 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
11947 vpmin_s8 (int8x8_t a
, int8x8_t b
)
11950 __asm__ ("sminp %0.8b, %1.8b, %2.8b"
11953 : /* No clobbers */);
11957 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
11958 vpmin_s16 (int16x4_t a
, int16x4_t b
)
11961 __asm__ ("sminp %0.4h, %1.4h, %2.4h"
11964 : /* No clobbers */);
11968 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
11969 vpmin_s32 (int32x2_t a
, int32x2_t b
)
11972 __asm__ ("sminp %0.2s, %1.2s, %2.2s"
11975 : /* No clobbers */);
11979 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
11980 vpmin_u8 (uint8x8_t a
, uint8x8_t b
)
11983 __asm__ ("uminp %0.8b, %1.8b, %2.8b"
11986 : /* No clobbers */);
11990 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
11991 vpmin_u16 (uint16x4_t a
, uint16x4_t b
)
11994 __asm__ ("uminp %0.4h, %1.4h, %2.4h"
11997 : /* No clobbers */);
12001 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
12002 vpmin_u32 (uint32x2_t a
, uint32x2_t b
)
12005 __asm__ ("uminp %0.2s, %1.2s, %2.2s"
12008 : /* No clobbers */);
12012 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
12013 vpminnm_f32 (float32x2_t a
, float32x2_t b
)
12015 float32x2_t result
;
12016 __asm__ ("fminnmp %0.2s,%1.2s,%2.2s"
12019 : /* No clobbers */);
12023 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
12024 vpminnmq_f32 (float32x4_t a
, float32x4_t b
)
12026 float32x4_t result
;
12027 __asm__ ("fminnmp %0.4s,%1.4s,%2.4s"
12030 : /* No clobbers */);
12034 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
12035 vpminnmq_f64 (float64x2_t a
, float64x2_t b
)
12037 float64x2_t result
;
12038 __asm__ ("fminnmp %0.2d,%1.2d,%2.2d"
12041 : /* No clobbers */);
12045 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
12046 vpminnmqd_f64 (float64x2_t a
)
12049 __asm__ ("fminnmp %d0,%1.2d"
12052 : /* No clobbers */);
12056 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
12057 vpminnms_f32 (float32x2_t a
)
12060 __asm__ ("fminnmp %s0,%1.2s"
12063 : /* No clobbers */);
12067 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
12068 vpminq_f32 (float32x4_t a
, float32x4_t b
)
12070 float32x4_t result
;
12071 __asm__ ("fminp %0.4s, %1.4s, %2.4s"
12074 : /* No clobbers */);
12078 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
12079 vpminq_f64 (float64x2_t a
, float64x2_t b
)
12081 float64x2_t result
;
12082 __asm__ ("fminp %0.2d, %1.2d, %2.2d"
12085 : /* No clobbers */);
12089 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
12090 vpminq_s8 (int8x16_t a
, int8x16_t b
)
12093 __asm__ ("sminp %0.16b, %1.16b, %2.16b"
12096 : /* No clobbers */);
12100 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
12101 vpminq_s16 (int16x8_t a
, int16x8_t b
)
12104 __asm__ ("sminp %0.8h, %1.8h, %2.8h"
12107 : /* No clobbers */);
12111 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
12112 vpminq_s32 (int32x4_t a
, int32x4_t b
)
12115 __asm__ ("sminp %0.4s, %1.4s, %2.4s"
12118 : /* No clobbers */);
12122 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
12123 vpminq_u8 (uint8x16_t a
, uint8x16_t b
)
12126 __asm__ ("uminp %0.16b, %1.16b, %2.16b"
12129 : /* No clobbers */);
12133 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
12134 vpminq_u16 (uint16x8_t a
, uint16x8_t b
)
12137 __asm__ ("uminp %0.8h, %1.8h, %2.8h"
12140 : /* No clobbers */);
12144 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
12145 vpminq_u32 (uint32x4_t a
, uint32x4_t b
)
12148 __asm__ ("uminp %0.4s, %1.4s, %2.4s"
12151 : /* No clobbers */);
12155 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
12156 vpminqd_f64 (float64x2_t a
)
12159 __asm__ ("fminp %d0,%1.2d"
12162 : /* No clobbers */);
12166 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
12167 vpmins_f32 (float32x2_t a
)
12170 __asm__ ("fminp %s0,%1.2s"
12173 : /* No clobbers */);
12177 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
12178 vqdmulh_n_s16 (int16x4_t a
, int16_t b
)
12181 __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]"
12184 : /* No clobbers */);
12188 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
12189 vqdmulh_n_s32 (int32x2_t a
, int32_t b
)
12192 __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]"
12195 : /* No clobbers */);
12199 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
12200 vqdmulhq_n_s16 (int16x8_t a
, int16_t b
)
12203 __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]"
12206 : /* No clobbers */);
12210 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
12211 vqdmulhq_n_s32 (int32x4_t a
, int32_t b
)
12214 __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]"
12217 : /* No clobbers */);
12221 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
12222 vqmovn_high_s16 (int8x8_t a
, int16x8_t b
)
12224 int8x16_t result
= vcombine_s8 (a
, vcreate_s8 (UINT64_C (0x0)));
12225 __asm__ ("sqxtn2 %0.16b, %1.8h"
12228 : /* No clobbers */);
12232 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
12233 vqmovn_high_s32 (int16x4_t a
, int32x4_t b
)
12235 int16x8_t result
= vcombine_s16 (a
, vcreate_s16 (UINT64_C (0x0)));
12236 __asm__ ("sqxtn2 %0.8h, %1.4s"
12239 : /* No clobbers */);
12243 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
12244 vqmovn_high_s64 (int32x2_t a
, int64x2_t b
)
12246 int32x4_t result
= vcombine_s32 (a
, vcreate_s32 (UINT64_C (0x0)));
12247 __asm__ ("sqxtn2 %0.4s, %1.2d"
12250 : /* No clobbers */);
12254 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
12255 vqmovn_high_u16 (uint8x8_t a
, uint16x8_t b
)
12257 uint8x16_t result
= vcombine_u8 (a
, vcreate_u8 (UINT64_C (0x0)));
12258 __asm__ ("uqxtn2 %0.16b, %1.8h"
12261 : /* No clobbers */);
12265 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
12266 vqmovn_high_u32 (uint16x4_t a
, uint32x4_t b
)
12268 uint16x8_t result
= vcombine_u16 (a
, vcreate_u16 (UINT64_C (0x0)));
12269 __asm__ ("uqxtn2 %0.8h, %1.4s"
12272 : /* No clobbers */);
12276 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
12277 vqmovn_high_u64 (uint32x2_t a
, uint64x2_t b
)
12279 uint32x4_t result
= vcombine_u32 (a
, vcreate_u32 (UINT64_C (0x0)));
12280 __asm__ ("uqxtn2 %0.4s, %1.2d"
12283 : /* No clobbers */);
12287 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
12288 vqmovun_high_s16 (uint8x8_t a
, int16x8_t b
)
12290 uint8x16_t result
= vcombine_u8 (a
, vcreate_u8 (UINT64_C (0x0)));
12291 __asm__ ("sqxtun2 %0.16b, %1.8h"
12294 : /* No clobbers */);
12298 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
12299 vqmovun_high_s32 (uint16x4_t a
, int32x4_t b
)
12301 uint16x8_t result
= vcombine_u16 (a
, vcreate_u16 (UINT64_C (0x0)));
12302 __asm__ ("sqxtun2 %0.8h, %1.4s"
12305 : /* No clobbers */);
12309 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
12310 vqmovun_high_s64 (uint32x2_t a
, int64x2_t b
)
12312 uint32x4_t result
= vcombine_u32 (a
, vcreate_u32 (UINT64_C (0x0)));
12313 __asm__ ("sqxtun2 %0.4s, %1.2d"
12316 : /* No clobbers */);
12320 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
12321 vqrdmulh_n_s16 (int16x4_t a
, int16_t b
)
12324 __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
12327 : /* No clobbers */);
12331 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
12332 vqrdmulh_n_s32 (int32x2_t a
, int32_t b
)
12335 __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]"
12338 : /* No clobbers */);
12342 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
12343 vqrdmulhq_n_s16 (int16x8_t a
, int16_t b
)
12346 __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
12349 : /* No clobbers */);
12353 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
12354 vqrdmulhq_n_s32 (int32x4_t a
, int32_t b
)
12357 __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]"
12360 : /* No clobbers */);
12364 #define vqrshrn_high_n_s16(a, b, c) \
12367 int16x8_t b_ = (b); \
12368 int8x8_t a_ = (a); \
12369 int8x16_t result = vcombine_s8 \
12370 (a_, vcreate_s8 (UINT64_C (0x0))); \
12371 __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \
12373 : "w"(b_), "i"(c) \
12374 : /* No clobbers */); \
12378 #define vqrshrn_high_n_s32(a, b, c) \
12381 int32x4_t b_ = (b); \
12382 int16x4_t a_ = (a); \
12383 int16x8_t result = vcombine_s16 \
12384 (a_, vcreate_s16 (UINT64_C (0x0))); \
12385 __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \
12387 : "w"(b_), "i"(c) \
12388 : /* No clobbers */); \
12392 #define vqrshrn_high_n_s64(a, b, c) \
12395 int64x2_t b_ = (b); \
12396 int32x2_t a_ = (a); \
12397 int32x4_t result = vcombine_s32 \
12398 (a_, vcreate_s32 (UINT64_C (0x0))); \
12399 __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \
12401 : "w"(b_), "i"(c) \
12402 : /* No clobbers */); \
12406 #define vqrshrn_high_n_u16(a, b, c) \
12409 uint16x8_t b_ = (b); \
12410 uint8x8_t a_ = (a); \
12411 uint8x16_t result = vcombine_u8 \
12412 (a_, vcreate_u8 (UINT64_C (0x0))); \
12413 __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \
12415 : "w"(b_), "i"(c) \
12416 : /* No clobbers */); \
12420 #define vqrshrn_high_n_u32(a, b, c) \
12423 uint32x4_t b_ = (b); \
12424 uint16x4_t a_ = (a); \
12425 uint16x8_t result = vcombine_u16 \
12426 (a_, vcreate_u16 (UINT64_C (0x0))); \
12427 __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \
12429 : "w"(b_), "i"(c) \
12430 : /* No clobbers */); \
12434 #define vqrshrn_high_n_u64(a, b, c) \
12437 uint64x2_t b_ = (b); \
12438 uint32x2_t a_ = (a); \
12439 uint32x4_t result = vcombine_u32 \
12440 (a_, vcreate_u32 (UINT64_C (0x0))); \
12441 __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \
12443 : "w"(b_), "i"(c) \
12444 : /* No clobbers */); \
12448 #define vqrshrun_high_n_s16(a, b, c) \
12451 int16x8_t b_ = (b); \
12452 uint8x8_t a_ = (a); \
12453 uint8x16_t result = vcombine_u8 \
12454 (a_, vcreate_u8 (UINT64_C (0x0))); \
12455 __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \
12457 : "w"(b_), "i"(c) \
12458 : /* No clobbers */); \
12462 #define vqrshrun_high_n_s32(a, b, c) \
12465 int32x4_t b_ = (b); \
12466 uint16x4_t a_ = (a); \
12467 uint16x8_t result = vcombine_u16 \
12468 (a_, vcreate_u16 (UINT64_C (0x0))); \
12469 __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \
12471 : "w"(b_), "i"(c) \
12472 : /* No clobbers */); \
12476 #define vqrshrun_high_n_s64(a, b, c) \
12479 int64x2_t b_ = (b); \
12480 uint32x2_t a_ = (a); \
12481 uint32x4_t result = vcombine_u32 \
12482 (a_, vcreate_u32 (UINT64_C (0x0))); \
12483 __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \
12485 : "w"(b_), "i"(c) \
12486 : /* No clobbers */); \
12490 #define vqshrn_high_n_s16(a, b, c) \
12493 int16x8_t b_ = (b); \
12494 int8x8_t a_ = (a); \
12495 int8x16_t result = vcombine_s8 \
12496 (a_, vcreate_s8 (UINT64_C (0x0))); \
12497 __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \
12499 : "w"(b_), "i"(c) \
12500 : /* No clobbers */); \
12504 #define vqshrn_high_n_s32(a, b, c) \
12507 int32x4_t b_ = (b); \
12508 int16x4_t a_ = (a); \
12509 int16x8_t result = vcombine_s16 \
12510 (a_, vcreate_s16 (UINT64_C (0x0))); \
12511 __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \
12513 : "w"(b_), "i"(c) \
12514 : /* No clobbers */); \
12518 #define vqshrn_high_n_s64(a, b, c) \
12521 int64x2_t b_ = (b); \
12522 int32x2_t a_ = (a); \
12523 int32x4_t result = vcombine_s32 \
12524 (a_, vcreate_s32 (UINT64_C (0x0))); \
12525 __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \
12527 : "w"(b_), "i"(c) \
12528 : /* No clobbers */); \
12532 #define vqshrn_high_n_u16(a, b, c) \
12535 uint16x8_t b_ = (b); \
12536 uint8x8_t a_ = (a); \
12537 uint8x16_t result = vcombine_u8 \
12538 (a_, vcreate_u8 (UINT64_C (0x0))); \
12539 __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \
12541 : "w"(b_), "i"(c) \
12542 : /* No clobbers */); \
12546 #define vqshrn_high_n_u32(a, b, c) \
12549 uint32x4_t b_ = (b); \
12550 uint16x4_t a_ = (a); \
12551 uint16x8_t result = vcombine_u16 \
12552 (a_, vcreate_u16 (UINT64_C (0x0))); \
12553 __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \
12555 : "w"(b_), "i"(c) \
12556 : /* No clobbers */); \
12560 #define vqshrn_high_n_u64(a, b, c) \
12563 uint64x2_t b_ = (b); \
12564 uint32x2_t a_ = (a); \
12565 uint32x4_t result = vcombine_u32 \
12566 (a_, vcreate_u32 (UINT64_C (0x0))); \
12567 __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \
12569 : "w"(b_), "i"(c) \
12570 : /* No clobbers */); \
12574 #define vqshrun_high_n_s16(a, b, c) \
12577 int16x8_t b_ = (b); \
12578 uint8x8_t a_ = (a); \
12579 uint8x16_t result = vcombine_u8 \
12580 (a_, vcreate_u8 (UINT64_C (0x0))); \
12581 __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \
12583 : "w"(b_), "i"(c) \
12584 : /* No clobbers */); \
12588 #define vqshrun_high_n_s32(a, b, c) \
12591 int32x4_t b_ = (b); \
12592 uint16x4_t a_ = (a); \
12593 uint16x8_t result = vcombine_u16 \
12594 (a_, vcreate_u16 (UINT64_C (0x0))); \
12595 __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \
12597 : "w"(b_), "i"(c) \
12598 : /* No clobbers */); \
12602 #define vqshrun_high_n_s64(a, b, c) \
12605 int64x2_t b_ = (b); \
12606 uint32x2_t a_ = (a); \
12607 uint32x4_t result = vcombine_u32 \
12608 (a_, vcreate_u32 (UINT64_C (0x0))); \
12609 __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \
12611 : "w"(b_), "i"(c) \
12612 : /* No clobbers */); \
12616 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
12617 vrbit_s8 (int8x8_t a
)
12620 __asm__ ("rbit %0.8b,%1.8b"
12623 : /* No clobbers */);
12627 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
12628 vrbit_u8 (uint8x8_t a
)
12631 __asm__ ("rbit %0.8b,%1.8b"
12634 : /* No clobbers */);
12638 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
12639 vrbitq_s8 (int8x16_t a
)
12642 __asm__ ("rbit %0.16b,%1.16b"
12645 : /* No clobbers */);
12649 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
12650 vrbitq_u8 (uint8x16_t a
)
12653 __asm__ ("rbit %0.16b,%1.16b"
12656 : /* No clobbers */);
12660 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
12661 vrecpe_u32 (uint32x2_t a
)
12664 __asm__ ("urecpe %0.2s,%1.2s"
12667 : /* No clobbers */);
12671 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
12672 vrecpeq_u32 (uint32x4_t a
)
12675 __asm__ ("urecpe %0.4s,%1.4s"
12678 : /* No clobbers */);
12682 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
12683 vrev16_p8 (poly8x8_t a
)
12686 __asm__ ("rev16 %0.8b,%1.8b"
12689 : /* No clobbers */);
12693 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
12694 vrev16_s8 (int8x8_t a
)
12697 __asm__ ("rev16 %0.8b,%1.8b"
12700 : /* No clobbers */);
12704 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
12705 vrev16_u8 (uint8x8_t a
)
12708 __asm__ ("rev16 %0.8b,%1.8b"
12711 : /* No clobbers */);
12715 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
12716 vrev16q_p8 (poly8x16_t a
)
12719 __asm__ ("rev16 %0.16b,%1.16b"
12722 : /* No clobbers */);
12726 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
12727 vrev16q_s8 (int8x16_t a
)
12730 __asm__ ("rev16 %0.16b,%1.16b"
12733 : /* No clobbers */);
12737 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
12738 vrev16q_u8 (uint8x16_t a
)
12741 __asm__ ("rev16 %0.16b,%1.16b"
12744 : /* No clobbers */);
12748 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
12749 vrev32_p8 (poly8x8_t a
)
12752 __asm__ ("rev32 %0.8b,%1.8b"
12755 : /* No clobbers */);
12759 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
12760 vrev32_p16 (poly16x4_t a
)
12763 __asm__ ("rev32 %0.4h,%1.4h"
12766 : /* No clobbers */);
12770 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
12771 vrev32_s8 (int8x8_t a
)
12774 __asm__ ("rev32 %0.8b,%1.8b"
12777 : /* No clobbers */);
12781 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
12782 vrev32_s16 (int16x4_t a
)
12785 __asm__ ("rev32 %0.4h,%1.4h"
12788 : /* No clobbers */);
12792 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
12793 vrev32_u8 (uint8x8_t a
)
12796 __asm__ ("rev32 %0.8b,%1.8b"
12799 : /* No clobbers */);
12803 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
12804 vrev32_u16 (uint16x4_t a
)
12807 __asm__ ("rev32 %0.4h,%1.4h"
12810 : /* No clobbers */);
12814 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
12815 vrev32q_p8 (poly8x16_t a
)
12818 __asm__ ("rev32 %0.16b,%1.16b"
12821 : /* No clobbers */);
12825 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
12826 vrev32q_p16 (poly16x8_t a
)
12829 __asm__ ("rev32 %0.8h,%1.8h"
12832 : /* No clobbers */);
12836 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
12837 vrev32q_s8 (int8x16_t a
)
12840 __asm__ ("rev32 %0.16b,%1.16b"
12843 : /* No clobbers */);
12847 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
12848 vrev32q_s16 (int16x8_t a
)
12851 __asm__ ("rev32 %0.8h,%1.8h"
12854 : /* No clobbers */);
12858 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
12859 vrev32q_u8 (uint8x16_t a
)
12862 __asm__ ("rev32 %0.16b,%1.16b"
12865 : /* No clobbers */);
12869 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
12870 vrev32q_u16 (uint16x8_t a
)
12873 __asm__ ("rev32 %0.8h,%1.8h"
12876 : /* No clobbers */);
12880 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
12881 vrev64_f32 (float32x2_t a
)
12883 float32x2_t result
;
12884 __asm__ ("rev64 %0.2s,%1.2s"
12887 : /* No clobbers */);
12891 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
12892 vrev64_p8 (poly8x8_t a
)
12895 __asm__ ("rev64 %0.8b,%1.8b"
12898 : /* No clobbers */);
12902 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
12903 vrev64_p16 (poly16x4_t a
)
12906 __asm__ ("rev64 %0.4h,%1.4h"
12909 : /* No clobbers */);
12913 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
12914 vrev64_s8 (int8x8_t a
)
12917 __asm__ ("rev64 %0.8b,%1.8b"
12920 : /* No clobbers */);
12924 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
12925 vrev64_s16 (int16x4_t a
)
12928 __asm__ ("rev64 %0.4h,%1.4h"
12931 : /* No clobbers */);
12935 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
12936 vrev64_s32 (int32x2_t a
)
12939 __asm__ ("rev64 %0.2s,%1.2s"
12942 : /* No clobbers */);
12946 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
12947 vrev64_u8 (uint8x8_t a
)
12950 __asm__ ("rev64 %0.8b,%1.8b"
12953 : /* No clobbers */);
12957 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
12958 vrev64_u16 (uint16x4_t a
)
12961 __asm__ ("rev64 %0.4h,%1.4h"
12964 : /* No clobbers */);
12968 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
12969 vrev64_u32 (uint32x2_t a
)
12972 __asm__ ("rev64 %0.2s,%1.2s"
12975 : /* No clobbers */);
12979 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
12980 vrev64q_f32 (float32x4_t a
)
12982 float32x4_t result
;
12983 __asm__ ("rev64 %0.4s,%1.4s"
12986 : /* No clobbers */);
12990 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
12991 vrev64q_p8 (poly8x16_t a
)
12994 __asm__ ("rev64 %0.16b,%1.16b"
12997 : /* No clobbers */);
13001 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
13002 vrev64q_p16 (poly16x8_t a
)
13005 __asm__ ("rev64 %0.8h,%1.8h"
13008 : /* No clobbers */);
13012 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
13013 vrev64q_s8 (int8x16_t a
)
13016 __asm__ ("rev64 %0.16b,%1.16b"
13019 : /* No clobbers */);
13023 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
13024 vrev64q_s16 (int16x8_t a
)
13027 __asm__ ("rev64 %0.8h,%1.8h"
13030 : /* No clobbers */);
13034 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
13035 vrev64q_s32 (int32x4_t a
)
13038 __asm__ ("rev64 %0.4s,%1.4s"
13041 : /* No clobbers */);
13045 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
13046 vrev64q_u8 (uint8x16_t a
)
13049 __asm__ ("rev64 %0.16b,%1.16b"
13052 : /* No clobbers */);
13056 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
13057 vrev64q_u16 (uint16x8_t a
)
13060 __asm__ ("rev64 %0.8h,%1.8h"
13063 : /* No clobbers */);
13067 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
13068 vrev64q_u32 (uint32x4_t a
)
13071 __asm__ ("rev64 %0.4s,%1.4s"
13074 : /* No clobbers */);
13078 #define vrshrn_high_n_s16(a, b, c) \
13081 int16x8_t b_ = (b); \
13082 int8x8_t a_ = (a); \
13083 int8x16_t result = vcombine_s8 \
13084 (a_, vcreate_s8 (UINT64_C (0x0))); \
13085 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
13087 : "w"(b_), "i"(c) \
13088 : /* No clobbers */); \
13092 #define vrshrn_high_n_s32(a, b, c) \
13095 int32x4_t b_ = (b); \
13096 int16x4_t a_ = (a); \
13097 int16x8_t result = vcombine_s16 \
13098 (a_, vcreate_s16 (UINT64_C (0x0))); \
13099 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
13101 : "w"(b_), "i"(c) \
13102 : /* No clobbers */); \
13106 #define vrshrn_high_n_s64(a, b, c) \
13109 int64x2_t b_ = (b); \
13110 int32x2_t a_ = (a); \
13111 int32x4_t result = vcombine_s32 \
13112 (a_, vcreate_s32 (UINT64_C (0x0))); \
13113 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
13115 : "w"(b_), "i"(c) \
13116 : /* No clobbers */); \
13120 #define vrshrn_high_n_u16(a, b, c) \
13123 uint16x8_t b_ = (b); \
13124 uint8x8_t a_ = (a); \
13125 uint8x16_t result = vcombine_u8 \
13126 (a_, vcreate_u8 (UINT64_C (0x0))); \
13127 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
13129 : "w"(b_), "i"(c) \
13130 : /* No clobbers */); \
13134 #define vrshrn_high_n_u32(a, b, c) \
13137 uint32x4_t b_ = (b); \
13138 uint16x4_t a_ = (a); \
13139 uint16x8_t result = vcombine_u16 \
13140 (a_, vcreate_u16 (UINT64_C (0x0))); \
13141 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
13143 : "w"(b_), "i"(c) \
13144 : /* No clobbers */); \
13148 #define vrshrn_high_n_u64(a, b, c) \
13151 uint64x2_t b_ = (b); \
13152 uint32x2_t a_ = (a); \
13153 uint32x4_t result = vcombine_u32 \
13154 (a_, vcreate_u32 (UINT64_C (0x0))); \
13155 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
13157 : "w"(b_), "i"(c) \
13158 : /* No clobbers */); \
13162 #define vrshrn_n_s16(a, b) \
13165 int16x8_t a_ = (a); \
13167 __asm__ ("rshrn %0.8b,%1.8h,%2" \
13169 : "w"(a_), "i"(b) \
13170 : /* No clobbers */); \
13174 #define vrshrn_n_s32(a, b) \
13177 int32x4_t a_ = (a); \
13178 int16x4_t result; \
13179 __asm__ ("rshrn %0.4h,%1.4s,%2" \
13181 : "w"(a_), "i"(b) \
13182 : /* No clobbers */); \
13186 #define vrshrn_n_s64(a, b) \
13189 int64x2_t a_ = (a); \
13190 int32x2_t result; \
13191 __asm__ ("rshrn %0.2s,%1.2d,%2" \
13193 : "w"(a_), "i"(b) \
13194 : /* No clobbers */); \
13198 #define vrshrn_n_u16(a, b) \
13201 uint16x8_t a_ = (a); \
13202 uint8x8_t result; \
13203 __asm__ ("rshrn %0.8b,%1.8h,%2" \
13205 : "w"(a_), "i"(b) \
13206 : /* No clobbers */); \
13210 #define vrshrn_n_u32(a, b) \
13213 uint32x4_t a_ = (a); \
13214 uint16x4_t result; \
13215 __asm__ ("rshrn %0.4h,%1.4s,%2" \
13217 : "w"(a_), "i"(b) \
13218 : /* No clobbers */); \
13222 #define vrshrn_n_u64(a, b) \
13225 uint64x2_t a_ = (a); \
13226 uint32x2_t result; \
13227 __asm__ ("rshrn %0.2s,%1.2d,%2" \
13229 : "w"(a_), "i"(b) \
13230 : /* No clobbers */); \
13234 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
13235 vrsqrte_f32 (float32x2_t a
)
13237 float32x2_t result
;
13238 __asm__ ("frsqrte %0.2s,%1.2s"
13241 : /* No clobbers */);
13245 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
13246 vrsqrte_f64 (float64x2_t a
)
13248 float64x2_t result
;
13249 __asm__ ("frsqrte %0.2d,%1.2d"
13252 : /* No clobbers */);
13256 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
13257 vrsqrte_u32 (uint32x2_t a
)
13260 __asm__ ("ursqrte %0.2s,%1.2s"
13263 : /* No clobbers */);
13267 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
13268 vrsqrted_f64 (float64_t a
)
13271 __asm__ ("frsqrte %d0,%d1"
13274 : /* No clobbers */);
13278 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
13279 vrsqrteq_f32 (float32x4_t a
)
13281 float32x4_t result
;
13282 __asm__ ("frsqrte %0.4s,%1.4s"
13285 : /* No clobbers */);
13289 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
13290 vrsqrteq_f64 (float64x2_t a
)
13292 float64x2_t result
;
13293 __asm__ ("frsqrte %0.2d,%1.2d"
13296 : /* No clobbers */);
13300 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
13301 vrsqrteq_u32 (uint32x4_t a
)
13304 __asm__ ("ursqrte %0.4s,%1.4s"
13307 : /* No clobbers */);
13311 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
13312 vrsqrtes_f32 (float32_t a
)
13315 __asm__ ("frsqrte %s0,%s1"
13318 : /* No clobbers */);
13322 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
13323 vrsqrts_f32 (float32x2_t a
, float32x2_t b
)
13325 float32x2_t result
;
13326 __asm__ ("frsqrts %0.2s,%1.2s,%2.2s"
13329 : /* No clobbers */);
13333 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
13334 vrsqrtsd_f64 (float64_t a
, float64_t b
)
13337 __asm__ ("frsqrts %d0,%d1,%d2"
13340 : /* No clobbers */);
13344 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
13345 vrsqrtsq_f32 (float32x4_t a
, float32x4_t b
)
13347 float32x4_t result
;
13348 __asm__ ("frsqrts %0.4s,%1.4s,%2.4s"
13351 : /* No clobbers */);
13355 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
13356 vrsqrtsq_f64 (float64x2_t a
, float64x2_t b
)
13358 float64x2_t result
;
13359 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
13362 : /* No clobbers */);
13366 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
13367 vrsqrtss_f32 (float32_t a
, float32_t b
)
13370 __asm__ ("frsqrts %s0,%s1,%s2"
13373 : /* No clobbers */);
13377 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
13378 vrsrtsq_f64 (float64x2_t a
, float64x2_t b
)
13380 float64x2_t result
;
13381 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
13384 : /* No clobbers */);
13388 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
13389 vrsubhn_high_s16 (int8x8_t a
, int16x8_t b
, int16x8_t c
)
13391 int8x16_t result
= vcombine_s8 (a
, vcreate_s8 (UINT64_C (0x0)));
13392 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
13395 : /* No clobbers */);
13399 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
13400 vrsubhn_high_s32 (int16x4_t a
, int32x4_t b
, int32x4_t c
)
13402 int16x8_t result
= vcombine_s16 (a
, vcreate_s16 (UINT64_C (0x0)));
13403 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
13406 : /* No clobbers */);
13410 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
13411 vrsubhn_high_s64 (int32x2_t a
, int64x2_t b
, int64x2_t c
)
13413 int32x4_t result
= vcombine_s32 (a
, vcreate_s32 (UINT64_C (0x0)));
13414 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
13417 : /* No clobbers */);
13421 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
13422 vrsubhn_high_u16 (uint8x8_t a
, uint16x8_t b
, uint16x8_t c
)
13424 uint8x16_t result
= vcombine_u8 (a
, vcreate_u8 (UINT64_C (0x0)));
13425 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
13428 : /* No clobbers */);
13432 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
13433 vrsubhn_high_u32 (uint16x4_t a
, uint32x4_t b
, uint32x4_t c
)
13435 uint16x8_t result
= vcombine_u16 (a
, vcreate_u16 (UINT64_C (0x0)));
13436 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
13439 : /* No clobbers */);
13443 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
13444 vrsubhn_high_u64 (uint32x2_t a
, uint64x2_t b
, uint64x2_t c
)
13446 uint32x4_t result
= vcombine_u32 (a
, vcreate_u32 (UINT64_C (0x0)));
13447 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
13450 : /* No clobbers */);
13454 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
13455 vrsubhn_s16 (int16x8_t a
, int16x8_t b
)
13458 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
13461 : /* No clobbers */);
13465 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
13466 vrsubhn_s32 (int32x4_t a
, int32x4_t b
)
13469 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
13472 : /* No clobbers */);
13476 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
13477 vrsubhn_s64 (int64x2_t a
, int64x2_t b
)
13480 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
13483 : /* No clobbers */);
13487 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
13488 vrsubhn_u16 (uint16x8_t a
, uint16x8_t b
)
13491 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
13494 : /* No clobbers */);
13498 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
13499 vrsubhn_u32 (uint32x4_t a
, uint32x4_t b
)
13502 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
13505 : /* No clobbers */);
13509 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
13510 vrsubhn_u64 (uint64x2_t a
, uint64x2_t b
)
13513 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
13516 : /* No clobbers */);
13520 #define vset_lane_f32(a, b, c) \
13523 float32x2_t b_ = (b); \
13524 float32_t a_ = (a); \
13525 float32x2_t result; \
13526 __asm__ ("ins %0.s[%3], %w1" \
13528 : "r"(a_), "0"(b_), "i"(c) \
13529 : /* No clobbers */); \
13533 #define vset_lane_f64(a, b, c) \
13536 float64x1_t b_ = (b); \
13537 float64_t a_ = (a); \
13538 float64x1_t result; \
13539 __asm__ ("ins %0.d[%3], %x1" \
13541 : "r"(a_), "0"(b_), "i"(c) \
13542 : /* No clobbers */); \
13546 #define vset_lane_p8(a, b, c) \
13549 poly8x8_t b_ = (b); \
13550 poly8_t a_ = (a); \
13551 poly8x8_t result; \
13552 __asm__ ("ins %0.b[%3], %w1" \
13554 : "r"(a_), "0"(b_), "i"(c) \
13555 : /* No clobbers */); \
13559 #define vset_lane_p16(a, b, c) \
13562 poly16x4_t b_ = (b); \
13563 poly16_t a_ = (a); \
13564 poly16x4_t result; \
13565 __asm__ ("ins %0.h[%3], %w1" \
13567 : "r"(a_), "0"(b_), "i"(c) \
13568 : /* No clobbers */); \
13572 #define vset_lane_s8(a, b, c) \
13575 int8x8_t b_ = (b); \
13578 __asm__ ("ins %0.b[%3], %w1" \
13580 : "r"(a_), "0"(b_), "i"(c) \
13581 : /* No clobbers */); \
13585 #define vset_lane_s16(a, b, c) \
13588 int16x4_t b_ = (b); \
13589 int16_t a_ = (a); \
13590 int16x4_t result; \
13591 __asm__ ("ins %0.h[%3], %w1" \
13593 : "r"(a_), "0"(b_), "i"(c) \
13594 : /* No clobbers */); \
13598 #define vset_lane_s32(a, b, c) \
13601 int32x2_t b_ = (b); \
13602 int32_t a_ = (a); \
13603 int32x2_t result; \
13604 __asm__ ("ins %0.s[%3], %w1" \
13606 : "r"(a_), "0"(b_), "i"(c) \
13607 : /* No clobbers */); \
13611 #define vset_lane_s64(a, b, c) \
13614 int64x1_t b_ = (b); \
13615 int64_t a_ = (a); \
13616 int64x1_t result; \
13617 __asm__ ("ins %0.d[%3], %x1" \
13619 : "r"(a_), "0"(b_), "i"(c) \
13620 : /* No clobbers */); \
13624 #define vset_lane_u8(a, b, c) \
13627 uint8x8_t b_ = (b); \
13628 uint8_t a_ = (a); \
13629 uint8x8_t result; \
13630 __asm__ ("ins %0.b[%3], %w1" \
13632 : "r"(a_), "0"(b_), "i"(c) \
13633 : /* No clobbers */); \
13637 #define vset_lane_u16(a, b, c) \
13640 uint16x4_t b_ = (b); \
13641 uint16_t a_ = (a); \
13642 uint16x4_t result; \
13643 __asm__ ("ins %0.h[%3], %w1" \
13645 : "r"(a_), "0"(b_), "i"(c) \
13646 : /* No clobbers */); \
13650 #define vset_lane_u32(a, b, c) \
13653 uint32x2_t b_ = (b); \
13654 uint32_t a_ = (a); \
13655 uint32x2_t result; \
13656 __asm__ ("ins %0.s[%3], %w1" \
13658 : "r"(a_), "0"(b_), "i"(c) \
13659 : /* No clobbers */); \
13663 #define vset_lane_u64(a, b, c) \
13666 uint64x1_t b_ = (b); \
13667 uint64_t a_ = (a); \
13668 uint64x1_t result; \
13669 __asm__ ("ins %0.d[%3], %x1" \
13671 : "r"(a_), "0"(b_), "i"(c) \
13672 : /* No clobbers */); \
13676 #define vsetq_lane_f32(a, b, c) \
13679 float32x4_t b_ = (b); \
13680 float32_t a_ = (a); \
13681 float32x4_t result; \
13682 __asm__ ("ins %0.s[%3], %w1" \
13684 : "r"(a_), "0"(b_), "i"(c) \
13685 : /* No clobbers */); \
13689 #define vsetq_lane_f64(a, b, c) \
13692 float64x2_t b_ = (b); \
13693 float64_t a_ = (a); \
13694 float64x2_t result; \
13695 __asm__ ("ins %0.d[%3], %x1" \
13697 : "r"(a_), "0"(b_), "i"(c) \
13698 : /* No clobbers */); \
13702 #define vsetq_lane_p8(a, b, c) \
13705 poly8x16_t b_ = (b); \
13706 poly8_t a_ = (a); \
13707 poly8x16_t result; \
13708 __asm__ ("ins %0.b[%3], %w1" \
13710 : "r"(a_), "0"(b_), "i"(c) \
13711 : /* No clobbers */); \
13715 #define vsetq_lane_p16(a, b, c) \
13718 poly16x8_t b_ = (b); \
13719 poly16_t a_ = (a); \
13720 poly16x8_t result; \
13721 __asm__ ("ins %0.h[%3], %w1" \
13723 : "r"(a_), "0"(b_), "i"(c) \
13724 : /* No clobbers */); \
13728 #define vsetq_lane_s8(a, b, c) \
13731 int8x16_t b_ = (b); \
13733 int8x16_t result; \
13734 __asm__ ("ins %0.b[%3], %w1" \
13736 : "r"(a_), "0"(b_), "i"(c) \
13737 : /* No clobbers */); \
13741 #define vsetq_lane_s16(a, b, c) \
13744 int16x8_t b_ = (b); \
13745 int16_t a_ = (a); \
13746 int16x8_t result; \
13747 __asm__ ("ins %0.h[%3], %w1" \
13749 : "r"(a_), "0"(b_), "i"(c) \
13750 : /* No clobbers */); \
13754 #define vsetq_lane_s32(a, b, c) \
13757 int32x4_t b_ = (b); \
13758 int32_t a_ = (a); \
13759 int32x4_t result; \
13760 __asm__ ("ins %0.s[%3], %w1" \
13762 : "r"(a_), "0"(b_), "i"(c) \
13763 : /* No clobbers */); \
13767 #define vsetq_lane_s64(a, b, c) \
13770 int64x2_t b_ = (b); \
13771 int64_t a_ = (a); \
13772 int64x2_t result; \
13773 __asm__ ("ins %0.d[%3], %x1" \
13775 : "r"(a_), "0"(b_), "i"(c) \
13776 : /* No clobbers */); \
13780 #define vsetq_lane_u8(a, b, c) \
13783 uint8x16_t b_ = (b); \
13784 uint8_t a_ = (a); \
13785 uint8x16_t result; \
13786 __asm__ ("ins %0.b[%3], %w1" \
13788 : "r"(a_), "0"(b_), "i"(c) \
13789 : /* No clobbers */); \
13793 #define vsetq_lane_u16(a, b, c) \
13796 uint16x8_t b_ = (b); \
13797 uint16_t a_ = (a); \
13798 uint16x8_t result; \
13799 __asm__ ("ins %0.h[%3], %w1" \
13801 : "r"(a_), "0"(b_), "i"(c) \
13802 : /* No clobbers */); \
13806 #define vsetq_lane_u32(a, b, c) \
13809 uint32x4_t b_ = (b); \
13810 uint32_t a_ = (a); \
13811 uint32x4_t result; \
13812 __asm__ ("ins %0.s[%3], %w1" \
13814 : "r"(a_), "0"(b_), "i"(c) \
13815 : /* No clobbers */); \
13819 #define vsetq_lane_u64(a, b, c) \
13822 uint64x2_t b_ = (b); \
13823 uint64_t a_ = (a); \
13824 uint64x2_t result; \
13825 __asm__ ("ins %0.d[%3], %x1" \
13827 : "r"(a_), "0"(b_), "i"(c) \
13828 : /* No clobbers */); \
13832 #define vshrn_high_n_s16(a, b, c) \
13835 int16x8_t b_ = (b); \
13836 int8x8_t a_ = (a); \
13837 int8x16_t result = vcombine_s8 \
13838 (a_, vcreate_s8 (UINT64_C (0x0))); \
13839 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
13841 : "w"(b_), "i"(c) \
13842 : /* No clobbers */); \
13846 #define vshrn_high_n_s32(a, b, c) \
13849 int32x4_t b_ = (b); \
13850 int16x4_t a_ = (a); \
13851 int16x8_t result = vcombine_s16 \
13852 (a_, vcreate_s16 (UINT64_C (0x0))); \
13853 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
13855 : "w"(b_), "i"(c) \
13856 : /* No clobbers */); \
13860 #define vshrn_high_n_s64(a, b, c) \
13863 int64x2_t b_ = (b); \
13864 int32x2_t a_ = (a); \
13865 int32x4_t result = vcombine_s32 \
13866 (a_, vcreate_s32 (UINT64_C (0x0))); \
13867 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
13869 : "w"(b_), "i"(c) \
13870 : /* No clobbers */); \
13874 #define vshrn_high_n_u16(a, b, c) \
13877 uint16x8_t b_ = (b); \
13878 uint8x8_t a_ = (a); \
13879 uint8x16_t result = vcombine_u8 \
13880 (a_, vcreate_u8 (UINT64_C (0x0))); \
13881 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
13883 : "w"(b_), "i"(c) \
13884 : /* No clobbers */); \
13888 #define vshrn_high_n_u32(a, b, c) \
13891 uint32x4_t b_ = (b); \
13892 uint16x4_t a_ = (a); \
13893 uint16x8_t result = vcombine_u16 \
13894 (a_, vcreate_u16 (UINT64_C (0x0))); \
13895 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
13897 : "w"(b_), "i"(c) \
13898 : /* No clobbers */); \
13902 #define vshrn_high_n_u64(a, b, c) \
13905 uint64x2_t b_ = (b); \
13906 uint32x2_t a_ = (a); \
13907 uint32x4_t result = vcombine_u32 \
13908 (a_, vcreate_u32 (UINT64_C (0x0))); \
13909 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
13911 : "w"(b_), "i"(c) \
13912 : /* No clobbers */); \
13916 #define vshrn_n_s16(a, b) \
13919 int16x8_t a_ = (a); \
13921 __asm__ ("shrn %0.8b,%1.8h,%2" \
13923 : "w"(a_), "i"(b) \
13924 : /* No clobbers */); \
13928 #define vshrn_n_s32(a, b) \
13931 int32x4_t a_ = (a); \
13932 int16x4_t result; \
13933 __asm__ ("shrn %0.4h,%1.4s,%2" \
13935 : "w"(a_), "i"(b) \
13936 : /* No clobbers */); \
13940 #define vshrn_n_s64(a, b) \
13943 int64x2_t a_ = (a); \
13944 int32x2_t result; \
13945 __asm__ ("shrn %0.2s,%1.2d,%2" \
13947 : "w"(a_), "i"(b) \
13948 : /* No clobbers */); \
13952 #define vshrn_n_u16(a, b) \
13955 uint16x8_t a_ = (a); \
13956 uint8x8_t result; \
13957 __asm__ ("shrn %0.8b,%1.8h,%2" \
13959 : "w"(a_), "i"(b) \
13960 : /* No clobbers */); \
13964 #define vshrn_n_u32(a, b) \
13967 uint32x4_t a_ = (a); \
13968 uint16x4_t result; \
13969 __asm__ ("shrn %0.4h,%1.4s,%2" \
13971 : "w"(a_), "i"(b) \
13972 : /* No clobbers */); \
13976 #define vshrn_n_u64(a, b) \
13979 uint64x2_t a_ = (a); \
13980 uint32x2_t result; \
13981 __asm__ ("shrn %0.2s,%1.2d,%2" \
13983 : "w"(a_), "i"(b) \
13984 : /* No clobbers */); \
13988 #define vsli_n_p8(a, b, c) \
13991 poly8x8_t b_ = (b); \
13992 poly8x8_t a_ = (a); \
13993 poly8x8_t result; \
13994 __asm__ ("sli %0.8b,%2.8b,%3" \
13996 : "0"(a_), "w"(b_), "i"(c) \
13997 : /* No clobbers */); \
14001 #define vsli_n_p16(a, b, c) \
14004 poly16x4_t b_ = (b); \
14005 poly16x4_t a_ = (a); \
14006 poly16x4_t result; \
14007 __asm__ ("sli %0.4h,%2.4h,%3" \
14009 : "0"(a_), "w"(b_), "i"(c) \
14010 : /* No clobbers */); \
14014 #define vsliq_n_p8(a, b, c) \
14017 poly8x16_t b_ = (b); \
14018 poly8x16_t a_ = (a); \
14019 poly8x16_t result; \
14020 __asm__ ("sli %0.16b,%2.16b,%3" \
14022 : "0"(a_), "w"(b_), "i"(c) \
14023 : /* No clobbers */); \
14027 #define vsliq_n_p16(a, b, c) \
14030 poly16x8_t b_ = (b); \
14031 poly16x8_t a_ = (a); \
14032 poly16x8_t result; \
14033 __asm__ ("sli %0.8h,%2.8h,%3" \
14035 : "0"(a_), "w"(b_), "i"(c) \
14036 : /* No clobbers */); \
14040 #define vsri_n_p8(a, b, c) \
14043 poly8x8_t b_ = (b); \
14044 poly8x8_t a_ = (a); \
14045 poly8x8_t result; \
14046 __asm__ ("sri %0.8b,%2.8b,%3" \
14048 : "0"(a_), "w"(b_), "i"(c) \
14049 : /* No clobbers */); \
14053 #define vsri_n_p16(a, b, c) \
14056 poly16x4_t b_ = (b); \
14057 poly16x4_t a_ = (a); \
14058 poly16x4_t result; \
14059 __asm__ ("sri %0.4h,%2.4h,%3" \
14061 : "0"(a_), "w"(b_), "i"(c) \
14062 : /* No clobbers */); \
14066 #define vsriq_n_p8(a, b, c) \
14069 poly8x16_t b_ = (b); \
14070 poly8x16_t a_ = (a); \
14071 poly8x16_t result; \
14072 __asm__ ("sri %0.16b,%2.16b,%3" \
14074 : "0"(a_), "w"(b_), "i"(c) \
14075 : /* No clobbers */); \
14079 #define vsriq_n_p16(a, b, c) \
14082 poly16x8_t b_ = (b); \
14083 poly16x8_t a_ = (a); \
14084 poly16x8_t result; \
14085 __asm__ ("sri %0.8h,%2.8h,%3" \
14087 : "0"(a_), "w"(b_), "i"(c) \
14088 : /* No clobbers */); \
14092 #define vst1_lane_f32(a, b, c) \
14095 float32x2_t b_ = (b); \
14096 float32_t * a_ = (a); \
14097 __asm__ ("st1 {%1.s}[%2],[%0]" \
14099 : "r"(a_), "w"(b_), "i"(c) \
14103 #define vst1_lane_f64(a, b, c) \
14106 float64x1_t b_ = (b); \
14107 float64_t * a_ = (a); \
14108 __asm__ ("st1 {%1.d}[%2],[%0]" \
14110 : "r"(a_), "w"(b_), "i"(c) \
14114 #define vst1_lane_p8(a, b, c) \
14117 poly8x8_t b_ = (b); \
14118 poly8_t * a_ = (a); \
14119 __asm__ ("st1 {%1.b}[%2],[%0]" \
14121 : "r"(a_), "w"(b_), "i"(c) \
14125 #define vst1_lane_p16(a, b, c) \
14128 poly16x4_t b_ = (b); \
14129 poly16_t * a_ = (a); \
14130 __asm__ ("st1 {%1.h}[%2],[%0]" \
14132 : "r"(a_), "w"(b_), "i"(c) \
14136 #define vst1_lane_s8(a, b, c) \
14139 int8x8_t b_ = (b); \
14140 int8_t * a_ = (a); \
14141 __asm__ ("st1 {%1.b}[%2],[%0]" \
14143 : "r"(a_), "w"(b_), "i"(c) \
14147 #define vst1_lane_s16(a, b, c) \
14150 int16x4_t b_ = (b); \
14151 int16_t * a_ = (a); \
14152 __asm__ ("st1 {%1.h}[%2],[%0]" \
14154 : "r"(a_), "w"(b_), "i"(c) \
14158 #define vst1_lane_s32(a, b, c) \
14161 int32x2_t b_ = (b); \
14162 int32_t * a_ = (a); \
14163 __asm__ ("st1 {%1.s}[%2],[%0]" \
14165 : "r"(a_), "w"(b_), "i"(c) \
14169 #define vst1_lane_s64(a, b, c) \
14172 int64x1_t b_ = (b); \
14173 int64_t * a_ = (a); \
14174 __asm__ ("st1 {%1.d}[%2],[%0]" \
14176 : "r"(a_), "w"(b_), "i"(c) \
14180 #define vst1_lane_u8(a, b, c) \
14183 uint8x8_t b_ = (b); \
14184 uint8_t * a_ = (a); \
14185 __asm__ ("st1 {%1.b}[%2],[%0]" \
14187 : "r"(a_), "w"(b_), "i"(c) \
14191 #define vst1_lane_u16(a, b, c) \
14194 uint16x4_t b_ = (b); \
14195 uint16_t * a_ = (a); \
14196 __asm__ ("st1 {%1.h}[%2],[%0]" \
14198 : "r"(a_), "w"(b_), "i"(c) \
14202 #define vst1_lane_u32(a, b, c) \
14205 uint32x2_t b_ = (b); \
14206 uint32_t * a_ = (a); \
14207 __asm__ ("st1 {%1.s}[%2],[%0]" \
14209 : "r"(a_), "w"(b_), "i"(c) \
14213 #define vst1_lane_u64(a, b, c) \
14216 uint64x1_t b_ = (b); \
14217 uint64_t * a_ = (a); \
14218 __asm__ ("st1 {%1.d}[%2],[%0]" \
14220 : "r"(a_), "w"(b_), "i"(c) \
14225 #define vst1q_lane_f32(a, b, c) \
14228 float32x4_t b_ = (b); \
14229 float32_t * a_ = (a); \
14230 __asm__ ("st1 {%1.s}[%2],[%0]" \
14232 : "r"(a_), "w"(b_), "i"(c) \
14236 #define vst1q_lane_f64(a, b, c) \
14239 float64x2_t b_ = (b); \
14240 float64_t * a_ = (a); \
14241 __asm__ ("st1 {%1.d}[%2],[%0]" \
14243 : "r"(a_), "w"(b_), "i"(c) \
14247 #define vst1q_lane_p8(a, b, c) \
14250 poly8x16_t b_ = (b); \
14251 poly8_t * a_ = (a); \
14252 __asm__ ("st1 {%1.b}[%2],[%0]" \
14254 : "r"(a_), "w"(b_), "i"(c) \
14258 #define vst1q_lane_p16(a, b, c) \
14261 poly16x8_t b_ = (b); \
14262 poly16_t * a_ = (a); \
14263 __asm__ ("st1 {%1.h}[%2],[%0]" \
14265 : "r"(a_), "w"(b_), "i"(c) \
14269 #define vst1q_lane_s8(a, b, c) \
14272 int8x16_t b_ = (b); \
14273 int8_t * a_ = (a); \
14274 __asm__ ("st1 {%1.b}[%2],[%0]" \
14276 : "r"(a_), "w"(b_), "i"(c) \
14280 #define vst1q_lane_s16(a, b, c) \
14283 int16x8_t b_ = (b); \
14284 int16_t * a_ = (a); \
14285 __asm__ ("st1 {%1.h}[%2],[%0]" \
14287 : "r"(a_), "w"(b_), "i"(c) \
14291 #define vst1q_lane_s32(a, b, c) \
14294 int32x4_t b_ = (b); \
14295 int32_t * a_ = (a); \
14296 __asm__ ("st1 {%1.s}[%2],[%0]" \
14298 : "r"(a_), "w"(b_), "i"(c) \
14302 #define vst1q_lane_s64(a, b, c) \
14305 int64x2_t b_ = (b); \
14306 int64_t * a_ = (a); \
14307 __asm__ ("st1 {%1.d}[%2],[%0]" \
14309 : "r"(a_), "w"(b_), "i"(c) \
14313 #define vst1q_lane_u8(a, b, c) \
14316 uint8x16_t b_ = (b); \
14317 uint8_t * a_ = (a); \
14318 __asm__ ("st1 {%1.b}[%2],[%0]" \
14320 : "r"(a_), "w"(b_), "i"(c) \
14324 #define vst1q_lane_u16(a, b, c) \
14327 uint16x8_t b_ = (b); \
14328 uint16_t * a_ = (a); \
14329 __asm__ ("st1 {%1.h}[%2],[%0]" \
14331 : "r"(a_), "w"(b_), "i"(c) \
14335 #define vst1q_lane_u32(a, b, c) \
14338 uint32x4_t b_ = (b); \
14339 uint32_t * a_ = (a); \
14340 __asm__ ("st1 {%1.s}[%2],[%0]" \
14342 : "r"(a_), "w"(b_), "i"(c) \
14346 #define vst1q_lane_u64(a, b, c) \
14349 uint64x2_t b_ = (b); \
14350 uint64_t * a_ = (a); \
14351 __asm__ ("st1 {%1.d}[%2],[%0]" \
14353 : "r"(a_), "w"(b_), "i"(c) \
14357 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
14358 vsubhn_high_s16 (int8x8_t a
, int16x8_t b
, int16x8_t c
)
14360 int8x16_t result
= vcombine_s8 (a
, vcreate_s8 (UINT64_C (0x0)));
14361 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
14364 : /* No clobbers */);
14368 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
14369 vsubhn_high_s32 (int16x4_t a
, int32x4_t b
, int32x4_t c
)
14371 int16x8_t result
= vcombine_s16 (a
, vcreate_s16 (UINT64_C (0x0)));
14372 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
14375 : /* No clobbers */);
14379 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
14380 vsubhn_high_s64 (int32x2_t a
, int64x2_t b
, int64x2_t c
)
14382 int32x4_t result
= vcombine_s32 (a
, vcreate_s32 (UINT64_C (0x0)));
14383 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
14386 : /* No clobbers */);
14390 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
14391 vsubhn_high_u16 (uint8x8_t a
, uint16x8_t b
, uint16x8_t c
)
14393 uint8x16_t result
= vcombine_u8 (a
, vcreate_u8 (UINT64_C (0x0)));
14394 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
14397 : /* No clobbers */);
14401 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
14402 vsubhn_high_u32 (uint16x4_t a
, uint32x4_t b
, uint32x4_t c
)
14404 uint16x8_t result
= vcombine_u16 (a
, vcreate_u16 (UINT64_C (0x0)));
14405 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
14408 : /* No clobbers */);
14412 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
14413 vsubhn_high_u64 (uint32x2_t a
, uint64x2_t b
, uint64x2_t c
)
14415 uint32x4_t result
= vcombine_u32 (a
, vcreate_u32 (UINT64_C (0x0)));
14416 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
14419 : /* No clobbers */);
14423 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
14424 vsubhn_s16 (int16x8_t a
, int16x8_t b
)
14427 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
14430 : /* No clobbers */);
14434 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
14435 vsubhn_s32 (int32x4_t a
, int32x4_t b
)
14438 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
14441 : /* No clobbers */);
14445 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
14446 vsubhn_s64 (int64x2_t a
, int64x2_t b
)
14449 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
14452 : /* No clobbers */);
14456 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
14457 vsubhn_u16 (uint16x8_t a
, uint16x8_t b
)
14460 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
14463 : /* No clobbers */);
14467 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
14468 vsubhn_u32 (uint32x4_t a
, uint32x4_t b
)
14471 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
14474 : /* No clobbers */);
14478 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
14479 vsubhn_u64 (uint64x2_t a
, uint64x2_t b
)
14482 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
14485 : /* No clobbers */);
14489 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
14490 vtrn1_f32 (float32x2_t a
, float32x2_t b
)
14492 float32x2_t result
;
14493 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
14496 : /* No clobbers */);
14500 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
14501 vtrn1_p8 (poly8x8_t a
, poly8x8_t b
)
14504 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
14507 : /* No clobbers */);
14511 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
14512 vtrn1_p16 (poly16x4_t a
, poly16x4_t b
)
14515 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
14518 : /* No clobbers */);
14522 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
14523 vtrn1_s8 (int8x8_t a
, int8x8_t b
)
14526 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
14529 : /* No clobbers */);
14533 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
14534 vtrn1_s16 (int16x4_t a
, int16x4_t b
)
14537 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
14540 : /* No clobbers */);
14544 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
14545 vtrn1_s32 (int32x2_t a
, int32x2_t b
)
14548 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
14551 : /* No clobbers */);
14555 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
14556 vtrn1_u8 (uint8x8_t a
, uint8x8_t b
)
14559 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
14562 : /* No clobbers */);
14566 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
14567 vtrn1_u16 (uint16x4_t a
, uint16x4_t b
)
14570 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
14573 : /* No clobbers */);
14577 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
14578 vtrn1_u32 (uint32x2_t a
, uint32x2_t b
)
14581 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
14584 : /* No clobbers */);
14588 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
14589 vtrn1q_f32 (float32x4_t a
, float32x4_t b
)
14591 float32x4_t result
;
14592 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
14595 : /* No clobbers */);
14599 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
14600 vtrn1q_f64 (float64x2_t a
, float64x2_t b
)
14602 float64x2_t result
;
14603 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
14606 : /* No clobbers */);
14610 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
14611 vtrn1q_p8 (poly8x16_t a
, poly8x16_t b
)
14614 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
14617 : /* No clobbers */);
14621 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
14622 vtrn1q_p16 (poly16x8_t a
, poly16x8_t b
)
14625 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
14628 : /* No clobbers */);
14632 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
14633 vtrn1q_s8 (int8x16_t a
, int8x16_t b
)
14636 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
14639 : /* No clobbers */);
14643 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
14644 vtrn1q_s16 (int16x8_t a
, int16x8_t b
)
14647 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
14650 : /* No clobbers */);
14654 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
14655 vtrn1q_s32 (int32x4_t a
, int32x4_t b
)
14658 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
14661 : /* No clobbers */);
14665 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
14666 vtrn1q_s64 (int64x2_t a
, int64x2_t b
)
14669 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
14672 : /* No clobbers */);
14676 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
14677 vtrn1q_u8 (uint8x16_t a
, uint8x16_t b
)
14680 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
14683 : /* No clobbers */);
14687 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
14688 vtrn1q_u16 (uint16x8_t a
, uint16x8_t b
)
14691 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
14694 : /* No clobbers */);
14698 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
14699 vtrn1q_u32 (uint32x4_t a
, uint32x4_t b
)
14702 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
14705 : /* No clobbers */);
14709 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
14710 vtrn1q_u64 (uint64x2_t a
, uint64x2_t b
)
14713 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
14716 : /* No clobbers */);
14720 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
14721 vtrn2_f32 (float32x2_t a
, float32x2_t b
)
14723 float32x2_t result
;
14724 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
14727 : /* No clobbers */);
14731 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
14732 vtrn2_p8 (poly8x8_t a
, poly8x8_t b
)
14735 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
14738 : /* No clobbers */);
14742 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
14743 vtrn2_p16 (poly16x4_t a
, poly16x4_t b
)
14746 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
14749 : /* No clobbers */);
14753 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
14754 vtrn2_s8 (int8x8_t a
, int8x8_t b
)
14757 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
14760 : /* No clobbers */);
14764 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
14765 vtrn2_s16 (int16x4_t a
, int16x4_t b
)
14768 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
14771 : /* No clobbers */);
14775 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
14776 vtrn2_s32 (int32x2_t a
, int32x2_t b
)
14779 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
14782 : /* No clobbers */);
14786 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
14787 vtrn2_u8 (uint8x8_t a
, uint8x8_t b
)
14790 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
14793 : /* No clobbers */);
14797 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
14798 vtrn2_u16 (uint16x4_t a
, uint16x4_t b
)
14801 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
14804 : /* No clobbers */);
14808 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
14809 vtrn2_u32 (uint32x2_t a
, uint32x2_t b
)
14812 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
14815 : /* No clobbers */);
14819 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
14820 vtrn2q_f32 (float32x4_t a
, float32x4_t b
)
14822 float32x4_t result
;
14823 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
14826 : /* No clobbers */);
14830 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
14831 vtrn2q_f64 (float64x2_t a
, float64x2_t b
)
14833 float64x2_t result
;
14834 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
14837 : /* No clobbers */);
14841 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
14842 vtrn2q_p8 (poly8x16_t a
, poly8x16_t b
)
14845 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
14848 : /* No clobbers */);
14852 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
14853 vtrn2q_p16 (poly16x8_t a
, poly16x8_t b
)
14856 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
14859 : /* No clobbers */);
14863 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
14864 vtrn2q_s8 (int8x16_t a
, int8x16_t b
)
14867 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
14870 : /* No clobbers */);
14874 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
14875 vtrn2q_s16 (int16x8_t a
, int16x8_t b
)
14878 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
14881 : /* No clobbers */);
14885 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
14886 vtrn2q_s32 (int32x4_t a
, int32x4_t b
)
14889 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
14892 : /* No clobbers */);
14896 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
14897 vtrn2q_s64 (int64x2_t a
, int64x2_t b
)
14900 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
14903 : /* No clobbers */);
14907 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
14908 vtrn2q_u8 (uint8x16_t a
, uint8x16_t b
)
14911 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
14914 : /* No clobbers */);
14918 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
14919 vtrn2q_u16 (uint16x8_t a
, uint16x8_t b
)
14922 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
14925 : /* No clobbers */);
14929 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
14930 vtrn2q_u32 (uint32x4_t a
, uint32x4_t b
)
14933 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
14936 : /* No clobbers */);
14940 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
14941 vtrn2q_u64 (uint64x2_t a
, uint64x2_t b
)
14944 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
14947 : /* No clobbers */);
14951 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
14952 vtst_p8 (poly8x8_t a
, poly8x8_t b
)
14955 __asm__ ("cmtst %0.8b, %1.8b, %2.8b"
14958 : /* No clobbers */);
14962 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
14963 vtst_p16 (poly16x4_t a
, poly16x4_t b
)
14966 __asm__ ("cmtst %0.4h, %1.4h, %2.4h"
14969 : /* No clobbers */);
14973 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
14974 vtstq_p8 (poly8x16_t a
, poly8x16_t b
)
14977 __asm__ ("cmtst %0.16b, %1.16b, %2.16b"
14980 : /* No clobbers */);
14984 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
14985 vtstq_p16 (poly16x8_t a
, poly16x8_t b
)
14988 __asm__ ("cmtst %0.8h, %1.8h, %2.8h"
14991 : /* No clobbers */);
14994 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
14995 vuzp1_f32 (float32x2_t a
, float32x2_t b
)
14997 float32x2_t result
;
14998 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
15001 : /* No clobbers */);
15005 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
15006 vuzp1_p8 (poly8x8_t a
, poly8x8_t b
)
15009 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
15012 : /* No clobbers */);
15016 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
15017 vuzp1_p16 (poly16x4_t a
, poly16x4_t b
)
15020 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
15023 : /* No clobbers */);
15027 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
15028 vuzp1_s8 (int8x8_t a
, int8x8_t b
)
15031 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
15034 : /* No clobbers */);
15038 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
15039 vuzp1_s16 (int16x4_t a
, int16x4_t b
)
15042 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
15045 : /* No clobbers */);
15049 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
15050 vuzp1_s32 (int32x2_t a
, int32x2_t b
)
15053 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
15056 : /* No clobbers */);
15060 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
15061 vuzp1_u8 (uint8x8_t a
, uint8x8_t b
)
15064 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
15067 : /* No clobbers */);
15071 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
15072 vuzp1_u16 (uint16x4_t a
, uint16x4_t b
)
15075 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
15078 : /* No clobbers */);
15082 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
15083 vuzp1_u32 (uint32x2_t a
, uint32x2_t b
)
15086 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
15089 : /* No clobbers */);
15093 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
15094 vuzp1q_f32 (float32x4_t a
, float32x4_t b
)
15096 float32x4_t result
;
15097 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
15100 : /* No clobbers */);
15104 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
15105 vuzp1q_f64 (float64x2_t a
, float64x2_t b
)
15107 float64x2_t result
;
15108 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
15111 : /* No clobbers */);
15115 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
15116 vuzp1q_p8 (poly8x16_t a
, poly8x16_t b
)
15119 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
15122 : /* No clobbers */);
15126 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
15127 vuzp1q_p16 (poly16x8_t a
, poly16x8_t b
)
15130 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
15133 : /* No clobbers */);
15137 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
15138 vuzp1q_s8 (int8x16_t a
, int8x16_t b
)
15141 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
15144 : /* No clobbers */);
15148 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
15149 vuzp1q_s16 (int16x8_t a
, int16x8_t b
)
15152 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
15155 : /* No clobbers */);
15159 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
15160 vuzp1q_s32 (int32x4_t a
, int32x4_t b
)
15163 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
15166 : /* No clobbers */);
15170 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
15171 vuzp1q_s64 (int64x2_t a
, int64x2_t b
)
15174 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
15177 : /* No clobbers */);
15181 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
15182 vuzp1q_u8 (uint8x16_t a
, uint8x16_t b
)
15185 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
15188 : /* No clobbers */);
15192 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
15193 vuzp1q_u16 (uint16x8_t a
, uint16x8_t b
)
15196 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
15199 : /* No clobbers */);
15203 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
15204 vuzp1q_u32 (uint32x4_t a
, uint32x4_t b
)
15207 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
15210 : /* No clobbers */);
15214 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
15215 vuzp1q_u64 (uint64x2_t a
, uint64x2_t b
)
15218 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
15221 : /* No clobbers */);
15225 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
15226 vuzp2_f32 (float32x2_t a
, float32x2_t b
)
15228 float32x2_t result
;
15229 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
15232 : /* No clobbers */);
15236 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
15237 vuzp2_p8 (poly8x8_t a
, poly8x8_t b
)
15240 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
15243 : /* No clobbers */);
15247 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
15248 vuzp2_p16 (poly16x4_t a
, poly16x4_t b
)
15251 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
15254 : /* No clobbers */);
15258 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
15259 vuzp2_s8 (int8x8_t a
, int8x8_t b
)
15262 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
15265 : /* No clobbers */);
15269 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
15270 vuzp2_s16 (int16x4_t a
, int16x4_t b
)
15273 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
15276 : /* No clobbers */);
15280 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
15281 vuzp2_s32 (int32x2_t a
, int32x2_t b
)
15284 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
15287 : /* No clobbers */);
15291 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
15292 vuzp2_u8 (uint8x8_t a
, uint8x8_t b
)
15295 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
15298 : /* No clobbers */);
15302 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
15303 vuzp2_u16 (uint16x4_t a
, uint16x4_t b
)
15306 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
15309 : /* No clobbers */);
15313 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
15314 vuzp2_u32 (uint32x2_t a
, uint32x2_t b
)
15317 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
15320 : /* No clobbers */);
15324 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
15325 vuzp2q_f32 (float32x4_t a
, float32x4_t b
)
15327 float32x4_t result
;
15328 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
15331 : /* No clobbers */);
15335 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
15336 vuzp2q_f64 (float64x2_t a
, float64x2_t b
)
15338 float64x2_t result
;
15339 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
15342 : /* No clobbers */);
15346 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
15347 vuzp2q_p8 (poly8x16_t a
, poly8x16_t b
)
15350 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
15353 : /* No clobbers */);
15357 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
15358 vuzp2q_p16 (poly16x8_t a
, poly16x8_t b
)
15361 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
15364 : /* No clobbers */);
15368 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
15369 vuzp2q_s8 (int8x16_t a
, int8x16_t b
)
15372 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
15375 : /* No clobbers */);
15379 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
15380 vuzp2q_s16 (int16x8_t a
, int16x8_t b
)
15383 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
15386 : /* No clobbers */);
15390 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
15391 vuzp2q_s32 (int32x4_t a
, int32x4_t b
)
15394 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
15397 : /* No clobbers */);
15401 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
15402 vuzp2q_s64 (int64x2_t a
, int64x2_t b
)
15405 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
15408 : /* No clobbers */);
15412 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
15413 vuzp2q_u8 (uint8x16_t a
, uint8x16_t b
)
15416 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
15419 : /* No clobbers */);
15423 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
15424 vuzp2q_u16 (uint16x8_t a
, uint16x8_t b
)
15427 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
15430 : /* No clobbers */);
15434 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
15435 vuzp2q_u32 (uint32x4_t a
, uint32x4_t b
)
15438 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
15441 : /* No clobbers */);
15445 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
15446 vuzp2q_u64 (uint64x2_t a
, uint64x2_t b
)
15449 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
15452 : /* No clobbers */);
15456 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
15457 vzip1_f32 (float32x2_t a
, float32x2_t b
)
15459 float32x2_t result
;
15460 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
15463 : /* No clobbers */);
15467 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
15468 vzip1_p8 (poly8x8_t a
, poly8x8_t b
)
15471 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
15474 : /* No clobbers */);
15478 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
15479 vzip1_p16 (poly16x4_t a
, poly16x4_t b
)
15482 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
15485 : /* No clobbers */);
15489 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
15490 vzip1_s8 (int8x8_t a
, int8x8_t b
)
15493 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
15496 : /* No clobbers */);
15500 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
15501 vzip1_s16 (int16x4_t a
, int16x4_t b
)
15504 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
15507 : /* No clobbers */);
15511 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
15512 vzip1_s32 (int32x2_t a
, int32x2_t b
)
15515 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
15518 : /* No clobbers */);
15522 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
15523 vzip1_u8 (uint8x8_t a
, uint8x8_t b
)
15526 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
15529 : /* No clobbers */);
15533 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
15534 vzip1_u16 (uint16x4_t a
, uint16x4_t b
)
15537 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
15540 : /* No clobbers */);
15544 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
15545 vzip1_u32 (uint32x2_t a
, uint32x2_t b
)
15548 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
15551 : /* No clobbers */);
15555 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
15556 vzip1q_f32 (float32x4_t a
, float32x4_t b
)
15558 float32x4_t result
;
15559 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
15562 : /* No clobbers */);
15566 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
15567 vzip1q_f64 (float64x2_t a
, float64x2_t b
)
15569 float64x2_t result
;
15570 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
15573 : /* No clobbers */);
15577 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
15578 vzip1q_p8 (poly8x16_t a
, poly8x16_t b
)
15581 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
15584 : /* No clobbers */);
15588 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
15589 vzip1q_p16 (poly16x8_t a
, poly16x8_t b
)
15592 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
15595 : /* No clobbers */);
15599 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
15600 vzip1q_s8 (int8x16_t a
, int8x16_t b
)
15603 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
15606 : /* No clobbers */);
15610 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
15611 vzip1q_s16 (int16x8_t a
, int16x8_t b
)
15614 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
15617 : /* No clobbers */);
15621 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
15622 vzip1q_s32 (int32x4_t a
, int32x4_t b
)
15625 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
15628 : /* No clobbers */);
15632 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
15633 vzip1q_s64 (int64x2_t a
, int64x2_t b
)
15636 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
15639 : /* No clobbers */);
15643 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
15644 vzip1q_u8 (uint8x16_t a
, uint8x16_t b
)
15647 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
15650 : /* No clobbers */);
15654 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
15655 vzip1q_u16 (uint16x8_t a
, uint16x8_t b
)
15658 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
15661 : /* No clobbers */);
15665 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
15666 vzip1q_u32 (uint32x4_t a
, uint32x4_t b
)
15669 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
15672 : /* No clobbers */);
15676 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
15677 vzip1q_u64 (uint64x2_t a
, uint64x2_t b
)
15680 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
15683 : /* No clobbers */);
15687 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
15688 vzip2_f32 (float32x2_t a
, float32x2_t b
)
15690 float32x2_t result
;
15691 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
15694 : /* No clobbers */);
15698 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
15699 vzip2_p8 (poly8x8_t a
, poly8x8_t b
)
15702 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
15705 : /* No clobbers */);
15709 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
15710 vzip2_p16 (poly16x4_t a
, poly16x4_t b
)
15713 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
15716 : /* No clobbers */);
15720 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
15721 vzip2_s8 (int8x8_t a
, int8x8_t b
)
15724 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
15727 : /* No clobbers */);
15731 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
15732 vzip2_s16 (int16x4_t a
, int16x4_t b
)
15735 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
15738 : /* No clobbers */);
15742 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
15743 vzip2_s32 (int32x2_t a
, int32x2_t b
)
15746 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
15749 : /* No clobbers */);
15753 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
15754 vzip2_u8 (uint8x8_t a
, uint8x8_t b
)
15757 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
15760 : /* No clobbers */);
15764 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
15765 vzip2_u16 (uint16x4_t a
, uint16x4_t b
)
15768 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
15771 : /* No clobbers */);
15775 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
15776 vzip2_u32 (uint32x2_t a
, uint32x2_t b
)
15779 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
15782 : /* No clobbers */);
15786 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
15787 vzip2q_f32 (float32x4_t a
, float32x4_t b
)
15789 float32x4_t result
;
15790 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
15793 : /* No clobbers */);
15797 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
15798 vzip2q_f64 (float64x2_t a
, float64x2_t b
)
15800 float64x2_t result
;
15801 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
15804 : /* No clobbers */);
15808 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
15809 vzip2q_p8 (poly8x16_t a
, poly8x16_t b
)
15812 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
15815 : /* No clobbers */);
15819 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
15820 vzip2q_p16 (poly16x8_t a
, poly16x8_t b
)
15823 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
15826 : /* No clobbers */);
15830 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
15831 vzip2q_s8 (int8x16_t a
, int8x16_t b
)
15834 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
15837 : /* No clobbers */);
15841 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
15842 vzip2q_s16 (int16x8_t a
, int16x8_t b
)
15845 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
15848 : /* No clobbers */);
15852 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
15853 vzip2q_s32 (int32x4_t a
, int32x4_t b
)
15856 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
15859 : /* No clobbers */);
15863 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
15864 vzip2q_s64 (int64x2_t a
, int64x2_t b
)
15867 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
15870 : /* No clobbers */);
15874 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
15875 vzip2q_u8 (uint8x16_t a
, uint8x16_t b
)
15878 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
15881 : /* No clobbers */);
15885 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
15886 vzip2q_u16 (uint16x8_t a
, uint16x8_t b
)
15889 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
15892 : /* No clobbers */);
15896 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
15897 vzip2q_u32 (uint32x4_t a
, uint32x4_t b
)
15900 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
15903 : /* No clobbers */);
15907 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
15908 vzip2q_u64 (uint64x2_t a
, uint64x2_t b
)
15911 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
15914 : /* No clobbers */);
15918 /* End of temporary inline asm implementations. */
15920 /* Start of temporary inline asm for vldn, vstn and friends. */
15922 /* Create struct element types for duplicating loads.
15924 Create 2 element structures of:
15926 +------+----+----+----+----+
15927 | | 8 | 16 | 32 | 64 |
15928 +------+----+----+----+----+
15929 |int | Y | Y | N | N |
15930 +------+----+----+----+----+
15931 |uint | Y | Y | N | N |
15932 +------+----+----+----+----+
15933 |float | - | - | N | N |
15934 +------+----+----+----+----+
15935 |poly | Y | Y | - | - |
15936 +------+----+----+----+----+
15938 Create 3 element structures of:
15940 +------+----+----+----+----+
15941 | | 8 | 16 | 32 | 64 |
15942 +------+----+----+----+----+
15943 |int | Y | Y | Y | Y |
15944 +------+----+----+----+----+
15945 |uint | Y | Y | Y | Y |
15946 +------+----+----+----+----+
15947 |float | - | - | Y | Y |
15948 +------+----+----+----+----+
15949 |poly | Y | Y | - | - |
15950 +------+----+----+----+----+
15952 Create 4 element structures of:
15954 +------+----+----+----+----+
15955 | | 8 | 16 | 32 | 64 |
15956 +------+----+----+----+----+
15957 |int | Y | N | N | Y |
15958 +------+----+----+----+----+
15959 |uint | Y | N | N | Y |
15960 +------+----+----+----+----+
15961 |float | - | - | N | Y |
15962 +------+----+----+----+----+
15963 |poly | Y | N | - | - |
15964 +------+----+----+----+----+
15966 This is required for casting memory reference. */
15967 #define __STRUCTN(t, sz, nelem) \
15968 typedef struct t ## sz ## x ## nelem ## _t { \
15969 t ## sz ## _t val[nelem]; \
15970 } t ## sz ## x ## nelem ## _t;
15972 /* 2-element structs. */
15973 __STRUCTN (int, 8, 2)
15974 __STRUCTN (int, 16, 2)
15975 __STRUCTN (uint
, 8, 2)
15976 __STRUCTN (uint
, 16, 2)
15977 __STRUCTN (poly
, 8, 2)
15978 __STRUCTN (poly
, 16, 2)
15979 /* 3-element structs. */
15980 __STRUCTN (int, 8, 3)
15981 __STRUCTN (int, 16, 3)
15982 __STRUCTN (int, 32, 3)
15983 __STRUCTN (int, 64, 3)
15984 __STRUCTN (uint
, 8, 3)
15985 __STRUCTN (uint
, 16, 3)
15986 __STRUCTN (uint
, 32, 3)
15987 __STRUCTN (uint
, 64, 3)
15988 __STRUCTN (float, 32, 3)
15989 __STRUCTN (float, 64, 3)
15990 __STRUCTN (poly
, 8, 3)
15991 __STRUCTN (poly
, 16, 3)
15992 /* 4-element structs. */
15993 __STRUCTN (int, 8, 4)
15994 __STRUCTN (int, 64, 4)
15995 __STRUCTN (uint
, 8, 4)
15996 __STRUCTN (uint
, 64, 4)
15997 __STRUCTN (poly
, 8, 4)
15998 __STRUCTN (float, 64, 4)
16001 #define __LD2R_FUNC(rettype, structtype, ptrtype, \
16002 regsuffix, funcsuffix, Q) \
16003 __extension__ static __inline rettype \
16004 __attribute__ ((__always_inline__)) \
16005 vld2 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
16008 __asm__ ("ld2r {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
16009 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
16011 : "Q"(*(const structtype *)ptr) \
16012 : "memory", "v16", "v17"); \
16016 __LD2R_FUNC (float32x2x2_t
, float32x2_t
, float32_t
, 2s
, f32
,)
16017 __LD2R_FUNC (float64x1x2_t
, float64x2_t
, float64_t
, 1d
, f64
,)
16018 __LD2R_FUNC (poly8x8x2_t
, poly8x2_t
, poly8_t
, 8b
, p8
,)
16019 __LD2R_FUNC (poly16x4x2_t
, poly16x2_t
, poly16_t
, 4h
, p16
,)
16020 __LD2R_FUNC (int8x8x2_t
, int8x2_t
, int8_t, 8b
, s8
,)
16021 __LD2R_FUNC (int16x4x2_t
, int16x2_t
, int16_t, 4h
, s16
,)
16022 __LD2R_FUNC (int32x2x2_t
, int32x2_t
, int32_t, 2s
, s32
,)
16023 __LD2R_FUNC (int64x1x2_t
, int64x2_t
, int64_t, 1d
, s64
,)
16024 __LD2R_FUNC (uint8x8x2_t
, uint8x2_t
, uint8_t, 8b
, u8
,)
16025 __LD2R_FUNC (uint16x4x2_t
, uint16x2_t
, uint16_t, 4h
, u16
,)
16026 __LD2R_FUNC (uint32x2x2_t
, uint32x2_t
, uint32_t, 2s
, u32
,)
16027 __LD2R_FUNC (uint64x1x2_t
, uint64x2_t
, uint64_t, 1d
, u64
,)
16028 __LD2R_FUNC (float32x4x2_t
, float32x2_t
, float32_t
, 4s
, f32
, q
)
16029 __LD2R_FUNC (float64x2x2_t
, float64x2_t
, float64_t
, 2d
, f64
, q
)
16030 __LD2R_FUNC (poly8x16x2_t
, poly8x2_t
, poly8_t
, 16b
, p8
, q
)
16031 __LD2R_FUNC (poly16x8x2_t
, poly16x2_t
, poly16_t
, 8h
, p16
, q
)
16032 __LD2R_FUNC (int8x16x2_t
, int8x2_t
, int8_t, 16b
, s8
, q
)
16033 __LD2R_FUNC (int16x8x2_t
, int16x2_t
, int16_t, 8h
, s16
, q
)
16034 __LD2R_FUNC (int32x4x2_t
, int32x2_t
, int32_t, 4s
, s32
, q
)
16035 __LD2R_FUNC (int64x2x2_t
, int64x2_t
, int64_t, 2d
, s64
, q
)
16036 __LD2R_FUNC (uint8x16x2_t
, uint8x2_t
, uint8_t, 16b
, u8
, q
)
16037 __LD2R_FUNC (uint16x8x2_t
, uint16x2_t
, uint16_t, 8h
, u16
, q
)
16038 __LD2R_FUNC (uint32x4x2_t
, uint32x2_t
, uint32_t, 4s
, u32
, q
)
16039 __LD2R_FUNC (uint64x2x2_t
, uint64x2_t
, uint64_t, 2d
, u64
, q
)
16041 #define __LD2_LANE_FUNC(rettype, ptrtype, regsuffix, \
16042 lnsuffix, funcsuffix, Q) \
16043 __extension__ static __inline rettype \
16044 __attribute__ ((__always_inline__)) \
16045 vld2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
16046 rettype b, const int c) \
16049 __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
16050 "ld2 {v16." #lnsuffix ", v17." #lnsuffix "}[%3], %2\n\t" \
16051 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
16053 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
16054 : "memory", "v16", "v17"); \
16058 __LD2_LANE_FUNC (int8x8x2_t
, uint8_t, 8b
, b
, s8
,)
16059 __LD2_LANE_FUNC (float32x2x2_t
, float32_t
, 2s
, s
, f32
,)
16060 __LD2_LANE_FUNC (float64x1x2_t
, float64_t
, 1d
, d
, f64
,)
16061 __LD2_LANE_FUNC (poly8x8x2_t
, poly8_t
, 8b
, b
, p8
,)
16062 __LD2_LANE_FUNC (poly16x4x2_t
, poly16_t
, 4h
, h
, p16
,)
16063 __LD2_LANE_FUNC (int16x4x2_t
, int16_t, 4h
, h
, s16
,)
16064 __LD2_LANE_FUNC (int32x2x2_t
, int32_t, 2s
, s
, s32
,)
16065 __LD2_LANE_FUNC (int64x1x2_t
, int64_t, 1d
, d
, s64
,)
16066 __LD2_LANE_FUNC (uint8x8x2_t
, uint8_t, 8b
, b
, u8
,)
16067 __LD2_LANE_FUNC (uint16x4x2_t
, uint16_t, 4h
, h
, u16
,)
16068 __LD2_LANE_FUNC (uint32x2x2_t
, uint32_t, 2s
, s
, u32
,)
16069 __LD2_LANE_FUNC (uint64x1x2_t
, uint64_t, 1d
, d
, u64
,)
16070 __LD2_LANE_FUNC (float32x4x2_t
, float32_t
, 4s
, s
, f32
, q
)
16071 __LD2_LANE_FUNC (float64x2x2_t
, float64_t
, 2d
, d
, f64
, q
)
16072 __LD2_LANE_FUNC (poly8x16x2_t
, poly8_t
, 16b
, b
, p8
, q
)
16073 __LD2_LANE_FUNC (poly16x8x2_t
, poly16_t
, 8h
, h
, p16
, q
)
16074 __LD2_LANE_FUNC (int8x16x2_t
, int8_t, 16b
, b
, s8
, q
)
16075 __LD2_LANE_FUNC (int16x8x2_t
, int16_t, 8h
, h
, s16
, q
)
16076 __LD2_LANE_FUNC (int32x4x2_t
, int32_t, 4s
, s
, s32
, q
)
16077 __LD2_LANE_FUNC (int64x2x2_t
, int64_t, 2d
, d
, s64
, q
)
16078 __LD2_LANE_FUNC (uint8x16x2_t
, uint8_t, 16b
, b
, u8
, q
)
16079 __LD2_LANE_FUNC (uint16x8x2_t
, uint16_t, 8h
, h
, u16
, q
)
16080 __LD2_LANE_FUNC (uint32x4x2_t
, uint32_t, 4s
, s
, u32
, q
)
16081 __LD2_LANE_FUNC (uint64x2x2_t
, uint64_t, 2d
, d
, u64
, q
)
16083 #define __LD3R_FUNC(rettype, structtype, ptrtype, \
16084 regsuffix, funcsuffix, Q) \
16085 __extension__ static __inline rettype \
16086 __attribute__ ((__always_inline__)) \
16087 vld3 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
16090 __asm__ ("ld3r {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
16091 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
16093 : "Q"(*(const structtype *)ptr) \
16094 : "memory", "v16", "v17", "v18"); \
16098 __LD3R_FUNC (float32x2x3_t
, float32x3_t
, float32_t
, 2s
, f32
,)
16099 __LD3R_FUNC (float64x1x3_t
, float64x3_t
, float64_t
, 1d
, f64
,)
16100 __LD3R_FUNC (poly8x8x3_t
, poly8x3_t
, poly8_t
, 8b
, p8
,)
16101 __LD3R_FUNC (poly16x4x3_t
, poly16x3_t
, poly16_t
, 4h
, p16
,)
16102 __LD3R_FUNC (int8x8x3_t
, int8x3_t
, int8_t, 8b
, s8
,)
16103 __LD3R_FUNC (int16x4x3_t
, int16x3_t
, int16_t, 4h
, s16
,)
16104 __LD3R_FUNC (int32x2x3_t
, int32x3_t
, int32_t, 2s
, s32
,)
16105 __LD3R_FUNC (int64x1x3_t
, int64x3_t
, int64_t, 1d
, s64
,)
16106 __LD3R_FUNC (uint8x8x3_t
, uint8x3_t
, uint8_t, 8b
, u8
,)
16107 __LD3R_FUNC (uint16x4x3_t
, uint16x3_t
, uint16_t, 4h
, u16
,)
16108 __LD3R_FUNC (uint32x2x3_t
, uint32x3_t
, uint32_t, 2s
, u32
,)
16109 __LD3R_FUNC (uint64x1x3_t
, uint64x3_t
, uint64_t, 1d
, u64
,)
16110 __LD3R_FUNC (float32x4x3_t
, float32x3_t
, float32_t
, 4s
, f32
, q
)
16111 __LD3R_FUNC (float64x2x3_t
, float64x3_t
, float64_t
, 2d
, f64
, q
)
16112 __LD3R_FUNC (poly8x16x3_t
, poly8x3_t
, poly8_t
, 16b
, p8
, q
)
16113 __LD3R_FUNC (poly16x8x3_t
, poly16x3_t
, poly16_t
, 8h
, p16
, q
)
16114 __LD3R_FUNC (int8x16x3_t
, int8x3_t
, int8_t, 16b
, s8
, q
)
16115 __LD3R_FUNC (int16x8x3_t
, int16x3_t
, int16_t, 8h
, s16
, q
)
16116 __LD3R_FUNC (int32x4x3_t
, int32x3_t
, int32_t, 4s
, s32
, q
)
16117 __LD3R_FUNC (int64x2x3_t
, int64x3_t
, int64_t, 2d
, s64
, q
)
16118 __LD3R_FUNC (uint8x16x3_t
, uint8x3_t
, uint8_t, 16b
, u8
, q
)
16119 __LD3R_FUNC (uint16x8x3_t
, uint16x3_t
, uint16_t, 8h
, u16
, q
)
16120 __LD3R_FUNC (uint32x4x3_t
, uint32x3_t
, uint32_t, 4s
, u32
, q
)
16121 __LD3R_FUNC (uint64x2x3_t
, uint64x3_t
, uint64_t, 2d
, u64
, q
)
16123 #define __LD3_LANE_FUNC(rettype, ptrtype, regsuffix, \
16124 lnsuffix, funcsuffix, Q) \
16125 __extension__ static __inline rettype \
16126 __attribute__ ((__always_inline__)) \
16127 vld3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
16128 rettype b, const int c) \
16131 __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
16132 "ld3 {v16." #lnsuffix " - v18." #lnsuffix "}[%3], %2\n\t" \
16133 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
16135 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
16136 : "memory", "v16", "v17", "v18"); \
16140 __LD3_LANE_FUNC (int8x8x3_t
, uint8_t, 8b
, b
, s8
,)
16141 __LD3_LANE_FUNC (float32x2x3_t
, float32_t
, 2s
, s
, f32
,)
16142 __LD3_LANE_FUNC (float64x1x3_t
, float64_t
, 1d
, d
, f64
,)
16143 __LD3_LANE_FUNC (poly8x8x3_t
, poly8_t
, 8b
, b
, p8
,)
16144 __LD3_LANE_FUNC (poly16x4x3_t
, poly16_t
, 4h
, h
, p16
,)
16145 __LD3_LANE_FUNC (int16x4x3_t
, int16_t, 4h
, h
, s16
,)
16146 __LD3_LANE_FUNC (int32x2x3_t
, int32_t, 2s
, s
, s32
,)
16147 __LD3_LANE_FUNC (int64x1x3_t
, int64_t, 1d
, d
, s64
,)
16148 __LD3_LANE_FUNC (uint8x8x3_t
, uint8_t, 8b
, b
, u8
,)
16149 __LD3_LANE_FUNC (uint16x4x3_t
, uint16_t, 4h
, h
, u16
,)
16150 __LD3_LANE_FUNC (uint32x2x3_t
, uint32_t, 2s
, s
, u32
,)
16151 __LD3_LANE_FUNC (uint64x1x3_t
, uint64_t, 1d
, d
, u64
,)
16152 __LD3_LANE_FUNC (float32x4x3_t
, float32_t
, 4s
, s
, f32
, q
)
16153 __LD3_LANE_FUNC (float64x2x3_t
, float64_t
, 2d
, d
, f64
, q
)
16154 __LD3_LANE_FUNC (poly8x16x3_t
, poly8_t
, 16b
, b
, p8
, q
)
16155 __LD3_LANE_FUNC (poly16x8x3_t
, poly16_t
, 8h
, h
, p16
, q
)
16156 __LD3_LANE_FUNC (int8x16x3_t
, int8_t, 16b
, b
, s8
, q
)
16157 __LD3_LANE_FUNC (int16x8x3_t
, int16_t, 8h
, h
, s16
, q
)
16158 __LD3_LANE_FUNC (int32x4x3_t
, int32_t, 4s
, s
, s32
, q
)
16159 __LD3_LANE_FUNC (int64x2x3_t
, int64_t, 2d
, d
, s64
, q
)
16160 __LD3_LANE_FUNC (uint8x16x3_t
, uint8_t, 16b
, b
, u8
, q
)
16161 __LD3_LANE_FUNC (uint16x8x3_t
, uint16_t, 8h
, h
, u16
, q
)
16162 __LD3_LANE_FUNC (uint32x4x3_t
, uint32_t, 4s
, s
, u32
, q
)
16163 __LD3_LANE_FUNC (uint64x2x3_t
, uint64_t, 2d
, d
, u64
, q
)
16165 #define __LD4R_FUNC(rettype, structtype, ptrtype, \
16166 regsuffix, funcsuffix, Q) \
16167 __extension__ static __inline rettype \
16168 __attribute__ ((__always_inline__)) \
16169 vld4 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
16172 __asm__ ("ld4r {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
16173 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
16175 : "Q"(*(const structtype *)ptr) \
16176 : "memory", "v16", "v17", "v18", "v19"); \
16180 __LD4R_FUNC (float32x2x4_t
, float32x4_t
, float32_t
, 2s
, f32
,)
16181 __LD4R_FUNC (float64x1x4_t
, float64x4_t
, float64_t
, 1d
, f64
,)
16182 __LD4R_FUNC (poly8x8x4_t
, poly8x4_t
, poly8_t
, 8b
, p8
,)
16183 __LD4R_FUNC (poly16x4x4_t
, poly16x4_t
, poly16_t
, 4h
, p16
,)
16184 __LD4R_FUNC (int8x8x4_t
, int8x4_t
, int8_t, 8b
, s8
,)
16185 __LD4R_FUNC (int16x4x4_t
, int16x4_t
, int16_t, 4h
, s16
,)
16186 __LD4R_FUNC (int32x2x4_t
, int32x4_t
, int32_t, 2s
, s32
,)
16187 __LD4R_FUNC (int64x1x4_t
, int64x4_t
, int64_t, 1d
, s64
,)
16188 __LD4R_FUNC (uint8x8x4_t
, uint8x4_t
, uint8_t, 8b
, u8
,)
16189 __LD4R_FUNC (uint16x4x4_t
, uint16x4_t
, uint16_t, 4h
, u16
,)
16190 __LD4R_FUNC (uint32x2x4_t
, uint32x4_t
, uint32_t, 2s
, u32
,)
16191 __LD4R_FUNC (uint64x1x4_t
, uint64x4_t
, uint64_t, 1d
, u64
,)
16192 __LD4R_FUNC (float32x4x4_t
, float32x4_t
, float32_t
, 4s
, f32
, q
)
16193 __LD4R_FUNC (float64x2x4_t
, float64x4_t
, float64_t
, 2d
, f64
, q
)
16194 __LD4R_FUNC (poly8x16x4_t
, poly8x4_t
, poly8_t
, 16b
, p8
, q
)
16195 __LD4R_FUNC (poly16x8x4_t
, poly16x4_t
, poly16_t
, 8h
, p16
, q
)
16196 __LD4R_FUNC (int8x16x4_t
, int8x4_t
, int8_t, 16b
, s8
, q
)
16197 __LD4R_FUNC (int16x8x4_t
, int16x4_t
, int16_t, 8h
, s16
, q
)
16198 __LD4R_FUNC (int32x4x4_t
, int32x4_t
, int32_t, 4s
, s32
, q
)
16199 __LD4R_FUNC (int64x2x4_t
, int64x4_t
, int64_t, 2d
, s64
, q
)
16200 __LD4R_FUNC (uint8x16x4_t
, uint8x4_t
, uint8_t, 16b
, u8
, q
)
16201 __LD4R_FUNC (uint16x8x4_t
, uint16x4_t
, uint16_t, 8h
, u16
, q
)
16202 __LD4R_FUNC (uint32x4x4_t
, uint32x4_t
, uint32_t, 4s
, u32
, q
)
16203 __LD4R_FUNC (uint64x2x4_t
, uint64x4_t
, uint64_t, 2d
, u64
, q
)
16205 #define __LD4_LANE_FUNC(rettype, ptrtype, regsuffix, \
16206 lnsuffix, funcsuffix, Q) \
16207 __extension__ static __inline rettype \
16208 __attribute__ ((__always_inline__)) \
16209 vld4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
16210 rettype b, const int c) \
16213 __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
16214 "ld4 {v16." #lnsuffix " - v19." #lnsuffix "}[%3], %2\n\t" \
16215 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
16217 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
16218 : "memory", "v16", "v17", "v18", "v19"); \
16222 __LD4_LANE_FUNC (int8x8x4_t
, uint8_t, 8b
, b
, s8
,)
16223 __LD4_LANE_FUNC (float32x2x4_t
, float32_t
, 2s
, s
, f32
,)
16224 __LD4_LANE_FUNC (float64x1x4_t
, float64_t
, 1d
, d
, f64
,)
16225 __LD4_LANE_FUNC (poly8x8x4_t
, poly8_t
, 8b
, b
, p8
,)
16226 __LD4_LANE_FUNC (poly16x4x4_t
, poly16_t
, 4h
, h
, p16
,)
16227 __LD4_LANE_FUNC (int16x4x4_t
, int16_t, 4h
, h
, s16
,)
16228 __LD4_LANE_FUNC (int32x2x4_t
, int32_t, 2s
, s
, s32
,)
16229 __LD4_LANE_FUNC (int64x1x4_t
, int64_t, 1d
, d
, s64
,)
16230 __LD4_LANE_FUNC (uint8x8x4_t
, uint8_t, 8b
, b
, u8
,)
16231 __LD4_LANE_FUNC (uint16x4x4_t
, uint16_t, 4h
, h
, u16
,)
16232 __LD4_LANE_FUNC (uint32x2x4_t
, uint32_t, 2s
, s
, u32
,)
16233 __LD4_LANE_FUNC (uint64x1x4_t
, uint64_t, 1d
, d
, u64
,)
16234 __LD4_LANE_FUNC (float32x4x4_t
, float32_t
, 4s
, s
, f32
, q
)
16235 __LD4_LANE_FUNC (float64x2x4_t
, float64_t
, 2d
, d
, f64
, q
)
16236 __LD4_LANE_FUNC (poly8x16x4_t
, poly8_t
, 16b
, b
, p8
, q
)
16237 __LD4_LANE_FUNC (poly16x8x4_t
, poly16_t
, 8h
, h
, p16
, q
)
16238 __LD4_LANE_FUNC (int8x16x4_t
, int8_t, 16b
, b
, s8
, q
)
16239 __LD4_LANE_FUNC (int16x8x4_t
, int16_t, 8h
, h
, s16
, q
)
16240 __LD4_LANE_FUNC (int32x4x4_t
, int32_t, 4s
, s
, s32
, q
)
16241 __LD4_LANE_FUNC (int64x2x4_t
, int64_t, 2d
, d
, s64
, q
)
16242 __LD4_LANE_FUNC (uint8x16x4_t
, uint8_t, 16b
, b
, u8
, q
)
16243 __LD4_LANE_FUNC (uint16x8x4_t
, uint16_t, 8h
, h
, u16
, q
)
16244 __LD4_LANE_FUNC (uint32x4x4_t
, uint32_t, 4s
, s
, u32
, q
)
16245 __LD4_LANE_FUNC (uint64x2x4_t
, uint64_t, 2d
, d
, u64
, q
)
16247 #define __ST2_LANE_FUNC(intype, ptrtype, regsuffix, \
16248 lnsuffix, funcsuffix, Q) \
16249 __extension__ static __inline void \
16250 __attribute__ ((__always_inline__)) \
16251 vst2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
16252 intype b, const int c) \
16254 __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
16255 "st2 {v16." #lnsuffix ", v17." #lnsuffix "}[%2], %0\n\t" \
16256 : "=Q"(*(intype *) ptr) \
16258 : "memory", "v16", "v17"); \
16261 __ST2_LANE_FUNC (int8x8x2_t
, int8_t, 8b
, b
, s8
,)
16262 __ST2_LANE_FUNC (float32x2x2_t
, float32_t
, 2s
, s
, f32
,)
16263 __ST2_LANE_FUNC (float64x1x2_t
, float64_t
, 1d
, d
, f64
,)
16264 __ST2_LANE_FUNC (poly8x8x2_t
, poly8_t
, 8b
, b
, p8
,)
16265 __ST2_LANE_FUNC (poly16x4x2_t
, poly16_t
, 4h
, h
, p16
,)
16266 __ST2_LANE_FUNC (int16x4x2_t
, int16_t, 4h
, h
, s16
,)
16267 __ST2_LANE_FUNC (int32x2x2_t
, int32_t, 2s
, s
, s32
,)
16268 __ST2_LANE_FUNC (int64x1x2_t
, int64_t, 1d
, d
, s64
,)
16269 __ST2_LANE_FUNC (uint8x8x2_t
, uint8_t, 8b
, b
, u8
,)
16270 __ST2_LANE_FUNC (uint16x4x2_t
, uint16_t, 4h
, h
, u16
,)
16271 __ST2_LANE_FUNC (uint32x2x2_t
, uint32_t, 2s
, s
, u32
,)
16272 __ST2_LANE_FUNC (uint64x1x2_t
, uint64_t, 1d
, d
, u64
,)
16273 __ST2_LANE_FUNC (float32x4x2_t
, float32_t
, 4s
, s
, f32
, q
)
16274 __ST2_LANE_FUNC (float64x2x2_t
, float64_t
, 2d
, d
, f64
, q
)
16275 __ST2_LANE_FUNC (poly8x16x2_t
, poly8_t
, 16b
, b
, p8
, q
)
16276 __ST2_LANE_FUNC (poly16x8x2_t
, poly16_t
, 8h
, h
, p16
, q
)
16277 __ST2_LANE_FUNC (int8x16x2_t
, int8_t, 16b
, b
, s8
, q
)
16278 __ST2_LANE_FUNC (int16x8x2_t
, int16_t, 8h
, h
, s16
, q
)
16279 __ST2_LANE_FUNC (int32x4x2_t
, int32_t, 4s
, s
, s32
, q
)
16280 __ST2_LANE_FUNC (int64x2x2_t
, int64_t, 2d
, d
, s64
, q
)
16281 __ST2_LANE_FUNC (uint8x16x2_t
, uint8_t, 16b
, b
, u8
, q
)
16282 __ST2_LANE_FUNC (uint16x8x2_t
, uint16_t, 8h
, h
, u16
, q
)
16283 __ST2_LANE_FUNC (uint32x4x2_t
, uint32_t, 4s
, s
, u32
, q
)
16284 __ST2_LANE_FUNC (uint64x2x2_t
, uint64_t, 2d
, d
, u64
, q
)
16286 #define __ST3_LANE_FUNC(intype, ptrtype, regsuffix, \
16287 lnsuffix, funcsuffix, Q) \
16288 __extension__ static __inline void \
16289 __attribute__ ((__always_inline__)) \
16290 vst3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
16291 intype b, const int c) \
16293 __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
16294 "st3 {v16." #lnsuffix " - v18." #lnsuffix "}[%2], %0\n\t" \
16295 : "=Q"(*(intype *) ptr) \
16297 : "memory", "v16", "v17", "v18"); \
16300 __ST3_LANE_FUNC (int8x8x3_t
, int8_t, 8b
, b
, s8
,)
16301 __ST3_LANE_FUNC (float32x2x3_t
, float32_t
, 2s
, s
, f32
,)
16302 __ST3_LANE_FUNC (float64x1x3_t
, float64_t
, 1d
, d
, f64
,)
16303 __ST3_LANE_FUNC (poly8x8x3_t
, poly8_t
, 8b
, b
, p8
,)
16304 __ST3_LANE_FUNC (poly16x4x3_t
, poly16_t
, 4h
, h
, p16
,)
16305 __ST3_LANE_FUNC (int16x4x3_t
, int16_t, 4h
, h
, s16
,)
16306 __ST3_LANE_FUNC (int32x2x3_t
, int32_t, 2s
, s
, s32
,)
16307 __ST3_LANE_FUNC (int64x1x3_t
, int64_t, 1d
, d
, s64
,)
16308 __ST3_LANE_FUNC (uint8x8x3_t
, uint8_t, 8b
, b
, u8
,)
16309 __ST3_LANE_FUNC (uint16x4x3_t
, uint16_t, 4h
, h
, u16
,)
16310 __ST3_LANE_FUNC (uint32x2x3_t
, uint32_t, 2s
, s
, u32
,)
16311 __ST3_LANE_FUNC (uint64x1x3_t
, uint64_t, 1d
, d
, u64
,)
16312 __ST3_LANE_FUNC (float32x4x3_t
, float32_t
, 4s
, s
, f32
, q
)
16313 __ST3_LANE_FUNC (float64x2x3_t
, float64_t
, 2d
, d
, f64
, q
)
16314 __ST3_LANE_FUNC (poly8x16x3_t
, poly8_t
, 16b
, b
, p8
, q
)
16315 __ST3_LANE_FUNC (poly16x8x3_t
, poly16_t
, 8h
, h
, p16
, q
)
16316 __ST3_LANE_FUNC (int8x16x3_t
, int8_t, 16b
, b
, s8
, q
)
16317 __ST3_LANE_FUNC (int16x8x3_t
, int16_t, 8h
, h
, s16
, q
)
16318 __ST3_LANE_FUNC (int32x4x3_t
, int32_t, 4s
, s
, s32
, q
)
16319 __ST3_LANE_FUNC (int64x2x3_t
, int64_t, 2d
, d
, s64
, q
)
16320 __ST3_LANE_FUNC (uint8x16x3_t
, uint8_t, 16b
, b
, u8
, q
)
16321 __ST3_LANE_FUNC (uint16x8x3_t
, uint16_t, 8h
, h
, u16
, q
)
16322 __ST3_LANE_FUNC (uint32x4x3_t
, uint32_t, 4s
, s
, u32
, q
)
16323 __ST3_LANE_FUNC (uint64x2x3_t
, uint64_t, 2d
, d
, u64
, q
)
16325 #define __ST4_LANE_FUNC(intype, ptrtype, regsuffix, \
16326 lnsuffix, funcsuffix, Q) \
16327 __extension__ static __inline void \
16328 __attribute__ ((__always_inline__)) \
16329 vst4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
16330 intype b, const int c) \
16332 __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
16333 "st4 {v16." #lnsuffix " - v19." #lnsuffix "}[%2], %0\n\t" \
16334 : "=Q"(*(intype *) ptr) \
16336 : "memory", "v16", "v17", "v18", "v19"); \
16339 __ST4_LANE_FUNC (int8x8x4_t
, int8_t, 8b
, b
, s8
,)
16340 __ST4_LANE_FUNC (float32x2x4_t
, float32_t
, 2s
, s
, f32
,)
16341 __ST4_LANE_FUNC (float64x1x4_t
, float64_t
, 1d
, d
, f64
,)
16342 __ST4_LANE_FUNC (poly8x8x4_t
, poly8_t
, 8b
, b
, p8
,)
16343 __ST4_LANE_FUNC (poly16x4x4_t
, poly16_t
, 4h
, h
, p16
,)
16344 __ST4_LANE_FUNC (int16x4x4_t
, int16_t, 4h
, h
, s16
,)
16345 __ST4_LANE_FUNC (int32x2x4_t
, int32_t, 2s
, s
, s32
,)
16346 __ST4_LANE_FUNC (int64x1x4_t
, int64_t, 1d
, d
, s64
,)
16347 __ST4_LANE_FUNC (uint8x8x4_t
, uint8_t, 8b
, b
, u8
,)
16348 __ST4_LANE_FUNC (uint16x4x4_t
, uint16_t, 4h
, h
, u16
,)
16349 __ST4_LANE_FUNC (uint32x2x4_t
, uint32_t, 2s
, s
, u32
,)
16350 __ST4_LANE_FUNC (uint64x1x4_t
, uint64_t, 1d
, d
, u64
,)
16351 __ST4_LANE_FUNC (float32x4x4_t
, float32_t
, 4s
, s
, f32
, q
)
16352 __ST4_LANE_FUNC (float64x2x4_t
, float64_t
, 2d
, d
, f64
, q
)
16353 __ST4_LANE_FUNC (poly8x16x4_t
, poly8_t
, 16b
, b
, p8
, q
)
16354 __ST4_LANE_FUNC (poly16x8x4_t
, poly16_t
, 8h
, h
, p16
, q
)
16355 __ST4_LANE_FUNC (int8x16x4_t
, int8_t, 16b
, b
, s8
, q
)
16356 __ST4_LANE_FUNC (int16x8x4_t
, int16_t, 8h
, h
, s16
, q
)
16357 __ST4_LANE_FUNC (int32x4x4_t
, int32_t, 4s
, s
, s32
, q
)
16358 __ST4_LANE_FUNC (int64x2x4_t
, int64_t, 2d
, d
, s64
, q
)
16359 __ST4_LANE_FUNC (uint8x16x4_t
, uint8_t, 16b
, b
, u8
, q
)
16360 __ST4_LANE_FUNC (uint16x8x4_t
, uint16_t, 8h
, h
, u16
, q
)
16361 __ST4_LANE_FUNC (uint32x4x4_t
, uint32_t, 4s
, s
, u32
, q
)
16362 __ST4_LANE_FUNC (uint64x2x4_t
, uint64_t, 2d
, d
, u64
, q
)
16364 __extension__
static __inline
int64_t __attribute__ ((__always_inline__
))
16365 vaddlv_s32 (int32x2_t a
)
16368 __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result
) : "w"(a
) : );
16372 __extension__
static __inline
uint64_t __attribute__ ((__always_inline__
))
16373 vaddlv_u32 (uint32x2_t a
)
16376 __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result
) : "w"(a
) : );
16380 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
16381 vpaddd_s64 (int64x2_t __a
)
16383 return __builtin_aarch64_addpdi (__a
);
16386 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
16387 vqdmulh_laneq_s16 (int16x4_t __a
, int16x8_t __b
, const int __c
)
16389 return __builtin_aarch64_sqdmulh_laneqv4hi (__a
, __b
, __c
);
16392 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
16393 vqdmulh_laneq_s32 (int32x2_t __a
, int32x4_t __b
, const int __c
)
16395 return __builtin_aarch64_sqdmulh_laneqv2si (__a
, __b
, __c
);
16398 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
16399 vqdmulhq_laneq_s16 (int16x8_t __a
, int16x8_t __b
, const int __c
)
16401 return __builtin_aarch64_sqdmulh_laneqv8hi (__a
, __b
, __c
);
16404 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
16405 vqdmulhq_laneq_s32 (int32x4_t __a
, int32x4_t __b
, const int __c
)
16407 return __builtin_aarch64_sqdmulh_laneqv4si (__a
, __b
, __c
);
16410 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
16411 vqrdmulh_laneq_s16 (int16x4_t __a
, int16x8_t __b
, const int __c
)
16413 return __builtin_aarch64_sqrdmulh_laneqv4hi (__a
, __b
, __c
);
16416 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
16417 vqrdmulh_laneq_s32 (int32x2_t __a
, int32x4_t __b
, const int __c
)
16419 return __builtin_aarch64_sqrdmulh_laneqv2si (__a
, __b
, __c
);
16422 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
16423 vqrdmulhq_laneq_s16 (int16x8_t __a
, int16x8_t __b
, const int __c
)
16425 return __builtin_aarch64_sqrdmulh_laneqv8hi (__a
, __b
, __c
);
16428 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
16429 vqrdmulhq_laneq_s32 (int32x4_t __a
, int32x4_t __b
, const int __c
)
16431 return __builtin_aarch64_sqrdmulh_laneqv4si (__a
, __b
, __c
);
16434 /* Table intrinsics. */
16436 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
16437 vqtbl1_p8 (poly8x16_t a
, uint8x8_t b
)
16440 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
16443 : /* No clobbers */);
16447 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
16448 vqtbl1_s8 (int8x16_t a
, int8x8_t b
)
16451 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
16454 : /* No clobbers */);
16458 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
16459 vqtbl1_u8 (uint8x16_t a
, uint8x8_t b
)
16462 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
16465 : /* No clobbers */);
16469 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
16470 vqtbl1q_p8 (poly8x16_t a
, uint8x16_t b
)
16473 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
16476 : /* No clobbers */);
16480 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
16481 vqtbl1q_s8 (int8x16_t a
, int8x16_t b
)
16484 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
16487 : /* No clobbers */);
16491 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
16492 vqtbl1q_u8 (uint8x16_t a
, uint8x16_t b
)
16495 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
16498 : /* No clobbers */);
16502 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
16503 vqtbl2_s8 (int8x16x2_t tab
, int8x8_t idx
)
16506 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16507 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
16510 :"memory", "v16", "v17");
16514 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
16515 vqtbl2_u8 (uint8x16x2_t tab
, uint8x8_t idx
)
16518 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16519 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
16522 :"memory", "v16", "v17");
16526 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
16527 vqtbl2_p8 (poly8x16x2_t tab
, uint8x8_t idx
)
16530 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16531 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
16534 :"memory", "v16", "v17");
16538 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
16539 vqtbl2q_s8 (int8x16x2_t tab
, int8x16_t idx
)
16542 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16543 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
16546 :"memory", "v16", "v17");
16550 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
16551 vqtbl2q_u8 (uint8x16x2_t tab
, uint8x16_t idx
)
16554 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16555 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
16558 :"memory", "v16", "v17");
16562 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
16563 vqtbl2q_p8 (poly8x16x2_t tab
, uint8x16_t idx
)
16566 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16567 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
16570 :"memory", "v16", "v17");
16574 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
16575 vqtbl3_s8 (int8x16x3_t tab
, int8x8_t idx
)
16578 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16579 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
16582 :"memory", "v16", "v17", "v18");
16586 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
16587 vqtbl3_u8 (uint8x16x3_t tab
, uint8x8_t idx
)
16590 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16591 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
16594 :"memory", "v16", "v17", "v18");
16598 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
16599 vqtbl3_p8 (poly8x16x3_t tab
, uint8x8_t idx
)
16602 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16603 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
16606 :"memory", "v16", "v17", "v18");
16610 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
16611 vqtbl3q_s8 (int8x16x3_t tab
, int8x16_t idx
)
16614 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16615 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
16618 :"memory", "v16", "v17", "v18");
16622 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
16623 vqtbl3q_u8 (uint8x16x3_t tab
, uint8x16_t idx
)
16626 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16627 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
16630 :"memory", "v16", "v17", "v18");
16634 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
16635 vqtbl3q_p8 (poly8x16x3_t tab
, uint8x16_t idx
)
16638 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16639 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
16642 :"memory", "v16", "v17", "v18");
16646 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
16647 vqtbl4_s8 (int8x16x4_t tab
, int8x8_t idx
)
16650 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16651 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
16654 :"memory", "v16", "v17", "v18", "v19");
16658 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
16659 vqtbl4_u8 (uint8x16x4_t tab
, uint8x8_t idx
)
16662 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16663 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
16666 :"memory", "v16", "v17", "v18", "v19");
16670 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
16671 vqtbl4_p8 (poly8x16x4_t tab
, uint8x8_t idx
)
16674 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16675 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
16678 :"memory", "v16", "v17", "v18", "v19");
16683 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
16684 vqtbl4q_s8 (int8x16x4_t tab
, int8x16_t idx
)
16687 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16688 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
16691 :"memory", "v16", "v17", "v18", "v19");
16695 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
16696 vqtbl4q_u8 (uint8x16x4_t tab
, uint8x16_t idx
)
16699 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16700 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
16703 :"memory", "v16", "v17", "v18", "v19");
16707 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
16708 vqtbl4q_p8 (poly8x16x4_t tab
, uint8x16_t idx
)
16711 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16712 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
16715 :"memory", "v16", "v17", "v18", "v19");
16720 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
16721 vqtbx1_s8 (int8x8_t r
, int8x16_t tab
, int8x8_t idx
)
16723 int8x8_t result
= r
;
16724 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
16726 : "w"(tab
), "w"(idx
)
16727 : /* No clobbers */);
16731 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
16732 vqtbx1_u8 (uint8x8_t r
, uint8x16_t tab
, uint8x8_t idx
)
16734 uint8x8_t result
= r
;
16735 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
16737 : "w"(tab
), "w"(idx
)
16738 : /* No clobbers */);
16742 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
16743 vqtbx1_p8 (poly8x8_t r
, poly8x16_t tab
, uint8x8_t idx
)
16745 poly8x8_t result
= r
;
16746 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
16748 : "w"(tab
), "w"(idx
)
16749 : /* No clobbers */);
16753 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
16754 vqtbx1q_s8 (int8x16_t r
, int8x16_t tab
, int8x16_t idx
)
16756 int8x16_t result
= r
;
16757 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
16759 : "w"(tab
), "w"(idx
)
16760 : /* No clobbers */);
16764 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
16765 vqtbx1q_u8 (uint8x16_t r
, uint8x16_t tab
, uint8x16_t idx
)
16767 uint8x16_t result
= r
;
16768 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
16770 : "w"(tab
), "w"(idx
)
16771 : /* No clobbers */);
16775 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
16776 vqtbx1q_p8 (poly8x16_t r
, poly8x16_t tab
, uint8x16_t idx
)
16778 poly8x16_t result
= r
;
16779 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
16781 : "w"(tab
), "w"(idx
)
16782 : /* No clobbers */);
16786 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
16787 vqtbx2_s8 (int8x8_t r
, int8x16x2_t tab
, int8x8_t idx
)
16789 int8x8_t result
= r
;
16790 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16791 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
16794 :"memory", "v16", "v17");
16798 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
16799 vqtbx2_u8 (uint8x8_t r
, uint8x16x2_t tab
, uint8x8_t idx
)
16801 uint8x8_t result
= r
;
16802 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16803 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
16806 :"memory", "v16", "v17");
16810 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
16811 vqtbx2_p8 (poly8x8_t r
, poly8x16x2_t tab
, uint8x8_t idx
)
16813 poly8x8_t result
= r
;
16814 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16815 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
16818 :"memory", "v16", "v17");
16823 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
16824 vqtbx2q_s8 (int8x16_t r
, int8x16x2_t tab
, int8x16_t idx
)
16826 int8x16_t result
= r
;
16827 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16828 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
16831 :"memory", "v16", "v17");
16835 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
16836 vqtbx2q_u8 (uint8x16_t r
, uint8x16x2_t tab
, uint8x16_t idx
)
16838 uint8x16_t result
= r
;
16839 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16840 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
16843 :"memory", "v16", "v17");
16847 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
16848 vqtbx2q_p8 (poly8x16_t r
, poly8x16x2_t tab
, uint8x16_t idx
)
16850 poly8x16_t result
= r
;
16851 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16852 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
16855 :"memory", "v16", "v17");
16860 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
16861 vqtbx3_s8 (int8x8_t r
, int8x16x3_t tab
, int8x8_t idx
)
16863 int8x8_t result
= r
;
16864 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16865 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
16868 :"memory", "v16", "v17", "v18");
16872 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
16873 vqtbx3_u8 (uint8x8_t r
, uint8x16x3_t tab
, uint8x8_t idx
)
16875 uint8x8_t result
= r
;
16876 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16877 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
16880 :"memory", "v16", "v17", "v18");
16884 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
16885 vqtbx3_p8 (poly8x8_t r
, poly8x16x3_t tab
, uint8x8_t idx
)
16887 poly8x8_t result
= r
;
16888 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16889 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
16892 :"memory", "v16", "v17", "v18");
16897 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
16898 vqtbx3q_s8 (int8x16_t r
, int8x16x3_t tab
, int8x16_t idx
)
16900 int8x16_t result
= r
;
16901 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16902 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
16905 :"memory", "v16", "v17", "v18");
16909 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
16910 vqtbx3q_u8 (uint8x16_t r
, uint8x16x3_t tab
, uint8x16_t idx
)
16912 uint8x16_t result
= r
;
16913 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16914 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
16917 :"memory", "v16", "v17", "v18");
16921 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
16922 vqtbx3q_p8 (poly8x16_t r
, poly8x16x3_t tab
, uint8x16_t idx
)
16924 poly8x16_t result
= r
;
16925 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16926 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
16929 :"memory", "v16", "v17", "v18");
16934 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
16935 vqtbx4_s8 (int8x8_t r
, int8x16x4_t tab
, int8x8_t idx
)
16937 int8x8_t result
= r
;
16938 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16939 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
16942 :"memory", "v16", "v17", "v18", "v19");
16946 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
16947 vqtbx4_u8 (uint8x8_t r
, uint8x16x4_t tab
, uint8x8_t idx
)
16949 uint8x8_t result
= r
;
16950 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16951 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
16954 :"memory", "v16", "v17", "v18", "v19");
16958 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
16959 vqtbx4_p8 (poly8x8_t r
, poly8x16x4_t tab
, uint8x8_t idx
)
16961 poly8x8_t result
= r
;
16962 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16963 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
16966 :"memory", "v16", "v17", "v18", "v19");
16971 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
16972 vqtbx4q_s8 (int8x16_t r
, int8x16x4_t tab
, int8x16_t idx
)
16974 int8x16_t result
= r
;
16975 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16976 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
16979 :"memory", "v16", "v17", "v18", "v19");
16983 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
16984 vqtbx4q_u8 (uint8x16_t r
, uint8x16x4_t tab
, uint8x16_t idx
)
16986 uint8x16_t result
= r
;
16987 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16988 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
16991 :"memory", "v16", "v17", "v18", "v19");
16995 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
16996 vqtbx4q_p8 (poly8x16_t r
, poly8x16x4_t tab
, uint8x16_t idx
)
16998 poly8x16_t result
= r
;
16999 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
17000 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
17003 :"memory", "v16", "v17", "v18", "v19");
17007 /* V7 legacy table intrinsics. */
17009 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
17010 vtbl1_s8 (int8x8_t tab
, int8x8_t idx
)
17013 int8x16_t temp
= vcombine_s8 (tab
, vcreate_s8 (UINT64_C (0x0)));
17014 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
17016 : "w"(temp
), "w"(idx
)
17017 : /* No clobbers */);
17021 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
17022 vtbl1_u8 (uint8x8_t tab
, uint8x8_t idx
)
17025 uint8x16_t temp
= vcombine_u8 (tab
, vcreate_u8 (UINT64_C (0x0)));
17026 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
17028 : "w"(temp
), "w"(idx
)
17029 : /* No clobbers */);
17033 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
17034 vtbl1_p8 (poly8x8_t tab
, uint8x8_t idx
)
17037 poly8x16_t temp
= vcombine_p8 (tab
, vcreate_p8 (UINT64_C (0x0)));
17038 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
17040 : "w"(temp
), "w"(idx
)
17041 : /* No clobbers */);
17045 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
17046 vtbl2_s8 (int8x8x2_t tab
, int8x8_t idx
)
17049 int8x16_t temp
= vcombine_s8 (tab
.val
[0], tab
.val
[1]);
17050 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
17052 : "w"(temp
), "w"(idx
)
17053 : /* No clobbers */);
17057 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
17058 vtbl2_u8 (uint8x8x2_t tab
, uint8x8_t idx
)
17061 uint8x16_t temp
= vcombine_u8 (tab
.val
[0], tab
.val
[1]);
17062 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
17064 : "w"(temp
), "w"(idx
)
17065 : /* No clobbers */);
17069 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
17070 vtbl2_p8 (poly8x8x2_t tab
, uint8x8_t idx
)
17073 poly8x16_t temp
= vcombine_p8 (tab
.val
[0], tab
.val
[1]);
17074 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
17076 : "w"(temp
), "w"(idx
)
17077 : /* No clobbers */);
17081 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
17082 vtbl3_s8 (int8x8x3_t tab
, int8x8_t idx
)
17086 temp
.val
[0] = vcombine_s8 (tab
.val
[0], tab
.val
[1]);
17087 temp
.val
[1] = vcombine_s8 (tab
.val
[2], vcreate_s8 (UINT64_C (0x0)));
17088 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
17089 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
17091 : "Q"(temp
), "w"(idx
)
17092 : "v16", "v17", "memory");
17096 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
17097 vtbl3_u8 (uint8x8x3_t tab
, uint8x8_t idx
)
17101 temp
.val
[0] = vcombine_u8 (tab
.val
[0], tab
.val
[1]);
17102 temp
.val
[1] = vcombine_u8 (tab
.val
[2], vcreate_u8 (UINT64_C (0x0)));
17103 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
17104 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
17106 : "Q"(temp
), "w"(idx
)
17107 : "v16", "v17", "memory");
17111 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
17112 vtbl3_p8 (poly8x8x3_t tab
, uint8x8_t idx
)
17116 temp
.val
[0] = vcombine_p8 (tab
.val
[0], tab
.val
[1]);
17117 temp
.val
[1] = vcombine_p8 (tab
.val
[2], vcreate_p8 (UINT64_C (0x0)));
17118 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
17119 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
17121 : "Q"(temp
), "w"(idx
)
17122 : "v16", "v17", "memory");
17126 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
17127 vtbl4_s8 (int8x8x4_t tab
, int8x8_t idx
)
17131 temp
.val
[0] = vcombine_s8 (tab
.val
[0], tab
.val
[1]);
17132 temp
.val
[1] = vcombine_s8 (tab
.val
[2], tab
.val
[3]);
17133 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
17134 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
17136 : "Q"(temp
), "w"(idx
)
17137 : "v16", "v17", "memory");
17141 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
17142 vtbl4_u8 (uint8x8x4_t tab
, uint8x8_t idx
)
17146 temp
.val
[0] = vcombine_u8 (tab
.val
[0], tab
.val
[1]);
17147 temp
.val
[1] = vcombine_u8 (tab
.val
[2], tab
.val
[3]);
17148 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
17149 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
17151 : "Q"(temp
), "w"(idx
)
17152 : "v16", "v17", "memory");
17156 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
17157 vtbl4_p8 (poly8x8x4_t tab
, uint8x8_t idx
)
17161 temp
.val
[0] = vcombine_p8 (tab
.val
[0], tab
.val
[1]);
17162 temp
.val
[1] = vcombine_p8 (tab
.val
[2], tab
.val
[3]);
17163 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
17164 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
17166 : "Q"(temp
), "w"(idx
)
17167 : "v16", "v17", "memory");
17171 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
17172 vtbx1_s8 (int8x8_t r
, int8x8_t tab
, int8x8_t idx
)
17176 int8x16_t temp
= vcombine_s8 (tab
, vcreate_s8 (UINT64_C (0x0)));
17177 __asm__ ("movi %0.8b, 8\n\t"
17178 "cmhs %0.8b, %3.8b, %0.8b\n\t"
17179 "tbl %1.8b, {%2.16b}, %3.8b\n\t"
17180 "bsl %0.8b, %4.8b, %1.8b\n\t"
17181 : "+w"(result
), "=w"(tmp1
)
17182 : "w"(temp
), "w"(idx
), "w"(r
)
17183 : /* No clobbers */);
17187 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
17188 vtbx1_u8 (uint8x8_t r
, uint8x8_t tab
, uint8x8_t idx
)
17192 uint8x16_t temp
= vcombine_u8 (tab
, vcreate_u8 (UINT64_C (0x0)));
17193 __asm__ ("movi %0.8b, 8\n\t"
17194 "cmhs %0.8b, %3.8b, %0.8b\n\t"
17195 "tbl %1.8b, {%2.16b}, %3.8b\n\t"
17196 "bsl %0.8b, %4.8b, %1.8b\n\t"
17197 : "+w"(result
), "=w"(tmp1
)
17198 : "w"(temp
), "w"(idx
), "w"(r
)
17199 : /* No clobbers */);
17203 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
17204 vtbx1_p8 (poly8x8_t r
, poly8x8_t tab
, uint8x8_t idx
)
17208 poly8x16_t temp
= vcombine_p8 (tab
, vcreate_p8 (UINT64_C (0x0)));
17209 __asm__ ("movi %0.8b, 8\n\t"
17210 "cmhs %0.8b, %3.8b, %0.8b\n\t"
17211 "tbl %1.8b, {%2.16b}, %3.8b\n\t"
17212 "bsl %0.8b, %4.8b, %1.8b\n\t"
17213 : "+w"(result
), "=w"(tmp1
)
17214 : "w"(temp
), "w"(idx
), "w"(r
)
17215 : /* No clobbers */);
17219 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
17220 vtbx2_s8 (int8x8_t r
, int8x8x2_t tab
, int8x8_t idx
)
17222 int8x8_t result
= r
;
17223 int8x16_t temp
= vcombine_s8 (tab
.val
[0], tab
.val
[1]);
17224 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
17226 : "w"(temp
), "w"(idx
)
17227 : /* No clobbers */);
17231 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
17232 vtbx2_u8 (uint8x8_t r
, uint8x8x2_t tab
, uint8x8_t idx
)
17234 uint8x8_t result
= r
;
17235 uint8x16_t temp
= vcombine_u8 (tab
.val
[0], tab
.val
[1]);
17236 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
17238 : "w"(temp
), "w"(idx
)
17239 : /* No clobbers */);
17243 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
17244 vtbx2_p8 (poly8x8_t r
, poly8x8x2_t tab
, uint8x8_t idx
)
17246 poly8x8_t result
= r
;
17247 poly8x16_t temp
= vcombine_p8 (tab
.val
[0], tab
.val
[1]);
17248 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
17250 : "w"(temp
), "w"(idx
)
17251 : /* No clobbers */);
17255 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
17256 vtbx3_s8 (int8x8_t r
, int8x8x3_t tab
, int8x8_t idx
)
17261 temp
.val
[0] = vcombine_s8 (tab
.val
[0], tab
.val
[1]);
17262 temp
.val
[1] = vcombine_s8 (tab
.val
[2], vcreate_s8 (UINT64_C (0x0)));
17263 __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
17264 "movi %0.8b, 24\n\t"
17265 "cmhs %0.8b, %3.8b, %0.8b\n\t"
17266 "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
17267 "bsl %0.8b, %4.8b, %1.8b\n\t"
17268 : "+w"(result
), "=w"(tmp1
)
17269 : "Q"(temp
), "w"(idx
), "w"(r
)
17270 : "v16", "v17", "memory");
17274 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
17275 vtbx3_u8 (uint8x8_t r
, uint8x8x3_t tab
, uint8x8_t idx
)
17280 temp
.val
[0] = vcombine_u8 (tab
.val
[0], tab
.val
[1]);
17281 temp
.val
[1] = vcombine_u8 (tab
.val
[2], vcreate_u8 (UINT64_C (0x0)));
17282 __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
17283 "movi %0.8b, 24\n\t"
17284 "cmhs %0.8b, %3.8b, %0.8b\n\t"
17285 "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
17286 "bsl %0.8b, %4.8b, %1.8b\n\t"
17287 : "+w"(result
), "=w"(tmp1
)
17288 : "Q"(temp
), "w"(idx
), "w"(r
)
17289 : "v16", "v17", "memory");
17293 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
17294 vtbx3_p8 (poly8x8_t r
, poly8x8x3_t tab
, uint8x8_t idx
)
17299 temp
.val
[0] = vcombine_p8 (tab
.val
[0], tab
.val
[1]);
17300 temp
.val
[1] = vcombine_p8 (tab
.val
[2], vcreate_p8 (UINT64_C (0x0)));
17301 __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
17302 "movi %0.8b, 24\n\t"
17303 "cmhs %0.8b, %3.8b, %0.8b\n\t"
17304 "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
17305 "bsl %0.8b, %4.8b, %1.8b\n\t"
17306 : "+w"(result
), "=w"(tmp1
)
17307 : "Q"(temp
), "w"(idx
), "w"(r
)
17308 : "v16", "v17", "memory");
17312 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
17313 vtbx4_s8 (int8x8_t r
, int8x8x4_t tab
, int8x8_t idx
)
17315 int8x8_t result
= r
;
17317 temp
.val
[0] = vcombine_s8 (tab
.val
[0], tab
.val
[1]);
17318 temp
.val
[1] = vcombine_s8 (tab
.val
[2], tab
.val
[3]);
17319 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
17320 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
17322 : "Q"(temp
), "w"(idx
)
17323 : "v16", "v17", "memory");
17327 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
17328 vtbx4_u8 (uint8x8_t r
, uint8x8x4_t tab
, uint8x8_t idx
)
17330 uint8x8_t result
= r
;
17332 temp
.val
[0] = vcombine_u8 (tab
.val
[0], tab
.val
[1]);
17333 temp
.val
[1] = vcombine_u8 (tab
.val
[2], tab
.val
[3]);
17334 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
17335 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
17337 : "Q"(temp
), "w"(idx
)
17338 : "v16", "v17", "memory");
17342 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
17343 vtbx4_p8 (poly8x8_t r
, poly8x8x4_t tab
, uint8x8_t idx
)
17345 poly8x8_t result
= r
;
17347 temp
.val
[0] = vcombine_p8 (tab
.val
[0], tab
.val
[1]);
17348 temp
.val
[1] = vcombine_p8 (tab
.val
[2], tab
.val
[3]);
17349 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
17350 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
17352 : "Q"(temp
), "w"(idx
)
17353 : "v16", "v17", "memory");
17357 /* End of temporary inline asm. */
17359 /* Start of optimal implementations in approved order. */
17363 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
17364 vabs_f32 (float32x2_t __a
)
17366 return __builtin_aarch64_absv2sf (__a
);
17369 __extension__
static __inline float64x1_t
__attribute__ ((__always_inline__
))
17370 vabs_f64 (float64x1_t __a
)
17372 return __builtin_fabs (__a
);
17375 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
17376 vabs_s8 (int8x8_t __a
)
17378 return __builtin_aarch64_absv8qi (__a
);
17381 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
17382 vabs_s16 (int16x4_t __a
)
17384 return __builtin_aarch64_absv4hi (__a
);
17387 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
17388 vabs_s32 (int32x2_t __a
)
17390 return __builtin_aarch64_absv2si (__a
);
17393 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
17394 vabs_s64 (int64x1_t __a
)
17396 return __builtin_llabs (__a
);
17399 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
17400 vabsq_f32 (float32x4_t __a
)
17402 return __builtin_aarch64_absv4sf (__a
);
17405 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
17406 vabsq_f64 (float64x2_t __a
)
17408 return __builtin_aarch64_absv2df (__a
);
17411 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
17412 vabsq_s8 (int8x16_t __a
)
17414 return __builtin_aarch64_absv16qi (__a
);
17417 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
17418 vabsq_s16 (int16x8_t __a
)
17420 return __builtin_aarch64_absv8hi (__a
);
17423 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
17424 vabsq_s32 (int32x4_t __a
)
17426 return __builtin_aarch64_absv4si (__a
);
17429 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
17430 vabsq_s64 (int64x2_t __a
)
17432 return __builtin_aarch64_absv2di (__a
);
17437 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
17438 vaddd_s64 (int64x1_t __a
, int64x1_t __b
)
17443 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
17444 vaddd_u64 (uint64x1_t __a
, uint64x1_t __b
)
17451 __extension__
static __inline
int8_t __attribute__ ((__always_inline__
))
17452 vaddv_s8 (int8x8_t __a
)
17454 return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a
), 0);
17457 __extension__
static __inline
int16_t __attribute__ ((__always_inline__
))
17458 vaddv_s16 (int16x4_t __a
)
17460 return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a
), 0);
17463 __extension__
static __inline
int32_t __attribute__ ((__always_inline__
))
17464 vaddv_s32 (int32x2_t __a
)
17466 return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a
), 0);
17469 __extension__
static __inline
uint8_t __attribute__ ((__always_inline__
))
17470 vaddv_u8 (uint8x8_t __a
)
17472 return vget_lane_u8 ((uint8x8_t
)
17473 __builtin_aarch64_reduc_uplus_v8qi ((int8x8_t
) __a
), 0);
17476 __extension__
static __inline
uint16_t __attribute__ ((__always_inline__
))
17477 vaddv_u16 (uint16x4_t __a
)
17479 return vget_lane_u16 ((uint16x4_t
)
17480 __builtin_aarch64_reduc_uplus_v4hi ((int16x4_t
) __a
), 0);
17483 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
17484 vaddv_u32 (uint32x2_t __a
)
17486 return vget_lane_u32 ((uint32x2_t
)
17487 __builtin_aarch64_reduc_uplus_v2si ((int32x2_t
) __a
), 0);
17490 __extension__
static __inline
int8_t __attribute__ ((__always_inline__
))
17491 vaddvq_s8 (int8x16_t __a
)
17493 return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a
), 0);
17496 __extension__
static __inline
int16_t __attribute__ ((__always_inline__
))
17497 vaddvq_s16 (int16x8_t __a
)
17499 return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a
), 0);
17502 __extension__
static __inline
int32_t __attribute__ ((__always_inline__
))
17503 vaddvq_s32 (int32x4_t __a
)
17505 return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a
), 0);
17508 __extension__
static __inline
int32_t __attribute__ ((__always_inline__
))
17509 vaddvq_s64 (int64x2_t __a
)
17511 return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a
), 0);
17514 __extension__
static __inline
uint8_t __attribute__ ((__always_inline__
))
17515 vaddvq_u8 (uint8x16_t __a
)
17517 return vgetq_lane_u8 ((uint8x16_t
)
17518 __builtin_aarch64_reduc_uplus_v16qi ((int8x16_t
) __a
), 0);
17521 __extension__
static __inline
uint16_t __attribute__ ((__always_inline__
))
17522 vaddvq_u16 (uint16x8_t __a
)
17524 return vgetq_lane_u16 ((uint16x8_t
)
17525 __builtin_aarch64_reduc_uplus_v8hi ((int16x8_t
) __a
), 0);
17528 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
17529 vaddvq_u32 (uint32x4_t __a
)
17531 return vgetq_lane_u32 ((uint32x4_t
)
17532 __builtin_aarch64_reduc_uplus_v4si ((int32x4_t
) __a
), 0);
17535 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
17536 vaddvq_u64 (uint64x2_t __a
)
17538 return vgetq_lane_u64 ((uint64x2_t
)
17539 __builtin_aarch64_reduc_uplus_v2di ((int64x2_t
) __a
), 0);
17542 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
17543 vaddv_f32 (float32x2_t __a
)
17545 float32x2_t t
= __builtin_aarch64_reduc_splus_v2sf (__a
);
17546 return vget_lane_f32 (t
, 0);
17549 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
17550 vaddvq_f32 (float32x4_t __a
)
17552 float32x4_t t
= __builtin_aarch64_reduc_splus_v4sf (__a
);
17553 return vgetq_lane_f32 (t
, 0);
17556 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
17557 vaddvq_f64 (float64x2_t __a
)
17559 float64x2_t t
= __builtin_aarch64_reduc_splus_v2df (__a
);
17560 return vgetq_lane_f64 (t
, 0);
17565 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
17566 vcages_f32 (float32_t __a
, float32_t __b
)
17568 return __builtin_fabsf (__a
) >= __builtin_fabsf (__b
) ? -1 : 0;
17571 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
17572 vcage_f32 (float32x2_t __a
, float32x2_t __b
)
17574 return vabs_f32 (__a
) >= vabs_f32 (__b
);
17577 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
17578 vcageq_f32 (float32x4_t __a
, float32x4_t __b
)
17580 return vabsq_f32 (__a
) >= vabsq_f32 (__b
);
17583 __extension__
static __inline
uint64_t __attribute__ ((__always_inline__
))
17584 vcaged_f64 (float64_t __a
, float64_t __b
)
17586 return __builtin_fabs (__a
) >= __builtin_fabs (__b
) ? -1 : 0;
17589 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
17590 vcageq_f64 (float64x2_t __a
, float64x2_t __b
)
17592 return vabsq_f64 (__a
) >= vabsq_f64 (__b
);
17597 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
17598 vcagts_f32 (float32_t __a
, float32_t __b
)
17600 return __builtin_fabsf (__a
) > __builtin_fabsf (__b
) ? -1 : 0;
17603 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
17604 vcagt_f32 (float32x2_t __a
, float32x2_t __b
)
17606 return vabs_f32 (__a
) > vabs_f32 (__b
);
17609 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
17610 vcagtq_f32 (float32x4_t __a
, float32x4_t __b
)
17612 return vabsq_f32 (__a
) > vabsq_f32 (__b
);
17615 __extension__
static __inline
uint64_t __attribute__ ((__always_inline__
))
17616 vcagtd_f64 (float64_t __a
, float64_t __b
)
17618 return __builtin_fabs (__a
) > __builtin_fabs (__b
) ? -1 : 0;
17621 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
17622 vcagtq_f64 (float64x2_t __a
, float64x2_t __b
)
17624 return vabsq_f64 (__a
) > vabsq_f64 (__b
);
17629 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
17630 vcale_f32 (float32x2_t __a
, float32x2_t __b
)
17632 return vabs_f32 (__a
) <= vabs_f32 (__b
);
17635 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
17636 vcaleq_f32 (float32x4_t __a
, float32x4_t __b
)
17638 return vabsq_f32 (__a
) <= vabsq_f32 (__b
);
17641 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
17642 vcaleq_f64 (float64x2_t __a
, float64x2_t __b
)
17644 return vabsq_f64 (__a
) <= vabsq_f64 (__b
);
17649 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
17650 vcalt_f32 (float32x2_t __a
, float32x2_t __b
)
17652 return vabs_f32 (__a
) < vabs_f32 (__b
);
17655 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
17656 vcaltq_f32 (float32x4_t __a
, float32x4_t __b
)
17658 return vabsq_f32 (__a
) < vabsq_f32 (__b
);
17661 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
17662 vcaltq_f64 (float64x2_t __a
, float64x2_t __b
)
17664 return vabsq_f64 (__a
) < vabsq_f64 (__b
);
17667 /* vceq - vector. */
17669 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
17670 vceq_f32 (float32x2_t __a
, float32x2_t __b
)
17672 return (uint32x2_t
) __builtin_aarch64_cmeqv2sf (__a
, __b
);
17675 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
17676 vceq_f64 (float64x1_t __a
, float64x1_t __b
)
17678 return __a
== __b
? -1ll : 0ll;
17681 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
17682 vceq_p8 (poly8x8_t __a
, poly8x8_t __b
)
17684 return (uint8x8_t
) __builtin_aarch64_cmeqv8qi ((int8x8_t
) __a
,
17688 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
17689 vceq_s8 (int8x8_t __a
, int8x8_t __b
)
17691 return (uint8x8_t
) __builtin_aarch64_cmeqv8qi (__a
, __b
);
17694 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
17695 vceq_s16 (int16x4_t __a
, int16x4_t __b
)
17697 return (uint16x4_t
) __builtin_aarch64_cmeqv4hi (__a
, __b
);
17700 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
17701 vceq_s32 (int32x2_t __a
, int32x2_t __b
)
17703 return (uint32x2_t
) __builtin_aarch64_cmeqv2si (__a
, __b
);
17706 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
17707 vceq_s64 (int64x1_t __a
, int64x1_t __b
)
17709 return __a
== __b
? -1ll : 0ll;
17712 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
17713 vceq_u8 (uint8x8_t __a
, uint8x8_t __b
)
17715 return (uint8x8_t
) __builtin_aarch64_cmeqv8qi ((int8x8_t
) __a
,
17719 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
17720 vceq_u16 (uint16x4_t __a
, uint16x4_t __b
)
17722 return (uint16x4_t
) __builtin_aarch64_cmeqv4hi ((int16x4_t
) __a
,
17726 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
17727 vceq_u32 (uint32x2_t __a
, uint32x2_t __b
)
17729 return (uint32x2_t
) __builtin_aarch64_cmeqv2si ((int32x2_t
) __a
,
17733 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
17734 vceq_u64 (uint64x1_t __a
, uint64x1_t __b
)
17736 return __a
== __b
? -1ll : 0ll;
17739 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
17740 vceqq_f32 (float32x4_t __a
, float32x4_t __b
)
17742 return (uint32x4_t
) __builtin_aarch64_cmeqv4sf (__a
, __b
);
17745 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
17746 vceqq_f64 (float64x2_t __a
, float64x2_t __b
)
17748 return (uint64x2_t
) __builtin_aarch64_cmeqv2df (__a
, __b
);
17751 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
17752 vceqq_p8 (poly8x16_t __a
, poly8x16_t __b
)
17754 return (uint8x16_t
) __builtin_aarch64_cmeqv16qi ((int8x16_t
) __a
,
17758 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
17759 vceqq_s8 (int8x16_t __a
, int8x16_t __b
)
17761 return (uint8x16_t
) __builtin_aarch64_cmeqv16qi (__a
, __b
);
17764 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
17765 vceqq_s16 (int16x8_t __a
, int16x8_t __b
)
17767 return (uint16x8_t
) __builtin_aarch64_cmeqv8hi (__a
, __b
);
17770 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
17771 vceqq_s32 (int32x4_t __a
, int32x4_t __b
)
17773 return (uint32x4_t
) __builtin_aarch64_cmeqv4si (__a
, __b
);
17776 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
17777 vceqq_s64 (int64x2_t __a
, int64x2_t __b
)
17779 return (uint64x2_t
) __builtin_aarch64_cmeqv2di (__a
, __b
);
17782 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
17783 vceqq_u8 (uint8x16_t __a
, uint8x16_t __b
)
17785 return (uint8x16_t
) __builtin_aarch64_cmeqv16qi ((int8x16_t
) __a
,
17789 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
17790 vceqq_u16 (uint16x8_t __a
, uint16x8_t __b
)
17792 return (uint16x8_t
) __builtin_aarch64_cmeqv8hi ((int16x8_t
) __a
,
17796 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
17797 vceqq_u32 (uint32x4_t __a
, uint32x4_t __b
)
17799 return (uint32x4_t
) __builtin_aarch64_cmeqv4si ((int32x4_t
) __a
,
17803 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
17804 vceqq_u64 (uint64x2_t __a
, uint64x2_t __b
)
17806 return (uint64x2_t
) __builtin_aarch64_cmeqv2di ((int64x2_t
) __a
,
17810 /* vceq - scalar. */
17812 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
17813 vceqs_f32 (float32_t __a
, float32_t __b
)
17815 return __a
== __b
? -1 : 0;
17818 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
17819 vceqd_s64 (int64x1_t __a
, int64x1_t __b
)
17821 return __a
== __b
? -1ll : 0ll;
17824 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
17825 vceqd_u64 (uint64x1_t __a
, uint64x1_t __b
)
17827 return __a
== __b
? -1ll : 0ll;
17830 __extension__
static __inline
uint64_t __attribute__ ((__always_inline__
))
17831 vceqd_f64 (float64_t __a
, float64_t __b
)
17833 return __a
== __b
? -1ll : 0ll;
17836 /* vceqz - vector. */
17838 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
17839 vceqz_f32 (float32x2_t __a
)
17841 float32x2_t __b
= {0.0f
, 0.0f
};
17842 return (uint32x2_t
) __builtin_aarch64_cmeqv2sf (__a
, __b
);
17845 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
17846 vceqz_f64 (float64x1_t __a
)
17848 return __a
== 0.0 ? -1ll : 0ll;
17851 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
17852 vceqz_p8 (poly8x8_t __a
)
17854 poly8x8_t __b
= {0, 0, 0, 0, 0, 0, 0, 0};
17855 return (uint8x8_t
) __builtin_aarch64_cmeqv8qi ((int8x8_t
) __a
,
17859 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
17860 vceqz_s8 (int8x8_t __a
)
17862 int8x8_t __b
= {0, 0, 0, 0, 0, 0, 0, 0};
17863 return (uint8x8_t
) __builtin_aarch64_cmeqv8qi (__a
, __b
);
17866 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
17867 vceqz_s16 (int16x4_t __a
)
17869 int16x4_t __b
= {0, 0, 0, 0};
17870 return (uint16x4_t
) __builtin_aarch64_cmeqv4hi (__a
, __b
);
17873 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
17874 vceqz_s32 (int32x2_t __a
)
17876 int32x2_t __b
= {0, 0};
17877 return (uint32x2_t
) __builtin_aarch64_cmeqv2si (__a
, __b
);
17880 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
17881 vceqz_s64 (int64x1_t __a
)
17883 return __a
== 0ll ? -1ll : 0ll;
17886 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
17887 vceqz_u8 (uint8x8_t __a
)
17889 uint8x8_t __b
= {0, 0, 0, 0, 0, 0, 0, 0};
17890 return (uint8x8_t
) __builtin_aarch64_cmeqv8qi ((int8x8_t
) __a
,
17894 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
17895 vceqz_u16 (uint16x4_t __a
)
17897 uint16x4_t __b
= {0, 0, 0, 0};
17898 return (uint16x4_t
) __builtin_aarch64_cmeqv4hi ((int16x4_t
) __a
,
17902 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
17903 vceqz_u32 (uint32x2_t __a
)
17905 uint32x2_t __b
= {0, 0};
17906 return (uint32x2_t
) __builtin_aarch64_cmeqv2si ((int32x2_t
) __a
,
17910 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
17911 vceqz_u64 (uint64x1_t __a
)
17913 return __a
== 0ll ? -1ll : 0ll;
17916 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
17917 vceqzq_f32 (float32x4_t __a
)
17919 float32x4_t __b
= {0.0f
, 0.0f
, 0.0f
, 0.0f
};
17920 return (uint32x4_t
) __builtin_aarch64_cmeqv4sf (__a
, __b
);
17923 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
17924 vceqzq_f64 (float64x2_t __a
)
17926 float64x2_t __b
= {0.0, 0.0};
17927 return (uint64x2_t
) __builtin_aarch64_cmeqv2df (__a
, __b
);
17930 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
17931 vceqzq_p8 (poly8x16_t __a
)
17933 poly8x16_t __b
= {0, 0, 0, 0, 0, 0, 0, 0,
17934 0, 0, 0, 0, 0, 0, 0, 0};
17935 return (uint8x16_t
) __builtin_aarch64_cmeqv16qi ((int8x16_t
) __a
,
17939 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
17940 vceqzq_s8 (int8x16_t __a
)
17942 int8x16_t __b
= {0, 0, 0, 0, 0, 0, 0, 0,
17943 0, 0, 0, 0, 0, 0, 0, 0};
17944 return (uint8x16_t
) __builtin_aarch64_cmeqv16qi (__a
, __b
);
17947 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
17948 vceqzq_s16 (int16x8_t __a
)
17950 int16x8_t __b
= {0, 0, 0, 0, 0, 0, 0, 0};
17951 return (uint16x8_t
) __builtin_aarch64_cmeqv8hi (__a
, __b
);
17954 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
17955 vceqzq_s32 (int32x4_t __a
)
17957 int32x4_t __b
= {0, 0, 0, 0};
17958 return (uint32x4_t
) __builtin_aarch64_cmeqv4si (__a
, __b
);
17961 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
17962 vceqzq_s64 (int64x2_t __a
)
17964 int64x2_t __b
= {0, 0};
17965 return (uint64x2_t
) __builtin_aarch64_cmeqv2di (__a
, __b
);
17968 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
17969 vceqzq_u8 (uint8x16_t __a
)
17971 uint8x16_t __b
= {0, 0, 0, 0, 0, 0, 0, 0,
17972 0, 0, 0, 0, 0, 0, 0, 0};
17973 return (uint8x16_t
) __builtin_aarch64_cmeqv16qi ((int8x16_t
) __a
,
17977 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
17978 vceqzq_u16 (uint16x8_t __a
)
17980 uint16x8_t __b
= {0, 0, 0, 0, 0, 0, 0, 0};
17981 return (uint16x8_t
) __builtin_aarch64_cmeqv8hi ((int16x8_t
) __a
,
17985 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
17986 vceqzq_u32 (uint32x4_t __a
)
17988 uint32x4_t __b
= {0, 0, 0, 0};
17989 return (uint32x4_t
) __builtin_aarch64_cmeqv4si ((int32x4_t
) __a
,
17993 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
17994 vceqzq_u64 (uint64x2_t __a
)
17996 uint64x2_t __b
= {0, 0};
17997 return (uint64x2_t
) __builtin_aarch64_cmeqv2di ((int64x2_t
) __a
,
18001 /* vceqz - scalar. */
18003 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
18004 vceqzs_f32 (float32_t __a
)
18006 return __a
== 0.0f
? -1 : 0;
18009 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18010 vceqzd_s64 (int64x1_t __a
)
18012 return __a
== 0 ? -1ll : 0ll;
18015 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18016 vceqzd_u64 (int64x1_t __a
)
18018 return __a
== 0 ? -1ll : 0ll;
18021 __extension__
static __inline
uint64_t __attribute__ ((__always_inline__
))
18022 vceqzd_f64 (float64_t __a
)
18024 return __a
== 0.0 ? -1ll : 0ll;
18027 /* vcge - vector. */
18029 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
18030 vcge_f32 (float32x2_t __a
, float32x2_t __b
)
18032 return (uint32x2_t
) __builtin_aarch64_cmgev2sf (__a
, __b
);
18035 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18036 vcge_f64 (float64x1_t __a
, float64x1_t __b
)
18038 return __a
>= __b
? -1ll : 0ll;
18041 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
18042 vcge_p8 (poly8x8_t __a
, poly8x8_t __b
)
18044 return (uint8x8_t
) __builtin_aarch64_cmgev8qi ((int8x8_t
) __a
,
18048 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
18049 vcge_s8 (int8x8_t __a
, int8x8_t __b
)
18051 return (uint8x8_t
) __builtin_aarch64_cmgev8qi (__a
, __b
);
18054 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
18055 vcge_s16 (int16x4_t __a
, int16x4_t __b
)
18057 return (uint16x4_t
) __builtin_aarch64_cmgev4hi (__a
, __b
);
18060 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
18061 vcge_s32 (int32x2_t __a
, int32x2_t __b
)
18063 return (uint32x2_t
) __builtin_aarch64_cmgev2si (__a
, __b
);
18066 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18067 vcge_s64 (int64x1_t __a
, int64x1_t __b
)
18069 return __a
>= __b
? -1ll : 0ll;
18072 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
18073 vcge_u8 (uint8x8_t __a
, uint8x8_t __b
)
18075 return (uint8x8_t
) __builtin_aarch64_cmgeuv8qi ((int8x8_t
) __a
,
18079 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
18080 vcge_u16 (uint16x4_t __a
, uint16x4_t __b
)
18082 return (uint16x4_t
) __builtin_aarch64_cmgeuv4hi ((int16x4_t
) __a
,
18086 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
18087 vcge_u32 (uint32x2_t __a
, uint32x2_t __b
)
18089 return (uint32x2_t
) __builtin_aarch64_cmgeuv2si ((int32x2_t
) __a
,
18093 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18094 vcge_u64 (uint64x1_t __a
, uint64x1_t __b
)
18096 return __a
>= __b
? -1ll : 0ll;
18099 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
18100 vcgeq_f32 (float32x4_t __a
, float32x4_t __b
)
18102 return (uint32x4_t
) __builtin_aarch64_cmgev4sf (__a
, __b
);
18105 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
18106 vcgeq_f64 (float64x2_t __a
, float64x2_t __b
)
18108 return (uint64x2_t
) __builtin_aarch64_cmgev2df (__a
, __b
);
18111 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
18112 vcgeq_p8 (poly8x16_t __a
, poly8x16_t __b
)
18114 return (uint8x16_t
) __builtin_aarch64_cmgev16qi ((int8x16_t
) __a
,
18118 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
18119 vcgeq_s8 (int8x16_t __a
, int8x16_t __b
)
18121 return (uint8x16_t
) __builtin_aarch64_cmgev16qi (__a
, __b
);
18124 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
18125 vcgeq_s16 (int16x8_t __a
, int16x8_t __b
)
18127 return (uint16x8_t
) __builtin_aarch64_cmgev8hi (__a
, __b
);
18130 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
18131 vcgeq_s32 (int32x4_t __a
, int32x4_t __b
)
18133 return (uint32x4_t
) __builtin_aarch64_cmgev4si (__a
, __b
);
18136 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
18137 vcgeq_s64 (int64x2_t __a
, int64x2_t __b
)
18139 return (uint64x2_t
) __builtin_aarch64_cmgev2di (__a
, __b
);
18142 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
18143 vcgeq_u8 (uint8x16_t __a
, uint8x16_t __b
)
18145 return (uint8x16_t
) __builtin_aarch64_cmgeuv16qi ((int8x16_t
) __a
,
18149 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
18150 vcgeq_u16 (uint16x8_t __a
, uint16x8_t __b
)
18152 return (uint16x8_t
) __builtin_aarch64_cmgeuv8hi ((int16x8_t
) __a
,
18156 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
18157 vcgeq_u32 (uint32x4_t __a
, uint32x4_t __b
)
18159 return (uint32x4_t
) __builtin_aarch64_cmgeuv4si ((int32x4_t
) __a
,
18163 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
18164 vcgeq_u64 (uint64x2_t __a
, uint64x2_t __b
)
18166 return (uint64x2_t
) __builtin_aarch64_cmgeuv2di ((int64x2_t
) __a
,
18170 /* vcge - scalar. */
18172 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
18173 vcges_f32 (float32_t __a
, float32_t __b
)
18175 return __a
>= __b
? -1 : 0;
18178 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18179 vcged_s64 (int64x1_t __a
, int64x1_t __b
)
18181 return __a
>= __b
? -1ll : 0ll;
18184 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18185 vcged_u64 (uint64x1_t __a
, uint64x1_t __b
)
18187 return __a
>= __b
? -1ll : 0ll;
18190 __extension__
static __inline
uint64_t __attribute__ ((__always_inline__
))
18191 vcged_f64 (float64_t __a
, float64_t __b
)
18193 return __a
>= __b
? -1ll : 0ll;
18196 /* vcgez - vector. */
18198 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
18199 vcgez_f32 (float32x2_t __a
)
18201 float32x2_t __b
= {0.0f
, 0.0f
};
18202 return (uint32x2_t
) __builtin_aarch64_cmgev2sf (__a
, __b
);
18205 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18206 vcgez_f64 (float64x1_t __a
)
18208 return __a
>= 0.0 ? -1ll : 0ll;
18211 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
18212 vcgez_p8 (poly8x8_t __a
)
18214 poly8x8_t __b
= {0, 0, 0, 0, 0, 0, 0, 0};
18215 return (uint8x8_t
) __builtin_aarch64_cmgev8qi ((int8x8_t
) __a
,
18219 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
18220 vcgez_s8 (int8x8_t __a
)
18222 int8x8_t __b
= {0, 0, 0, 0, 0, 0, 0, 0};
18223 return (uint8x8_t
) __builtin_aarch64_cmgev8qi (__a
, __b
);
18226 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
18227 vcgez_s16 (int16x4_t __a
)
18229 int16x4_t __b
= {0, 0, 0, 0};
18230 return (uint16x4_t
) __builtin_aarch64_cmgev4hi (__a
, __b
);
18233 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
18234 vcgez_s32 (int32x2_t __a
)
18236 int32x2_t __b
= {0, 0};
18237 return (uint32x2_t
) __builtin_aarch64_cmgev2si (__a
, __b
);
18240 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18241 vcgez_s64 (int64x1_t __a
)
18243 return __a
>= 0ll ? -1ll : 0ll;
18246 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
18247 vcgez_u8 (uint8x8_t __a
)
18249 uint8x8_t __b
= {0, 0, 0, 0, 0, 0, 0, 0};
18250 return (uint8x8_t
) __builtin_aarch64_cmgeuv8qi ((int8x8_t
) __a
,
18254 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
18255 vcgez_u16 (uint16x4_t __a
)
18257 uint16x4_t __b
= {0, 0, 0, 0};
18258 return (uint16x4_t
) __builtin_aarch64_cmgeuv4hi ((int16x4_t
) __a
,
18262 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
18263 vcgez_u32 (uint32x2_t __a
)
18265 uint32x2_t __b
= {0, 0};
18266 return (uint32x2_t
) __builtin_aarch64_cmgeuv2si ((int32x2_t
) __a
,
18270 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18271 vcgez_u64 (uint64x1_t __a
)
18273 return __a
>= 0ll ? -1ll : 0ll;
18276 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
18277 vcgezq_f32 (float32x4_t __a
)
18279 float32x4_t __b
= {0.0f
, 0.0f
, 0.0f
, 0.0f
};
18280 return (uint32x4_t
) __builtin_aarch64_cmgev4sf (__a
, __b
);
18283 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
18284 vcgezq_f64 (float64x2_t __a
)
18286 float64x2_t __b
= {0.0, 0.0};
18287 return (uint64x2_t
) __builtin_aarch64_cmgev2df (__a
, __b
);
18290 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
18291 vcgezq_p8 (poly8x16_t __a
)
18293 poly8x16_t __b
= {0, 0, 0, 0, 0, 0, 0, 0,
18294 0, 0, 0, 0, 0, 0, 0, 0};
18295 return (uint8x16_t
) __builtin_aarch64_cmgev16qi ((int8x16_t
) __a
,
18299 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
18300 vcgezq_s8 (int8x16_t __a
)
18302 int8x16_t __b
= {0, 0, 0, 0, 0, 0, 0, 0,
18303 0, 0, 0, 0, 0, 0, 0, 0};
18304 return (uint8x16_t
) __builtin_aarch64_cmgev16qi (__a
, __b
);
18307 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
18308 vcgezq_s16 (int16x8_t __a
)
18310 int16x8_t __b
= {0, 0, 0, 0, 0, 0, 0, 0};
18311 return (uint16x8_t
) __builtin_aarch64_cmgev8hi (__a
, __b
);
18314 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
18315 vcgezq_s32 (int32x4_t __a
)
18317 int32x4_t __b
= {0, 0, 0, 0};
18318 return (uint32x4_t
) __builtin_aarch64_cmgev4si (__a
, __b
);
18321 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
18322 vcgezq_s64 (int64x2_t __a
)
18324 int64x2_t __b
= {0, 0};
18325 return (uint64x2_t
) __builtin_aarch64_cmgev2di (__a
, __b
);
18328 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
18329 vcgezq_u8 (uint8x16_t __a
)
18331 uint8x16_t __b
= {0, 0, 0, 0, 0, 0, 0, 0,
18332 0, 0, 0, 0, 0, 0, 0, 0};
18333 return (uint8x16_t
) __builtin_aarch64_cmgeuv16qi ((int8x16_t
) __a
,
18337 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
18338 vcgezq_u16 (uint16x8_t __a
)
18340 uint16x8_t __b
= {0, 0, 0, 0, 0, 0, 0, 0};
18341 return (uint16x8_t
) __builtin_aarch64_cmgeuv8hi ((int16x8_t
) __a
,
18345 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
18346 vcgezq_u32 (uint32x4_t __a
)
18348 uint32x4_t __b
= {0, 0, 0, 0};
18349 return (uint32x4_t
) __builtin_aarch64_cmgeuv4si ((int32x4_t
) __a
,
18353 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
18354 vcgezq_u64 (uint64x2_t __a
)
18356 uint64x2_t __b
= {0, 0};
18357 return (uint64x2_t
) __builtin_aarch64_cmgeuv2di ((int64x2_t
) __a
,
18361 /* vcgez - scalar. */
18363 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
18364 vcgezs_f32 (float32_t __a
)
18366 return __a
>= 0.0f
? -1 : 0;
18369 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18370 vcgezd_s64 (int64x1_t __a
)
18372 return __a
>= 0 ? -1ll : 0ll;
18375 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18376 vcgezd_u64 (int64x1_t __a
)
18378 return __a
>= 0 ? -1ll : 0ll;
18381 __extension__
static __inline
uint64_t __attribute__ ((__always_inline__
))
18382 vcgezd_f64 (float64_t __a
)
18384 return __a
>= 0.0 ? -1ll : 0ll;
18387 /* vcgt - vector. */
18389 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
18390 vcgt_f32 (float32x2_t __a
, float32x2_t __b
)
18392 return (uint32x2_t
) __builtin_aarch64_cmgtv2sf (__a
, __b
);
18395 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18396 vcgt_f64 (float64x1_t __a
, float64x1_t __b
)
18398 return __a
> __b
? -1ll : 0ll;
18401 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
18402 vcgt_p8 (poly8x8_t __a
, poly8x8_t __b
)
18404 return (uint8x8_t
) __builtin_aarch64_cmgtv8qi ((int8x8_t
) __a
,
18408 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
18409 vcgt_s8 (int8x8_t __a
, int8x8_t __b
)
18411 return (uint8x8_t
) __builtin_aarch64_cmgtv8qi (__a
, __b
);
18414 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
18415 vcgt_s16 (int16x4_t __a
, int16x4_t __b
)
18417 return (uint16x4_t
) __builtin_aarch64_cmgtv4hi (__a
, __b
);
18420 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
18421 vcgt_s32 (int32x2_t __a
, int32x2_t __b
)
18423 return (uint32x2_t
) __builtin_aarch64_cmgtv2si (__a
, __b
);
18426 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18427 vcgt_s64 (int64x1_t __a
, int64x1_t __b
)
18429 return __a
> __b
? -1ll : 0ll;
18432 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
18433 vcgt_u8 (uint8x8_t __a
, uint8x8_t __b
)
18435 return (uint8x8_t
) __builtin_aarch64_cmgtuv8qi ((int8x8_t
) __a
,
18439 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
18440 vcgt_u16 (uint16x4_t __a
, uint16x4_t __b
)
18442 return (uint16x4_t
) __builtin_aarch64_cmgtuv4hi ((int16x4_t
) __a
,
18446 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
18447 vcgt_u32 (uint32x2_t __a
, uint32x2_t __b
)
18449 return (uint32x2_t
) __builtin_aarch64_cmgtuv2si ((int32x2_t
) __a
,
18453 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18454 vcgt_u64 (uint64x1_t __a
, uint64x1_t __b
)
18456 return __a
> __b
? -1ll : 0ll;
18459 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
18460 vcgtq_f32 (float32x4_t __a
, float32x4_t __b
)
18462 return (uint32x4_t
) __builtin_aarch64_cmgtv4sf (__a
, __b
);
18465 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
18466 vcgtq_f64 (float64x2_t __a
, float64x2_t __b
)
18468 return (uint64x2_t
) __builtin_aarch64_cmgtv2df (__a
, __b
);
18471 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
18472 vcgtq_p8 (poly8x16_t __a
, poly8x16_t __b
)
18474 return (uint8x16_t
) __builtin_aarch64_cmgtv16qi ((int8x16_t
) __a
,
18478 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
18479 vcgtq_s8 (int8x16_t __a
, int8x16_t __b
)
18481 return (uint8x16_t
) __builtin_aarch64_cmgtv16qi (__a
, __b
);
18484 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
18485 vcgtq_s16 (int16x8_t __a
, int16x8_t __b
)
18487 return (uint16x8_t
) __builtin_aarch64_cmgtv8hi (__a
, __b
);
18490 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
18491 vcgtq_s32 (int32x4_t __a
, int32x4_t __b
)
18493 return (uint32x4_t
) __builtin_aarch64_cmgtv4si (__a
, __b
);
18496 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
18497 vcgtq_s64 (int64x2_t __a
, int64x2_t __b
)
18499 return (uint64x2_t
) __builtin_aarch64_cmgtv2di (__a
, __b
);
18502 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
18503 vcgtq_u8 (uint8x16_t __a
, uint8x16_t __b
)
18505 return (uint8x16_t
) __builtin_aarch64_cmgtuv16qi ((int8x16_t
) __a
,
18509 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
18510 vcgtq_u16 (uint16x8_t __a
, uint16x8_t __b
)
18512 return (uint16x8_t
) __builtin_aarch64_cmgtuv8hi ((int16x8_t
) __a
,
18516 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
18517 vcgtq_u32 (uint32x4_t __a
, uint32x4_t __b
)
18519 return (uint32x4_t
) __builtin_aarch64_cmgtuv4si ((int32x4_t
) __a
,
18523 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
18524 vcgtq_u64 (uint64x2_t __a
, uint64x2_t __b
)
18526 return (uint64x2_t
) __builtin_aarch64_cmgtuv2di ((int64x2_t
) __a
,
18530 /* vcgt - scalar. */
18532 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
18533 vcgts_f32 (float32_t __a
, float32_t __b
)
18535 return __a
> __b
? -1 : 0;
18538 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18539 vcgtd_s64 (int64x1_t __a
, int64x1_t __b
)
18541 return __a
> __b
? -1ll : 0ll;
18544 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18545 vcgtd_u64 (uint64x1_t __a
, uint64x1_t __b
)
18547 return __a
> __b
? -1ll : 0ll;
18550 __extension__
static __inline
uint64_t __attribute__ ((__always_inline__
))
18551 vcgtd_f64 (float64_t __a
, float64_t __b
)
18553 return __a
> __b
? -1ll : 0ll;
18556 /* vcgtz - vector. */
18558 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
18559 vcgtz_f32 (float32x2_t __a
)
18561 float32x2_t __b
= {0.0f
, 0.0f
};
18562 return (uint32x2_t
) __builtin_aarch64_cmgtv2sf (__a
, __b
);
18565 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18566 vcgtz_f64 (float64x1_t __a
)
18568 return __a
> 0.0 ? -1ll : 0ll;
18571 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
18572 vcgtz_p8 (poly8x8_t __a
)
18574 poly8x8_t __b
= {0, 0, 0, 0, 0, 0, 0, 0};
18575 return (uint8x8_t
) __builtin_aarch64_cmgtv8qi ((int8x8_t
) __a
,
18579 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
18580 vcgtz_s8 (int8x8_t __a
)
18582 int8x8_t __b
= {0, 0, 0, 0, 0, 0, 0, 0};
18583 return (uint8x8_t
) __builtin_aarch64_cmgtv8qi (__a
, __b
);
18586 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
18587 vcgtz_s16 (int16x4_t __a
)
18589 int16x4_t __b
= {0, 0, 0, 0};
18590 return (uint16x4_t
) __builtin_aarch64_cmgtv4hi (__a
, __b
);
18593 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
18594 vcgtz_s32 (int32x2_t __a
)
18596 int32x2_t __b
= {0, 0};
18597 return (uint32x2_t
) __builtin_aarch64_cmgtv2si (__a
, __b
);
18600 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18601 vcgtz_s64 (int64x1_t __a
)
18603 return __a
> 0ll ? -1ll : 0ll;
18606 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
18607 vcgtz_u8 (uint8x8_t __a
)
18609 uint8x8_t __b
= {0, 0, 0, 0, 0, 0, 0, 0};
18610 return (uint8x8_t
) __builtin_aarch64_cmgtuv8qi ((int8x8_t
) __a
,
18614 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
18615 vcgtz_u16 (uint16x4_t __a
)
18617 uint16x4_t __b
= {0, 0, 0, 0};
18618 return (uint16x4_t
) __builtin_aarch64_cmgtuv4hi ((int16x4_t
) __a
,
18622 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
18623 vcgtz_u32 (uint32x2_t __a
)
18625 uint32x2_t __b
= {0, 0};
18626 return (uint32x2_t
) __builtin_aarch64_cmgtuv2si ((int32x2_t
) __a
,
18630 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18631 vcgtz_u64 (uint64x1_t __a
)
18633 return __a
> 0ll ? -1ll : 0ll;
18636 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
18637 vcgtzq_f32 (float32x4_t __a
)
18639 float32x4_t __b
= {0.0f
, 0.0f
, 0.0f
, 0.0f
};
18640 return (uint32x4_t
) __builtin_aarch64_cmgtv4sf (__a
, __b
);
18643 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
18644 vcgtzq_f64 (float64x2_t __a
)
18646 float64x2_t __b
= {0.0, 0.0};
18647 return (uint64x2_t
) __builtin_aarch64_cmgtv2df (__a
, __b
);
18650 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
18651 vcgtzq_p8 (poly8x16_t __a
)
18653 poly8x16_t __b
= {0, 0, 0, 0, 0, 0, 0, 0,
18654 0, 0, 0, 0, 0, 0, 0, 0};
18655 return (uint8x16_t
) __builtin_aarch64_cmgtv16qi ((int8x16_t
) __a
,
18659 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
18660 vcgtzq_s8 (int8x16_t __a
)
18662 int8x16_t __b
= {0, 0, 0, 0, 0, 0, 0, 0,
18663 0, 0, 0, 0, 0, 0, 0, 0};
18664 return (uint8x16_t
) __builtin_aarch64_cmgtv16qi (__a
, __b
);
18667 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
18668 vcgtzq_s16 (int16x8_t __a
)
18670 int16x8_t __b
= {0, 0, 0, 0, 0, 0, 0, 0};
18671 return (uint16x8_t
) __builtin_aarch64_cmgtv8hi (__a
, __b
);
18674 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
18675 vcgtzq_s32 (int32x4_t __a
)
18677 int32x4_t __b
= {0, 0, 0, 0};
18678 return (uint32x4_t
) __builtin_aarch64_cmgtv4si (__a
, __b
);
18681 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
18682 vcgtzq_s64 (int64x2_t __a
)
18684 int64x2_t __b
= {0, 0};
18685 return (uint64x2_t
) __builtin_aarch64_cmgtv2di (__a
, __b
);
18688 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
18689 vcgtzq_u8 (uint8x16_t __a
)
18691 uint8x16_t __b
= {0, 0, 0, 0, 0, 0, 0, 0,
18692 0, 0, 0, 0, 0, 0, 0, 0};
18693 return (uint8x16_t
) __builtin_aarch64_cmgtuv16qi ((int8x16_t
) __a
,
18697 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
18698 vcgtzq_u16 (uint16x8_t __a
)
18700 uint16x8_t __b
= {0, 0, 0, 0, 0, 0, 0, 0};
18701 return (uint16x8_t
) __builtin_aarch64_cmgtuv8hi ((int16x8_t
) __a
,
18705 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
18706 vcgtzq_u32 (uint32x4_t __a
)
18708 uint32x4_t __b
= {0, 0, 0, 0};
18709 return (uint32x4_t
) __builtin_aarch64_cmgtuv4si ((int32x4_t
) __a
,
18713 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
18714 vcgtzq_u64 (uint64x2_t __a
)
18716 uint64x2_t __b
= {0, 0};
18717 return (uint64x2_t
) __builtin_aarch64_cmgtuv2di ((int64x2_t
) __a
,
18721 /* vcgtz - scalar. */
18723 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
18724 vcgtzs_f32 (float32_t __a
)
18726 return __a
> 0.0f
? -1 : 0;
18729 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18730 vcgtzd_s64 (int64x1_t __a
)
18732 return __a
> 0 ? -1ll : 0ll;
18735 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18736 vcgtzd_u64 (int64x1_t __a
)
18738 return __a
> 0 ? -1ll : 0ll;
18741 __extension__
static __inline
uint64_t __attribute__ ((__always_inline__
))
18742 vcgtzd_f64 (float64_t __a
)
18744 return __a
> 0.0 ? -1ll : 0ll;
18747 /* vcle - vector. */
18749 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
18750 vcle_f32 (float32x2_t __a
, float32x2_t __b
)
18752 return (uint32x2_t
) __builtin_aarch64_cmgev2sf (__b
, __a
);
18755 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18756 vcle_f64 (float64x1_t __a
, float64x1_t __b
)
18758 return __a
<= __b
? -1ll : 0ll;
18761 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
18762 vcle_p8 (poly8x8_t __a
, poly8x8_t __b
)
18764 return (uint8x8_t
) __builtin_aarch64_cmgev8qi ((int8x8_t
) __b
,
18768 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
18769 vcle_s8 (int8x8_t __a
, int8x8_t __b
)
18771 return (uint8x8_t
) __builtin_aarch64_cmgev8qi (__b
, __a
);
18774 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
18775 vcle_s16 (int16x4_t __a
, int16x4_t __b
)
18777 return (uint16x4_t
) __builtin_aarch64_cmgev4hi (__b
, __a
);
18780 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
18781 vcle_s32 (int32x2_t __a
, int32x2_t __b
)
18783 return (uint32x2_t
) __builtin_aarch64_cmgev2si (__b
, __a
);
18786 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18787 vcle_s64 (int64x1_t __a
, int64x1_t __b
)
18789 return __a
<= __b
? -1ll : 0ll;
18792 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
18793 vcle_u8 (uint8x8_t __a
, uint8x8_t __b
)
18795 return (uint8x8_t
) __builtin_aarch64_cmgeuv8qi ((int8x8_t
) __b
,
18799 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
18800 vcle_u16 (uint16x4_t __a
, uint16x4_t __b
)
18802 return (uint16x4_t
) __builtin_aarch64_cmgeuv4hi ((int16x4_t
) __b
,
18806 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
18807 vcle_u32 (uint32x2_t __a
, uint32x2_t __b
)
18809 return (uint32x2_t
) __builtin_aarch64_cmgeuv2si ((int32x2_t
) __b
,
18813 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18814 vcle_u64 (uint64x1_t __a
, uint64x1_t __b
)
18816 return __a
<= __b
? -1ll : 0ll;
18819 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
18820 vcleq_f32 (float32x4_t __a
, float32x4_t __b
)
18822 return (uint32x4_t
) __builtin_aarch64_cmgev4sf (__b
, __a
);
18825 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
18826 vcleq_f64 (float64x2_t __a
, float64x2_t __b
)
18828 return (uint64x2_t
) __builtin_aarch64_cmgev2df (__b
, __a
);
18831 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
18832 vcleq_p8 (poly8x16_t __a
, poly8x16_t __b
)
18834 return (uint8x16_t
) __builtin_aarch64_cmgev16qi ((int8x16_t
) __b
,
18838 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
18839 vcleq_s8 (int8x16_t __a
, int8x16_t __b
)
18841 return (uint8x16_t
) __builtin_aarch64_cmgev16qi (__b
, __a
);
18844 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
18845 vcleq_s16 (int16x8_t __a
, int16x8_t __b
)
18847 return (uint16x8_t
) __builtin_aarch64_cmgev8hi (__b
, __a
);
18850 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
18851 vcleq_s32 (int32x4_t __a
, int32x4_t __b
)
18853 return (uint32x4_t
) __builtin_aarch64_cmgev4si (__b
, __a
);
18856 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
18857 vcleq_s64 (int64x2_t __a
, int64x2_t __b
)
18859 return (uint64x2_t
) __builtin_aarch64_cmgev2di (__b
, __a
);
18862 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
18863 vcleq_u8 (uint8x16_t __a
, uint8x16_t __b
)
18865 return (uint8x16_t
) __builtin_aarch64_cmgeuv16qi ((int8x16_t
) __b
,
18869 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
18870 vcleq_u16 (uint16x8_t __a
, uint16x8_t __b
)
18872 return (uint16x8_t
) __builtin_aarch64_cmgeuv8hi ((int16x8_t
) __b
,
18876 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
18877 vcleq_u32 (uint32x4_t __a
, uint32x4_t __b
)
18879 return (uint32x4_t
) __builtin_aarch64_cmgeuv4si ((int32x4_t
) __b
,
18883 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
18884 vcleq_u64 (uint64x2_t __a
, uint64x2_t __b
)
18886 return (uint64x2_t
) __builtin_aarch64_cmgeuv2di ((int64x2_t
) __b
,
18890 /* vcle - scalar. */
18892 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
18893 vcles_f32 (float32_t __a
, float32_t __b
)
18895 return __a
<= __b
? -1 : 0;
18898 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18899 vcled_s64 (int64x1_t __a
, int64x1_t __b
)
18901 return __a
<= __b
? -1ll : 0ll;
18904 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18905 vcled_u64 (uint64x1_t __a
, uint64x1_t __b
)
18907 return __a
<= __b
? -1ll : 0ll;
18910 __extension__
static __inline
uint64_t __attribute__ ((__always_inline__
))
18911 vcled_f64 (float64_t __a
, float64_t __b
)
18913 return __a
<= __b
? -1ll : 0ll;
18916 /* vclez - vector. */
18918 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
18919 vclez_f32 (float32x2_t __a
)
18921 float32x2_t __b
= {0.0f
, 0.0f
};
18922 return (uint32x2_t
) __builtin_aarch64_cmlev2sf (__a
, __b
);
18925 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18926 vclez_f64 (float64x1_t __a
)
18928 return __a
<= 0.0 ? -1ll : 0ll;
18931 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
18932 vclez_p8 (poly8x8_t __a
)
18934 poly8x8_t __b
= {0, 0, 0, 0, 0, 0, 0, 0};
18935 return (uint8x8_t
) __builtin_aarch64_cmlev8qi ((int8x8_t
) __a
,
18939 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
18940 vclez_s8 (int8x8_t __a
)
18942 int8x8_t __b
= {0, 0, 0, 0, 0, 0, 0, 0};
18943 return (uint8x8_t
) __builtin_aarch64_cmlev8qi (__a
, __b
);
18946 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
18947 vclez_s16 (int16x4_t __a
)
18949 int16x4_t __b
= {0, 0, 0, 0};
18950 return (uint16x4_t
) __builtin_aarch64_cmlev4hi (__a
, __b
);
18953 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
18954 vclez_s32 (int32x2_t __a
)
18956 int32x2_t __b
= {0, 0};
18957 return (uint32x2_t
) __builtin_aarch64_cmlev2si (__a
, __b
);
18960 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18961 vclez_s64 (int64x1_t __a
)
18963 return __a
<= 0ll ? -1ll : 0ll;
18966 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
18967 vclez_u64 (uint64x1_t __a
)
18969 return __a
<= 0ll ? -1ll : 0ll;
18972 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
18973 vclezq_f32 (float32x4_t __a
)
18975 float32x4_t __b
= {0.0f
, 0.0f
, 0.0f
, 0.0f
};
18976 return (uint32x4_t
) __builtin_aarch64_cmlev4sf (__a
, __b
);
18979 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
18980 vclezq_f64 (float64x2_t __a
)
18982 float64x2_t __b
= {0.0, 0.0};
18983 return (uint64x2_t
) __builtin_aarch64_cmlev2df (__a
, __b
);
18986 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
18987 vclezq_p8 (poly8x16_t __a
)
18989 poly8x16_t __b
= {0, 0, 0, 0, 0, 0, 0, 0,
18990 0, 0, 0, 0, 0, 0, 0, 0};
18991 return (uint8x16_t
) __builtin_aarch64_cmlev16qi ((int8x16_t
) __a
,
18995 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
18996 vclezq_s8 (int8x16_t __a
)
18998 int8x16_t __b
= {0, 0, 0, 0, 0, 0, 0, 0,
18999 0, 0, 0, 0, 0, 0, 0, 0};
19000 return (uint8x16_t
) __builtin_aarch64_cmlev16qi (__a
, __b
);
19003 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
19004 vclezq_s16 (int16x8_t __a
)
19006 int16x8_t __b
= {0, 0, 0, 0, 0, 0, 0, 0};
19007 return (uint16x8_t
) __builtin_aarch64_cmlev8hi (__a
, __b
);
19010 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
19011 vclezq_s32 (int32x4_t __a
)
19013 int32x4_t __b
= {0, 0, 0, 0};
19014 return (uint32x4_t
) __builtin_aarch64_cmlev4si (__a
, __b
);
19017 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
19018 vclezq_s64 (int64x2_t __a
)
19020 int64x2_t __b
= {0, 0};
19021 return (uint64x2_t
) __builtin_aarch64_cmlev2di (__a
, __b
);
19024 /* vclez - scalar. */
19026 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
19027 vclezs_f32 (float32_t __a
)
19029 return __a
<= 0.0f
? -1 : 0;
19032 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
19033 vclezd_s64 (int64x1_t __a
)
19035 return __a
<= 0 ? -1ll : 0ll;
19038 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
19039 vclezd_u64 (int64x1_t __a
)
19041 return __a
<= 0 ? -1ll : 0ll;
19044 __extension__
static __inline
uint64_t __attribute__ ((__always_inline__
))
19045 vclezd_f64 (float64_t __a
)
19047 return __a
<= 0.0 ? -1ll : 0ll;
19050 /* vclt - vector. */
19052 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
19053 vclt_f32 (float32x2_t __a
, float32x2_t __b
)
19055 return (uint32x2_t
) __builtin_aarch64_cmgtv2sf (__b
, __a
);
19058 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
19059 vclt_f64 (float64x1_t __a
, float64x1_t __b
)
19061 return __a
< __b
? -1ll : 0ll;
19064 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
19065 vclt_p8 (poly8x8_t __a
, poly8x8_t __b
)
19067 return (uint8x8_t
) __builtin_aarch64_cmgtv8qi ((int8x8_t
) __b
,
19071 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
19072 vclt_s8 (int8x8_t __a
, int8x8_t __b
)
19074 return (uint8x8_t
) __builtin_aarch64_cmgtv8qi (__b
, __a
);
19077 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
19078 vclt_s16 (int16x4_t __a
, int16x4_t __b
)
19080 return (uint16x4_t
) __builtin_aarch64_cmgtv4hi (__b
, __a
);
19083 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
19084 vclt_s32 (int32x2_t __a
, int32x2_t __b
)
19086 return (uint32x2_t
) __builtin_aarch64_cmgtv2si (__b
, __a
);
19089 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
19090 vclt_s64 (int64x1_t __a
, int64x1_t __b
)
19092 return __a
< __b
? -1ll : 0ll;
19095 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
19096 vclt_u8 (uint8x8_t __a
, uint8x8_t __b
)
19098 return (uint8x8_t
) __builtin_aarch64_cmgtuv8qi ((int8x8_t
) __b
,
19102 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
19103 vclt_u16 (uint16x4_t __a
, uint16x4_t __b
)
19105 return (uint16x4_t
) __builtin_aarch64_cmgtuv4hi ((int16x4_t
) __b
,
19109 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
19110 vclt_u32 (uint32x2_t __a
, uint32x2_t __b
)
19112 return (uint32x2_t
) __builtin_aarch64_cmgtuv2si ((int32x2_t
) __b
,
19116 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
19117 vclt_u64 (uint64x1_t __a
, uint64x1_t __b
)
19119 return __a
< __b
? -1ll : 0ll;
19122 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
19123 vcltq_f32 (float32x4_t __a
, float32x4_t __b
)
19125 return (uint32x4_t
) __builtin_aarch64_cmgtv4sf (__b
, __a
);
19128 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
19129 vcltq_f64 (float64x2_t __a
, float64x2_t __b
)
19131 return (uint64x2_t
) __builtin_aarch64_cmgtv2df (__b
, __a
);
19134 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
19135 vcltq_p8 (poly8x16_t __a
, poly8x16_t __b
)
19137 return (uint8x16_t
) __builtin_aarch64_cmgtv16qi ((int8x16_t
) __b
,
19141 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
19142 vcltq_s8 (int8x16_t __a
, int8x16_t __b
)
19144 return (uint8x16_t
) __builtin_aarch64_cmgtv16qi (__b
, __a
);
19147 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
19148 vcltq_s16 (int16x8_t __a
, int16x8_t __b
)
19150 return (uint16x8_t
) __builtin_aarch64_cmgtv8hi (__b
, __a
);
19153 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
19154 vcltq_s32 (int32x4_t __a
, int32x4_t __b
)
19156 return (uint32x4_t
) __builtin_aarch64_cmgtv4si (__b
, __a
);
19159 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
19160 vcltq_s64 (int64x2_t __a
, int64x2_t __b
)
19162 return (uint64x2_t
) __builtin_aarch64_cmgtv2di (__b
, __a
);
19165 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
19166 vcltq_u8 (uint8x16_t __a
, uint8x16_t __b
)
19168 return (uint8x16_t
) __builtin_aarch64_cmgtuv16qi ((int8x16_t
) __b
,
19172 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
19173 vcltq_u16 (uint16x8_t __a
, uint16x8_t __b
)
19175 return (uint16x8_t
) __builtin_aarch64_cmgtuv8hi ((int16x8_t
) __b
,
19179 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
19180 vcltq_u32 (uint32x4_t __a
, uint32x4_t __b
)
19182 return (uint32x4_t
) __builtin_aarch64_cmgtuv4si ((int32x4_t
) __b
,
19186 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
19187 vcltq_u64 (uint64x2_t __a
, uint64x2_t __b
)
19189 return (uint64x2_t
) __builtin_aarch64_cmgtuv2di ((int64x2_t
) __b
,
19193 /* vclt - scalar. */
19195 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
19196 vclts_f32 (float32_t __a
, float32_t __b
)
19198 return __a
< __b
? -1 : 0;
19201 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
19202 vcltd_s64 (int64x1_t __a
, int64x1_t __b
)
19204 return __a
< __b
? -1ll : 0ll;
19207 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
19208 vcltd_u64 (uint64x1_t __a
, uint64x1_t __b
)
19210 return __a
< __b
? -1ll : 0ll;
19213 __extension__
static __inline
uint64_t __attribute__ ((__always_inline__
))
19214 vcltd_f64 (float64_t __a
, float64_t __b
)
19216 return __a
< __b
? -1ll : 0ll;
19219 /* vcltz - vector. */
19221 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
19222 vcltz_f32 (float32x2_t __a
)
19224 float32x2_t __b
= {0.0f
, 0.0f
};
19225 return (uint32x2_t
) __builtin_aarch64_cmltv2sf (__a
, __b
);
19228 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
19229 vcltz_f64 (float64x1_t __a
)
19231 return __a
< 0.0 ? -1ll : 0ll;
19234 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
19235 vcltz_p8 (poly8x8_t __a
)
19237 poly8x8_t __b
= {0, 0, 0, 0, 0, 0, 0, 0};
19238 return (uint8x8_t
) __builtin_aarch64_cmltv8qi ((int8x8_t
) __a
,
19242 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
19243 vcltz_s8 (int8x8_t __a
)
19245 int8x8_t __b
= {0, 0, 0, 0, 0, 0, 0, 0};
19246 return (uint8x8_t
) __builtin_aarch64_cmltv8qi (__a
, __b
);
19249 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
19250 vcltz_s16 (int16x4_t __a
)
19252 int16x4_t __b
= {0, 0, 0, 0};
19253 return (uint16x4_t
) __builtin_aarch64_cmltv4hi (__a
, __b
);
19256 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
19257 vcltz_s32 (int32x2_t __a
)
19259 int32x2_t __b
= {0, 0};
19260 return (uint32x2_t
) __builtin_aarch64_cmltv2si (__a
, __b
);
19263 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
19264 vcltz_s64 (int64x1_t __a
)
19266 return __a
< 0ll ? -1ll : 0ll;
19269 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
19270 vcltzq_f32 (float32x4_t __a
)
19272 float32x4_t __b
= {0.0f
, 0.0f
, 0.0f
, 0.0f
};
19273 return (uint32x4_t
) __builtin_aarch64_cmltv4sf (__a
, __b
);
19276 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
19277 vcltzq_f64 (float64x2_t __a
)
19279 float64x2_t __b
= {0.0, 0.0};
19280 return (uint64x2_t
) __builtin_aarch64_cmltv2df (__a
, __b
);
19283 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
19284 vcltzq_p8 (poly8x16_t __a
)
19286 poly8x16_t __b
= {0, 0, 0, 0, 0, 0, 0, 0,
19287 0, 0, 0, 0, 0, 0, 0, 0};
19288 return (uint8x16_t
) __builtin_aarch64_cmltv16qi ((int8x16_t
) __a
,
19292 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
19293 vcltzq_s8 (int8x16_t __a
)
19295 int8x16_t __b
= {0, 0, 0, 0, 0, 0, 0, 0,
19296 0, 0, 0, 0, 0, 0, 0, 0};
19297 return (uint8x16_t
) __builtin_aarch64_cmltv16qi (__a
, __b
);
19300 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
19301 vcltzq_s16 (int16x8_t __a
)
19303 int16x8_t __b
= {0, 0, 0, 0, 0, 0, 0, 0};
19304 return (uint16x8_t
) __builtin_aarch64_cmltv8hi (__a
, __b
);
19307 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
19308 vcltzq_s32 (int32x4_t __a
)
19310 int32x4_t __b
= {0, 0, 0, 0};
19311 return (uint32x4_t
) __builtin_aarch64_cmltv4si (__a
, __b
);
19314 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
19315 vcltzq_s64 (int64x2_t __a
)
19317 int64x2_t __b
= {0, 0};
19318 return (uint64x2_t
) __builtin_aarch64_cmltv2di (__a
, __b
);
19321 /* vcltz - scalar. */
19323 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
19324 vcltzs_f32 (float32_t __a
)
19326 return __a
< 0.0f
? -1 : 0;
19329 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
19330 vcltzd_s64 (int64x1_t __a
)
19332 return __a
< 0 ? -1ll : 0ll;
19335 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
19336 vcltzd_u64 (int64x1_t __a
)
19338 return __a
< 0 ? -1ll : 0ll;
19341 __extension__
static __inline
uint64_t __attribute__ ((__always_inline__
))
19342 vcltzd_f64 (float64_t __a
)
19344 return __a
< 0.0 ? -1ll : 0ll;
19347 /* vcvt (double -> float). */
19349 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
19350 vcvt_f32_f64 (float64x2_t __a
)
19352 return __builtin_aarch64_float_truncate_lo_v2sf (__a
);
19355 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
19356 vcvt_high_f32_f64 (float32x2_t __a
, float64x2_t __b
)
19358 return __builtin_aarch64_float_truncate_hi_v4sf (__a
, __b
);
19361 /* vcvt (float -> double). */
19363 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
19364 vcvt_f64_f32 (float32x2_t __a
)
19367 return __builtin_aarch64_float_extend_lo_v2df (__a
);
19370 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
19371 vcvt_high_f64_f32 (float32x4_t __a
)
19373 return __builtin_aarch64_vec_unpacks_hi_v4sf (__a
);
19376 /* vcvt (<u>int -> float) */
19378 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
19379 vcvtd_f64_s64 (int64_t __a
)
19381 return (float64_t
) __a
;
19384 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
19385 vcvtd_f64_u64 (uint64_t __a
)
19387 return (float64_t
) __a
;
19390 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
19391 vcvts_f32_s32 (int32_t __a
)
19393 return (float32_t
) __a
;
19396 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
19397 vcvts_f32_u32 (uint32_t __a
)
19399 return (float32_t
) __a
;
19402 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
19403 vcvt_f32_s32 (int32x2_t __a
)
19405 return __builtin_aarch64_floatv2siv2sf (__a
);
19408 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
19409 vcvt_f32_u32 (uint32x2_t __a
)
19411 return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t
) __a
);
19414 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
19415 vcvtq_f32_s32 (int32x4_t __a
)
19417 return __builtin_aarch64_floatv4siv4sf (__a
);
19420 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
19421 vcvtq_f32_u32 (uint32x4_t __a
)
19423 return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t
) __a
);
19426 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
19427 vcvtq_f64_s64 (int64x2_t __a
)
19429 return __builtin_aarch64_floatv2div2df (__a
);
19432 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
19433 vcvtq_f64_u64 (uint64x2_t __a
)
19435 return __builtin_aarch64_floatunsv2div2df ((int64x2_t
) __a
);
19438 /* vcvt (float -> <u>int) */
19440 __extension__
static __inline
int64_t __attribute__ ((__always_inline__
))
19441 vcvtd_s64_f64 (float64_t __a
)
19443 return (int64_t) __a
;
19446 __extension__
static __inline
uint64_t __attribute__ ((__always_inline__
))
19447 vcvtd_u64_f64 (float64_t __a
)
19449 return (uint64_t) __a
;
19452 __extension__
static __inline
int32_t __attribute__ ((__always_inline__
))
19453 vcvts_s32_f32 (float32_t __a
)
19455 return (int32_t) __a
;
19458 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
19459 vcvts_u32_f32 (float32_t __a
)
19461 return (uint32_t) __a
;
19464 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
19465 vcvt_s32_f32 (float32x2_t __a
)
19467 return __builtin_aarch64_lbtruncv2sfv2si (__a
);
19470 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
19471 vcvt_u32_f32 (float32x2_t __a
)
19473 /* TODO: This cast should go away when builtins have
19474 their correct types. */
19475 return (uint32x2_t
) __builtin_aarch64_lbtruncuv2sfv2si (__a
);
19478 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
19479 vcvtq_s32_f32 (float32x4_t __a
)
19481 return __builtin_aarch64_lbtruncv4sfv4si (__a
);
19484 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
19485 vcvtq_u32_f32 (float32x4_t __a
)
19487 /* TODO: This cast should go away when builtins have
19488 their correct types. */
19489 return (uint32x4_t
) __builtin_aarch64_lbtruncuv4sfv4si (__a
);
19492 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
19493 vcvtq_s64_f64 (float64x2_t __a
)
19495 return __builtin_aarch64_lbtruncv2dfv2di (__a
);
19498 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
19499 vcvtq_u64_f64 (float64x2_t __a
)
19501 /* TODO: This cast should go away when builtins have
19502 their correct types. */
19503 return (uint64x2_t
) __builtin_aarch64_lbtruncuv2dfv2di (__a
);
19508 __extension__
static __inline
int64_t __attribute__ ((__always_inline__
))
19509 vcvtad_s64_f64 (float64_t __a
)
19511 return __builtin_aarch64_lrounddfdi (__a
);
19514 __extension__
static __inline
uint64_t __attribute__ ((__always_inline__
))
19515 vcvtad_u64_f64 (float64_t __a
)
19517 return __builtin_aarch64_lroundudfdi (__a
);
19520 __extension__
static __inline
int32_t __attribute__ ((__always_inline__
))
19521 vcvtas_s32_f32 (float32_t __a
)
19523 return __builtin_aarch64_lroundsfsi (__a
);
19526 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
19527 vcvtas_u32_f32 (float32_t __a
)
19529 return __builtin_aarch64_lroundusfsi (__a
);
19532 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
19533 vcvta_s32_f32 (float32x2_t __a
)
19535 return __builtin_aarch64_lroundv2sfv2si (__a
);
19538 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
19539 vcvta_u32_f32 (float32x2_t __a
)
19541 /* TODO: This cast should go away when builtins have
19542 their correct types. */
19543 return (uint32x2_t
) __builtin_aarch64_lrounduv2sfv2si (__a
);
19546 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
19547 vcvtaq_s32_f32 (float32x4_t __a
)
19549 return __builtin_aarch64_lroundv4sfv4si (__a
);
19552 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
19553 vcvtaq_u32_f32 (float32x4_t __a
)
19555 /* TODO: This cast should go away when builtins have
19556 their correct types. */
19557 return (uint32x4_t
) __builtin_aarch64_lrounduv4sfv4si (__a
);
19560 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
19561 vcvtaq_s64_f64 (float64x2_t __a
)
19563 return __builtin_aarch64_lroundv2dfv2di (__a
);
19566 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
19567 vcvtaq_u64_f64 (float64x2_t __a
)
19569 /* TODO: This cast should go away when builtins have
19570 their correct types. */
19571 return (uint64x2_t
) __builtin_aarch64_lrounduv2dfv2di (__a
);
19576 __extension__
static __inline
int64_t __attribute__ ((__always_inline__
))
19577 vcvtmd_s64_f64 (float64_t __a
)
19579 return __builtin_lfloor (__a
);
19582 __extension__
static __inline
uint64_t __attribute__ ((__always_inline__
))
19583 vcvtmd_u64_f64 (float64_t __a
)
19585 return __builtin_aarch64_lfloorudfdi (__a
);
19588 __extension__
static __inline
int32_t __attribute__ ((__always_inline__
))
19589 vcvtms_s32_f32 (float32_t __a
)
19591 return __builtin_ifloorf (__a
);
19594 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
19595 vcvtms_u32_f32 (float32_t __a
)
19597 return __builtin_aarch64_lfloorusfsi (__a
);
19600 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
19601 vcvtm_s32_f32 (float32x2_t __a
)
19603 return __builtin_aarch64_lfloorv2sfv2si (__a
);
19606 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
19607 vcvtm_u32_f32 (float32x2_t __a
)
19609 /* TODO: This cast should go away when builtins have
19610 their correct types. */
19611 return (uint32x2_t
) __builtin_aarch64_lflooruv2sfv2si (__a
);
19614 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
19615 vcvtmq_s32_f32 (float32x4_t __a
)
19617 return __builtin_aarch64_lfloorv4sfv4si (__a
);
19620 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
19621 vcvtmq_u32_f32 (float32x4_t __a
)
19623 /* TODO: This cast should go away when builtins have
19624 their correct types. */
19625 return (uint32x4_t
) __builtin_aarch64_lflooruv4sfv4si (__a
);
19628 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
19629 vcvtmq_s64_f64 (float64x2_t __a
)
19631 return __builtin_aarch64_lfloorv2dfv2di (__a
);
19634 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
19635 vcvtmq_u64_f64 (float64x2_t __a
)
19637 /* TODO: This cast should go away when builtins have
19638 their correct types. */
19639 return (uint64x2_t
) __builtin_aarch64_lflooruv2dfv2di (__a
);
19644 __extension__
static __inline
int64_t __attribute__ ((__always_inline__
))
19645 vcvtnd_s64_f64 (float64_t __a
)
19647 return __builtin_aarch64_lfrintndfdi (__a
);
19650 __extension__
static __inline
uint64_t __attribute__ ((__always_inline__
))
19651 vcvtnd_u64_f64 (float64_t __a
)
19653 return __builtin_aarch64_lfrintnudfdi (__a
);
19656 __extension__
static __inline
int32_t __attribute__ ((__always_inline__
))
19657 vcvtns_s32_f32 (float32_t __a
)
19659 return __builtin_aarch64_lfrintnsfsi (__a
);
19662 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
19663 vcvtns_u32_f32 (float32_t __a
)
19665 return __builtin_aarch64_lfrintnusfsi (__a
);
19668 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
19669 vcvtn_s32_f32 (float32x2_t __a
)
19671 return __builtin_aarch64_lfrintnv2sfv2si (__a
);
19674 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
19675 vcvtn_u32_f32 (float32x2_t __a
)
19677 /* TODO: This cast should go away when builtins have
19678 their correct types. */
19679 return (uint32x2_t
) __builtin_aarch64_lfrintnuv2sfv2si (__a
);
19682 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
19683 vcvtnq_s32_f32 (float32x4_t __a
)
19685 return __builtin_aarch64_lfrintnv4sfv4si (__a
);
19688 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
19689 vcvtnq_u32_f32 (float32x4_t __a
)
19691 /* TODO: This cast should go away when builtins have
19692 their correct types. */
19693 return (uint32x4_t
) __builtin_aarch64_lfrintnuv4sfv4si (__a
);
19696 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
19697 vcvtnq_s64_f64 (float64x2_t __a
)
19699 return __builtin_aarch64_lfrintnv2dfv2di (__a
);
19702 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
19703 vcvtnq_u64_f64 (float64x2_t __a
)
19705 /* TODO: This cast should go away when builtins have
19706 their correct types. */
19707 return (uint64x2_t
) __builtin_aarch64_lfrintnuv2dfv2di (__a
);
19712 __extension__
static __inline
int64_t __attribute__ ((__always_inline__
))
19713 vcvtpd_s64_f64 (float64_t __a
)
19715 return __builtin_lceil (__a
);
19718 __extension__
static __inline
uint64_t __attribute__ ((__always_inline__
))
19719 vcvtpd_u64_f64 (float64_t __a
)
19721 return __builtin_aarch64_lceiludfdi (__a
);
19724 __extension__
static __inline
int32_t __attribute__ ((__always_inline__
))
19725 vcvtps_s32_f32 (float32_t __a
)
19727 return __builtin_iceilf (__a
);
19730 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
19731 vcvtps_u32_f32 (float32_t __a
)
19733 return __builtin_aarch64_lceilusfsi (__a
);
19736 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
19737 vcvtp_s32_f32 (float32x2_t __a
)
19739 return __builtin_aarch64_lceilv2sfv2si (__a
);
19742 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
19743 vcvtp_u32_f32 (float32x2_t __a
)
19745 /* TODO: This cast should go away when builtins have
19746 their correct types. */
19747 return (uint32x2_t
) __builtin_aarch64_lceiluv2sfv2si (__a
);
19750 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
19751 vcvtpq_s32_f32 (float32x4_t __a
)
19753 return __builtin_aarch64_lceilv4sfv4si (__a
);
19756 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
19757 vcvtpq_u32_f32 (float32x4_t __a
)
19759 /* TODO: This cast should go away when builtins have
19760 their correct types. */
19761 return (uint32x4_t
) __builtin_aarch64_lceiluv4sfv4si (__a
);
19764 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
19765 vcvtpq_s64_f64 (float64x2_t __a
)
19767 return __builtin_aarch64_lceilv2dfv2di (__a
);
19770 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
19771 vcvtpq_u64_f64 (float64x2_t __a
)
19773 /* TODO: This cast should go away when builtins have
19774 their correct types. */
19775 return (uint64x2_t
) __builtin_aarch64_lceiluv2dfv2di (__a
);
19780 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
19781 vdupb_lane_s8 (int8x16_t a
, int const b
)
19783 return __aarch64_vgetq_lane_s8 (a
, b
);
19786 __extension__
static __inline uint8x1_t
__attribute__ ((__always_inline__
))
19787 vdupb_lane_u8 (uint8x16_t a
, int const b
)
19789 return __aarch64_vgetq_lane_u8 (a
, b
);
19792 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
19793 vduph_lane_s16 (int16x8_t a
, int const b
)
19795 return __aarch64_vgetq_lane_s16 (a
, b
);
19798 __extension__
static __inline uint16x1_t
__attribute__ ((__always_inline__
))
19799 vduph_lane_u16 (uint16x8_t a
, int const b
)
19801 return __aarch64_vgetq_lane_u16 (a
, b
);
19804 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
19805 vdups_lane_s32 (int32x4_t a
, int const b
)
19807 return __aarch64_vgetq_lane_s32 (a
, b
);
19810 __extension__
static __inline uint32x1_t
__attribute__ ((__always_inline__
))
19811 vdups_lane_u32 (uint32x4_t a
, int const b
)
19813 return __aarch64_vgetq_lane_u32 (a
, b
);
19816 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
19817 vdupd_lane_s64 (int64x2_t a
, int const b
)
19819 return __aarch64_vgetq_lane_s64 (a
, b
);
19822 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
19823 vdupd_lane_u64 (uint64x2_t a
, int const b
)
19825 return __aarch64_vgetq_lane_u64 (a
, b
);
19830 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
19831 vld1_f32 (const float32_t
*a
)
19833 return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf
*) a
);
19836 __extension__
static __inline float64x1_t
__attribute__ ((__always_inline__
))
19837 vld1_f64 (const float64_t
*a
)
19842 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
19843 vld1_p8 (const poly8_t
*a
)
19846 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi
*) a
);
19849 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
19850 vld1_p16 (const poly16_t
*a
)
19852 return (poly16x4_t
)
19853 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi
*) a
);
19856 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
19857 vld1_s8 (const int8_t *a
)
19859 return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi
*) a
);
19862 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
19863 vld1_s16 (const int16_t *a
)
19865 return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi
*) a
);
19868 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
19869 vld1_s32 (const int32_t *a
)
19871 return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si
*) a
);
19874 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
19875 vld1_s64 (const int64_t *a
)
19880 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
19881 vld1_u8 (const uint8_t *a
)
19884 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi
*) a
);
19887 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
19888 vld1_u16 (const uint16_t *a
)
19890 return (uint16x4_t
)
19891 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi
*) a
);
19894 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
19895 vld1_u32 (const uint32_t *a
)
19897 return (uint32x2_t
)
19898 __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si
*) a
);
19901 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
19902 vld1_u64 (const uint64_t *a
)
19909 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
19910 vld1q_f32 (const float32_t
*a
)
19912 return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf
*) a
);
19915 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
19916 vld1q_f64 (const float64_t
*a
)
19918 return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df
*) a
);
19921 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
19922 vld1q_p8 (const poly8_t
*a
)
19924 return (poly8x16_t
)
19925 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi
*) a
);
19928 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
19929 vld1q_p16 (const poly16_t
*a
)
19931 return (poly16x8_t
)
19932 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi
*) a
);
19935 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
19936 vld1q_s8 (const int8_t *a
)
19938 return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi
*) a
);
19941 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
19942 vld1q_s16 (const int16_t *a
)
19944 return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi
*) a
);
19947 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
19948 vld1q_s32 (const int32_t *a
)
19950 return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si
*) a
);
19953 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
19954 vld1q_s64 (const int64_t *a
)
19956 return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di
*) a
);
19959 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
19960 vld1q_u8 (const uint8_t *a
)
19962 return (uint8x16_t
)
19963 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi
*) a
);
19966 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
19967 vld1q_u16 (const uint16_t *a
)
19969 return (uint16x8_t
)
19970 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi
*) a
);
19973 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
19974 vld1q_u32 (const uint32_t *a
)
19976 return (uint32x4_t
)
19977 __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si
*) a
);
19980 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
19981 vld1q_u64 (const uint64_t *a
)
19983 return (uint64x2_t
)
19984 __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di
*) a
);
19989 __extension__
static __inline int64x1x2_t
__attribute__ ((__always_inline__
))
19990 vld2_s64 (const int64_t * __a
)
19993 __builtin_aarch64_simd_oi __o
;
19994 __o
= __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di
*) __a
);
19995 ret
.val
[0] = (int64x1_t
) __builtin_aarch64_get_dregoidi (__o
, 0);
19996 ret
.val
[1] = (int64x1_t
) __builtin_aarch64_get_dregoidi (__o
, 1);
20000 __extension__
static __inline uint64x1x2_t
__attribute__ ((__always_inline__
))
20001 vld2_u64 (const uint64_t * __a
)
20004 __builtin_aarch64_simd_oi __o
;
20005 __o
= __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di
*) __a
);
20006 ret
.val
[0] = (uint64x1_t
) __builtin_aarch64_get_dregoidi (__o
, 0);
20007 ret
.val
[1] = (uint64x1_t
) __builtin_aarch64_get_dregoidi (__o
, 1);
20011 __extension__
static __inline float64x1x2_t
__attribute__ ((__always_inline__
))
20012 vld2_f64 (const float64_t
* __a
)
20015 __builtin_aarch64_simd_oi __o
;
20016 __o
= __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df
*) __a
);
20017 ret
.val
[0] = (float64x1_t
) __builtin_aarch64_get_dregoidf (__o
, 0);
20018 ret
.val
[1] = (float64x1_t
) __builtin_aarch64_get_dregoidf (__o
, 1);
20022 __extension__
static __inline int8x8x2_t
__attribute__ ((__always_inline__
))
20023 vld2_s8 (const int8_t * __a
)
20026 __builtin_aarch64_simd_oi __o
;
20027 __o
= __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi
*) __a
);
20028 ret
.val
[0] = (int8x8_t
) __builtin_aarch64_get_dregoiv8qi (__o
, 0);
20029 ret
.val
[1] = (int8x8_t
) __builtin_aarch64_get_dregoiv8qi (__o
, 1);
20033 __extension__
static __inline poly8x8x2_t
__attribute__ ((__always_inline__
))
20034 vld2_p8 (const poly8_t
* __a
)
20037 __builtin_aarch64_simd_oi __o
;
20038 __o
= __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi
*) __a
);
20039 ret
.val
[0] = (poly8x8_t
) __builtin_aarch64_get_dregoiv8qi (__o
, 0);
20040 ret
.val
[1] = (poly8x8_t
) __builtin_aarch64_get_dregoiv8qi (__o
, 1);
20044 __extension__
static __inline int16x4x2_t
__attribute__ ((__always_inline__
))
20045 vld2_s16 (const int16_t * __a
)
20048 __builtin_aarch64_simd_oi __o
;
20049 __o
= __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi
*) __a
);
20050 ret
.val
[0] = (int16x4_t
) __builtin_aarch64_get_dregoiv4hi (__o
, 0);
20051 ret
.val
[1] = (int16x4_t
) __builtin_aarch64_get_dregoiv4hi (__o
, 1);
20055 __extension__
static __inline poly16x4x2_t
__attribute__ ((__always_inline__
))
20056 vld2_p16 (const poly16_t
* __a
)
20059 __builtin_aarch64_simd_oi __o
;
20060 __o
= __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi
*) __a
);
20061 ret
.val
[0] = (poly16x4_t
) __builtin_aarch64_get_dregoiv4hi (__o
, 0);
20062 ret
.val
[1] = (poly16x4_t
) __builtin_aarch64_get_dregoiv4hi (__o
, 1);
20066 __extension__
static __inline int32x2x2_t
__attribute__ ((__always_inline__
))
20067 vld2_s32 (const int32_t * __a
)
20070 __builtin_aarch64_simd_oi __o
;
20071 __o
= __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si
*) __a
);
20072 ret
.val
[0] = (int32x2_t
) __builtin_aarch64_get_dregoiv2si (__o
, 0);
20073 ret
.val
[1] = (int32x2_t
) __builtin_aarch64_get_dregoiv2si (__o
, 1);
20077 __extension__
static __inline uint8x8x2_t
__attribute__ ((__always_inline__
))
20078 vld2_u8 (const uint8_t * __a
)
20081 __builtin_aarch64_simd_oi __o
;
20082 __o
= __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi
*) __a
);
20083 ret
.val
[0] = (uint8x8_t
) __builtin_aarch64_get_dregoiv8qi (__o
, 0);
20084 ret
.val
[1] = (uint8x8_t
) __builtin_aarch64_get_dregoiv8qi (__o
, 1);
20088 __extension__
static __inline uint16x4x2_t
__attribute__ ((__always_inline__
))
20089 vld2_u16 (const uint16_t * __a
)
20092 __builtin_aarch64_simd_oi __o
;
20093 __o
= __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi
*) __a
);
20094 ret
.val
[0] = (uint16x4_t
) __builtin_aarch64_get_dregoiv4hi (__o
, 0);
20095 ret
.val
[1] = (uint16x4_t
) __builtin_aarch64_get_dregoiv4hi (__o
, 1);
20099 __extension__
static __inline uint32x2x2_t
__attribute__ ((__always_inline__
))
20100 vld2_u32 (const uint32_t * __a
)
20103 __builtin_aarch64_simd_oi __o
;
20104 __o
= __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si
*) __a
);
20105 ret
.val
[0] = (uint32x2_t
) __builtin_aarch64_get_dregoiv2si (__o
, 0);
20106 ret
.val
[1] = (uint32x2_t
) __builtin_aarch64_get_dregoiv2si (__o
, 1);
20110 __extension__
static __inline float32x2x2_t
__attribute__ ((__always_inline__
))
20111 vld2_f32 (const float32_t
* __a
)
20114 __builtin_aarch64_simd_oi __o
;
20115 __o
= __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf
*) __a
);
20116 ret
.val
[0] = (float32x2_t
) __builtin_aarch64_get_dregoiv2sf (__o
, 0);
20117 ret
.val
[1] = (float32x2_t
) __builtin_aarch64_get_dregoiv2sf (__o
, 1);
20121 __extension__
static __inline int8x16x2_t
__attribute__ ((__always_inline__
))
20122 vld2q_s8 (const int8_t * __a
)
20125 __builtin_aarch64_simd_oi __o
;
20126 __o
= __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi
*) __a
);
20127 ret
.val
[0] = (int8x16_t
) __builtin_aarch64_get_qregoiv16qi (__o
, 0);
20128 ret
.val
[1] = (int8x16_t
) __builtin_aarch64_get_qregoiv16qi (__o
, 1);
20132 __extension__
static __inline poly8x16x2_t
__attribute__ ((__always_inline__
))
20133 vld2q_p8 (const poly8_t
* __a
)
20136 __builtin_aarch64_simd_oi __o
;
20137 __o
= __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi
*) __a
);
20138 ret
.val
[0] = (poly8x16_t
) __builtin_aarch64_get_qregoiv16qi (__o
, 0);
20139 ret
.val
[1] = (poly8x16_t
) __builtin_aarch64_get_qregoiv16qi (__o
, 1);
20143 __extension__
static __inline int16x8x2_t
__attribute__ ((__always_inline__
))
20144 vld2q_s16 (const int16_t * __a
)
20147 __builtin_aarch64_simd_oi __o
;
20148 __o
= __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi
*) __a
);
20149 ret
.val
[0] = (int16x8_t
) __builtin_aarch64_get_qregoiv8hi (__o
, 0);
20150 ret
.val
[1] = (int16x8_t
) __builtin_aarch64_get_qregoiv8hi (__o
, 1);
20154 __extension__
static __inline poly16x8x2_t
__attribute__ ((__always_inline__
))
20155 vld2q_p16 (const poly16_t
* __a
)
20158 __builtin_aarch64_simd_oi __o
;
20159 __o
= __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi
*) __a
);
20160 ret
.val
[0] = (poly16x8_t
) __builtin_aarch64_get_qregoiv8hi (__o
, 0);
20161 ret
.val
[1] = (poly16x8_t
) __builtin_aarch64_get_qregoiv8hi (__o
, 1);
20165 __extension__
static __inline int32x4x2_t
__attribute__ ((__always_inline__
))
20166 vld2q_s32 (const int32_t * __a
)
20169 __builtin_aarch64_simd_oi __o
;
20170 __o
= __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si
*) __a
);
20171 ret
.val
[0] = (int32x4_t
) __builtin_aarch64_get_qregoiv4si (__o
, 0);
20172 ret
.val
[1] = (int32x4_t
) __builtin_aarch64_get_qregoiv4si (__o
, 1);
20176 __extension__
static __inline int64x2x2_t
__attribute__ ((__always_inline__
))
20177 vld2q_s64 (const int64_t * __a
)
20180 __builtin_aarch64_simd_oi __o
;
20181 __o
= __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di
*) __a
);
20182 ret
.val
[0] = (int64x2_t
) __builtin_aarch64_get_qregoiv2di (__o
, 0);
20183 ret
.val
[1] = (int64x2_t
) __builtin_aarch64_get_qregoiv2di (__o
, 1);
20187 __extension__
static __inline uint8x16x2_t
__attribute__ ((__always_inline__
))
20188 vld2q_u8 (const uint8_t * __a
)
20191 __builtin_aarch64_simd_oi __o
;
20192 __o
= __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi
*) __a
);
20193 ret
.val
[0] = (uint8x16_t
) __builtin_aarch64_get_qregoiv16qi (__o
, 0);
20194 ret
.val
[1] = (uint8x16_t
) __builtin_aarch64_get_qregoiv16qi (__o
, 1);
20198 __extension__
static __inline uint16x8x2_t
__attribute__ ((__always_inline__
))
20199 vld2q_u16 (const uint16_t * __a
)
20202 __builtin_aarch64_simd_oi __o
;
20203 __o
= __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi
*) __a
);
20204 ret
.val
[0] = (uint16x8_t
) __builtin_aarch64_get_qregoiv8hi (__o
, 0);
20205 ret
.val
[1] = (uint16x8_t
) __builtin_aarch64_get_qregoiv8hi (__o
, 1);
20209 __extension__
static __inline uint32x4x2_t
__attribute__ ((__always_inline__
))
20210 vld2q_u32 (const uint32_t * __a
)
20213 __builtin_aarch64_simd_oi __o
;
20214 __o
= __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si
*) __a
);
20215 ret
.val
[0] = (uint32x4_t
) __builtin_aarch64_get_qregoiv4si (__o
, 0);
20216 ret
.val
[1] = (uint32x4_t
) __builtin_aarch64_get_qregoiv4si (__o
, 1);
20220 __extension__
static __inline uint64x2x2_t
__attribute__ ((__always_inline__
))
20221 vld2q_u64 (const uint64_t * __a
)
20224 __builtin_aarch64_simd_oi __o
;
20225 __o
= __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di
*) __a
);
20226 ret
.val
[0] = (uint64x2_t
) __builtin_aarch64_get_qregoiv2di (__o
, 0);
20227 ret
.val
[1] = (uint64x2_t
) __builtin_aarch64_get_qregoiv2di (__o
, 1);
20231 __extension__
static __inline float32x4x2_t
__attribute__ ((__always_inline__
))
20232 vld2q_f32 (const float32_t
* __a
)
20235 __builtin_aarch64_simd_oi __o
;
20236 __o
= __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf
*) __a
);
20237 ret
.val
[0] = (float32x4_t
) __builtin_aarch64_get_qregoiv4sf (__o
, 0);
20238 ret
.val
[1] = (float32x4_t
) __builtin_aarch64_get_qregoiv4sf (__o
, 1);
20242 __extension__
static __inline float64x2x2_t
__attribute__ ((__always_inline__
))
20243 vld2q_f64 (const float64_t
* __a
)
20246 __builtin_aarch64_simd_oi __o
;
20247 __o
= __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df
*) __a
);
20248 ret
.val
[0] = (float64x2_t
) __builtin_aarch64_get_qregoiv2df (__o
, 0);
20249 ret
.val
[1] = (float64x2_t
) __builtin_aarch64_get_qregoiv2df (__o
, 1);
20253 __extension__
static __inline int64x1x3_t
__attribute__ ((__always_inline__
))
20254 vld3_s64 (const int64_t * __a
)
20257 __builtin_aarch64_simd_ci __o
;
20258 __o
= __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di
*) __a
);
20259 ret
.val
[0] = (int64x1_t
) __builtin_aarch64_get_dregcidi (__o
, 0);
20260 ret
.val
[1] = (int64x1_t
) __builtin_aarch64_get_dregcidi (__o
, 1);
20261 ret
.val
[2] = (int64x1_t
) __builtin_aarch64_get_dregcidi (__o
, 2);
20265 __extension__
static __inline uint64x1x3_t
__attribute__ ((__always_inline__
))
20266 vld3_u64 (const uint64_t * __a
)
20269 __builtin_aarch64_simd_ci __o
;
20270 __o
= __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di
*) __a
);
20271 ret
.val
[0] = (uint64x1_t
) __builtin_aarch64_get_dregcidi (__o
, 0);
20272 ret
.val
[1] = (uint64x1_t
) __builtin_aarch64_get_dregcidi (__o
, 1);
20273 ret
.val
[2] = (uint64x1_t
) __builtin_aarch64_get_dregcidi (__o
, 2);
20277 __extension__
static __inline float64x1x3_t
__attribute__ ((__always_inline__
))
20278 vld3_f64 (const float64_t
* __a
)
20281 __builtin_aarch64_simd_ci __o
;
20282 __o
= __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df
*) __a
);
20283 ret
.val
[0] = (float64x1_t
) __builtin_aarch64_get_dregcidf (__o
, 0);
20284 ret
.val
[1] = (float64x1_t
) __builtin_aarch64_get_dregcidf (__o
, 1);
20285 ret
.val
[2] = (float64x1_t
) __builtin_aarch64_get_dregcidf (__o
, 2);
20289 __extension__
static __inline int8x8x3_t
__attribute__ ((__always_inline__
))
20290 vld3_s8 (const int8_t * __a
)
20293 __builtin_aarch64_simd_ci __o
;
20294 __o
= __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi
*) __a
);
20295 ret
.val
[0] = (int8x8_t
) __builtin_aarch64_get_dregciv8qi (__o
, 0);
20296 ret
.val
[1] = (int8x8_t
) __builtin_aarch64_get_dregciv8qi (__o
, 1);
20297 ret
.val
[2] = (int8x8_t
) __builtin_aarch64_get_dregciv8qi (__o
, 2);
20301 __extension__
static __inline poly8x8x3_t
__attribute__ ((__always_inline__
))
20302 vld3_p8 (const poly8_t
* __a
)
20305 __builtin_aarch64_simd_ci __o
;
20306 __o
= __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi
*) __a
);
20307 ret
.val
[0] = (poly8x8_t
) __builtin_aarch64_get_dregciv8qi (__o
, 0);
20308 ret
.val
[1] = (poly8x8_t
) __builtin_aarch64_get_dregciv8qi (__o
, 1);
20309 ret
.val
[2] = (poly8x8_t
) __builtin_aarch64_get_dregciv8qi (__o
, 2);
20313 __extension__
static __inline int16x4x3_t
__attribute__ ((__always_inline__
))
20314 vld3_s16 (const int16_t * __a
)
20317 __builtin_aarch64_simd_ci __o
;
20318 __o
= __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi
*) __a
);
20319 ret
.val
[0] = (int16x4_t
) __builtin_aarch64_get_dregciv4hi (__o
, 0);
20320 ret
.val
[1] = (int16x4_t
) __builtin_aarch64_get_dregciv4hi (__o
, 1);
20321 ret
.val
[2] = (int16x4_t
) __builtin_aarch64_get_dregciv4hi (__o
, 2);
20325 __extension__
static __inline poly16x4x3_t
__attribute__ ((__always_inline__
))
20326 vld3_p16 (const poly16_t
* __a
)
20329 __builtin_aarch64_simd_ci __o
;
20330 __o
= __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi
*) __a
);
20331 ret
.val
[0] = (poly16x4_t
) __builtin_aarch64_get_dregciv4hi (__o
, 0);
20332 ret
.val
[1] = (poly16x4_t
) __builtin_aarch64_get_dregciv4hi (__o
, 1);
20333 ret
.val
[2] = (poly16x4_t
) __builtin_aarch64_get_dregciv4hi (__o
, 2);
20337 __extension__
static __inline int32x2x3_t
__attribute__ ((__always_inline__
))
20338 vld3_s32 (const int32_t * __a
)
20341 __builtin_aarch64_simd_ci __o
;
20342 __o
= __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si
*) __a
);
20343 ret
.val
[0] = (int32x2_t
) __builtin_aarch64_get_dregciv2si (__o
, 0);
20344 ret
.val
[1] = (int32x2_t
) __builtin_aarch64_get_dregciv2si (__o
, 1);
20345 ret
.val
[2] = (int32x2_t
) __builtin_aarch64_get_dregciv2si (__o
, 2);
20349 __extension__
static __inline uint8x8x3_t
__attribute__ ((__always_inline__
))
20350 vld3_u8 (const uint8_t * __a
)
20353 __builtin_aarch64_simd_ci __o
;
20354 __o
= __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi
*) __a
);
20355 ret
.val
[0] = (uint8x8_t
) __builtin_aarch64_get_dregciv8qi (__o
, 0);
20356 ret
.val
[1] = (uint8x8_t
) __builtin_aarch64_get_dregciv8qi (__o
, 1);
20357 ret
.val
[2] = (uint8x8_t
) __builtin_aarch64_get_dregciv8qi (__o
, 2);
20361 __extension__
static __inline uint16x4x3_t
__attribute__ ((__always_inline__
))
20362 vld3_u16 (const uint16_t * __a
)
20365 __builtin_aarch64_simd_ci __o
;
20366 __o
= __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi
*) __a
);
20367 ret
.val
[0] = (uint16x4_t
) __builtin_aarch64_get_dregciv4hi (__o
, 0);
20368 ret
.val
[1] = (uint16x4_t
) __builtin_aarch64_get_dregciv4hi (__o
, 1);
20369 ret
.val
[2] = (uint16x4_t
) __builtin_aarch64_get_dregciv4hi (__o
, 2);
20373 __extension__
static __inline uint32x2x3_t
__attribute__ ((__always_inline__
))
20374 vld3_u32 (const uint32_t * __a
)
20377 __builtin_aarch64_simd_ci __o
;
20378 __o
= __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si
*) __a
);
20379 ret
.val
[0] = (uint32x2_t
) __builtin_aarch64_get_dregciv2si (__o
, 0);
20380 ret
.val
[1] = (uint32x2_t
) __builtin_aarch64_get_dregciv2si (__o
, 1);
20381 ret
.val
[2] = (uint32x2_t
) __builtin_aarch64_get_dregciv2si (__o
, 2);
20385 __extension__
static __inline float32x2x3_t
__attribute__ ((__always_inline__
))
20386 vld3_f32 (const float32_t
* __a
)
20389 __builtin_aarch64_simd_ci __o
;
20390 __o
= __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf
*) __a
);
20391 ret
.val
[0] = (float32x2_t
) __builtin_aarch64_get_dregciv2sf (__o
, 0);
20392 ret
.val
[1] = (float32x2_t
) __builtin_aarch64_get_dregciv2sf (__o
, 1);
20393 ret
.val
[2] = (float32x2_t
) __builtin_aarch64_get_dregciv2sf (__o
, 2);
20397 __extension__
static __inline int8x16x3_t
__attribute__ ((__always_inline__
))
20398 vld3q_s8 (const int8_t * __a
)
20401 __builtin_aarch64_simd_ci __o
;
20402 __o
= __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi
*) __a
);
20403 ret
.val
[0] = (int8x16_t
) __builtin_aarch64_get_qregciv16qi (__o
, 0);
20404 ret
.val
[1] = (int8x16_t
) __builtin_aarch64_get_qregciv16qi (__o
, 1);
20405 ret
.val
[2] = (int8x16_t
) __builtin_aarch64_get_qregciv16qi (__o
, 2);
20409 __extension__
static __inline poly8x16x3_t
__attribute__ ((__always_inline__
))
20410 vld3q_p8 (const poly8_t
* __a
)
20413 __builtin_aarch64_simd_ci __o
;
20414 __o
= __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi
*) __a
);
20415 ret
.val
[0] = (poly8x16_t
) __builtin_aarch64_get_qregciv16qi (__o
, 0);
20416 ret
.val
[1] = (poly8x16_t
) __builtin_aarch64_get_qregciv16qi (__o
, 1);
20417 ret
.val
[2] = (poly8x16_t
) __builtin_aarch64_get_qregciv16qi (__o
, 2);
20421 __extension__
static __inline int16x8x3_t
__attribute__ ((__always_inline__
))
20422 vld3q_s16 (const int16_t * __a
)
20425 __builtin_aarch64_simd_ci __o
;
20426 __o
= __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi
*) __a
);
20427 ret
.val
[0] = (int16x8_t
) __builtin_aarch64_get_qregciv8hi (__o
, 0);
20428 ret
.val
[1] = (int16x8_t
) __builtin_aarch64_get_qregciv8hi (__o
, 1);
20429 ret
.val
[2] = (int16x8_t
) __builtin_aarch64_get_qregciv8hi (__o
, 2);
20433 __extension__
static __inline poly16x8x3_t
__attribute__ ((__always_inline__
))
20434 vld3q_p16 (const poly16_t
* __a
)
20437 __builtin_aarch64_simd_ci __o
;
20438 __o
= __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi
*) __a
);
20439 ret
.val
[0] = (poly16x8_t
) __builtin_aarch64_get_qregciv8hi (__o
, 0);
20440 ret
.val
[1] = (poly16x8_t
) __builtin_aarch64_get_qregciv8hi (__o
, 1);
20441 ret
.val
[2] = (poly16x8_t
) __builtin_aarch64_get_qregciv8hi (__o
, 2);
20445 __extension__
static __inline int32x4x3_t
__attribute__ ((__always_inline__
))
20446 vld3q_s32 (const int32_t * __a
)
20449 __builtin_aarch64_simd_ci __o
;
20450 __o
= __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si
*) __a
);
20451 ret
.val
[0] = (int32x4_t
) __builtin_aarch64_get_qregciv4si (__o
, 0);
20452 ret
.val
[1] = (int32x4_t
) __builtin_aarch64_get_qregciv4si (__o
, 1);
20453 ret
.val
[2] = (int32x4_t
) __builtin_aarch64_get_qregciv4si (__o
, 2);
20457 __extension__
static __inline int64x2x3_t
__attribute__ ((__always_inline__
))
20458 vld3q_s64 (const int64_t * __a
)
20461 __builtin_aarch64_simd_ci __o
;
20462 __o
= __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di
*) __a
);
20463 ret
.val
[0] = (int64x2_t
) __builtin_aarch64_get_qregciv2di (__o
, 0);
20464 ret
.val
[1] = (int64x2_t
) __builtin_aarch64_get_qregciv2di (__o
, 1);
20465 ret
.val
[2] = (int64x2_t
) __builtin_aarch64_get_qregciv2di (__o
, 2);
20469 __extension__
static __inline uint8x16x3_t
__attribute__ ((__always_inline__
))
20470 vld3q_u8 (const uint8_t * __a
)
20473 __builtin_aarch64_simd_ci __o
;
20474 __o
= __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi
*) __a
);
20475 ret
.val
[0] = (uint8x16_t
) __builtin_aarch64_get_qregciv16qi (__o
, 0);
20476 ret
.val
[1] = (uint8x16_t
) __builtin_aarch64_get_qregciv16qi (__o
, 1);
20477 ret
.val
[2] = (uint8x16_t
) __builtin_aarch64_get_qregciv16qi (__o
, 2);
20481 __extension__
static __inline uint16x8x3_t
__attribute__ ((__always_inline__
))
20482 vld3q_u16 (const uint16_t * __a
)
20485 __builtin_aarch64_simd_ci __o
;
20486 __o
= __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi
*) __a
);
20487 ret
.val
[0] = (uint16x8_t
) __builtin_aarch64_get_qregciv8hi (__o
, 0);
20488 ret
.val
[1] = (uint16x8_t
) __builtin_aarch64_get_qregciv8hi (__o
, 1);
20489 ret
.val
[2] = (uint16x8_t
) __builtin_aarch64_get_qregciv8hi (__o
, 2);
20493 __extension__
static __inline uint32x4x3_t
__attribute__ ((__always_inline__
))
20494 vld3q_u32 (const uint32_t * __a
)
20497 __builtin_aarch64_simd_ci __o
;
20498 __o
= __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si
*) __a
);
20499 ret
.val
[0] = (uint32x4_t
) __builtin_aarch64_get_qregciv4si (__o
, 0);
20500 ret
.val
[1] = (uint32x4_t
) __builtin_aarch64_get_qregciv4si (__o
, 1);
20501 ret
.val
[2] = (uint32x4_t
) __builtin_aarch64_get_qregciv4si (__o
, 2);
20505 __extension__
static __inline uint64x2x3_t
__attribute__ ((__always_inline__
))
20506 vld3q_u64 (const uint64_t * __a
)
20509 __builtin_aarch64_simd_ci __o
;
20510 __o
= __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di
*) __a
);
20511 ret
.val
[0] = (uint64x2_t
) __builtin_aarch64_get_qregciv2di (__o
, 0);
20512 ret
.val
[1] = (uint64x2_t
) __builtin_aarch64_get_qregciv2di (__o
, 1);
20513 ret
.val
[2] = (uint64x2_t
) __builtin_aarch64_get_qregciv2di (__o
, 2);
20517 __extension__
static __inline float32x4x3_t
__attribute__ ((__always_inline__
))
20518 vld3q_f32 (const float32_t
* __a
)
20521 __builtin_aarch64_simd_ci __o
;
20522 __o
= __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf
*) __a
);
20523 ret
.val
[0] = (float32x4_t
) __builtin_aarch64_get_qregciv4sf (__o
, 0);
20524 ret
.val
[1] = (float32x4_t
) __builtin_aarch64_get_qregciv4sf (__o
, 1);
20525 ret
.val
[2] = (float32x4_t
) __builtin_aarch64_get_qregciv4sf (__o
, 2);
20529 __extension__
static __inline float64x2x3_t
__attribute__ ((__always_inline__
))
20530 vld3q_f64 (const float64_t
* __a
)
20533 __builtin_aarch64_simd_ci __o
;
20534 __o
= __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df
*) __a
);
20535 ret
.val
[0] = (float64x2_t
) __builtin_aarch64_get_qregciv2df (__o
, 0);
20536 ret
.val
[1] = (float64x2_t
) __builtin_aarch64_get_qregciv2df (__o
, 1);
20537 ret
.val
[2] = (float64x2_t
) __builtin_aarch64_get_qregciv2df (__o
, 2);
20541 __extension__
static __inline int64x1x4_t
__attribute__ ((__always_inline__
))
20542 vld4_s64 (const int64_t * __a
)
20545 __builtin_aarch64_simd_xi __o
;
20546 __o
= __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di
*) __a
);
20547 ret
.val
[0] = (int64x1_t
) __builtin_aarch64_get_dregxidi (__o
, 0);
20548 ret
.val
[1] = (int64x1_t
) __builtin_aarch64_get_dregxidi (__o
, 1);
20549 ret
.val
[2] = (int64x1_t
) __builtin_aarch64_get_dregxidi (__o
, 2);
20550 ret
.val
[3] = (int64x1_t
) __builtin_aarch64_get_dregxidi (__o
, 3);
20554 __extension__
static __inline uint64x1x4_t
__attribute__ ((__always_inline__
))
20555 vld4_u64 (const uint64_t * __a
)
20558 __builtin_aarch64_simd_xi __o
;
20559 __o
= __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di
*) __a
);
20560 ret
.val
[0] = (uint64x1_t
) __builtin_aarch64_get_dregxidi (__o
, 0);
20561 ret
.val
[1] = (uint64x1_t
) __builtin_aarch64_get_dregxidi (__o
, 1);
20562 ret
.val
[2] = (uint64x1_t
) __builtin_aarch64_get_dregxidi (__o
, 2);
20563 ret
.val
[3] = (uint64x1_t
) __builtin_aarch64_get_dregxidi (__o
, 3);
20567 __extension__
static __inline float64x1x4_t
__attribute__ ((__always_inline__
))
20568 vld4_f64 (const float64_t
* __a
)
20571 __builtin_aarch64_simd_xi __o
;
20572 __o
= __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df
*) __a
);
20573 ret
.val
[0] = (float64x1_t
) __builtin_aarch64_get_dregxidf (__o
, 0);
20574 ret
.val
[1] = (float64x1_t
) __builtin_aarch64_get_dregxidf (__o
, 1);
20575 ret
.val
[2] = (float64x1_t
) __builtin_aarch64_get_dregxidf (__o
, 2);
20576 ret
.val
[3] = (float64x1_t
) __builtin_aarch64_get_dregxidf (__o
, 3);
20580 __extension__
static __inline int8x8x4_t
__attribute__ ((__always_inline__
))
20581 vld4_s8 (const int8_t * __a
)
20584 __builtin_aarch64_simd_xi __o
;
20585 __o
= __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi
*) __a
);
20586 ret
.val
[0] = (int8x8_t
) __builtin_aarch64_get_dregxiv8qi (__o
, 0);
20587 ret
.val
[1] = (int8x8_t
) __builtin_aarch64_get_dregxiv8qi (__o
, 1);
20588 ret
.val
[2] = (int8x8_t
) __builtin_aarch64_get_dregxiv8qi (__o
, 2);
20589 ret
.val
[3] = (int8x8_t
) __builtin_aarch64_get_dregxiv8qi (__o
, 3);
20593 __extension__
static __inline poly8x8x4_t
__attribute__ ((__always_inline__
))
20594 vld4_p8 (const poly8_t
* __a
)
20597 __builtin_aarch64_simd_xi __o
;
20598 __o
= __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi
*) __a
);
20599 ret
.val
[0] = (poly8x8_t
) __builtin_aarch64_get_dregxiv8qi (__o
, 0);
20600 ret
.val
[1] = (poly8x8_t
) __builtin_aarch64_get_dregxiv8qi (__o
, 1);
20601 ret
.val
[2] = (poly8x8_t
) __builtin_aarch64_get_dregxiv8qi (__o
, 2);
20602 ret
.val
[3] = (poly8x8_t
) __builtin_aarch64_get_dregxiv8qi (__o
, 3);
20606 __extension__
static __inline int16x4x4_t
__attribute__ ((__always_inline__
))
20607 vld4_s16 (const int16_t * __a
)
20610 __builtin_aarch64_simd_xi __o
;
20611 __o
= __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi
*) __a
);
20612 ret
.val
[0] = (int16x4_t
) __builtin_aarch64_get_dregxiv4hi (__o
, 0);
20613 ret
.val
[1] = (int16x4_t
) __builtin_aarch64_get_dregxiv4hi (__o
, 1);
20614 ret
.val
[2] = (int16x4_t
) __builtin_aarch64_get_dregxiv4hi (__o
, 2);
20615 ret
.val
[3] = (int16x4_t
) __builtin_aarch64_get_dregxiv4hi (__o
, 3);
20619 __extension__
static __inline poly16x4x4_t
__attribute__ ((__always_inline__
))
20620 vld4_p16 (const poly16_t
* __a
)
20623 __builtin_aarch64_simd_xi __o
;
20624 __o
= __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi
*) __a
);
20625 ret
.val
[0] = (poly16x4_t
) __builtin_aarch64_get_dregxiv4hi (__o
, 0);
20626 ret
.val
[1] = (poly16x4_t
) __builtin_aarch64_get_dregxiv4hi (__o
, 1);
20627 ret
.val
[2] = (poly16x4_t
) __builtin_aarch64_get_dregxiv4hi (__o
, 2);
20628 ret
.val
[3] = (poly16x4_t
) __builtin_aarch64_get_dregxiv4hi (__o
, 3);
20632 __extension__
static __inline int32x2x4_t
__attribute__ ((__always_inline__
))
20633 vld4_s32 (const int32_t * __a
)
20636 __builtin_aarch64_simd_xi __o
;
20637 __o
= __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si
*) __a
);
20638 ret
.val
[0] = (int32x2_t
) __builtin_aarch64_get_dregxiv2si (__o
, 0);
20639 ret
.val
[1] = (int32x2_t
) __builtin_aarch64_get_dregxiv2si (__o
, 1);
20640 ret
.val
[2] = (int32x2_t
) __builtin_aarch64_get_dregxiv2si (__o
, 2);
20641 ret
.val
[3] = (int32x2_t
) __builtin_aarch64_get_dregxiv2si (__o
, 3);
20645 __extension__
static __inline uint8x8x4_t
__attribute__ ((__always_inline__
))
20646 vld4_u8 (const uint8_t * __a
)
20649 __builtin_aarch64_simd_xi __o
;
20650 __o
= __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi
*) __a
);
20651 ret
.val
[0] = (uint8x8_t
) __builtin_aarch64_get_dregxiv8qi (__o
, 0);
20652 ret
.val
[1] = (uint8x8_t
) __builtin_aarch64_get_dregxiv8qi (__o
, 1);
20653 ret
.val
[2] = (uint8x8_t
) __builtin_aarch64_get_dregxiv8qi (__o
, 2);
20654 ret
.val
[3] = (uint8x8_t
) __builtin_aarch64_get_dregxiv8qi (__o
, 3);
20658 __extension__
static __inline uint16x4x4_t
__attribute__ ((__always_inline__
))
20659 vld4_u16 (const uint16_t * __a
)
20662 __builtin_aarch64_simd_xi __o
;
20663 __o
= __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi
*) __a
);
20664 ret
.val
[0] = (uint16x4_t
) __builtin_aarch64_get_dregxiv4hi (__o
, 0);
20665 ret
.val
[1] = (uint16x4_t
) __builtin_aarch64_get_dregxiv4hi (__o
, 1);
20666 ret
.val
[2] = (uint16x4_t
) __builtin_aarch64_get_dregxiv4hi (__o
, 2);
20667 ret
.val
[3] = (uint16x4_t
) __builtin_aarch64_get_dregxiv4hi (__o
, 3);
20671 __extension__
static __inline uint32x2x4_t
__attribute__ ((__always_inline__
))
20672 vld4_u32 (const uint32_t * __a
)
20675 __builtin_aarch64_simd_xi __o
;
20676 __o
= __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si
*) __a
);
20677 ret
.val
[0] = (uint32x2_t
) __builtin_aarch64_get_dregxiv2si (__o
, 0);
20678 ret
.val
[1] = (uint32x2_t
) __builtin_aarch64_get_dregxiv2si (__o
, 1);
20679 ret
.val
[2] = (uint32x2_t
) __builtin_aarch64_get_dregxiv2si (__o
, 2);
20680 ret
.val
[3] = (uint32x2_t
) __builtin_aarch64_get_dregxiv2si (__o
, 3);
20684 __extension__
static __inline float32x2x4_t
__attribute__ ((__always_inline__
))
20685 vld4_f32 (const float32_t
* __a
)
20688 __builtin_aarch64_simd_xi __o
;
20689 __o
= __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf
*) __a
);
20690 ret
.val
[0] = (float32x2_t
) __builtin_aarch64_get_dregxiv2sf (__o
, 0);
20691 ret
.val
[1] = (float32x2_t
) __builtin_aarch64_get_dregxiv2sf (__o
, 1);
20692 ret
.val
[2] = (float32x2_t
) __builtin_aarch64_get_dregxiv2sf (__o
, 2);
20693 ret
.val
[3] = (float32x2_t
) __builtin_aarch64_get_dregxiv2sf (__o
, 3);
20697 __extension__
static __inline int8x16x4_t
__attribute__ ((__always_inline__
))
20698 vld4q_s8 (const int8_t * __a
)
20701 __builtin_aarch64_simd_xi __o
;
20702 __o
= __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi
*) __a
);
20703 ret
.val
[0] = (int8x16_t
) __builtin_aarch64_get_qregxiv16qi (__o
, 0);
20704 ret
.val
[1] = (int8x16_t
) __builtin_aarch64_get_qregxiv16qi (__o
, 1);
20705 ret
.val
[2] = (int8x16_t
) __builtin_aarch64_get_qregxiv16qi (__o
, 2);
20706 ret
.val
[3] = (int8x16_t
) __builtin_aarch64_get_qregxiv16qi (__o
, 3);
20710 __extension__
static __inline poly8x16x4_t
__attribute__ ((__always_inline__
))
20711 vld4q_p8 (const poly8_t
* __a
)
20714 __builtin_aarch64_simd_xi __o
;
20715 __o
= __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi
*) __a
);
20716 ret
.val
[0] = (poly8x16_t
) __builtin_aarch64_get_qregxiv16qi (__o
, 0);
20717 ret
.val
[1] = (poly8x16_t
) __builtin_aarch64_get_qregxiv16qi (__o
, 1);
20718 ret
.val
[2] = (poly8x16_t
) __builtin_aarch64_get_qregxiv16qi (__o
, 2);
20719 ret
.val
[3] = (poly8x16_t
) __builtin_aarch64_get_qregxiv16qi (__o
, 3);
20723 __extension__
static __inline int16x8x4_t
__attribute__ ((__always_inline__
))
20724 vld4q_s16 (const int16_t * __a
)
20727 __builtin_aarch64_simd_xi __o
;
20728 __o
= __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi
*) __a
);
20729 ret
.val
[0] = (int16x8_t
) __builtin_aarch64_get_qregxiv8hi (__o
, 0);
20730 ret
.val
[1] = (int16x8_t
) __builtin_aarch64_get_qregxiv8hi (__o
, 1);
20731 ret
.val
[2] = (int16x8_t
) __builtin_aarch64_get_qregxiv8hi (__o
, 2);
20732 ret
.val
[3] = (int16x8_t
) __builtin_aarch64_get_qregxiv8hi (__o
, 3);
20736 __extension__
static __inline poly16x8x4_t
__attribute__ ((__always_inline__
))
20737 vld4q_p16 (const poly16_t
* __a
)
20740 __builtin_aarch64_simd_xi __o
;
20741 __o
= __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi
*) __a
);
20742 ret
.val
[0] = (poly16x8_t
) __builtin_aarch64_get_qregxiv8hi (__o
, 0);
20743 ret
.val
[1] = (poly16x8_t
) __builtin_aarch64_get_qregxiv8hi (__o
, 1);
20744 ret
.val
[2] = (poly16x8_t
) __builtin_aarch64_get_qregxiv8hi (__o
, 2);
20745 ret
.val
[3] = (poly16x8_t
) __builtin_aarch64_get_qregxiv8hi (__o
, 3);
20749 __extension__
static __inline int32x4x4_t
__attribute__ ((__always_inline__
))
20750 vld4q_s32 (const int32_t * __a
)
20753 __builtin_aarch64_simd_xi __o
;
20754 __o
= __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si
*) __a
);
20755 ret
.val
[0] = (int32x4_t
) __builtin_aarch64_get_qregxiv4si (__o
, 0);
20756 ret
.val
[1] = (int32x4_t
) __builtin_aarch64_get_qregxiv4si (__o
, 1);
20757 ret
.val
[2] = (int32x4_t
) __builtin_aarch64_get_qregxiv4si (__o
, 2);
20758 ret
.val
[3] = (int32x4_t
) __builtin_aarch64_get_qregxiv4si (__o
, 3);
20762 __extension__
static __inline int64x2x4_t
__attribute__ ((__always_inline__
))
20763 vld4q_s64 (const int64_t * __a
)
20766 __builtin_aarch64_simd_xi __o
;
20767 __o
= __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di
*) __a
);
20768 ret
.val
[0] = (int64x2_t
) __builtin_aarch64_get_qregxiv2di (__o
, 0);
20769 ret
.val
[1] = (int64x2_t
) __builtin_aarch64_get_qregxiv2di (__o
, 1);
20770 ret
.val
[2] = (int64x2_t
) __builtin_aarch64_get_qregxiv2di (__o
, 2);
20771 ret
.val
[3] = (int64x2_t
) __builtin_aarch64_get_qregxiv2di (__o
, 3);
20775 __extension__
static __inline uint8x16x4_t
__attribute__ ((__always_inline__
))
20776 vld4q_u8 (const uint8_t * __a
)
20779 __builtin_aarch64_simd_xi __o
;
20780 __o
= __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi
*) __a
);
20781 ret
.val
[0] = (uint8x16_t
) __builtin_aarch64_get_qregxiv16qi (__o
, 0);
20782 ret
.val
[1] = (uint8x16_t
) __builtin_aarch64_get_qregxiv16qi (__o
, 1);
20783 ret
.val
[2] = (uint8x16_t
) __builtin_aarch64_get_qregxiv16qi (__o
, 2);
20784 ret
.val
[3] = (uint8x16_t
) __builtin_aarch64_get_qregxiv16qi (__o
, 3);
20788 __extension__
static __inline uint16x8x4_t
__attribute__ ((__always_inline__
))
20789 vld4q_u16 (const uint16_t * __a
)
20792 __builtin_aarch64_simd_xi __o
;
20793 __o
= __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi
*) __a
);
20794 ret
.val
[0] = (uint16x8_t
) __builtin_aarch64_get_qregxiv8hi (__o
, 0);
20795 ret
.val
[1] = (uint16x8_t
) __builtin_aarch64_get_qregxiv8hi (__o
, 1);
20796 ret
.val
[2] = (uint16x8_t
) __builtin_aarch64_get_qregxiv8hi (__o
, 2);
20797 ret
.val
[3] = (uint16x8_t
) __builtin_aarch64_get_qregxiv8hi (__o
, 3);
20801 __extension__
static __inline uint32x4x4_t
__attribute__ ((__always_inline__
))
20802 vld4q_u32 (const uint32_t * __a
)
20805 __builtin_aarch64_simd_xi __o
;
20806 __o
= __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si
*) __a
);
20807 ret
.val
[0] = (uint32x4_t
) __builtin_aarch64_get_qregxiv4si (__o
, 0);
20808 ret
.val
[1] = (uint32x4_t
) __builtin_aarch64_get_qregxiv4si (__o
, 1);
20809 ret
.val
[2] = (uint32x4_t
) __builtin_aarch64_get_qregxiv4si (__o
, 2);
20810 ret
.val
[3] = (uint32x4_t
) __builtin_aarch64_get_qregxiv4si (__o
, 3);
20814 __extension__
static __inline uint64x2x4_t
__attribute__ ((__always_inline__
))
20815 vld4q_u64 (const uint64_t * __a
)
20818 __builtin_aarch64_simd_xi __o
;
20819 __o
= __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di
*) __a
);
20820 ret
.val
[0] = (uint64x2_t
) __builtin_aarch64_get_qregxiv2di (__o
, 0);
20821 ret
.val
[1] = (uint64x2_t
) __builtin_aarch64_get_qregxiv2di (__o
, 1);
20822 ret
.val
[2] = (uint64x2_t
) __builtin_aarch64_get_qregxiv2di (__o
, 2);
20823 ret
.val
[3] = (uint64x2_t
) __builtin_aarch64_get_qregxiv2di (__o
, 3);
20827 __extension__
static __inline float32x4x4_t
__attribute__ ((__always_inline__
))
20828 vld4q_f32 (const float32_t
* __a
)
20831 __builtin_aarch64_simd_xi __o
;
20832 __o
= __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf
*) __a
);
20833 ret
.val
[0] = (float32x4_t
) __builtin_aarch64_get_qregxiv4sf (__o
, 0);
20834 ret
.val
[1] = (float32x4_t
) __builtin_aarch64_get_qregxiv4sf (__o
, 1);
20835 ret
.val
[2] = (float32x4_t
) __builtin_aarch64_get_qregxiv4sf (__o
, 2);
20836 ret
.val
[3] = (float32x4_t
) __builtin_aarch64_get_qregxiv4sf (__o
, 3);
20840 __extension__
static __inline float64x2x4_t
__attribute__ ((__always_inline__
))
20841 vld4q_f64 (const float64_t
* __a
)
20844 __builtin_aarch64_simd_xi __o
;
20845 __o
= __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df
*) __a
);
20846 ret
.val
[0] = (float64x2_t
) __builtin_aarch64_get_qregxiv2df (__o
, 0);
20847 ret
.val
[1] = (float64x2_t
) __builtin_aarch64_get_qregxiv2df (__o
, 1);
20848 ret
.val
[2] = (float64x2_t
) __builtin_aarch64_get_qregxiv2df (__o
, 2);
20849 ret
.val
[3] = (float64x2_t
) __builtin_aarch64_get_qregxiv2df (__o
, 3);
20855 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
20856 vmax_f32 (float32x2_t __a
, float32x2_t __b
)
20858 return __builtin_aarch64_smax_nanv2sf (__a
, __b
);
20861 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
20862 vmax_s8 (int8x8_t __a
, int8x8_t __b
)
20864 return __builtin_aarch64_smaxv8qi (__a
, __b
);
20867 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
20868 vmax_s16 (int16x4_t __a
, int16x4_t __b
)
20870 return __builtin_aarch64_smaxv4hi (__a
, __b
);
20873 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
20874 vmax_s32 (int32x2_t __a
, int32x2_t __b
)
20876 return __builtin_aarch64_smaxv2si (__a
, __b
);
20879 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
20880 vmax_u8 (uint8x8_t __a
, uint8x8_t __b
)
20882 return (uint8x8_t
) __builtin_aarch64_umaxv8qi ((int8x8_t
) __a
,
20886 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
20887 vmax_u16 (uint16x4_t __a
, uint16x4_t __b
)
20889 return (uint16x4_t
) __builtin_aarch64_umaxv4hi ((int16x4_t
) __a
,
20893 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
20894 vmax_u32 (uint32x2_t __a
, uint32x2_t __b
)
20896 return (uint32x2_t
) __builtin_aarch64_umaxv2si ((int32x2_t
) __a
,
20900 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
20901 vmaxq_f32 (float32x4_t __a
, float32x4_t __b
)
20903 return __builtin_aarch64_smax_nanv4sf (__a
, __b
);
20906 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
20907 vmaxq_f64 (float64x2_t __a
, float64x2_t __b
)
20909 return __builtin_aarch64_smax_nanv2df (__a
, __b
);
20912 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
20913 vmaxq_s8 (int8x16_t __a
, int8x16_t __b
)
20915 return __builtin_aarch64_smaxv16qi (__a
, __b
);
20918 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
20919 vmaxq_s16 (int16x8_t __a
, int16x8_t __b
)
20921 return __builtin_aarch64_smaxv8hi (__a
, __b
);
20924 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
20925 vmaxq_s32 (int32x4_t __a
, int32x4_t __b
)
20927 return __builtin_aarch64_smaxv4si (__a
, __b
);
20930 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
20931 vmaxq_u8 (uint8x16_t __a
, uint8x16_t __b
)
20933 return (uint8x16_t
) __builtin_aarch64_umaxv16qi ((int8x16_t
) __a
,
20937 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
20938 vmaxq_u16 (uint16x8_t __a
, uint16x8_t __b
)
20940 return (uint16x8_t
) __builtin_aarch64_umaxv8hi ((int16x8_t
) __a
,
20944 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
20945 vmaxq_u32 (uint32x4_t __a
, uint32x4_t __b
)
20947 return (uint32x4_t
) __builtin_aarch64_umaxv4si ((int32x4_t
) __a
,
20953 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
20954 vmaxnm_f32 (float32x2_t __a
, float32x2_t __b
)
20956 return __builtin_aarch64_smaxv2sf (__a
, __b
);
20959 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
20960 vmaxnmq_f32 (float32x4_t __a
, float32x4_t __b
)
20962 return __builtin_aarch64_smaxv4sf (__a
, __b
);
20965 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
20966 vmaxnmq_f64 (float64x2_t __a
, float64x2_t __b
)
20968 return __builtin_aarch64_smaxv2df (__a
, __b
);
20973 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
20974 vmaxv_f32 (float32x2_t __a
)
20976 return vget_lane_f32 (__builtin_aarch64_reduc_smax_nan_v2sf (__a
), 0);
20979 __extension__
static __inline
int8_t __attribute__ ((__always_inline__
))
20980 vmaxv_s8 (int8x8_t __a
)
20982 return vget_lane_s8 (__builtin_aarch64_reduc_smax_v8qi (__a
), 0);
20985 __extension__
static __inline
int16_t __attribute__ ((__always_inline__
))
20986 vmaxv_s16 (int16x4_t __a
)
20988 return vget_lane_s16 (__builtin_aarch64_reduc_smax_v4hi (__a
), 0);
20991 __extension__
static __inline
int32_t __attribute__ ((__always_inline__
))
20992 vmaxv_s32 (int32x2_t __a
)
20994 return vget_lane_s32 (__builtin_aarch64_reduc_smax_v2si (__a
), 0);
20997 __extension__
static __inline
uint8_t __attribute__ ((__always_inline__
))
20998 vmaxv_u8 (uint8x8_t __a
)
21000 return vget_lane_u8 ((uint8x8_t
)
21001 __builtin_aarch64_reduc_umax_v8qi ((int8x8_t
) __a
), 0);
21004 __extension__
static __inline
uint16_t __attribute__ ((__always_inline__
))
21005 vmaxv_u16 (uint16x4_t __a
)
21007 return vget_lane_u16 ((uint16x4_t
)
21008 __builtin_aarch64_reduc_umax_v4hi ((int16x4_t
) __a
), 0);
21011 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
21012 vmaxv_u32 (uint32x2_t __a
)
21014 return vget_lane_u32 ((uint32x2_t
)
21015 __builtin_aarch64_reduc_umax_v2si ((int32x2_t
) __a
), 0);
21018 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
21019 vmaxvq_f32 (float32x4_t __a
)
21021 return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_nan_v4sf (__a
), 0);
21024 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
21025 vmaxvq_f64 (float64x2_t __a
)
21027 return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_nan_v2df (__a
), 0);
21030 __extension__
static __inline
int8_t __attribute__ ((__always_inline__
))
21031 vmaxvq_s8 (int8x16_t __a
)
21033 return vgetq_lane_s8 (__builtin_aarch64_reduc_smax_v16qi (__a
), 0);
21036 __extension__
static __inline
int16_t __attribute__ ((__always_inline__
))
21037 vmaxvq_s16 (int16x8_t __a
)
21039 return vgetq_lane_s16 (__builtin_aarch64_reduc_smax_v8hi (__a
), 0);
21042 __extension__
static __inline
int32_t __attribute__ ((__always_inline__
))
21043 vmaxvq_s32 (int32x4_t __a
)
21045 return vgetq_lane_s32 (__builtin_aarch64_reduc_smax_v4si (__a
), 0);
21048 __extension__
static __inline
uint8_t __attribute__ ((__always_inline__
))
21049 vmaxvq_u8 (uint8x16_t __a
)
21051 return vgetq_lane_u8 ((uint8x16_t
)
21052 __builtin_aarch64_reduc_umax_v16qi ((int8x16_t
) __a
), 0);
21055 __extension__
static __inline
uint16_t __attribute__ ((__always_inline__
))
21056 vmaxvq_u16 (uint16x8_t __a
)
21058 return vgetq_lane_u16 ((uint16x8_t
)
21059 __builtin_aarch64_reduc_umax_v8hi ((int16x8_t
) __a
), 0);
21062 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
21063 vmaxvq_u32 (uint32x4_t __a
)
21065 return vgetq_lane_u32 ((uint32x4_t
)
21066 __builtin_aarch64_reduc_umax_v4si ((int32x4_t
) __a
), 0);
21071 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
21072 vmaxnmv_f32 (float32x2_t __a
)
21074 return vget_lane_f32 (__builtin_aarch64_reduc_smax_v2sf (__a
), 0);
21077 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
21078 vmaxnmvq_f32 (float32x4_t __a
)
21080 return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_v4sf (__a
), 0);
21083 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
21084 vmaxnmvq_f64 (float64x2_t __a
)
21086 return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_v2df (__a
), 0);
21091 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
21092 vmin_f32 (float32x2_t __a
, float32x2_t __b
)
21094 return __builtin_aarch64_smin_nanv2sf (__a
, __b
);
21097 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
21098 vmin_s8 (int8x8_t __a
, int8x8_t __b
)
21100 return __builtin_aarch64_sminv8qi (__a
, __b
);
21103 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
21104 vmin_s16 (int16x4_t __a
, int16x4_t __b
)
21106 return __builtin_aarch64_sminv4hi (__a
, __b
);
21109 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
21110 vmin_s32 (int32x2_t __a
, int32x2_t __b
)
21112 return __builtin_aarch64_sminv2si (__a
, __b
);
21115 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
21116 vmin_u8 (uint8x8_t __a
, uint8x8_t __b
)
21118 return (uint8x8_t
) __builtin_aarch64_uminv8qi ((int8x8_t
) __a
,
21122 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
21123 vmin_u16 (uint16x4_t __a
, uint16x4_t __b
)
21125 return (uint16x4_t
) __builtin_aarch64_uminv4hi ((int16x4_t
) __a
,
21129 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
21130 vmin_u32 (uint32x2_t __a
, uint32x2_t __b
)
21132 return (uint32x2_t
) __builtin_aarch64_uminv2si ((int32x2_t
) __a
,
21136 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
21137 vminq_f32 (float32x4_t __a
, float32x4_t __b
)
21139 return __builtin_aarch64_smin_nanv4sf (__a
, __b
);
21142 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
21143 vminq_f64 (float64x2_t __a
, float64x2_t __b
)
21145 return __builtin_aarch64_smin_nanv2df (__a
, __b
);
21148 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
21149 vminq_s8 (int8x16_t __a
, int8x16_t __b
)
21151 return __builtin_aarch64_sminv16qi (__a
, __b
);
21154 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
21155 vminq_s16 (int16x8_t __a
, int16x8_t __b
)
21157 return __builtin_aarch64_sminv8hi (__a
, __b
);
21160 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21161 vminq_s32 (int32x4_t __a
, int32x4_t __b
)
21163 return __builtin_aarch64_sminv4si (__a
, __b
);
21166 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
21167 vminq_u8 (uint8x16_t __a
, uint8x16_t __b
)
21169 return (uint8x16_t
) __builtin_aarch64_uminv16qi ((int8x16_t
) __a
,
21173 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
21174 vminq_u16 (uint16x8_t __a
, uint16x8_t __b
)
21176 return (uint16x8_t
) __builtin_aarch64_uminv8hi ((int16x8_t
) __a
,
21180 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
21181 vminq_u32 (uint32x4_t __a
, uint32x4_t __b
)
21183 return (uint32x4_t
) __builtin_aarch64_uminv4si ((int32x4_t
) __a
,
21189 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
21190 vminnm_f32 (float32x2_t __a
, float32x2_t __b
)
21192 return __builtin_aarch64_sminv2sf (__a
, __b
);
21195 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
21196 vminnmq_f32 (float32x4_t __a
, float32x4_t __b
)
21198 return __builtin_aarch64_sminv4sf (__a
, __b
);
21201 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
21202 vminnmq_f64 (float64x2_t __a
, float64x2_t __b
)
21204 return __builtin_aarch64_sminv2df (__a
, __b
);
21209 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
21210 vminv_f32 (float32x2_t __a
)
21212 return vget_lane_f32 (__builtin_aarch64_reduc_smin_nan_v2sf (__a
), 0);
21215 __extension__
static __inline
int8_t __attribute__ ((__always_inline__
))
21216 vminv_s8 (int8x8_t __a
)
21218 return vget_lane_s8 (__builtin_aarch64_reduc_smin_v8qi (__a
), 0);
21221 __extension__
static __inline
int16_t __attribute__ ((__always_inline__
))
21222 vminv_s16 (int16x4_t __a
)
21224 return vget_lane_s16 (__builtin_aarch64_reduc_smin_v4hi (__a
), 0);
21227 __extension__
static __inline
int32_t __attribute__ ((__always_inline__
))
21228 vminv_s32 (int32x2_t __a
)
21230 return vget_lane_s32 (__builtin_aarch64_reduc_smin_v2si (__a
), 0);
21233 __extension__
static __inline
uint8_t __attribute__ ((__always_inline__
))
21234 vminv_u8 (uint8x8_t __a
)
21236 return vget_lane_u8 ((uint8x8_t
)
21237 __builtin_aarch64_reduc_umin_v8qi ((int8x8_t
) __a
), 0);
21240 __extension__
static __inline
uint16_t __attribute__ ((__always_inline__
))
21241 vminv_u16 (uint16x4_t __a
)
21243 return vget_lane_u16 ((uint16x4_t
)
21244 __builtin_aarch64_reduc_umin_v4hi ((int16x4_t
) __a
), 0);
21247 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
21248 vminv_u32 (uint32x2_t __a
)
21250 return vget_lane_u32 ((uint32x2_t
)
21251 __builtin_aarch64_reduc_umin_v2si ((int32x2_t
) __a
), 0);
21254 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
21255 vminvq_f32 (float32x4_t __a
)
21257 return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_nan_v4sf (__a
), 0);
21260 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
21261 vminvq_f64 (float64x2_t __a
)
21263 return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_nan_v2df (__a
), 0);
21266 __extension__
static __inline
int8_t __attribute__ ((__always_inline__
))
21267 vminvq_s8 (int8x16_t __a
)
21269 return vgetq_lane_s8 (__builtin_aarch64_reduc_smin_v16qi (__a
), 0);
21272 __extension__
static __inline
int16_t __attribute__ ((__always_inline__
))
21273 vminvq_s16 (int16x8_t __a
)
21275 return vgetq_lane_s16 (__builtin_aarch64_reduc_smin_v8hi (__a
), 0);
21278 __extension__
static __inline
int32_t __attribute__ ((__always_inline__
))
21279 vminvq_s32 (int32x4_t __a
)
21281 return vgetq_lane_s32 (__builtin_aarch64_reduc_smin_v4si (__a
), 0);
21284 __extension__
static __inline
uint8_t __attribute__ ((__always_inline__
))
21285 vminvq_u8 (uint8x16_t __a
)
21287 return vgetq_lane_u8 ((uint8x16_t
)
21288 __builtin_aarch64_reduc_umin_v16qi ((int8x16_t
) __a
), 0);
21291 __extension__
static __inline
uint16_t __attribute__ ((__always_inline__
))
21292 vminvq_u16 (uint16x8_t __a
)
21294 return vgetq_lane_u16 ((uint16x8_t
)
21295 __builtin_aarch64_reduc_umin_v8hi ((int16x8_t
) __a
), 0);
21298 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
21299 vminvq_u32 (uint32x4_t __a
)
21301 return vgetq_lane_u32 ((uint32x4_t
)
21302 __builtin_aarch64_reduc_umin_v4si ((int32x4_t
) __a
), 0);
21307 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
21308 vminnmv_f32 (float32x2_t __a
)
21310 return vget_lane_f32 (__builtin_aarch64_reduc_smin_v2sf (__a
), 0);
21313 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
21314 vminnmvq_f32 (float32x4_t __a
)
21316 return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_v4sf (__a
), 0);
21319 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
21320 vminnmvq_f64 (float64x2_t __a
)
21322 return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_v2df (__a
), 0);
21327 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
21328 vmla_f32 (float32x2_t a
, float32x2_t b
, float32x2_t c
)
21333 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
21334 vmlaq_f32 (float32x4_t a
, float32x4_t b
, float32x4_t c
)
21339 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
21340 vmlaq_f64 (float64x2_t a
, float64x2_t b
, float64x2_t c
)
21345 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
21346 vmls_f32 (float32x2_t a
, float32x2_t b
, float32x2_t c
)
21351 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
21352 vmlsq_f32 (float32x4_t a
, float32x4_t b
, float32x4_t c
)
21357 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
21358 vmlsq_f64 (float64x2_t a
, float64x2_t b
, float64x2_t c
)
21365 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21366 vqabsq_s64 (int64x2_t __a
)
21368 return (int64x2_t
) __builtin_aarch64_sqabsv2di (__a
);
21371 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
21372 vqabsb_s8 (int8x1_t __a
)
21374 return (int8x1_t
) __builtin_aarch64_sqabsqi (__a
);
21377 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
21378 vqabsh_s16 (int16x1_t __a
)
21380 return (int16x1_t
) __builtin_aarch64_sqabshi (__a
);
21383 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
21384 vqabss_s32 (int32x1_t __a
)
21386 return (int32x1_t
) __builtin_aarch64_sqabssi (__a
);
21391 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
21392 vqaddb_s8 (int8x1_t __a
, int8x1_t __b
)
21394 return (int8x1_t
) __builtin_aarch64_sqaddqi (__a
, __b
);
21397 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
21398 vqaddh_s16 (int16x1_t __a
, int16x1_t __b
)
21400 return (int16x1_t
) __builtin_aarch64_sqaddhi (__a
, __b
);
21403 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
21404 vqadds_s32 (int32x1_t __a
, int32x1_t __b
)
21406 return (int32x1_t
) __builtin_aarch64_sqaddsi (__a
, __b
);
21409 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
21410 vqaddd_s64 (int64x1_t __a
, int64x1_t __b
)
21412 return (int64x1_t
) __builtin_aarch64_sqadddi (__a
, __b
);
21415 __extension__
static __inline uint8x1_t
__attribute__ ((__always_inline__
))
21416 vqaddb_u8 (uint8x1_t __a
, uint8x1_t __b
)
21418 return (uint8x1_t
) __builtin_aarch64_uqaddqi (__a
, __b
);
21421 __extension__
static __inline uint16x1_t
__attribute__ ((__always_inline__
))
21422 vqaddh_u16 (uint16x1_t __a
, uint16x1_t __b
)
21424 return (uint16x1_t
) __builtin_aarch64_uqaddhi (__a
, __b
);
21427 __extension__
static __inline uint32x1_t
__attribute__ ((__always_inline__
))
21428 vqadds_u32 (uint32x1_t __a
, uint32x1_t __b
)
21430 return (uint32x1_t
) __builtin_aarch64_uqaddsi (__a
, __b
);
21433 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
21434 vqaddd_u64 (uint64x1_t __a
, uint64x1_t __b
)
21436 return (uint64x1_t
) __builtin_aarch64_uqadddi (__a
, __b
);
21441 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21442 vqdmlal_s16 (int32x4_t __a
, int16x4_t __b
, int16x4_t __c
)
21444 return __builtin_aarch64_sqdmlalv4hi (__a
, __b
, __c
);
21447 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21448 vqdmlal_high_s16 (int32x4_t __a
, int16x8_t __b
, int16x8_t __c
)
21450 return __builtin_aarch64_sqdmlal2v8hi (__a
, __b
, __c
);
21453 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21454 vqdmlal_high_lane_s16 (int32x4_t __a
, int16x8_t __b
, int16x8_t __c
,
21457 return __builtin_aarch64_sqdmlal2_lanev8hi (__a
, __b
, __c
, __d
);
21460 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21461 vqdmlal_high_laneq_s16 (int32x4_t __a
, int16x8_t __b
, int16x8_t __c
,
21464 return __builtin_aarch64_sqdmlal2_laneqv8hi (__a
, __b
, __c
, __d
);
21467 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21468 vqdmlal_high_n_s16 (int32x4_t __a
, int16x8_t __b
, int16_t __c
)
21470 return __builtin_aarch64_sqdmlal2_nv8hi (__a
, __b
, __c
);
21473 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21474 vqdmlal_lane_s16 (int32x4_t __a
, int16x4_t __b
, int16x4_t __c
, int const __d
)
21476 int16x8_t __tmp
= vcombine_s16 (__c
, vcreate_s16 (INT64_C (0)));
21477 return __builtin_aarch64_sqdmlal_lanev4hi (__a
, __b
, __tmp
, __d
);
21480 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21481 vqdmlal_laneq_s16 (int32x4_t __a
, int16x4_t __b
, int16x8_t __c
, int const __d
)
21483 return __builtin_aarch64_sqdmlal_laneqv4hi (__a
, __b
, __c
, __d
);
21486 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21487 vqdmlal_n_s16 (int32x4_t __a
, int16x4_t __b
, int16_t __c
)
21489 return __builtin_aarch64_sqdmlal_nv4hi (__a
, __b
, __c
);
21492 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21493 vqdmlal_s32 (int64x2_t __a
, int32x2_t __b
, int32x2_t __c
)
21495 return __builtin_aarch64_sqdmlalv2si (__a
, __b
, __c
);
21498 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21499 vqdmlal_high_s32 (int64x2_t __a
, int32x4_t __b
, int32x4_t __c
)
21501 return __builtin_aarch64_sqdmlal2v4si (__a
, __b
, __c
);
21504 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21505 vqdmlal_high_lane_s32 (int64x2_t __a
, int32x4_t __b
, int32x4_t __c
,
21508 return __builtin_aarch64_sqdmlal2_lanev4si (__a
, __b
, __c
, __d
);
21511 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21512 vqdmlal_high_laneq_s32 (int64x2_t __a
, int32x4_t __b
, int32x4_t __c
,
21515 return __builtin_aarch64_sqdmlal2_laneqv4si (__a
, __b
, __c
, __d
);
21518 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21519 vqdmlal_high_n_s32 (int64x2_t __a
, int32x4_t __b
, int32_t __c
)
21521 return __builtin_aarch64_sqdmlal2_nv4si (__a
, __b
, __c
);
21524 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21525 vqdmlal_lane_s32 (int64x2_t __a
, int32x2_t __b
, int32x2_t __c
, int const __d
)
21527 int32x4_t __tmp
= vcombine_s32 (__c
, vcreate_s32 (INT64_C (0)));
21528 return __builtin_aarch64_sqdmlal_lanev2si (__a
, __b
, __tmp
, __d
);
21531 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21532 vqdmlal_laneq_s32 (int64x2_t __a
, int32x2_t __b
, int32x4_t __c
, int const __d
)
21534 return __builtin_aarch64_sqdmlal_laneqv2si (__a
, __b
, __c
, __d
);
21537 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21538 vqdmlal_n_s32 (int64x2_t __a
, int32x2_t __b
, int32_t __c
)
21540 return __builtin_aarch64_sqdmlal_nv2si (__a
, __b
, __c
);
21543 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
21544 vqdmlalh_s16 (int32x1_t __a
, int16x1_t __b
, int16x1_t __c
)
21546 return __builtin_aarch64_sqdmlalhi (__a
, __b
, __c
);
21549 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
21550 vqdmlalh_lane_s16 (int32x1_t __a
, int16x1_t __b
, int16x8_t __c
, const int __d
)
21552 return __builtin_aarch64_sqdmlal_lanehi (__a
, __b
, __c
, __d
);
21555 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
21556 vqdmlals_s32 (int64x1_t __a
, int32x1_t __b
, int32x1_t __c
)
21558 return __builtin_aarch64_sqdmlalsi (__a
, __b
, __c
);
21561 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
21562 vqdmlals_lane_s32 (int64x1_t __a
, int32x1_t __b
, int32x4_t __c
, const int __d
)
21564 return __builtin_aarch64_sqdmlal_lanesi (__a
, __b
, __c
, __d
);
21569 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21570 vqdmlsl_s16 (int32x4_t __a
, int16x4_t __b
, int16x4_t __c
)
21572 return __builtin_aarch64_sqdmlslv4hi (__a
, __b
, __c
);
21575 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21576 vqdmlsl_high_s16 (int32x4_t __a
, int16x8_t __b
, int16x8_t __c
)
21578 return __builtin_aarch64_sqdmlsl2v8hi (__a
, __b
, __c
);
21581 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21582 vqdmlsl_high_lane_s16 (int32x4_t __a
, int16x8_t __b
, int16x8_t __c
,
21585 return __builtin_aarch64_sqdmlsl2_lanev8hi (__a
, __b
, __c
, __d
);
21588 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21589 vqdmlsl_high_laneq_s16 (int32x4_t __a
, int16x8_t __b
, int16x8_t __c
,
21592 return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a
, __b
, __c
, __d
);
21595 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21596 vqdmlsl_high_n_s16 (int32x4_t __a
, int16x8_t __b
, int16_t __c
)
21598 return __builtin_aarch64_sqdmlsl2_nv8hi (__a
, __b
, __c
);
21601 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21602 vqdmlsl_lane_s16 (int32x4_t __a
, int16x4_t __b
, int16x4_t __c
, int const __d
)
21604 int16x8_t __tmp
= vcombine_s16 (__c
, vcreate_s16 (INT64_C (0)));
21605 return __builtin_aarch64_sqdmlsl_lanev4hi (__a
, __b
, __tmp
, __d
);
21608 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21609 vqdmlsl_laneq_s16 (int32x4_t __a
, int16x4_t __b
, int16x8_t __c
, int const __d
)
21611 return __builtin_aarch64_sqdmlsl_laneqv4hi (__a
, __b
, __c
, __d
);
21614 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21615 vqdmlsl_n_s16 (int32x4_t __a
, int16x4_t __b
, int16_t __c
)
21617 return __builtin_aarch64_sqdmlsl_nv4hi (__a
, __b
, __c
);
21620 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21621 vqdmlsl_s32 (int64x2_t __a
, int32x2_t __b
, int32x2_t __c
)
21623 return __builtin_aarch64_sqdmlslv2si (__a
, __b
, __c
);
21626 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21627 vqdmlsl_high_s32 (int64x2_t __a
, int32x4_t __b
, int32x4_t __c
)
21629 return __builtin_aarch64_sqdmlsl2v4si (__a
, __b
, __c
);
21632 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21633 vqdmlsl_high_lane_s32 (int64x2_t __a
, int32x4_t __b
, int32x4_t __c
,
21636 return __builtin_aarch64_sqdmlsl2_lanev4si (__a
, __b
, __c
, __d
);
21639 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21640 vqdmlsl_high_laneq_s32 (int64x2_t __a
, int32x4_t __b
, int32x4_t __c
,
21643 return __builtin_aarch64_sqdmlsl2_laneqv4si (__a
, __b
, __c
, __d
);
21646 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21647 vqdmlsl_high_n_s32 (int64x2_t __a
, int32x4_t __b
, int32_t __c
)
21649 return __builtin_aarch64_sqdmlsl2_nv4si (__a
, __b
, __c
);
21652 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21653 vqdmlsl_lane_s32 (int64x2_t __a
, int32x2_t __b
, int32x2_t __c
, int const __d
)
21655 int32x4_t __tmp
= vcombine_s32 (__c
, vcreate_s32 (INT64_C (0)));
21656 return __builtin_aarch64_sqdmlsl_lanev2si (__a
, __b
, __tmp
, __d
);
21659 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21660 vqdmlsl_laneq_s32 (int64x2_t __a
, int32x2_t __b
, int32x4_t __c
, int const __d
)
21662 return __builtin_aarch64_sqdmlsl_laneqv2si (__a
, __b
, __c
, __d
);
21665 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21666 vqdmlsl_n_s32 (int64x2_t __a
, int32x2_t __b
, int32_t __c
)
21668 return __builtin_aarch64_sqdmlsl_nv2si (__a
, __b
, __c
);
21671 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
21672 vqdmlslh_s16 (int32x1_t __a
, int16x1_t __b
, int16x1_t __c
)
21674 return __builtin_aarch64_sqdmlslhi (__a
, __b
, __c
);
21677 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
21678 vqdmlslh_lane_s16 (int32x1_t __a
, int16x1_t __b
, int16x8_t __c
, const int __d
)
21680 return __builtin_aarch64_sqdmlsl_lanehi (__a
, __b
, __c
, __d
);
21683 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
21684 vqdmlsls_s32 (int64x1_t __a
, int32x1_t __b
, int32x1_t __c
)
21686 return __builtin_aarch64_sqdmlslsi (__a
, __b
, __c
);
21689 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
21690 vqdmlsls_lane_s32 (int64x1_t __a
, int32x1_t __b
, int32x4_t __c
, const int __d
)
21692 return __builtin_aarch64_sqdmlsl_lanesi (__a
, __b
, __c
, __d
);
21697 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
21698 vqdmulh_lane_s16 (int16x4_t __a
, int16x4_t __b
, const int __c
)
21700 return __builtin_aarch64_sqdmulh_lanev4hi (__a
, __b
, __c
);
21703 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
21704 vqdmulh_lane_s32 (int32x2_t __a
, int32x2_t __b
, const int __c
)
21706 return __builtin_aarch64_sqdmulh_lanev2si (__a
, __b
, __c
);
21709 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
21710 vqdmulhq_lane_s16 (int16x8_t __a
, int16x4_t __b
, const int __c
)
21712 return __builtin_aarch64_sqdmulh_lanev8hi (__a
, __b
, __c
);
21715 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21716 vqdmulhq_lane_s32 (int32x4_t __a
, int32x2_t __b
, const int __c
)
21718 return __builtin_aarch64_sqdmulh_lanev4si (__a
, __b
, __c
);
21721 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
21722 vqdmulhh_s16 (int16x1_t __a
, int16x1_t __b
)
21724 return (int16x1_t
) __builtin_aarch64_sqdmulhhi (__a
, __b
);
21727 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
21728 vqdmulhh_lane_s16 (int16x1_t __a
, int16x8_t __b
, const int __c
)
21730 return __builtin_aarch64_sqdmulh_lanehi (__a
, __b
, __c
);
21733 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
21734 vqdmulhs_s32 (int32x1_t __a
, int32x1_t __b
)
21736 return (int32x1_t
) __builtin_aarch64_sqdmulhsi (__a
, __b
);
21739 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
21740 vqdmulhs_lane_s32 (int32x1_t __a
, int32x4_t __b
, const int __c
)
21742 return __builtin_aarch64_sqdmulh_lanesi (__a
, __b
, __c
);
21747 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21748 vqdmull_s16 (int16x4_t __a
, int16x4_t __b
)
21750 return __builtin_aarch64_sqdmullv4hi (__a
, __b
);
21753 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21754 vqdmull_high_s16 (int16x8_t __a
, int16x8_t __b
)
21756 return __builtin_aarch64_sqdmull2v8hi (__a
, __b
);
21759 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21760 vqdmull_high_lane_s16 (int16x8_t __a
, int16x8_t __b
, int const __c
)
21762 return __builtin_aarch64_sqdmull2_lanev8hi (__a
, __b
,__c
);
21765 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21766 vqdmull_high_laneq_s16 (int16x8_t __a
, int16x8_t __b
, int const __c
)
21768 return __builtin_aarch64_sqdmull2_laneqv8hi (__a
, __b
,__c
);
21771 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21772 vqdmull_high_n_s16 (int16x8_t __a
, int16_t __b
)
21774 return __builtin_aarch64_sqdmull2_nv8hi (__a
, __b
);
21777 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21778 vqdmull_lane_s16 (int16x4_t __a
, int16x4_t __b
, int const __c
)
21780 int16x8_t __tmp
= vcombine_s16 (__b
, vcreate_s16 (INT64_C (0)));
21781 return __builtin_aarch64_sqdmull_lanev4hi (__a
, __tmp
, __c
);
21784 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21785 vqdmull_laneq_s16 (int16x4_t __a
, int16x8_t __b
, int const __c
)
21787 return __builtin_aarch64_sqdmull_laneqv4hi (__a
, __b
, __c
);
21790 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21791 vqdmull_n_s16 (int16x4_t __a
, int16_t __b
)
21793 return __builtin_aarch64_sqdmull_nv4hi (__a
, __b
);
21796 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21797 vqdmull_s32 (int32x2_t __a
, int32x2_t __b
)
21799 return __builtin_aarch64_sqdmullv2si (__a
, __b
);
21802 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21803 vqdmull_high_s32 (int32x4_t __a
, int32x4_t __b
)
21805 return __builtin_aarch64_sqdmull2v4si (__a
, __b
);
21808 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21809 vqdmull_high_lane_s32 (int32x4_t __a
, int32x4_t __b
, int const __c
)
21811 return __builtin_aarch64_sqdmull2_lanev4si (__a
, __b
, __c
);
21814 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21815 vqdmull_high_laneq_s32 (int32x4_t __a
, int32x4_t __b
, int const __c
)
21817 return __builtin_aarch64_sqdmull2_laneqv4si (__a
, __b
, __c
);
21820 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21821 vqdmull_high_n_s32 (int32x4_t __a
, int32_t __b
)
21823 return __builtin_aarch64_sqdmull2_nv4si (__a
, __b
);
21826 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21827 vqdmull_lane_s32 (int32x2_t __a
, int32x2_t __b
, int const __c
)
21829 int32x4_t __tmp
= vcombine_s32 (__b
, vcreate_s32 (INT64_C (0)));
21830 return __builtin_aarch64_sqdmull_lanev2si (__a
, __tmp
, __c
);
21833 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21834 vqdmull_laneq_s32 (int32x2_t __a
, int32x4_t __b
, int const __c
)
21836 return __builtin_aarch64_sqdmull_laneqv2si (__a
, __b
, __c
);
21839 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21840 vqdmull_n_s32 (int32x2_t __a
, int32_t __b
)
21842 return __builtin_aarch64_sqdmull_nv2si (__a
, __b
);
21845 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
21846 vqdmullh_s16 (int16x1_t __a
, int16x1_t __b
)
21848 return (int32x1_t
) __builtin_aarch64_sqdmullhi (__a
, __b
);
21851 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
21852 vqdmullh_lane_s16 (int16x1_t __a
, int16x8_t __b
, const int __c
)
21854 return __builtin_aarch64_sqdmull_lanehi (__a
, __b
, __c
);
21857 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
21858 vqdmulls_s32 (int32x1_t __a
, int32x1_t __b
)
21860 return (int64x1_t
) __builtin_aarch64_sqdmullsi (__a
, __b
);
21863 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
21864 vqdmulls_lane_s32 (int32x1_t __a
, int32x4_t __b
, const int __c
)
21866 return __builtin_aarch64_sqdmull_lanesi (__a
, __b
, __c
);
21871 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
21872 vqmovn_s16 (int16x8_t __a
)
21874 return (int8x8_t
) __builtin_aarch64_sqmovnv8hi (__a
);
21877 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
21878 vqmovn_s32 (int32x4_t __a
)
21880 return (int16x4_t
) __builtin_aarch64_sqmovnv4si (__a
);
21883 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
21884 vqmovn_s64 (int64x2_t __a
)
21886 return (int32x2_t
) __builtin_aarch64_sqmovnv2di (__a
);
21889 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
21890 vqmovn_u16 (uint16x8_t __a
)
21892 return (uint8x8_t
) __builtin_aarch64_uqmovnv8hi ((int16x8_t
) __a
);
21895 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
21896 vqmovn_u32 (uint32x4_t __a
)
21898 return (uint16x4_t
) __builtin_aarch64_uqmovnv4si ((int32x4_t
) __a
);
21901 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
21902 vqmovn_u64 (uint64x2_t __a
)
21904 return (uint32x2_t
) __builtin_aarch64_uqmovnv2di ((int64x2_t
) __a
);
21907 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
21908 vqmovnh_s16 (int16x1_t __a
)
21910 return (int8x1_t
) __builtin_aarch64_sqmovnhi (__a
);
21913 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
21914 vqmovns_s32 (int32x1_t __a
)
21916 return (int16x1_t
) __builtin_aarch64_sqmovnsi (__a
);
21919 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
21920 vqmovnd_s64 (int64x1_t __a
)
21922 return (int32x1_t
) __builtin_aarch64_sqmovndi (__a
);
21925 __extension__
static __inline uint8x1_t
__attribute__ ((__always_inline__
))
21926 vqmovnh_u16 (uint16x1_t __a
)
21928 return (uint8x1_t
) __builtin_aarch64_uqmovnhi (__a
);
21931 __extension__
static __inline uint16x1_t
__attribute__ ((__always_inline__
))
21932 vqmovns_u32 (uint32x1_t __a
)
21934 return (uint16x1_t
) __builtin_aarch64_uqmovnsi (__a
);
21937 __extension__
static __inline uint32x1_t
__attribute__ ((__always_inline__
))
21938 vqmovnd_u64 (uint64x1_t __a
)
21940 return (uint32x1_t
) __builtin_aarch64_uqmovndi (__a
);
21945 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
21946 vqmovun_s16 (int16x8_t __a
)
21948 return (uint8x8_t
) __builtin_aarch64_sqmovunv8hi (__a
);
21951 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
21952 vqmovun_s32 (int32x4_t __a
)
21954 return (uint16x4_t
) __builtin_aarch64_sqmovunv4si (__a
);
21957 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
21958 vqmovun_s64 (int64x2_t __a
)
21960 return (uint32x2_t
) __builtin_aarch64_sqmovunv2di (__a
);
21963 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
21964 vqmovunh_s16 (int16x1_t __a
)
21966 return (int8x1_t
) __builtin_aarch64_sqmovunhi (__a
);
21969 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
21970 vqmovuns_s32 (int32x1_t __a
)
21972 return (int16x1_t
) __builtin_aarch64_sqmovunsi (__a
);
21975 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
21976 vqmovund_s64 (int64x1_t __a
)
21978 return (int32x1_t
) __builtin_aarch64_sqmovundi (__a
);
21983 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21984 vqnegq_s64 (int64x2_t __a
)
21986 return (int64x2_t
) __builtin_aarch64_sqnegv2di (__a
);
21989 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
21990 vqnegb_s8 (int8x1_t __a
)
21992 return (int8x1_t
) __builtin_aarch64_sqnegqi (__a
);
21995 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
21996 vqnegh_s16 (int16x1_t __a
)
21998 return (int16x1_t
) __builtin_aarch64_sqneghi (__a
);
22001 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
22002 vqnegs_s32 (int32x1_t __a
)
22004 return (int32x1_t
) __builtin_aarch64_sqnegsi (__a
);
22009 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
22010 vqrdmulh_lane_s16 (int16x4_t __a
, int16x4_t __b
, const int __c
)
22012 return __builtin_aarch64_sqrdmulh_lanev4hi (__a
, __b
, __c
);
22015 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
22016 vqrdmulh_lane_s32 (int32x2_t __a
, int32x2_t __b
, const int __c
)
22018 return __builtin_aarch64_sqrdmulh_lanev2si (__a
, __b
, __c
);
22021 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
22022 vqrdmulhq_lane_s16 (int16x8_t __a
, int16x4_t __b
, const int __c
)
22024 return __builtin_aarch64_sqrdmulh_lanev8hi (__a
, __b
, __c
);
22027 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
22028 vqrdmulhq_lane_s32 (int32x4_t __a
, int32x2_t __b
, const int __c
)
22030 return __builtin_aarch64_sqrdmulh_lanev4si (__a
, __b
, __c
);
22033 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
22034 vqrdmulhh_s16 (int16x1_t __a
, int16x1_t __b
)
22036 return (int16x1_t
) __builtin_aarch64_sqrdmulhhi (__a
, __b
);
22039 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
22040 vqrdmulhh_lane_s16 (int16x1_t __a
, int16x8_t __b
, const int __c
)
22042 return __builtin_aarch64_sqrdmulh_lanehi (__a
, __b
, __c
);
22045 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
22046 vqrdmulhs_s32 (int32x1_t __a
, int32x1_t __b
)
22048 return (int32x1_t
) __builtin_aarch64_sqrdmulhsi (__a
, __b
);
22051 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
22052 vqrdmulhs_lane_s32 (int32x1_t __a
, int32x4_t __b
, const int __c
)
22054 return __builtin_aarch64_sqrdmulh_lanesi (__a
, __b
, __c
);
22059 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
22060 vqrshl_s8 (int8x8_t __a
, int8x8_t __b
)
22062 return __builtin_aarch64_sqrshlv8qi (__a
, __b
);
22065 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
22066 vqrshl_s16 (int16x4_t __a
, int16x4_t __b
)
22068 return __builtin_aarch64_sqrshlv4hi (__a
, __b
);
22071 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
22072 vqrshl_s32 (int32x2_t __a
, int32x2_t __b
)
22074 return __builtin_aarch64_sqrshlv2si (__a
, __b
);
22077 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
22078 vqrshl_s64 (int64x1_t __a
, int64x1_t __b
)
22080 return __builtin_aarch64_sqrshldi (__a
, __b
);
22083 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
22084 vqrshl_u8 (uint8x8_t __a
, int8x8_t __b
)
22086 return (uint8x8_t
) __builtin_aarch64_uqrshlv8qi ((int8x8_t
) __a
, __b
);
22089 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
22090 vqrshl_u16 (uint16x4_t __a
, int16x4_t __b
)
22092 return (uint16x4_t
) __builtin_aarch64_uqrshlv4hi ((int16x4_t
) __a
, __b
);
22095 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
22096 vqrshl_u32 (uint32x2_t __a
, int32x2_t __b
)
22098 return (uint32x2_t
) __builtin_aarch64_uqrshlv2si ((int32x2_t
) __a
, __b
);
22101 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
22102 vqrshl_u64 (uint64x1_t __a
, int64x1_t __b
)
22104 return (uint64x1_t
) __builtin_aarch64_uqrshldi ((int64x1_t
) __a
, __b
);
22107 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
22108 vqrshlq_s8 (int8x16_t __a
, int8x16_t __b
)
22110 return __builtin_aarch64_sqrshlv16qi (__a
, __b
);
22113 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
22114 vqrshlq_s16 (int16x8_t __a
, int16x8_t __b
)
22116 return __builtin_aarch64_sqrshlv8hi (__a
, __b
);
22119 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
22120 vqrshlq_s32 (int32x4_t __a
, int32x4_t __b
)
22122 return __builtin_aarch64_sqrshlv4si (__a
, __b
);
22125 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
22126 vqrshlq_s64 (int64x2_t __a
, int64x2_t __b
)
22128 return __builtin_aarch64_sqrshlv2di (__a
, __b
);
22131 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
22132 vqrshlq_u8 (uint8x16_t __a
, int8x16_t __b
)
22134 return (uint8x16_t
) __builtin_aarch64_uqrshlv16qi ((int8x16_t
) __a
, __b
);
22137 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
22138 vqrshlq_u16 (uint16x8_t __a
, int16x8_t __b
)
22140 return (uint16x8_t
) __builtin_aarch64_uqrshlv8hi ((int16x8_t
) __a
, __b
);
22143 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
22144 vqrshlq_u32 (uint32x4_t __a
, int32x4_t __b
)
22146 return (uint32x4_t
) __builtin_aarch64_uqrshlv4si ((int32x4_t
) __a
, __b
);
22149 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
22150 vqrshlq_u64 (uint64x2_t __a
, int64x2_t __b
)
22152 return (uint64x2_t
) __builtin_aarch64_uqrshlv2di ((int64x2_t
) __a
, __b
);
22155 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
22156 vqrshlb_s8 (int8x1_t __a
, int8x1_t __b
)
22158 return __builtin_aarch64_sqrshlqi (__a
, __b
);
22161 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
22162 vqrshlh_s16 (int16x1_t __a
, int16x1_t __b
)
22164 return __builtin_aarch64_sqrshlhi (__a
, __b
);
22167 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
22168 vqrshls_s32 (int32x1_t __a
, int32x1_t __b
)
22170 return __builtin_aarch64_sqrshlsi (__a
, __b
);
22173 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
22174 vqrshld_s64 (int64x1_t __a
, int64x1_t __b
)
22176 return __builtin_aarch64_sqrshldi (__a
, __b
);
22179 __extension__
static __inline uint8x1_t
__attribute__ ((__always_inline__
))
22180 vqrshlb_u8 (uint8x1_t __a
, uint8x1_t __b
)
22182 return (uint8x1_t
) __builtin_aarch64_uqrshlqi (__a
, __b
);
22185 __extension__
static __inline uint16x1_t
__attribute__ ((__always_inline__
))
22186 vqrshlh_u16 (uint16x1_t __a
, uint16x1_t __b
)
22188 return (uint16x1_t
) __builtin_aarch64_uqrshlhi (__a
, __b
);
22191 __extension__
static __inline uint32x1_t
__attribute__ ((__always_inline__
))
22192 vqrshls_u32 (uint32x1_t __a
, uint32x1_t __b
)
22194 return (uint32x1_t
) __builtin_aarch64_uqrshlsi (__a
, __b
);
22197 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
22198 vqrshld_u64 (uint64x1_t __a
, uint64x1_t __b
)
22200 return (uint64x1_t
) __builtin_aarch64_uqrshldi (__a
, __b
);
22205 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
22206 vqrshrn_n_s16 (int16x8_t __a
, const int __b
)
22208 return (int8x8_t
) __builtin_aarch64_sqrshrn_nv8hi (__a
, __b
);
22211 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
22212 vqrshrn_n_s32 (int32x4_t __a
, const int __b
)
22214 return (int16x4_t
) __builtin_aarch64_sqrshrn_nv4si (__a
, __b
);
22217 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
22218 vqrshrn_n_s64 (int64x2_t __a
, const int __b
)
22220 return (int32x2_t
) __builtin_aarch64_sqrshrn_nv2di (__a
, __b
);
22223 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
22224 vqrshrn_n_u16 (uint16x8_t __a
, const int __b
)
22226 return (uint8x8_t
) __builtin_aarch64_uqrshrn_nv8hi ((int16x8_t
) __a
, __b
);
22229 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
22230 vqrshrn_n_u32 (uint32x4_t __a
, const int __b
)
22232 return (uint16x4_t
) __builtin_aarch64_uqrshrn_nv4si ((int32x4_t
) __a
, __b
);
22235 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
22236 vqrshrn_n_u64 (uint64x2_t __a
, const int __b
)
22238 return (uint32x2_t
) __builtin_aarch64_uqrshrn_nv2di ((int64x2_t
) __a
, __b
);
22241 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
22242 vqrshrnh_n_s16 (int16x1_t __a
, const int __b
)
22244 return (int8x1_t
) __builtin_aarch64_sqrshrn_nhi (__a
, __b
);
22247 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
22248 vqrshrns_n_s32 (int32x1_t __a
, const int __b
)
22250 return (int16x1_t
) __builtin_aarch64_sqrshrn_nsi (__a
, __b
);
22253 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
22254 vqrshrnd_n_s64 (int64x1_t __a
, const int __b
)
22256 return (int32x1_t
) __builtin_aarch64_sqrshrn_ndi (__a
, __b
);
22259 __extension__
static __inline uint8x1_t
__attribute__ ((__always_inline__
))
22260 vqrshrnh_n_u16 (uint16x1_t __a
, const int __b
)
22262 return (uint8x1_t
) __builtin_aarch64_uqrshrn_nhi (__a
, __b
);
22265 __extension__
static __inline uint16x1_t
__attribute__ ((__always_inline__
))
22266 vqrshrns_n_u32 (uint32x1_t __a
, const int __b
)
22268 return (uint16x1_t
) __builtin_aarch64_uqrshrn_nsi (__a
, __b
);
22271 __extension__
static __inline uint32x1_t
__attribute__ ((__always_inline__
))
22272 vqrshrnd_n_u64 (uint64x1_t __a
, const int __b
)
22274 return (uint32x1_t
) __builtin_aarch64_uqrshrn_ndi (__a
, __b
);
22279 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
22280 vqrshrun_n_s16 (int16x8_t __a
, const int __b
)
22282 return (uint8x8_t
) __builtin_aarch64_sqrshrun_nv8hi (__a
, __b
);
22285 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
22286 vqrshrun_n_s32 (int32x4_t __a
, const int __b
)
22288 return (uint16x4_t
) __builtin_aarch64_sqrshrun_nv4si (__a
, __b
);
22291 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
22292 vqrshrun_n_s64 (int64x2_t __a
, const int __b
)
22294 return (uint32x2_t
) __builtin_aarch64_sqrshrun_nv2di (__a
, __b
);
22297 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
22298 vqrshrunh_n_s16 (int16x1_t __a
, const int __b
)
22300 return (int8x1_t
) __builtin_aarch64_sqrshrun_nhi (__a
, __b
);
22303 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
22304 vqrshruns_n_s32 (int32x1_t __a
, const int __b
)
22306 return (int16x1_t
) __builtin_aarch64_sqrshrun_nsi (__a
, __b
);
22309 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
22310 vqrshrund_n_s64 (int64x1_t __a
, const int __b
)
22312 return (int32x1_t
) __builtin_aarch64_sqrshrun_ndi (__a
, __b
);
22317 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
22318 vqshl_s8 (int8x8_t __a
, int8x8_t __b
)
22320 return __builtin_aarch64_sqshlv8qi (__a
, __b
);
22323 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
22324 vqshl_s16 (int16x4_t __a
, int16x4_t __b
)
22326 return __builtin_aarch64_sqshlv4hi (__a
, __b
);
22329 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
22330 vqshl_s32 (int32x2_t __a
, int32x2_t __b
)
22332 return __builtin_aarch64_sqshlv2si (__a
, __b
);
22335 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
22336 vqshl_s64 (int64x1_t __a
, int64x1_t __b
)
22338 return __builtin_aarch64_sqshldi (__a
, __b
);
22341 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
22342 vqshl_u8 (uint8x8_t __a
, int8x8_t __b
)
22344 return (uint8x8_t
) __builtin_aarch64_uqshlv8qi ((int8x8_t
) __a
, __b
);
22347 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
22348 vqshl_u16 (uint16x4_t __a
, int16x4_t __b
)
22350 return (uint16x4_t
) __builtin_aarch64_uqshlv4hi ((int16x4_t
) __a
, __b
);
22353 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
22354 vqshl_u32 (uint32x2_t __a
, int32x2_t __b
)
22356 return (uint32x2_t
) __builtin_aarch64_uqshlv2si ((int32x2_t
) __a
, __b
);
22359 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
22360 vqshl_u64 (uint64x1_t __a
, int64x1_t __b
)
22362 return (uint64x1_t
) __builtin_aarch64_uqshldi ((int64x1_t
) __a
, __b
);
22365 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
22366 vqshlq_s8 (int8x16_t __a
, int8x16_t __b
)
22368 return __builtin_aarch64_sqshlv16qi (__a
, __b
);
22371 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
22372 vqshlq_s16 (int16x8_t __a
, int16x8_t __b
)
22374 return __builtin_aarch64_sqshlv8hi (__a
, __b
);
22377 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
22378 vqshlq_s32 (int32x4_t __a
, int32x4_t __b
)
22380 return __builtin_aarch64_sqshlv4si (__a
, __b
);
22383 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
22384 vqshlq_s64 (int64x2_t __a
, int64x2_t __b
)
22386 return __builtin_aarch64_sqshlv2di (__a
, __b
);
22389 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
22390 vqshlq_u8 (uint8x16_t __a
, int8x16_t __b
)
22392 return (uint8x16_t
) __builtin_aarch64_uqshlv16qi ((int8x16_t
) __a
, __b
);
22395 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
22396 vqshlq_u16 (uint16x8_t __a
, int16x8_t __b
)
22398 return (uint16x8_t
) __builtin_aarch64_uqshlv8hi ((int16x8_t
) __a
, __b
);
22401 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
22402 vqshlq_u32 (uint32x4_t __a
, int32x4_t __b
)
22404 return (uint32x4_t
) __builtin_aarch64_uqshlv4si ((int32x4_t
) __a
, __b
);
22407 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
22408 vqshlq_u64 (uint64x2_t __a
, int64x2_t __b
)
22410 return (uint64x2_t
) __builtin_aarch64_uqshlv2di ((int64x2_t
) __a
, __b
);
22413 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
22414 vqshlb_s8 (int8x1_t __a
, int8x1_t __b
)
22416 return __builtin_aarch64_sqshlqi (__a
, __b
);
22419 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
22420 vqshlh_s16 (int16x1_t __a
, int16x1_t __b
)
22422 return __builtin_aarch64_sqshlhi (__a
, __b
);
22425 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
22426 vqshls_s32 (int32x1_t __a
, int32x1_t __b
)
22428 return __builtin_aarch64_sqshlsi (__a
, __b
);
22431 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
22432 vqshld_s64 (int64x1_t __a
, int64x1_t __b
)
22434 return __builtin_aarch64_sqshldi (__a
, __b
);
22437 __extension__
static __inline uint8x1_t
__attribute__ ((__always_inline__
))
22438 vqshlb_u8 (uint8x1_t __a
, uint8x1_t __b
)
22440 return (uint8x1_t
) __builtin_aarch64_uqshlqi (__a
, __b
);
22443 __extension__
static __inline uint16x1_t
__attribute__ ((__always_inline__
))
22444 vqshlh_u16 (uint16x1_t __a
, uint16x1_t __b
)
22446 return (uint16x1_t
) __builtin_aarch64_uqshlhi (__a
, __b
);
22449 __extension__
static __inline uint32x1_t
__attribute__ ((__always_inline__
))
22450 vqshls_u32 (uint32x1_t __a
, uint32x1_t __b
)
22452 return (uint32x1_t
) __builtin_aarch64_uqshlsi (__a
, __b
);
22455 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
22456 vqshld_u64 (uint64x1_t __a
, uint64x1_t __b
)
22458 return (uint64x1_t
) __builtin_aarch64_uqshldi (__a
, __b
);
22461 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
22462 vqshl_n_s8 (int8x8_t __a
, const int __b
)
22464 return (int8x8_t
) __builtin_aarch64_sqshl_nv8qi (__a
, __b
);
22467 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
22468 vqshl_n_s16 (int16x4_t __a
, const int __b
)
22470 return (int16x4_t
) __builtin_aarch64_sqshl_nv4hi (__a
, __b
);
22473 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
22474 vqshl_n_s32 (int32x2_t __a
, const int __b
)
22476 return (int32x2_t
) __builtin_aarch64_sqshl_nv2si (__a
, __b
);
22479 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
22480 vqshl_n_s64 (int64x1_t __a
, const int __b
)
22482 return (int64x1_t
) __builtin_aarch64_sqshl_ndi (__a
, __b
);
22485 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
22486 vqshl_n_u8 (uint8x8_t __a
, const int __b
)
22488 return (uint8x8_t
) __builtin_aarch64_uqshl_nv8qi ((int8x8_t
) __a
, __b
);
22491 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
22492 vqshl_n_u16 (uint16x4_t __a
, const int __b
)
22494 return (uint16x4_t
) __builtin_aarch64_uqshl_nv4hi ((int16x4_t
) __a
, __b
);
22497 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
22498 vqshl_n_u32 (uint32x2_t __a
, const int __b
)
22500 return (uint32x2_t
) __builtin_aarch64_uqshl_nv2si ((int32x2_t
) __a
, __b
);
22503 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
22504 vqshl_n_u64 (uint64x1_t __a
, const int __b
)
22506 return (uint64x1_t
) __builtin_aarch64_uqshl_ndi ((int64x1_t
) __a
, __b
);
22509 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
22510 vqshlq_n_s8 (int8x16_t __a
, const int __b
)
22512 return (int8x16_t
) __builtin_aarch64_sqshl_nv16qi (__a
, __b
);
22515 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
22516 vqshlq_n_s16 (int16x8_t __a
, const int __b
)
22518 return (int16x8_t
) __builtin_aarch64_sqshl_nv8hi (__a
, __b
);
22521 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
22522 vqshlq_n_s32 (int32x4_t __a
, const int __b
)
22524 return (int32x4_t
) __builtin_aarch64_sqshl_nv4si (__a
, __b
);
22527 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
22528 vqshlq_n_s64 (int64x2_t __a
, const int __b
)
22530 return (int64x2_t
) __builtin_aarch64_sqshl_nv2di (__a
, __b
);
22533 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
22534 vqshlq_n_u8 (uint8x16_t __a
, const int __b
)
22536 return (uint8x16_t
) __builtin_aarch64_uqshl_nv16qi ((int8x16_t
) __a
, __b
);
22539 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
22540 vqshlq_n_u16 (uint16x8_t __a
, const int __b
)
22542 return (uint16x8_t
) __builtin_aarch64_uqshl_nv8hi ((int16x8_t
) __a
, __b
);
22545 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
22546 vqshlq_n_u32 (uint32x4_t __a
, const int __b
)
22548 return (uint32x4_t
) __builtin_aarch64_uqshl_nv4si ((int32x4_t
) __a
, __b
);
22551 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
22552 vqshlq_n_u64 (uint64x2_t __a
, const int __b
)
22554 return (uint64x2_t
) __builtin_aarch64_uqshl_nv2di ((int64x2_t
) __a
, __b
);
22557 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
22558 vqshlb_n_s8 (int8x1_t __a
, const int __b
)
22560 return (int8x1_t
) __builtin_aarch64_sqshl_nqi (__a
, __b
);
22563 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
22564 vqshlh_n_s16 (int16x1_t __a
, const int __b
)
22566 return (int16x1_t
) __builtin_aarch64_sqshl_nhi (__a
, __b
);
22569 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
22570 vqshls_n_s32 (int32x1_t __a
, const int __b
)
22572 return (int32x1_t
) __builtin_aarch64_sqshl_nsi (__a
, __b
);
22575 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
22576 vqshld_n_s64 (int64x1_t __a
, const int __b
)
22578 return (int64x1_t
) __builtin_aarch64_sqshl_ndi (__a
, __b
);
22581 __extension__
static __inline uint8x1_t
__attribute__ ((__always_inline__
))
22582 vqshlb_n_u8 (uint8x1_t __a
, const int __b
)
22584 return (uint8x1_t
) __builtin_aarch64_uqshl_nqi (__a
, __b
);
22587 __extension__
static __inline uint16x1_t
__attribute__ ((__always_inline__
))
22588 vqshlh_n_u16 (uint16x1_t __a
, const int __b
)
22590 return (uint16x1_t
) __builtin_aarch64_uqshl_nhi (__a
, __b
);
22593 __extension__
static __inline uint32x1_t
__attribute__ ((__always_inline__
))
22594 vqshls_n_u32 (uint32x1_t __a
, const int __b
)
22596 return (uint32x1_t
) __builtin_aarch64_uqshl_nsi (__a
, __b
);
22599 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
22600 vqshld_n_u64 (uint64x1_t __a
, const int __b
)
22602 return (uint64x1_t
) __builtin_aarch64_uqshl_ndi (__a
, __b
);
22607 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
22608 vqshlu_n_s8 (int8x8_t __a
, const int __b
)
22610 return (uint8x8_t
) __builtin_aarch64_sqshlu_nv8qi (__a
, __b
);
22613 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
22614 vqshlu_n_s16 (int16x4_t __a
, const int __b
)
22616 return (uint16x4_t
) __builtin_aarch64_sqshlu_nv4hi (__a
, __b
);
22619 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
22620 vqshlu_n_s32 (int32x2_t __a
, const int __b
)
22622 return (uint32x2_t
) __builtin_aarch64_sqshlu_nv2si (__a
, __b
);
22625 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
22626 vqshlu_n_s64 (int64x1_t __a
, const int __b
)
22628 return (uint64x1_t
) __builtin_aarch64_sqshlu_ndi (__a
, __b
);
22631 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
22632 vqshluq_n_s8 (int8x16_t __a
, const int __b
)
22634 return (uint8x16_t
) __builtin_aarch64_sqshlu_nv16qi (__a
, __b
);
22637 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
22638 vqshluq_n_s16 (int16x8_t __a
, const int __b
)
22640 return (uint16x8_t
) __builtin_aarch64_sqshlu_nv8hi (__a
, __b
);
22643 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
22644 vqshluq_n_s32 (int32x4_t __a
, const int __b
)
22646 return (uint32x4_t
) __builtin_aarch64_sqshlu_nv4si (__a
, __b
);
22649 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
22650 vqshluq_n_s64 (int64x2_t __a
, const int __b
)
22652 return (uint64x2_t
) __builtin_aarch64_sqshlu_nv2di (__a
, __b
);
22655 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
22656 vqshlub_n_s8 (int8x1_t __a
, const int __b
)
22658 return (int8x1_t
) __builtin_aarch64_sqshlu_nqi (__a
, __b
);
22661 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
22662 vqshluh_n_s16 (int16x1_t __a
, const int __b
)
22664 return (int16x1_t
) __builtin_aarch64_sqshlu_nhi (__a
, __b
);
22667 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
22668 vqshlus_n_s32 (int32x1_t __a
, const int __b
)
22670 return (int32x1_t
) __builtin_aarch64_sqshlu_nsi (__a
, __b
);
22673 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
22674 vqshlud_n_s64 (int64x1_t __a
, const int __b
)
22676 return (int64x1_t
) __builtin_aarch64_sqshlu_ndi (__a
, __b
);
22681 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
22682 vqshrn_n_s16 (int16x8_t __a
, const int __b
)
22684 return (int8x8_t
) __builtin_aarch64_sqshrn_nv8hi (__a
, __b
);
22687 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
22688 vqshrn_n_s32 (int32x4_t __a
, const int __b
)
22690 return (int16x4_t
) __builtin_aarch64_sqshrn_nv4si (__a
, __b
);
22693 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
22694 vqshrn_n_s64 (int64x2_t __a
, const int __b
)
22696 return (int32x2_t
) __builtin_aarch64_sqshrn_nv2di (__a
, __b
);
22699 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
22700 vqshrn_n_u16 (uint16x8_t __a
, const int __b
)
22702 return (uint8x8_t
) __builtin_aarch64_uqshrn_nv8hi ((int16x8_t
) __a
, __b
);
22705 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
22706 vqshrn_n_u32 (uint32x4_t __a
, const int __b
)
22708 return (uint16x4_t
) __builtin_aarch64_uqshrn_nv4si ((int32x4_t
) __a
, __b
);
22711 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
22712 vqshrn_n_u64 (uint64x2_t __a
, const int __b
)
22714 return (uint32x2_t
) __builtin_aarch64_uqshrn_nv2di ((int64x2_t
) __a
, __b
);
22717 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
22718 vqshrnh_n_s16 (int16x1_t __a
, const int __b
)
22720 return (int8x1_t
) __builtin_aarch64_sqshrn_nhi (__a
, __b
);
22723 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
22724 vqshrns_n_s32 (int32x1_t __a
, const int __b
)
22726 return (int16x1_t
) __builtin_aarch64_sqshrn_nsi (__a
, __b
);
22729 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
22730 vqshrnd_n_s64 (int64x1_t __a
, const int __b
)
22732 return (int32x1_t
) __builtin_aarch64_sqshrn_ndi (__a
, __b
);
22735 __extension__
static __inline uint8x1_t
__attribute__ ((__always_inline__
))
22736 vqshrnh_n_u16 (uint16x1_t __a
, const int __b
)
22738 return (uint8x1_t
) __builtin_aarch64_uqshrn_nhi (__a
, __b
);
22741 __extension__
static __inline uint16x1_t
__attribute__ ((__always_inline__
))
22742 vqshrns_n_u32 (uint32x1_t __a
, const int __b
)
22744 return (uint16x1_t
) __builtin_aarch64_uqshrn_nsi (__a
, __b
);
22747 __extension__
static __inline uint32x1_t
__attribute__ ((__always_inline__
))
22748 vqshrnd_n_u64 (uint64x1_t __a
, const int __b
)
22750 return (uint32x1_t
) __builtin_aarch64_uqshrn_ndi (__a
, __b
);
22755 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
22756 vqshrun_n_s16 (int16x8_t __a
, const int __b
)
22758 return (uint8x8_t
) __builtin_aarch64_sqshrun_nv8hi (__a
, __b
);
22761 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
22762 vqshrun_n_s32 (int32x4_t __a
, const int __b
)
22764 return (uint16x4_t
) __builtin_aarch64_sqshrun_nv4si (__a
, __b
);
22767 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
22768 vqshrun_n_s64 (int64x2_t __a
, const int __b
)
22770 return (uint32x2_t
) __builtin_aarch64_sqshrun_nv2di (__a
, __b
);
22773 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
22774 vqshrunh_n_s16 (int16x1_t __a
, const int __b
)
22776 return (int8x1_t
) __builtin_aarch64_sqshrun_nhi (__a
, __b
);
22779 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
22780 vqshruns_n_s32 (int32x1_t __a
, const int __b
)
22782 return (int16x1_t
) __builtin_aarch64_sqshrun_nsi (__a
, __b
);
22785 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
22786 vqshrund_n_s64 (int64x1_t __a
, const int __b
)
22788 return (int32x1_t
) __builtin_aarch64_sqshrun_ndi (__a
, __b
);
22793 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
22794 vqsubb_s8 (int8x1_t __a
, int8x1_t __b
)
22796 return (int8x1_t
) __builtin_aarch64_sqsubqi (__a
, __b
);
22799 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
22800 vqsubh_s16 (int16x1_t __a
, int16x1_t __b
)
22802 return (int16x1_t
) __builtin_aarch64_sqsubhi (__a
, __b
);
22805 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
22806 vqsubs_s32 (int32x1_t __a
, int32x1_t __b
)
22808 return (int32x1_t
) __builtin_aarch64_sqsubsi (__a
, __b
);
22811 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
22812 vqsubd_s64 (int64x1_t __a
, int64x1_t __b
)
22814 return (int64x1_t
) __builtin_aarch64_sqsubdi (__a
, __b
);
22817 __extension__
static __inline uint8x1_t
__attribute__ ((__always_inline__
))
22818 vqsubb_u8 (uint8x1_t __a
, uint8x1_t __b
)
22820 return (uint8x1_t
) __builtin_aarch64_uqsubqi (__a
, __b
);
22823 __extension__
static __inline uint16x1_t
__attribute__ ((__always_inline__
))
22824 vqsubh_u16 (uint16x1_t __a
, uint16x1_t __b
)
22826 return (uint16x1_t
) __builtin_aarch64_uqsubhi (__a
, __b
);
22829 __extension__
static __inline uint32x1_t
__attribute__ ((__always_inline__
))
22830 vqsubs_u32 (uint32x1_t __a
, uint32x1_t __b
)
22832 return (uint32x1_t
) __builtin_aarch64_uqsubsi (__a
, __b
);
22835 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
22836 vqsubd_u64 (uint64x1_t __a
, uint64x1_t __b
)
22838 return (uint64x1_t
) __builtin_aarch64_uqsubdi (__a
, __b
);
22843 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
22844 vrecpes_f32 (float32_t __a
)
22846 return __builtin_aarch64_frecpesf (__a
);
22849 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
22850 vrecped_f64 (float64_t __a
)
22852 return __builtin_aarch64_frecpedf (__a
);
22855 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
22856 vrecpe_f32 (float32x2_t __a
)
22858 return __builtin_aarch64_frecpev2sf (__a
);
22861 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
22862 vrecpeq_f32 (float32x4_t __a
)
22864 return __builtin_aarch64_frecpev4sf (__a
);
22867 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
22868 vrecpeq_f64 (float64x2_t __a
)
22870 return __builtin_aarch64_frecpev2df (__a
);
22875 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
22876 vrecpss_f32 (float32_t __a
, float32_t __b
)
22878 return __builtin_aarch64_frecpssf (__a
, __b
);
22881 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
22882 vrecpsd_f64 (float64_t __a
, float64_t __b
)
22884 return __builtin_aarch64_frecpsdf (__a
, __b
);
22887 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
22888 vrecps_f32 (float32x2_t __a
, float32x2_t __b
)
22890 return __builtin_aarch64_frecpsv2sf (__a
, __b
);
22893 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
22894 vrecpsq_f32 (float32x4_t __a
, float32x4_t __b
)
22896 return __builtin_aarch64_frecpsv4sf (__a
, __b
);
22899 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
22900 vrecpsq_f64 (float64x2_t __a
, float64x2_t __b
)
22902 return __builtin_aarch64_frecpsv2df (__a
, __b
);
22907 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
22908 vrecpxs_f32 (float32_t __a
)
22910 return __builtin_aarch64_frecpxsf (__a
);
22913 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
22914 vrecpxd_f64 (float64_t __a
)
22916 return __builtin_aarch64_frecpxdf (__a
);
22921 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
22922 vrnd_f32 (float32x2_t __a
)
22924 return __builtin_aarch64_btruncv2sf (__a
);
22927 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
22928 vrndq_f32 (float32x4_t __a
)
22930 return __builtin_aarch64_btruncv4sf (__a
);
22933 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
22934 vrndq_f64 (float64x2_t __a
)
22936 return __builtin_aarch64_btruncv2df (__a
);
22941 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
22942 vrnda_f32 (float32x2_t __a
)
22944 return __builtin_aarch64_roundv2sf (__a
);
22947 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
22948 vrndaq_f32 (float32x4_t __a
)
22950 return __builtin_aarch64_roundv4sf (__a
);
22953 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
22954 vrndaq_f64 (float64x2_t __a
)
22956 return __builtin_aarch64_roundv2df (__a
);
22961 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
22962 vrndi_f32 (float32x2_t __a
)
22964 return __builtin_aarch64_nearbyintv2sf (__a
);
22967 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
22968 vrndiq_f32 (float32x4_t __a
)
22970 return __builtin_aarch64_nearbyintv4sf (__a
);
22973 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
22974 vrndiq_f64 (float64x2_t __a
)
22976 return __builtin_aarch64_nearbyintv2df (__a
);
22981 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
22982 vrndm_f32 (float32x2_t __a
)
22984 return __builtin_aarch64_floorv2sf (__a
);
22987 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
22988 vrndmq_f32 (float32x4_t __a
)
22990 return __builtin_aarch64_floorv4sf (__a
);
22993 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
22994 vrndmq_f64 (float64x2_t __a
)
22996 return __builtin_aarch64_floorv2df (__a
);
23001 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
23002 vrndn_f32 (float32x2_t __a
)
23004 return __builtin_aarch64_frintnv2sf (__a
);
23006 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
23007 vrndnq_f32 (float32x4_t __a
)
23009 return __builtin_aarch64_frintnv4sf (__a
);
23012 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
23013 vrndnq_f64 (float64x2_t __a
)
23015 return __builtin_aarch64_frintnv2df (__a
);
23020 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
23021 vrndp_f32 (float32x2_t __a
)
23023 return __builtin_aarch64_ceilv2sf (__a
);
23026 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
23027 vrndpq_f32 (float32x4_t __a
)
23029 return __builtin_aarch64_ceilv4sf (__a
);
23032 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
23033 vrndpq_f64 (float64x2_t __a
)
23035 return __builtin_aarch64_ceilv2df (__a
);
23040 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
23041 vrndx_f32 (float32x2_t __a
)
23043 return __builtin_aarch64_rintv2sf (__a
);
23046 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
23047 vrndxq_f32 (float32x4_t __a
)
23049 return __builtin_aarch64_rintv4sf (__a
);
23052 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
23053 vrndxq_f64 (float64x2_t __a
)
23055 return __builtin_aarch64_rintv2df (__a
);
23060 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
23061 vrshl_s8 (int8x8_t __a
, int8x8_t __b
)
23063 return (int8x8_t
) __builtin_aarch64_srshlv8qi (__a
, __b
);
23066 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
23067 vrshl_s16 (int16x4_t __a
, int16x4_t __b
)
23069 return (int16x4_t
) __builtin_aarch64_srshlv4hi (__a
, __b
);
23072 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
23073 vrshl_s32 (int32x2_t __a
, int32x2_t __b
)
23075 return (int32x2_t
) __builtin_aarch64_srshlv2si (__a
, __b
);
23078 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
23079 vrshl_s64 (int64x1_t __a
, int64x1_t __b
)
23081 return (int64x1_t
) __builtin_aarch64_srshldi (__a
, __b
);
23084 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
23085 vrshl_u8 (uint8x8_t __a
, int8x8_t __b
)
23087 return (uint8x8_t
) __builtin_aarch64_urshlv8qi ((int8x8_t
) __a
, __b
);
23090 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
23091 vrshl_u16 (uint16x4_t __a
, int16x4_t __b
)
23093 return (uint16x4_t
) __builtin_aarch64_urshlv4hi ((int16x4_t
) __a
, __b
);
23096 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
23097 vrshl_u32 (uint32x2_t __a
, int32x2_t __b
)
23099 return (uint32x2_t
) __builtin_aarch64_urshlv2si ((int32x2_t
) __a
, __b
);
23102 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23103 vrshl_u64 (uint64x1_t __a
, int64x1_t __b
)
23105 return (uint64x1_t
) __builtin_aarch64_urshldi ((int64x1_t
) __a
, __b
);
23108 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
23109 vrshlq_s8 (int8x16_t __a
, int8x16_t __b
)
23111 return (int8x16_t
) __builtin_aarch64_srshlv16qi (__a
, __b
);
23114 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
23115 vrshlq_s16 (int16x8_t __a
, int16x8_t __b
)
23117 return (int16x8_t
) __builtin_aarch64_srshlv8hi (__a
, __b
);
23120 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
23121 vrshlq_s32 (int32x4_t __a
, int32x4_t __b
)
23123 return (int32x4_t
) __builtin_aarch64_srshlv4si (__a
, __b
);
23126 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
23127 vrshlq_s64 (int64x2_t __a
, int64x2_t __b
)
23129 return (int64x2_t
) __builtin_aarch64_srshlv2di (__a
, __b
);
23132 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
23133 vrshlq_u8 (uint8x16_t __a
, int8x16_t __b
)
23135 return (uint8x16_t
) __builtin_aarch64_urshlv16qi ((int8x16_t
) __a
, __b
);
23138 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
23139 vrshlq_u16 (uint16x8_t __a
, int16x8_t __b
)
23141 return (uint16x8_t
) __builtin_aarch64_urshlv8hi ((int16x8_t
) __a
, __b
);
23144 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
23145 vrshlq_u32 (uint32x4_t __a
, int32x4_t __b
)
23147 return (uint32x4_t
) __builtin_aarch64_urshlv4si ((int32x4_t
) __a
, __b
);
23150 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
23151 vrshlq_u64 (uint64x2_t __a
, int64x2_t __b
)
23153 return (uint64x2_t
) __builtin_aarch64_urshlv2di ((int64x2_t
) __a
, __b
);
23156 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
23157 vrshld_s64 (int64x1_t __a
, int64x1_t __b
)
23159 return (int64x1_t
) __builtin_aarch64_srshldi (__a
, __b
);
23162 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23163 vrshld_u64 (uint64x1_t __a
, uint64x1_t __b
)
23165 return (uint64x1_t
) __builtin_aarch64_urshldi (__a
, __b
);
23170 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
23171 vrshr_n_s8 (int8x8_t __a
, const int __b
)
23173 return (int8x8_t
) __builtin_aarch64_srshr_nv8qi (__a
, __b
);
23176 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
23177 vrshr_n_s16 (int16x4_t __a
, const int __b
)
23179 return (int16x4_t
) __builtin_aarch64_srshr_nv4hi (__a
, __b
);
23182 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
23183 vrshr_n_s32 (int32x2_t __a
, const int __b
)
23185 return (int32x2_t
) __builtin_aarch64_srshr_nv2si (__a
, __b
);
23188 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
23189 vrshr_n_s64 (int64x1_t __a
, const int __b
)
23191 return (int64x1_t
) __builtin_aarch64_srshr_ndi (__a
, __b
);
23194 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
23195 vrshr_n_u8 (uint8x8_t __a
, const int __b
)
23197 return (uint8x8_t
) __builtin_aarch64_urshr_nv8qi ((int8x8_t
) __a
, __b
);
23200 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
23201 vrshr_n_u16 (uint16x4_t __a
, const int __b
)
23203 return (uint16x4_t
) __builtin_aarch64_urshr_nv4hi ((int16x4_t
) __a
, __b
);
23206 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
23207 vrshr_n_u32 (uint32x2_t __a
, const int __b
)
23209 return (uint32x2_t
) __builtin_aarch64_urshr_nv2si ((int32x2_t
) __a
, __b
);
23212 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23213 vrshr_n_u64 (uint64x1_t __a
, const int __b
)
23215 return (uint64x1_t
) __builtin_aarch64_urshr_ndi ((int64x1_t
) __a
, __b
);
23218 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
23219 vrshrq_n_s8 (int8x16_t __a
, const int __b
)
23221 return (int8x16_t
) __builtin_aarch64_srshr_nv16qi (__a
, __b
);
23224 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
23225 vrshrq_n_s16 (int16x8_t __a
, const int __b
)
23227 return (int16x8_t
) __builtin_aarch64_srshr_nv8hi (__a
, __b
);
23230 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
23231 vrshrq_n_s32 (int32x4_t __a
, const int __b
)
23233 return (int32x4_t
) __builtin_aarch64_srshr_nv4si (__a
, __b
);
23236 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
23237 vrshrq_n_s64 (int64x2_t __a
, const int __b
)
23239 return (int64x2_t
) __builtin_aarch64_srshr_nv2di (__a
, __b
);
23242 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
23243 vrshrq_n_u8 (uint8x16_t __a
, const int __b
)
23245 return (uint8x16_t
) __builtin_aarch64_urshr_nv16qi ((int8x16_t
) __a
, __b
);
23248 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
23249 vrshrq_n_u16 (uint16x8_t __a
, const int __b
)
23251 return (uint16x8_t
) __builtin_aarch64_urshr_nv8hi ((int16x8_t
) __a
, __b
);
23254 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
23255 vrshrq_n_u32 (uint32x4_t __a
, const int __b
)
23257 return (uint32x4_t
) __builtin_aarch64_urshr_nv4si ((int32x4_t
) __a
, __b
);
23260 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
23261 vrshrq_n_u64 (uint64x2_t __a
, const int __b
)
23263 return (uint64x2_t
) __builtin_aarch64_urshr_nv2di ((int64x2_t
) __a
, __b
);
23266 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
23267 vrshrd_n_s64 (int64x1_t __a
, const int __b
)
23269 return (int64x1_t
) __builtin_aarch64_srshr_ndi (__a
, __b
);
23272 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23273 vrshrd_n_u64 (uint64x1_t __a
, const int __b
)
23275 return (uint64x1_t
) __builtin_aarch64_urshr_ndi (__a
, __b
);
23280 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
23281 vrsra_n_s8 (int8x8_t __a
, int8x8_t __b
, const int __c
)
23283 return (int8x8_t
) __builtin_aarch64_srsra_nv8qi (__a
, __b
, __c
);
23286 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
23287 vrsra_n_s16 (int16x4_t __a
, int16x4_t __b
, const int __c
)
23289 return (int16x4_t
) __builtin_aarch64_srsra_nv4hi (__a
, __b
, __c
);
23292 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
23293 vrsra_n_s32 (int32x2_t __a
, int32x2_t __b
, const int __c
)
23295 return (int32x2_t
) __builtin_aarch64_srsra_nv2si (__a
, __b
, __c
);
23298 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
23299 vrsra_n_s64 (int64x1_t __a
, int64x1_t __b
, const int __c
)
23301 return (int64x1_t
) __builtin_aarch64_srsra_ndi (__a
, __b
, __c
);
23304 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
23305 vrsra_n_u8 (uint8x8_t __a
, uint8x8_t __b
, const int __c
)
23307 return (uint8x8_t
) __builtin_aarch64_ursra_nv8qi ((int8x8_t
) __a
,
23308 (int8x8_t
) __b
, __c
);
23311 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
23312 vrsra_n_u16 (uint16x4_t __a
, uint16x4_t __b
, const int __c
)
23314 return (uint16x4_t
) __builtin_aarch64_ursra_nv4hi ((int16x4_t
) __a
,
23315 (int16x4_t
) __b
, __c
);
23318 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
23319 vrsra_n_u32 (uint32x2_t __a
, uint32x2_t __b
, const int __c
)
23321 return (uint32x2_t
) __builtin_aarch64_ursra_nv2si ((int32x2_t
) __a
,
23322 (int32x2_t
) __b
, __c
);
23325 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23326 vrsra_n_u64 (uint64x1_t __a
, uint64x1_t __b
, const int __c
)
23328 return (uint64x1_t
) __builtin_aarch64_ursra_ndi ((int64x1_t
) __a
,
23329 (int64x1_t
) __b
, __c
);
23332 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
23333 vrsraq_n_s8 (int8x16_t __a
, int8x16_t __b
, const int __c
)
23335 return (int8x16_t
) __builtin_aarch64_srsra_nv16qi (__a
, __b
, __c
);
23338 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
23339 vrsraq_n_s16 (int16x8_t __a
, int16x8_t __b
, const int __c
)
23341 return (int16x8_t
) __builtin_aarch64_srsra_nv8hi (__a
, __b
, __c
);
23344 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
23345 vrsraq_n_s32 (int32x4_t __a
, int32x4_t __b
, const int __c
)
23347 return (int32x4_t
) __builtin_aarch64_srsra_nv4si (__a
, __b
, __c
);
23350 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
23351 vrsraq_n_s64 (int64x2_t __a
, int64x2_t __b
, const int __c
)
23353 return (int64x2_t
) __builtin_aarch64_srsra_nv2di (__a
, __b
, __c
);
23356 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
23357 vrsraq_n_u8 (uint8x16_t __a
, uint8x16_t __b
, const int __c
)
23359 return (uint8x16_t
) __builtin_aarch64_ursra_nv16qi ((int8x16_t
) __a
,
23360 (int8x16_t
) __b
, __c
);
23363 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
23364 vrsraq_n_u16 (uint16x8_t __a
, uint16x8_t __b
, const int __c
)
23366 return (uint16x8_t
) __builtin_aarch64_ursra_nv8hi ((int16x8_t
) __a
,
23367 (int16x8_t
) __b
, __c
);
23370 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
23371 vrsraq_n_u32 (uint32x4_t __a
, uint32x4_t __b
, const int __c
)
23373 return (uint32x4_t
) __builtin_aarch64_ursra_nv4si ((int32x4_t
) __a
,
23374 (int32x4_t
) __b
, __c
);
23377 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
23378 vrsraq_n_u64 (uint64x2_t __a
, uint64x2_t __b
, const int __c
)
23380 return (uint64x2_t
) __builtin_aarch64_ursra_nv2di ((int64x2_t
) __a
,
23381 (int64x2_t
) __b
, __c
);
23384 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
23385 vrsrad_n_s64 (int64x1_t __a
, int64x1_t __b
, const int __c
)
23387 return (int64x1_t
) __builtin_aarch64_srsra_ndi (__a
, __b
, __c
);
23390 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23391 vrsrad_n_u64 (uint64x1_t __a
, uint64x1_t __b
, const int __c
)
23393 return (uint64x1_t
) __builtin_aarch64_ursra_ndi (__a
, __b
, __c
);
23398 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
23399 vshl_n_s8 (int8x8_t __a
, const int __b
)
23401 return (int8x8_t
) __builtin_aarch64_ashlv8qi (__a
, __b
);
23404 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
23405 vshl_n_s16 (int16x4_t __a
, const int __b
)
23407 return (int16x4_t
) __builtin_aarch64_ashlv4hi (__a
, __b
);
23410 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
23411 vshl_n_s32 (int32x2_t __a
, const int __b
)
23413 return (int32x2_t
) __builtin_aarch64_ashlv2si (__a
, __b
);
23416 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
23417 vshl_n_s64 (int64x1_t __a
, const int __b
)
23419 return (int64x1_t
) __builtin_aarch64_ashldi (__a
, __b
);
23422 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
23423 vshl_n_u8 (uint8x8_t __a
, const int __b
)
23425 return (uint8x8_t
) __builtin_aarch64_ashlv8qi ((int8x8_t
) __a
, __b
);
23428 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
23429 vshl_n_u16 (uint16x4_t __a
, const int __b
)
23431 return (uint16x4_t
) __builtin_aarch64_ashlv4hi ((int16x4_t
) __a
, __b
);
23434 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
23435 vshl_n_u32 (uint32x2_t __a
, const int __b
)
23437 return (uint32x2_t
) __builtin_aarch64_ashlv2si ((int32x2_t
) __a
, __b
);
23440 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23441 vshl_n_u64 (uint64x1_t __a
, const int __b
)
23443 return (uint64x1_t
) __builtin_aarch64_ashldi ((int64x1_t
) __a
, __b
);
23446 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
23447 vshlq_n_s8 (int8x16_t __a
, const int __b
)
23449 return (int8x16_t
) __builtin_aarch64_ashlv16qi (__a
, __b
);
23452 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
23453 vshlq_n_s16 (int16x8_t __a
, const int __b
)
23455 return (int16x8_t
) __builtin_aarch64_ashlv8hi (__a
, __b
);
23458 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
23459 vshlq_n_s32 (int32x4_t __a
, const int __b
)
23461 return (int32x4_t
) __builtin_aarch64_ashlv4si (__a
, __b
);
23464 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
23465 vshlq_n_s64 (int64x2_t __a
, const int __b
)
23467 return (int64x2_t
) __builtin_aarch64_ashlv2di (__a
, __b
);
23470 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
23471 vshlq_n_u8 (uint8x16_t __a
, const int __b
)
23473 return (uint8x16_t
) __builtin_aarch64_ashlv16qi ((int8x16_t
) __a
, __b
);
23476 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
23477 vshlq_n_u16 (uint16x8_t __a
, const int __b
)
23479 return (uint16x8_t
) __builtin_aarch64_ashlv8hi ((int16x8_t
) __a
, __b
);
23482 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
23483 vshlq_n_u32 (uint32x4_t __a
, const int __b
)
23485 return (uint32x4_t
) __builtin_aarch64_ashlv4si ((int32x4_t
) __a
, __b
);
23488 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
23489 vshlq_n_u64 (uint64x2_t __a
, const int __b
)
23491 return (uint64x2_t
) __builtin_aarch64_ashlv2di ((int64x2_t
) __a
, __b
);
23494 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
23495 vshld_n_s64 (int64x1_t __a
, const int __b
)
23497 return (int64x1_t
) __builtin_aarch64_ashldi (__a
, __b
);
23500 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23501 vshld_n_u64 (uint64x1_t __a
, const int __b
)
23503 return (uint64x1_t
) __builtin_aarch64_ashldi (__a
, __b
);
23506 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
23507 vshl_s8 (int8x8_t __a
, int8x8_t __b
)
23509 return (int8x8_t
) __builtin_aarch64_sshlv8qi (__a
, __b
);
23512 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
23513 vshl_s16 (int16x4_t __a
, int16x4_t __b
)
23515 return (int16x4_t
) __builtin_aarch64_sshlv4hi (__a
, __b
);
23518 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
23519 vshl_s32 (int32x2_t __a
, int32x2_t __b
)
23521 return (int32x2_t
) __builtin_aarch64_sshlv2si (__a
, __b
);
23524 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
23525 vshl_s64 (int64x1_t __a
, int64x1_t __b
)
23527 return (int64x1_t
) __builtin_aarch64_sshldi (__a
, __b
);
23530 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
23531 vshl_u8 (uint8x8_t __a
, int8x8_t __b
)
23533 return (uint8x8_t
) __builtin_aarch64_ushlv8qi ((int8x8_t
) __a
, __b
);
23536 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
23537 vshl_u16 (uint16x4_t __a
, int16x4_t __b
)
23539 return (uint16x4_t
) __builtin_aarch64_ushlv4hi ((int16x4_t
) __a
, __b
);
23542 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
23543 vshl_u32 (uint32x2_t __a
, int32x2_t __b
)
23545 return (uint32x2_t
) __builtin_aarch64_ushlv2si ((int32x2_t
) __a
, __b
);
23548 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23549 vshl_u64 (uint64x1_t __a
, int64x1_t __b
)
23551 return (uint64x1_t
) __builtin_aarch64_ushldi ((int64x1_t
) __a
, __b
);
23554 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
23555 vshlq_s8 (int8x16_t __a
, int8x16_t __b
)
23557 return (int8x16_t
) __builtin_aarch64_sshlv16qi (__a
, __b
);
23560 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
23561 vshlq_s16 (int16x8_t __a
, int16x8_t __b
)
23563 return (int16x8_t
) __builtin_aarch64_sshlv8hi (__a
, __b
);
23566 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
23567 vshlq_s32 (int32x4_t __a
, int32x4_t __b
)
23569 return (int32x4_t
) __builtin_aarch64_sshlv4si (__a
, __b
);
23572 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
23573 vshlq_s64 (int64x2_t __a
, int64x2_t __b
)
23575 return (int64x2_t
) __builtin_aarch64_sshlv2di (__a
, __b
);
23578 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
23579 vshlq_u8 (uint8x16_t __a
, int8x16_t __b
)
23581 return (uint8x16_t
) __builtin_aarch64_ushlv16qi ((int8x16_t
) __a
, __b
);
23584 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
23585 vshlq_u16 (uint16x8_t __a
, int16x8_t __b
)
23587 return (uint16x8_t
) __builtin_aarch64_ushlv8hi ((int16x8_t
) __a
, __b
);
23590 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
23591 vshlq_u32 (uint32x4_t __a
, int32x4_t __b
)
23593 return (uint32x4_t
) __builtin_aarch64_ushlv4si ((int32x4_t
) __a
, __b
);
23596 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
23597 vshlq_u64 (uint64x2_t __a
, int64x2_t __b
)
23599 return (uint64x2_t
) __builtin_aarch64_ushlv2di ((int64x2_t
) __a
, __b
);
23602 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
23603 vshld_s64 (int64x1_t __a
, int64x1_t __b
)
23605 return (int64x1_t
) __builtin_aarch64_sshldi (__a
, __b
);
23608 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23609 vshld_u64 (uint64x1_t __a
, uint64x1_t __b
)
23611 return (uint64x1_t
) __builtin_aarch64_ushldi (__a
, __b
);
23614 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
23615 vshll_high_n_s8 (int8x16_t __a
, const int __b
)
23617 return __builtin_aarch64_sshll2_nv16qi (__a
, __b
);
23620 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
23621 vshll_high_n_s16 (int16x8_t __a
, const int __b
)
23623 return __builtin_aarch64_sshll2_nv8hi (__a
, __b
);
23626 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
23627 vshll_high_n_s32 (int32x4_t __a
, const int __b
)
23629 return __builtin_aarch64_sshll2_nv4si (__a
, __b
);
23632 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
23633 vshll_high_n_u8 (uint8x16_t __a
, const int __b
)
23635 return (uint16x8_t
) __builtin_aarch64_ushll2_nv16qi ((int8x16_t
) __a
, __b
);
23638 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
23639 vshll_high_n_u16 (uint16x8_t __a
, const int __b
)
23641 return (uint32x4_t
) __builtin_aarch64_ushll2_nv8hi ((int16x8_t
) __a
, __b
);
23644 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
23645 vshll_high_n_u32 (uint32x4_t __a
, const int __b
)
23647 return (uint64x2_t
) __builtin_aarch64_ushll2_nv4si ((int32x4_t
) __a
, __b
);
23650 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
23651 vshll_n_s8 (int8x8_t __a
, const int __b
)
23653 return __builtin_aarch64_sshll_nv8qi (__a
, __b
);
23656 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
23657 vshll_n_s16 (int16x4_t __a
, const int __b
)
23659 return __builtin_aarch64_sshll_nv4hi (__a
, __b
);
23662 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
23663 vshll_n_s32 (int32x2_t __a
, const int __b
)
23665 return __builtin_aarch64_sshll_nv2si (__a
, __b
);
23668 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
23669 vshll_n_u8 (uint8x8_t __a
, const int __b
)
23671 return (uint16x8_t
) __builtin_aarch64_ushll_nv8qi ((int8x8_t
) __a
, __b
);
23674 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
23675 vshll_n_u16 (uint16x4_t __a
, const int __b
)
23677 return (uint32x4_t
) __builtin_aarch64_ushll_nv4hi ((int16x4_t
) __a
, __b
);
23680 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
23681 vshll_n_u32 (uint32x2_t __a
, const int __b
)
23683 return (uint64x2_t
) __builtin_aarch64_ushll_nv2si ((int32x2_t
) __a
, __b
);
23688 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
23689 vshr_n_s8 (int8x8_t __a
, const int __b
)
23691 return (int8x8_t
) __builtin_aarch64_ashrv8qi (__a
, __b
);
23694 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
23695 vshr_n_s16 (int16x4_t __a
, const int __b
)
23697 return (int16x4_t
) __builtin_aarch64_ashrv4hi (__a
, __b
);
23700 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
23701 vshr_n_s32 (int32x2_t __a
, const int __b
)
23703 return (int32x2_t
) __builtin_aarch64_ashrv2si (__a
, __b
);
23706 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
23707 vshr_n_s64 (int64x1_t __a
, const int __b
)
23709 return (int64x1_t
) __builtin_aarch64_ashrdi (__a
, __b
);
23712 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
23713 vshr_n_u8 (uint8x8_t __a
, const int __b
)
23715 return (uint8x8_t
) __builtin_aarch64_lshrv8qi ((int8x8_t
) __a
, __b
);
23718 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
23719 vshr_n_u16 (uint16x4_t __a
, const int __b
)
23721 return (uint16x4_t
) __builtin_aarch64_lshrv4hi ((int16x4_t
) __a
, __b
);
23724 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
23725 vshr_n_u32 (uint32x2_t __a
, const int __b
)
23727 return (uint32x2_t
) __builtin_aarch64_lshrv2si ((int32x2_t
) __a
, __b
);
23730 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23731 vshr_n_u64 (uint64x1_t __a
, const int __b
)
23733 return (uint64x1_t
) __builtin_aarch64_lshrdi ((int64x1_t
) __a
, __b
);
23736 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
23737 vshrq_n_s8 (int8x16_t __a
, const int __b
)
23739 return (int8x16_t
) __builtin_aarch64_ashrv16qi (__a
, __b
);
23742 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
23743 vshrq_n_s16 (int16x8_t __a
, const int __b
)
23745 return (int16x8_t
) __builtin_aarch64_ashrv8hi (__a
, __b
);
23748 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
23749 vshrq_n_s32 (int32x4_t __a
, const int __b
)
23751 return (int32x4_t
) __builtin_aarch64_ashrv4si (__a
, __b
);
23754 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
23755 vshrq_n_s64 (int64x2_t __a
, const int __b
)
23757 return (int64x2_t
) __builtin_aarch64_ashrv2di (__a
, __b
);
23760 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
23761 vshrq_n_u8 (uint8x16_t __a
, const int __b
)
23763 return (uint8x16_t
) __builtin_aarch64_lshrv16qi ((int8x16_t
) __a
, __b
);
23766 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
23767 vshrq_n_u16 (uint16x8_t __a
, const int __b
)
23769 return (uint16x8_t
) __builtin_aarch64_lshrv8hi ((int16x8_t
) __a
, __b
);
23772 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
23773 vshrq_n_u32 (uint32x4_t __a
, const int __b
)
23775 return (uint32x4_t
) __builtin_aarch64_lshrv4si ((int32x4_t
) __a
, __b
);
23778 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
23779 vshrq_n_u64 (uint64x2_t __a
, const int __b
)
23781 return (uint64x2_t
) __builtin_aarch64_lshrv2di ((int64x2_t
) __a
, __b
);
23784 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
23785 vshrd_n_s64 (int64x1_t __a
, const int __b
)
23787 return (int64x1_t
) __builtin_aarch64_ashrdi (__a
, __b
);
23790 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23791 vshrd_n_u64 (uint64x1_t __a
, const int __b
)
23793 return (uint64x1_t
) __builtin_aarch64_lshrdi (__a
, __b
);
23798 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
23799 vsli_n_s8 (int8x8_t __a
, int8x8_t __b
, const int __c
)
23801 return (int8x8_t
) __builtin_aarch64_ssli_nv8qi (__a
, __b
, __c
);
23804 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
23805 vsli_n_s16 (int16x4_t __a
, int16x4_t __b
, const int __c
)
23807 return (int16x4_t
) __builtin_aarch64_ssli_nv4hi (__a
, __b
, __c
);
23810 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
23811 vsli_n_s32 (int32x2_t __a
, int32x2_t __b
, const int __c
)
23813 return (int32x2_t
) __builtin_aarch64_ssli_nv2si (__a
, __b
, __c
);
23816 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
23817 vsli_n_s64 (int64x1_t __a
, int64x1_t __b
, const int __c
)
23819 return (int64x1_t
) __builtin_aarch64_ssli_ndi (__a
, __b
, __c
);
23822 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
23823 vsli_n_u8 (uint8x8_t __a
, uint8x8_t __b
, const int __c
)
23825 return (uint8x8_t
) __builtin_aarch64_usli_nv8qi ((int8x8_t
) __a
,
23826 (int8x8_t
) __b
, __c
);
23829 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
23830 vsli_n_u16 (uint16x4_t __a
, uint16x4_t __b
, const int __c
)
23832 return (uint16x4_t
) __builtin_aarch64_usli_nv4hi ((int16x4_t
) __a
,
23833 (int16x4_t
) __b
, __c
);
23836 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
23837 vsli_n_u32 (uint32x2_t __a
, uint32x2_t __b
, const int __c
)
23839 return (uint32x2_t
) __builtin_aarch64_usli_nv2si ((int32x2_t
) __a
,
23840 (int32x2_t
) __b
, __c
);
23843 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23844 vsli_n_u64 (uint64x1_t __a
, uint64x1_t __b
, const int __c
)
23846 return (uint64x1_t
) __builtin_aarch64_usli_ndi ((int64x1_t
) __a
,
23847 (int64x1_t
) __b
, __c
);
23850 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
23851 vsliq_n_s8 (int8x16_t __a
, int8x16_t __b
, const int __c
)
23853 return (int8x16_t
) __builtin_aarch64_ssli_nv16qi (__a
, __b
, __c
);
23856 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
23857 vsliq_n_s16 (int16x8_t __a
, int16x8_t __b
, const int __c
)
23859 return (int16x8_t
) __builtin_aarch64_ssli_nv8hi (__a
, __b
, __c
);
23862 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
23863 vsliq_n_s32 (int32x4_t __a
, int32x4_t __b
, const int __c
)
23865 return (int32x4_t
) __builtin_aarch64_ssli_nv4si (__a
, __b
, __c
);
23868 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
23869 vsliq_n_s64 (int64x2_t __a
, int64x2_t __b
, const int __c
)
23871 return (int64x2_t
) __builtin_aarch64_ssli_nv2di (__a
, __b
, __c
);
23874 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
23875 vsliq_n_u8 (uint8x16_t __a
, uint8x16_t __b
, const int __c
)
23877 return (uint8x16_t
) __builtin_aarch64_usli_nv16qi ((int8x16_t
) __a
,
23878 (int8x16_t
) __b
, __c
);
23881 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
23882 vsliq_n_u16 (uint16x8_t __a
, uint16x8_t __b
, const int __c
)
23884 return (uint16x8_t
) __builtin_aarch64_usli_nv8hi ((int16x8_t
) __a
,
23885 (int16x8_t
) __b
, __c
);
23888 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
23889 vsliq_n_u32 (uint32x4_t __a
, uint32x4_t __b
, const int __c
)
23891 return (uint32x4_t
) __builtin_aarch64_usli_nv4si ((int32x4_t
) __a
,
23892 (int32x4_t
) __b
, __c
);
23895 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
23896 vsliq_n_u64 (uint64x2_t __a
, uint64x2_t __b
, const int __c
)
23898 return (uint64x2_t
) __builtin_aarch64_usli_nv2di ((int64x2_t
) __a
,
23899 (int64x2_t
) __b
, __c
);
23902 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
23903 vslid_n_s64 (int64x1_t __a
, int64x1_t __b
, const int __c
)
23905 return (int64x1_t
) __builtin_aarch64_ssli_ndi (__a
, __b
, __c
);
23908 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23909 vslid_n_u64 (uint64x1_t __a
, uint64x1_t __b
, const int __c
)
23911 return (uint64x1_t
) __builtin_aarch64_usli_ndi (__a
, __b
, __c
);
23916 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
23917 vsqadd_u8 (uint8x8_t __a
, int8x8_t __b
)
23919 return (uint8x8_t
) __builtin_aarch64_usqaddv8qi ((int8x8_t
) __a
,
23923 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
23924 vsqadd_u16 (uint16x4_t __a
, int16x4_t __b
)
23926 return (uint16x4_t
) __builtin_aarch64_usqaddv4hi ((int16x4_t
) __a
,
23930 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
23931 vsqadd_u32 (uint32x2_t __a
, int32x2_t __b
)
23933 return (uint32x2_t
) __builtin_aarch64_usqaddv2si ((int32x2_t
) __a
,
23937 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23938 vsqadd_u64 (uint64x1_t __a
, int64x1_t __b
)
23940 return (uint64x1_t
) __builtin_aarch64_usqadddi ((int64x1_t
) __a
, __b
);
23943 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
23944 vsqaddq_u8 (uint8x16_t __a
, int8x16_t __b
)
23946 return (uint8x16_t
) __builtin_aarch64_usqaddv16qi ((int8x16_t
) __a
,
23950 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
23951 vsqaddq_u16 (uint16x8_t __a
, int16x8_t __b
)
23953 return (uint16x8_t
) __builtin_aarch64_usqaddv8hi ((int16x8_t
) __a
,
23957 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
23958 vsqaddq_u32 (uint32x4_t __a
, int32x4_t __b
)
23960 return (uint32x4_t
) __builtin_aarch64_usqaddv4si ((int32x4_t
) __a
,
23964 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
23965 vsqaddq_u64 (uint64x2_t __a
, int64x2_t __b
)
23967 return (uint64x2_t
) __builtin_aarch64_usqaddv2di ((int64x2_t
) __a
,
23971 __extension__
static __inline uint8x1_t
__attribute__ ((__always_inline__
))
23972 vsqaddb_u8 (uint8x1_t __a
, int8x1_t __b
)
23974 return (uint8x1_t
) __builtin_aarch64_usqaddqi ((int8x1_t
) __a
, __b
);
23977 __extension__
static __inline uint16x1_t
__attribute__ ((__always_inline__
))
23978 vsqaddh_u16 (uint16x1_t __a
, int16x1_t __b
)
23980 return (uint16x1_t
) __builtin_aarch64_usqaddhi ((int16x1_t
) __a
, __b
);
23983 __extension__
static __inline uint32x1_t
__attribute__ ((__always_inline__
))
23984 vsqadds_u32 (uint32x1_t __a
, int32x1_t __b
)
23986 return (uint32x1_t
) __builtin_aarch64_usqaddsi ((int32x1_t
) __a
, __b
);
23989 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23990 vsqaddd_u64 (uint64x1_t __a
, int64x1_t __b
)
23992 return (uint64x1_t
) __builtin_aarch64_usqadddi ((int64x1_t
) __a
, __b
);
23996 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
23997 vsqrt_f32 (float32x2_t a
)
23999 return __builtin_aarch64_sqrtv2sf (a
);
24002 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
24003 vsqrtq_f32 (float32x4_t a
)
24005 return __builtin_aarch64_sqrtv4sf (a
);
24008 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
24009 vsqrtq_f64 (float64x2_t a
)
24011 return __builtin_aarch64_sqrtv2df (a
);
24016 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
24017 vsra_n_s8 (int8x8_t __a
, int8x8_t __b
, const int __c
)
24019 return (int8x8_t
) __builtin_aarch64_ssra_nv8qi (__a
, __b
, __c
);
24022 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
24023 vsra_n_s16 (int16x4_t __a
, int16x4_t __b
, const int __c
)
24025 return (int16x4_t
) __builtin_aarch64_ssra_nv4hi (__a
, __b
, __c
);
24028 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
24029 vsra_n_s32 (int32x2_t __a
, int32x2_t __b
, const int __c
)
24031 return (int32x2_t
) __builtin_aarch64_ssra_nv2si (__a
, __b
, __c
);
24034 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
24035 vsra_n_s64 (int64x1_t __a
, int64x1_t __b
, const int __c
)
24037 return (int64x1_t
) __builtin_aarch64_ssra_ndi (__a
, __b
, __c
);
24040 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
24041 vsra_n_u8 (uint8x8_t __a
, uint8x8_t __b
, const int __c
)
24043 return (uint8x8_t
) __builtin_aarch64_usra_nv8qi ((int8x8_t
) __a
,
24044 (int8x8_t
) __b
, __c
);
24047 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
24048 vsra_n_u16 (uint16x4_t __a
, uint16x4_t __b
, const int __c
)
24050 return (uint16x4_t
) __builtin_aarch64_usra_nv4hi ((int16x4_t
) __a
,
24051 (int16x4_t
) __b
, __c
);
24054 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
24055 vsra_n_u32 (uint32x2_t __a
, uint32x2_t __b
, const int __c
)
24057 return (uint32x2_t
) __builtin_aarch64_usra_nv2si ((int32x2_t
) __a
,
24058 (int32x2_t
) __b
, __c
);
24061 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
24062 vsra_n_u64 (uint64x1_t __a
, uint64x1_t __b
, const int __c
)
24064 return (uint64x1_t
) __builtin_aarch64_usra_ndi ((int64x1_t
) __a
,
24065 (int64x1_t
) __b
, __c
);
24068 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
24069 vsraq_n_s8 (int8x16_t __a
, int8x16_t __b
, const int __c
)
24071 return (int8x16_t
) __builtin_aarch64_ssra_nv16qi (__a
, __b
, __c
);
24074 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
24075 vsraq_n_s16 (int16x8_t __a
, int16x8_t __b
, const int __c
)
24077 return (int16x8_t
) __builtin_aarch64_ssra_nv8hi (__a
, __b
, __c
);
24080 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
24081 vsraq_n_s32 (int32x4_t __a
, int32x4_t __b
, const int __c
)
24083 return (int32x4_t
) __builtin_aarch64_ssra_nv4si (__a
, __b
, __c
);
24086 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
24087 vsraq_n_s64 (int64x2_t __a
, int64x2_t __b
, const int __c
)
24089 return (int64x2_t
) __builtin_aarch64_ssra_nv2di (__a
, __b
, __c
);
24092 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
24093 vsraq_n_u8 (uint8x16_t __a
, uint8x16_t __b
, const int __c
)
24095 return (uint8x16_t
) __builtin_aarch64_usra_nv16qi ((int8x16_t
) __a
,
24096 (int8x16_t
) __b
, __c
);
24099 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
24100 vsraq_n_u16 (uint16x8_t __a
, uint16x8_t __b
, const int __c
)
24102 return (uint16x8_t
) __builtin_aarch64_usra_nv8hi ((int16x8_t
) __a
,
24103 (int16x8_t
) __b
, __c
);
24106 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
24107 vsraq_n_u32 (uint32x4_t __a
, uint32x4_t __b
, const int __c
)
24109 return (uint32x4_t
) __builtin_aarch64_usra_nv4si ((int32x4_t
) __a
,
24110 (int32x4_t
) __b
, __c
);
24113 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
24114 vsraq_n_u64 (uint64x2_t __a
, uint64x2_t __b
, const int __c
)
24116 return (uint64x2_t
) __builtin_aarch64_usra_nv2di ((int64x2_t
) __a
,
24117 (int64x2_t
) __b
, __c
);
24120 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
24121 vsrad_n_s64 (int64x1_t __a
, int64x1_t __b
, const int __c
)
24123 return (int64x1_t
) __builtin_aarch64_ssra_ndi (__a
, __b
, __c
);
24126 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
24127 vsrad_n_u64 (uint64x1_t __a
, uint64x1_t __b
, const int __c
)
24129 return (uint64x1_t
) __builtin_aarch64_usra_ndi (__a
, __b
, __c
);
24134 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
24135 vsri_n_s8 (int8x8_t __a
, int8x8_t __b
, const int __c
)
24137 return (int8x8_t
) __builtin_aarch64_ssri_nv8qi (__a
, __b
, __c
);
24140 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
24141 vsri_n_s16 (int16x4_t __a
, int16x4_t __b
, const int __c
)
24143 return (int16x4_t
) __builtin_aarch64_ssri_nv4hi (__a
, __b
, __c
);
24146 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
24147 vsri_n_s32 (int32x2_t __a
, int32x2_t __b
, const int __c
)
24149 return (int32x2_t
) __builtin_aarch64_ssri_nv2si (__a
, __b
, __c
);
24152 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
24153 vsri_n_s64 (int64x1_t __a
, int64x1_t __b
, const int __c
)
24155 return (int64x1_t
) __builtin_aarch64_ssri_ndi (__a
, __b
, __c
);
24158 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
24159 vsri_n_u8 (uint8x8_t __a
, uint8x8_t __b
, const int __c
)
24161 return (uint8x8_t
) __builtin_aarch64_usri_nv8qi ((int8x8_t
) __a
,
24162 (int8x8_t
) __b
, __c
);
24165 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
24166 vsri_n_u16 (uint16x4_t __a
, uint16x4_t __b
, const int __c
)
24168 return (uint16x4_t
) __builtin_aarch64_usri_nv4hi ((int16x4_t
) __a
,
24169 (int16x4_t
) __b
, __c
);
24172 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
24173 vsri_n_u32 (uint32x2_t __a
, uint32x2_t __b
, const int __c
)
24175 return (uint32x2_t
) __builtin_aarch64_usri_nv2si ((int32x2_t
) __a
,
24176 (int32x2_t
) __b
, __c
);
24179 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
24180 vsri_n_u64 (uint64x1_t __a
, uint64x1_t __b
, const int __c
)
24182 return (uint64x1_t
) __builtin_aarch64_usri_ndi ((int64x1_t
) __a
,
24183 (int64x1_t
) __b
, __c
);
24186 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
24187 vsriq_n_s8 (int8x16_t __a
, int8x16_t __b
, const int __c
)
24189 return (int8x16_t
) __builtin_aarch64_ssri_nv16qi (__a
, __b
, __c
);
24192 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
24193 vsriq_n_s16 (int16x8_t __a
, int16x8_t __b
, const int __c
)
24195 return (int16x8_t
) __builtin_aarch64_ssri_nv8hi (__a
, __b
, __c
);
24198 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
24199 vsriq_n_s32 (int32x4_t __a
, int32x4_t __b
, const int __c
)
24201 return (int32x4_t
) __builtin_aarch64_ssri_nv4si (__a
, __b
, __c
);
24204 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
24205 vsriq_n_s64 (int64x2_t __a
, int64x2_t __b
, const int __c
)
24207 return (int64x2_t
) __builtin_aarch64_ssri_nv2di (__a
, __b
, __c
);
24210 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
24211 vsriq_n_u8 (uint8x16_t __a
, uint8x16_t __b
, const int __c
)
24213 return (uint8x16_t
) __builtin_aarch64_usri_nv16qi ((int8x16_t
) __a
,
24214 (int8x16_t
) __b
, __c
);
24217 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
24218 vsriq_n_u16 (uint16x8_t __a
, uint16x8_t __b
, const int __c
)
24220 return (uint16x8_t
) __builtin_aarch64_usri_nv8hi ((int16x8_t
) __a
,
24221 (int16x8_t
) __b
, __c
);
24224 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
24225 vsriq_n_u32 (uint32x4_t __a
, uint32x4_t __b
, const int __c
)
24227 return (uint32x4_t
) __builtin_aarch64_usri_nv4si ((int32x4_t
) __a
,
24228 (int32x4_t
) __b
, __c
);
24231 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
24232 vsriq_n_u64 (uint64x2_t __a
, uint64x2_t __b
, const int __c
)
24234 return (uint64x2_t
) __builtin_aarch64_usri_nv2di ((int64x2_t
) __a
,
24235 (int64x2_t
) __b
, __c
);
24238 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
24239 vsrid_n_s64 (int64x1_t __a
, int64x1_t __b
, const int __c
)
24241 return (int64x1_t
) __builtin_aarch64_ssri_ndi (__a
, __b
, __c
);
24244 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
24245 vsrid_n_u64 (uint64x1_t __a
, uint64x1_t __b
, const int __c
)
24247 return (uint64x1_t
) __builtin_aarch64_usri_ndi (__a
, __b
, __c
);
24252 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24253 vst1_f32 (float32_t
*a
, float32x2_t b
)
24255 __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf
*) a
, b
);
24258 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24259 vst1_f64 (float64_t
*a
, float64x1_t b
)
24264 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24265 vst1_p8 (poly8_t
*a
, poly8x8_t b
)
24267 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi
*) a
,
24271 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24272 vst1_p16 (poly16_t
*a
, poly16x4_t b
)
24274 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi
*) a
,
24278 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24279 vst1_s8 (int8_t *a
, int8x8_t b
)
24281 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi
*) a
, b
);
24284 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24285 vst1_s16 (int16_t *a
, int16x4_t b
)
24287 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi
*) a
, b
);
24290 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24291 vst1_s32 (int32_t *a
, int32x2_t b
)
24293 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si
*) a
, b
);
24296 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24297 vst1_s64 (int64_t *a
, int64x1_t b
)
24302 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24303 vst1_u8 (uint8_t *a
, uint8x8_t b
)
24305 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi
*) a
,
24309 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24310 vst1_u16 (uint16_t *a
, uint16x4_t b
)
24312 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi
*) a
,
24316 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24317 vst1_u32 (uint32_t *a
, uint32x2_t b
)
24319 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si
*) a
,
24323 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24324 vst1_u64 (uint64_t *a
, uint64x1_t b
)
24329 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24330 vst1q_f32 (float32_t
*a
, float32x4_t b
)
24332 __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf
*) a
, b
);
24335 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24336 vst1q_f64 (float64_t
*a
, float64x2_t b
)
24338 __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df
*) a
, b
);
24343 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24344 vst1q_p8 (poly8_t
*a
, poly8x16_t b
)
24346 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi
*) a
,
24350 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24351 vst1q_p16 (poly16_t
*a
, poly16x8_t b
)
24353 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi
*) a
,
24357 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24358 vst1q_s8 (int8_t *a
, int8x16_t b
)
24360 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi
*) a
, b
);
24363 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24364 vst1q_s16 (int16_t *a
, int16x8_t b
)
24366 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi
*) a
, b
);
24369 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24370 vst1q_s32 (int32_t *a
, int32x4_t b
)
24372 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si
*) a
, b
);
24375 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24376 vst1q_s64 (int64_t *a
, int64x2_t b
)
24378 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di
*) a
, b
);
24381 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24382 vst1q_u8 (uint8_t *a
, uint8x16_t b
)
24384 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi
*) a
,
24388 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24389 vst1q_u16 (uint16_t *a
, uint16x8_t b
)
24391 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi
*) a
,
24395 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24396 vst1q_u32 (uint32_t *a
, uint32x4_t b
)
24398 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si
*) a
,
24402 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24403 vst1q_u64 (uint64_t *a
, uint64x2_t b
)
24405 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di
*) a
,
24411 __extension__
static __inline
void
24412 vst2_s64 (int64_t * __a
, int64x1x2_t val
)
24414 __builtin_aarch64_simd_oi __o
;
24416 temp
.val
[0] = vcombine_s64 (val
.val
[0], vcreate_s64 (INT64_C (0)));
24417 temp
.val
[1] = vcombine_s64 (val
.val
[1], vcreate_s64 (INT64_C (0)));
24418 __o
= __builtin_aarch64_set_qregoiv2di (__o
, (int64x2_t
) temp
.val
[0], 0);
24419 __o
= __builtin_aarch64_set_qregoiv2di (__o
, (int64x2_t
) temp
.val
[1], 1);
24420 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di
*) __a
, __o
);
24423 __extension__
static __inline
void
24424 vst2_u64 (uint64_t * __a
, uint64x1x2_t val
)
24426 __builtin_aarch64_simd_oi __o
;
24428 temp
.val
[0] = vcombine_u64 (val
.val
[0], vcreate_u64 (UINT64_C (0)));
24429 temp
.val
[1] = vcombine_u64 (val
.val
[1], vcreate_u64 (UINT64_C (0)));
24430 __o
= __builtin_aarch64_set_qregoiv2di (__o
, (int64x2_t
) temp
.val
[0], 0);
24431 __o
= __builtin_aarch64_set_qregoiv2di (__o
, (int64x2_t
) temp
.val
[1], 1);
24432 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di
*) __a
, __o
);
24435 __extension__
static __inline
void
24436 vst2_f64 (float64_t
* __a
, float64x1x2_t val
)
24438 __builtin_aarch64_simd_oi __o
;
24439 float64x2x2_t temp
;
24440 temp
.val
[0] = vcombine_f64 (val
.val
[0], vcreate_f64 (UINT64_C (0)));
24441 temp
.val
[1] = vcombine_f64 (val
.val
[1], vcreate_f64 (UINT64_C (0)));
24442 __o
= __builtin_aarch64_set_qregoiv2df (__o
, (float64x2_t
) temp
.val
[0], 0);
24443 __o
= __builtin_aarch64_set_qregoiv2df (__o
, (float64x2_t
) temp
.val
[1], 1);
24444 __builtin_aarch64_st2df ((__builtin_aarch64_simd_df
*) __a
, __o
);
24447 __extension__
static __inline
void
24448 vst2_s8 (int8_t * __a
, int8x8x2_t val
)
24450 __builtin_aarch64_simd_oi __o
;
24452 temp
.val
[0] = vcombine_s8 (val
.val
[0], vcreate_s8 (INT64_C (0)));
24453 temp
.val
[1] = vcombine_s8 (val
.val
[1], vcreate_s8 (INT64_C (0)));
24454 __o
= __builtin_aarch64_set_qregoiv16qi (__o
, (int8x16_t
) temp
.val
[0], 0);
24455 __o
= __builtin_aarch64_set_qregoiv16qi (__o
, (int8x16_t
) temp
.val
[1], 1);
24456 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
24459 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24460 vst2_p8 (poly8_t
* __a
, poly8x8x2_t val
)
24462 __builtin_aarch64_simd_oi __o
;
24464 temp
.val
[0] = vcombine_p8 (val
.val
[0], vcreate_p8 (UINT64_C (0)));
24465 temp
.val
[1] = vcombine_p8 (val
.val
[1], vcreate_p8 (UINT64_C (0)));
24466 __o
= __builtin_aarch64_set_qregoiv16qi (__o
, (int8x16_t
) temp
.val
[0], 0);
24467 __o
= __builtin_aarch64_set_qregoiv16qi (__o
, (int8x16_t
) temp
.val
[1], 1);
24468 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
24471 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24472 vst2_s16 (int16_t * __a
, int16x4x2_t val
)
24474 __builtin_aarch64_simd_oi __o
;
24476 temp
.val
[0] = vcombine_s16 (val
.val
[0], vcreate_s16 (INT64_C (0)));
24477 temp
.val
[1] = vcombine_s16 (val
.val
[1], vcreate_s16 (INT64_C (0)));
24478 __o
= __builtin_aarch64_set_qregoiv8hi (__o
, (int16x8_t
) temp
.val
[0], 0);
24479 __o
= __builtin_aarch64_set_qregoiv8hi (__o
, (int16x8_t
) temp
.val
[1], 1);
24480 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
24483 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24484 vst2_p16 (poly16_t
* __a
, poly16x4x2_t val
)
24486 __builtin_aarch64_simd_oi __o
;
24488 temp
.val
[0] = vcombine_p16 (val
.val
[0], vcreate_p16 (UINT64_C (0)));
24489 temp
.val
[1] = vcombine_p16 (val
.val
[1], vcreate_p16 (UINT64_C (0)));
24490 __o
= __builtin_aarch64_set_qregoiv8hi (__o
, (int16x8_t
) temp
.val
[0], 0);
24491 __o
= __builtin_aarch64_set_qregoiv8hi (__o
, (int16x8_t
) temp
.val
[1], 1);
24492 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
24495 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24496 vst2_s32 (int32_t * __a
, int32x2x2_t val
)
24498 __builtin_aarch64_simd_oi __o
;
24500 temp
.val
[0] = vcombine_s32 (val
.val
[0], vcreate_s32 (INT64_C (0)));
24501 temp
.val
[1] = vcombine_s32 (val
.val
[1], vcreate_s32 (INT64_C (0)));
24502 __o
= __builtin_aarch64_set_qregoiv4si (__o
, (int32x4_t
) temp
.val
[0], 0);
24503 __o
= __builtin_aarch64_set_qregoiv4si (__o
, (int32x4_t
) temp
.val
[1], 1);
24504 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si
*) __a
, __o
);
24507 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24508 vst2_u8 (uint8_t * __a
, uint8x8x2_t val
)
24510 __builtin_aarch64_simd_oi __o
;
24512 temp
.val
[0] = vcombine_u8 (val
.val
[0], vcreate_u8 (UINT64_C (0)));
24513 temp
.val
[1] = vcombine_u8 (val
.val
[1], vcreate_u8 (UINT64_C (0)));
24514 __o
= __builtin_aarch64_set_qregoiv16qi (__o
, (int8x16_t
) temp
.val
[0], 0);
24515 __o
= __builtin_aarch64_set_qregoiv16qi (__o
, (int8x16_t
) temp
.val
[1], 1);
24516 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
24519 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24520 vst2_u16 (uint16_t * __a
, uint16x4x2_t val
)
24522 __builtin_aarch64_simd_oi __o
;
24524 temp
.val
[0] = vcombine_u16 (val
.val
[0], vcreate_u16 (UINT64_C (0)));
24525 temp
.val
[1] = vcombine_u16 (val
.val
[1], vcreate_u16 (UINT64_C (0)));
24526 __o
= __builtin_aarch64_set_qregoiv8hi (__o
, (int16x8_t
) temp
.val
[0], 0);
24527 __o
= __builtin_aarch64_set_qregoiv8hi (__o
, (int16x8_t
) temp
.val
[1], 1);
24528 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
24531 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24532 vst2_u32 (uint32_t * __a
, uint32x2x2_t val
)
24534 __builtin_aarch64_simd_oi __o
;
24536 temp
.val
[0] = vcombine_u32 (val
.val
[0], vcreate_u32 (UINT64_C (0)));
24537 temp
.val
[1] = vcombine_u32 (val
.val
[1], vcreate_u32 (UINT64_C (0)));
24538 __o
= __builtin_aarch64_set_qregoiv4si (__o
, (int32x4_t
) temp
.val
[0], 0);
24539 __o
= __builtin_aarch64_set_qregoiv4si (__o
, (int32x4_t
) temp
.val
[1], 1);
24540 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si
*) __a
, __o
);
24543 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24544 vst2_f32 (float32_t
* __a
, float32x2x2_t val
)
24546 __builtin_aarch64_simd_oi __o
;
24547 float32x4x2_t temp
;
24548 temp
.val
[0] = vcombine_f32 (val
.val
[0], vcreate_f32 (UINT64_C (0)));
24549 temp
.val
[1] = vcombine_f32 (val
.val
[1], vcreate_f32 (UINT64_C (0)));
24550 __o
= __builtin_aarch64_set_qregoiv4sf (__o
, (float32x4_t
) temp
.val
[0], 0);
24551 __o
= __builtin_aarch64_set_qregoiv4sf (__o
, (float32x4_t
) temp
.val
[1], 1);
24552 __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf
*) __a
, __o
);
24555 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24556 vst2q_s8 (int8_t * __a
, int8x16x2_t val
)
24558 __builtin_aarch64_simd_oi __o
;
24559 __o
= __builtin_aarch64_set_qregoiv16qi (__o
, (int8x16_t
) val
.val
[0], 0);
24560 __o
= __builtin_aarch64_set_qregoiv16qi (__o
, (int8x16_t
) val
.val
[1], 1);
24561 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
24564 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24565 vst2q_p8 (poly8_t
* __a
, poly8x16x2_t val
)
24567 __builtin_aarch64_simd_oi __o
;
24568 __o
= __builtin_aarch64_set_qregoiv16qi (__o
, (int8x16_t
) val
.val
[0], 0);
24569 __o
= __builtin_aarch64_set_qregoiv16qi (__o
, (int8x16_t
) val
.val
[1], 1);
24570 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
24573 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24574 vst2q_s16 (int16_t * __a
, int16x8x2_t val
)
24576 __builtin_aarch64_simd_oi __o
;
24577 __o
= __builtin_aarch64_set_qregoiv8hi (__o
, (int16x8_t
) val
.val
[0], 0);
24578 __o
= __builtin_aarch64_set_qregoiv8hi (__o
, (int16x8_t
) val
.val
[1], 1);
24579 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
24582 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24583 vst2q_p16 (poly16_t
* __a
, poly16x8x2_t val
)
24585 __builtin_aarch64_simd_oi __o
;
24586 __o
= __builtin_aarch64_set_qregoiv8hi (__o
, (int16x8_t
) val
.val
[0], 0);
24587 __o
= __builtin_aarch64_set_qregoiv8hi (__o
, (int16x8_t
) val
.val
[1], 1);
24588 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
24591 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24592 vst2q_s32 (int32_t * __a
, int32x4x2_t val
)
24594 __builtin_aarch64_simd_oi __o
;
24595 __o
= __builtin_aarch64_set_qregoiv4si (__o
, (int32x4_t
) val
.val
[0], 0);
24596 __o
= __builtin_aarch64_set_qregoiv4si (__o
, (int32x4_t
) val
.val
[1], 1);
24597 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si
*) __a
, __o
);
24600 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24601 vst2q_s64 (int64_t * __a
, int64x2x2_t val
)
24603 __builtin_aarch64_simd_oi __o
;
24604 __o
= __builtin_aarch64_set_qregoiv2di (__o
, (int64x2_t
) val
.val
[0], 0);
24605 __o
= __builtin_aarch64_set_qregoiv2di (__o
, (int64x2_t
) val
.val
[1], 1);
24606 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di
*) __a
, __o
);
24609 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24610 vst2q_u8 (uint8_t * __a
, uint8x16x2_t val
)
24612 __builtin_aarch64_simd_oi __o
;
24613 __o
= __builtin_aarch64_set_qregoiv16qi (__o
, (int8x16_t
) val
.val
[0], 0);
24614 __o
= __builtin_aarch64_set_qregoiv16qi (__o
, (int8x16_t
) val
.val
[1], 1);
24615 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
24618 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24619 vst2q_u16 (uint16_t * __a
, uint16x8x2_t val
)
24621 __builtin_aarch64_simd_oi __o
;
24622 __o
= __builtin_aarch64_set_qregoiv8hi (__o
, (int16x8_t
) val
.val
[0], 0);
24623 __o
= __builtin_aarch64_set_qregoiv8hi (__o
, (int16x8_t
) val
.val
[1], 1);
24624 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
24627 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24628 vst2q_u32 (uint32_t * __a
, uint32x4x2_t val
)
24630 __builtin_aarch64_simd_oi __o
;
24631 __o
= __builtin_aarch64_set_qregoiv4si (__o
, (int32x4_t
) val
.val
[0], 0);
24632 __o
= __builtin_aarch64_set_qregoiv4si (__o
, (int32x4_t
) val
.val
[1], 1);
24633 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si
*) __a
, __o
);
24636 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24637 vst2q_u64 (uint64_t * __a
, uint64x2x2_t val
)
24639 __builtin_aarch64_simd_oi __o
;
24640 __o
= __builtin_aarch64_set_qregoiv2di (__o
, (int64x2_t
) val
.val
[0], 0);
24641 __o
= __builtin_aarch64_set_qregoiv2di (__o
, (int64x2_t
) val
.val
[1], 1);
24642 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di
*) __a
, __o
);
24645 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24646 vst2q_f32 (float32_t
* __a
, float32x4x2_t val
)
24648 __builtin_aarch64_simd_oi __o
;
24649 __o
= __builtin_aarch64_set_qregoiv4sf (__o
, (float32x4_t
) val
.val
[0], 0);
24650 __o
= __builtin_aarch64_set_qregoiv4sf (__o
, (float32x4_t
) val
.val
[1], 1);
24651 __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf
*) __a
, __o
);
24654 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24655 vst2q_f64 (float64_t
* __a
, float64x2x2_t val
)
24657 __builtin_aarch64_simd_oi __o
;
24658 __o
= __builtin_aarch64_set_qregoiv2df (__o
, (float64x2_t
) val
.val
[0], 0);
24659 __o
= __builtin_aarch64_set_qregoiv2df (__o
, (float64x2_t
) val
.val
[1], 1);
24660 __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df
*) __a
, __o
);
24663 __extension__
static __inline
void
24664 vst3_s64 (int64_t * __a
, int64x1x3_t val
)
24666 __builtin_aarch64_simd_ci __o
;
24668 temp
.val
[0] = vcombine_s64 (val
.val
[0], vcreate_s64 (INT64_C (0)));
24669 temp
.val
[1] = vcombine_s64 (val
.val
[1], vcreate_s64 (INT64_C (0)));
24670 temp
.val
[2] = vcombine_s64 (val
.val
[2], vcreate_s64 (INT64_C (0)));
24671 __o
= __builtin_aarch64_set_qregciv2di (__o
, (int64x2_t
) temp
.val
[0], 0);
24672 __o
= __builtin_aarch64_set_qregciv2di (__o
, (int64x2_t
) temp
.val
[1], 1);
24673 __o
= __builtin_aarch64_set_qregciv2di (__o
, (int64x2_t
) temp
.val
[2], 2);
24674 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di
*) __a
, __o
);
24677 __extension__
static __inline
void
24678 vst3_u64 (uint64_t * __a
, uint64x1x3_t val
)
24680 __builtin_aarch64_simd_ci __o
;
24682 temp
.val
[0] = vcombine_u64 (val
.val
[0], vcreate_u64 (UINT64_C (0)));
24683 temp
.val
[1] = vcombine_u64 (val
.val
[1], vcreate_u64 (UINT64_C (0)));
24684 temp
.val
[2] = vcombine_u64 (val
.val
[2], vcreate_u64 (UINT64_C (0)));
24685 __o
= __builtin_aarch64_set_qregciv2di (__o
, (int64x2_t
) temp
.val
[0], 0);
24686 __o
= __builtin_aarch64_set_qregciv2di (__o
, (int64x2_t
) temp
.val
[1], 1);
24687 __o
= __builtin_aarch64_set_qregciv2di (__o
, (int64x2_t
) temp
.val
[2], 2);
24688 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di
*) __a
, __o
);
24691 __extension__
static __inline
void
24692 vst3_f64 (float64_t
* __a
, float64x1x3_t val
)
24694 __builtin_aarch64_simd_ci __o
;
24695 float64x2x3_t temp
;
24696 temp
.val
[0] = vcombine_f64 (val
.val
[0], vcreate_f64 (UINT64_C (0)));
24697 temp
.val
[1] = vcombine_f64 (val
.val
[1], vcreate_f64 (UINT64_C (0)));
24698 temp
.val
[2] = vcombine_f64 (val
.val
[2], vcreate_f64 (UINT64_C (0)));
24699 __o
= __builtin_aarch64_set_qregciv2df (__o
, (float64x2_t
) temp
.val
[0], 0);
24700 __o
= __builtin_aarch64_set_qregciv2df (__o
, (float64x2_t
) temp
.val
[1], 1);
24701 __o
= __builtin_aarch64_set_qregciv2df (__o
, (float64x2_t
) temp
.val
[2], 2);
24702 __builtin_aarch64_st3df ((__builtin_aarch64_simd_df
*) __a
, __o
);
24705 __extension__
static __inline
void
24706 vst3_s8 (int8_t * __a
, int8x8x3_t val
)
24708 __builtin_aarch64_simd_ci __o
;
24710 temp
.val
[0] = vcombine_s8 (val
.val
[0], vcreate_s8 (INT64_C (0)));
24711 temp
.val
[1] = vcombine_s8 (val
.val
[1], vcreate_s8 (INT64_C (0)));
24712 temp
.val
[2] = vcombine_s8 (val
.val
[2], vcreate_s8 (INT64_C (0)));
24713 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) temp
.val
[0], 0);
24714 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) temp
.val
[1], 1);
24715 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) temp
.val
[2], 2);
24716 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
24719 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24720 vst3_p8 (poly8_t
* __a
, poly8x8x3_t val
)
24722 __builtin_aarch64_simd_ci __o
;
24724 temp
.val
[0] = vcombine_p8 (val
.val
[0], vcreate_p8 (UINT64_C (0)));
24725 temp
.val
[1] = vcombine_p8 (val
.val
[1], vcreate_p8 (UINT64_C (0)));
24726 temp
.val
[2] = vcombine_p8 (val
.val
[2], vcreate_p8 (UINT64_C (0)));
24727 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) temp
.val
[0], 0);
24728 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) temp
.val
[1], 1);
24729 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) temp
.val
[2], 2);
24730 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
24733 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24734 vst3_s16 (int16_t * __a
, int16x4x3_t val
)
24736 __builtin_aarch64_simd_ci __o
;
24738 temp
.val
[0] = vcombine_s16 (val
.val
[0], vcreate_s16 (INT64_C (0)));
24739 temp
.val
[1] = vcombine_s16 (val
.val
[1], vcreate_s16 (INT64_C (0)));
24740 temp
.val
[2] = vcombine_s16 (val
.val
[2], vcreate_s16 (INT64_C (0)));
24741 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) temp
.val
[0], 0);
24742 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) temp
.val
[1], 1);
24743 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) temp
.val
[2], 2);
24744 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
24747 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24748 vst3_p16 (poly16_t
* __a
, poly16x4x3_t val
)
24750 __builtin_aarch64_simd_ci __o
;
24752 temp
.val
[0] = vcombine_p16 (val
.val
[0], vcreate_p16 (UINT64_C (0)));
24753 temp
.val
[1] = vcombine_p16 (val
.val
[1], vcreate_p16 (UINT64_C (0)));
24754 temp
.val
[2] = vcombine_p16 (val
.val
[2], vcreate_p16 (UINT64_C (0)));
24755 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) temp
.val
[0], 0);
24756 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) temp
.val
[1], 1);
24757 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) temp
.val
[2], 2);
24758 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
24761 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24762 vst3_s32 (int32_t * __a
, int32x2x3_t val
)
24764 __builtin_aarch64_simd_ci __o
;
24766 temp
.val
[0] = vcombine_s32 (val
.val
[0], vcreate_s32 (INT64_C (0)));
24767 temp
.val
[1] = vcombine_s32 (val
.val
[1], vcreate_s32 (INT64_C (0)));
24768 temp
.val
[2] = vcombine_s32 (val
.val
[2], vcreate_s32 (INT64_C (0)));
24769 __o
= __builtin_aarch64_set_qregciv4si (__o
, (int32x4_t
) temp
.val
[0], 0);
24770 __o
= __builtin_aarch64_set_qregciv4si (__o
, (int32x4_t
) temp
.val
[1], 1);
24771 __o
= __builtin_aarch64_set_qregciv4si (__o
, (int32x4_t
) temp
.val
[2], 2);
24772 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si
*) __a
, __o
);
24775 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24776 vst3_u8 (uint8_t * __a
, uint8x8x3_t val
)
24778 __builtin_aarch64_simd_ci __o
;
24780 temp
.val
[0] = vcombine_u8 (val
.val
[0], vcreate_u8 (UINT64_C (0)));
24781 temp
.val
[1] = vcombine_u8 (val
.val
[1], vcreate_u8 (UINT64_C (0)));
24782 temp
.val
[2] = vcombine_u8 (val
.val
[2], vcreate_u8 (UINT64_C (0)));
24783 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) temp
.val
[0], 0);
24784 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) temp
.val
[1], 1);
24785 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) temp
.val
[2], 2);
24786 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
24789 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24790 vst3_u16 (uint16_t * __a
, uint16x4x3_t val
)
24792 __builtin_aarch64_simd_ci __o
;
24794 temp
.val
[0] = vcombine_u16 (val
.val
[0], vcreate_u16 (UINT64_C (0)));
24795 temp
.val
[1] = vcombine_u16 (val
.val
[1], vcreate_u16 (UINT64_C (0)));
24796 temp
.val
[2] = vcombine_u16 (val
.val
[2], vcreate_u16 (UINT64_C (0)));
24797 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) temp
.val
[0], 0);
24798 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) temp
.val
[1], 1);
24799 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) temp
.val
[2], 2);
24800 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
24803 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24804 vst3_u32 (uint32_t * __a
, uint32x2x3_t val
)
24806 __builtin_aarch64_simd_ci __o
;
24808 temp
.val
[0] = vcombine_u32 (val
.val
[0], vcreate_u32 (UINT64_C (0)));
24809 temp
.val
[1] = vcombine_u32 (val
.val
[1], vcreate_u32 (UINT64_C (0)));
24810 temp
.val
[2] = vcombine_u32 (val
.val
[2], vcreate_u32 (UINT64_C (0)));
24811 __o
= __builtin_aarch64_set_qregciv4si (__o
, (int32x4_t
) temp
.val
[0], 0);
24812 __o
= __builtin_aarch64_set_qregciv4si (__o
, (int32x4_t
) temp
.val
[1], 1);
24813 __o
= __builtin_aarch64_set_qregciv4si (__o
, (int32x4_t
) temp
.val
[2], 2);
24814 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si
*) __a
, __o
);
24817 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24818 vst3_f32 (float32_t
* __a
, float32x2x3_t val
)
24820 __builtin_aarch64_simd_ci __o
;
24821 float32x4x3_t temp
;
24822 temp
.val
[0] = vcombine_f32 (val
.val
[0], vcreate_f32 (UINT64_C (0)));
24823 temp
.val
[1] = vcombine_f32 (val
.val
[1], vcreate_f32 (UINT64_C (0)));
24824 temp
.val
[2] = vcombine_f32 (val
.val
[2], vcreate_f32 (UINT64_C (0)));
24825 __o
= __builtin_aarch64_set_qregciv4sf (__o
, (float32x4_t
) temp
.val
[0], 0);
24826 __o
= __builtin_aarch64_set_qregciv4sf (__o
, (float32x4_t
) temp
.val
[1], 1);
24827 __o
= __builtin_aarch64_set_qregciv4sf (__o
, (float32x4_t
) temp
.val
[2], 2);
24828 __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf
*) __a
, __o
);
24831 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24832 vst3q_s8 (int8_t * __a
, int8x16x3_t val
)
24834 __builtin_aarch64_simd_ci __o
;
24835 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) val
.val
[0], 0);
24836 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) val
.val
[1], 1);
24837 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) val
.val
[2], 2);
24838 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
24841 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24842 vst3q_p8 (poly8_t
* __a
, poly8x16x3_t val
)
24844 __builtin_aarch64_simd_ci __o
;
24845 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) val
.val
[0], 0);
24846 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) val
.val
[1], 1);
24847 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) val
.val
[2], 2);
24848 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
24851 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24852 vst3q_s16 (int16_t * __a
, int16x8x3_t val
)
24854 __builtin_aarch64_simd_ci __o
;
24855 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) val
.val
[0], 0);
24856 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) val
.val
[1], 1);
24857 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) val
.val
[2], 2);
24858 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
24861 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24862 vst3q_p16 (poly16_t
* __a
, poly16x8x3_t val
)
24864 __builtin_aarch64_simd_ci __o
;
24865 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) val
.val
[0], 0);
24866 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) val
.val
[1], 1);
24867 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) val
.val
[2], 2);
24868 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
24871 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24872 vst3q_s32 (int32_t * __a
, int32x4x3_t val
)
24874 __builtin_aarch64_simd_ci __o
;
24875 __o
= __builtin_aarch64_set_qregciv4si (__o
, (int32x4_t
) val
.val
[0], 0);
24876 __o
= __builtin_aarch64_set_qregciv4si (__o
, (int32x4_t
) val
.val
[1], 1);
24877 __o
= __builtin_aarch64_set_qregciv4si (__o
, (int32x4_t
) val
.val
[2], 2);
24878 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si
*) __a
, __o
);
24881 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24882 vst3q_s64 (int64_t * __a
, int64x2x3_t val
)
24884 __builtin_aarch64_simd_ci __o
;
24885 __o
= __builtin_aarch64_set_qregciv2di (__o
, (int64x2_t
) val
.val
[0], 0);
24886 __o
= __builtin_aarch64_set_qregciv2di (__o
, (int64x2_t
) val
.val
[1], 1);
24887 __o
= __builtin_aarch64_set_qregciv2di (__o
, (int64x2_t
) val
.val
[2], 2);
24888 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di
*) __a
, __o
);
24891 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24892 vst3q_u8 (uint8_t * __a
, uint8x16x3_t val
)
24894 __builtin_aarch64_simd_ci __o
;
24895 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) val
.val
[0], 0);
24896 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) val
.val
[1], 1);
24897 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) val
.val
[2], 2);
24898 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
24901 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24902 vst3q_u16 (uint16_t * __a
, uint16x8x3_t val
)
24904 __builtin_aarch64_simd_ci __o
;
24905 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) val
.val
[0], 0);
24906 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) val
.val
[1], 1);
24907 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) val
.val
[2], 2);
24908 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
24911 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24912 vst3q_u32 (uint32_t * __a
, uint32x4x3_t val
)
24914 __builtin_aarch64_simd_ci __o
;
24915 __o
= __builtin_aarch64_set_qregciv4si (__o
, (int32x4_t
) val
.val
[0], 0);
24916 __o
= __builtin_aarch64_set_qregciv4si (__o
, (int32x4_t
) val
.val
[1], 1);
24917 __o
= __builtin_aarch64_set_qregciv4si (__o
, (int32x4_t
) val
.val
[2], 2);
24918 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si
*) __a
, __o
);
24921 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24922 vst3q_u64 (uint64_t * __a
, uint64x2x3_t val
)
24924 __builtin_aarch64_simd_ci __o
;
24925 __o
= __builtin_aarch64_set_qregciv2di (__o
, (int64x2_t
) val
.val
[0], 0);
24926 __o
= __builtin_aarch64_set_qregciv2di (__o
, (int64x2_t
) val
.val
[1], 1);
24927 __o
= __builtin_aarch64_set_qregciv2di (__o
, (int64x2_t
) val
.val
[2], 2);
24928 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di
*) __a
, __o
);
24931 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24932 vst3q_f32 (float32_t
* __a
, float32x4x3_t val
)
24934 __builtin_aarch64_simd_ci __o
;
24935 __o
= __builtin_aarch64_set_qregciv4sf (__o
, (float32x4_t
) val
.val
[0], 0);
24936 __o
= __builtin_aarch64_set_qregciv4sf (__o
, (float32x4_t
) val
.val
[1], 1);
24937 __o
= __builtin_aarch64_set_qregciv4sf (__o
, (float32x4_t
) val
.val
[2], 2);
24938 __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf
*) __a
, __o
);
24941 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24942 vst3q_f64 (float64_t
* __a
, float64x2x3_t val
)
24944 __builtin_aarch64_simd_ci __o
;
24945 __o
= __builtin_aarch64_set_qregciv2df (__o
, (float64x2_t
) val
.val
[0], 0);
24946 __o
= __builtin_aarch64_set_qregciv2df (__o
, (float64x2_t
) val
.val
[1], 1);
24947 __o
= __builtin_aarch64_set_qregciv2df (__o
, (float64x2_t
) val
.val
[2], 2);
24948 __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df
*) __a
, __o
);
24951 __extension__
static __inline
void
24952 vst4_s64 (int64_t * __a
, int64x1x4_t val
)
24954 __builtin_aarch64_simd_xi __o
;
24956 temp
.val
[0] = vcombine_s64 (val
.val
[0], vcreate_s64 (INT64_C (0)));
24957 temp
.val
[1] = vcombine_s64 (val
.val
[1], vcreate_s64 (INT64_C (0)));
24958 temp
.val
[2] = vcombine_s64 (val
.val
[2], vcreate_s64 (INT64_C (0)));
24959 temp
.val
[3] = vcombine_s64 (val
.val
[3], vcreate_s64 (INT64_C (0)));
24960 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) temp
.val
[0], 0);
24961 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) temp
.val
[1], 1);
24962 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) temp
.val
[2], 2);
24963 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) temp
.val
[3], 3);
24964 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di
*) __a
, __o
);
24967 __extension__
static __inline
void
24968 vst4_u64 (uint64_t * __a
, uint64x1x4_t val
)
24970 __builtin_aarch64_simd_xi __o
;
24972 temp
.val
[0] = vcombine_u64 (val
.val
[0], vcreate_u64 (UINT64_C (0)));
24973 temp
.val
[1] = vcombine_u64 (val
.val
[1], vcreate_u64 (UINT64_C (0)));
24974 temp
.val
[2] = vcombine_u64 (val
.val
[2], vcreate_u64 (UINT64_C (0)));
24975 temp
.val
[3] = vcombine_u64 (val
.val
[3], vcreate_u64 (UINT64_C (0)));
24976 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) temp
.val
[0], 0);
24977 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) temp
.val
[1], 1);
24978 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) temp
.val
[2], 2);
24979 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) temp
.val
[3], 3);
24980 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di
*) __a
, __o
);
24983 __extension__
static __inline
void
24984 vst4_f64 (float64_t
* __a
, float64x1x4_t val
)
24986 __builtin_aarch64_simd_xi __o
;
24987 float64x2x4_t temp
;
24988 temp
.val
[0] = vcombine_f64 (val
.val
[0], vcreate_f64 (UINT64_C (0)));
24989 temp
.val
[1] = vcombine_f64 (val
.val
[1], vcreate_f64 (UINT64_C (0)));
24990 temp
.val
[2] = vcombine_f64 (val
.val
[2], vcreate_f64 (UINT64_C (0)));
24991 temp
.val
[3] = vcombine_f64 (val
.val
[3], vcreate_f64 (UINT64_C (0)));
24992 __o
= __builtin_aarch64_set_qregxiv2df (__o
, (float64x2_t
) temp
.val
[0], 0);
24993 __o
= __builtin_aarch64_set_qregxiv2df (__o
, (float64x2_t
) temp
.val
[1], 1);
24994 __o
= __builtin_aarch64_set_qregxiv2df (__o
, (float64x2_t
) temp
.val
[2], 2);
24995 __o
= __builtin_aarch64_set_qregxiv2df (__o
, (float64x2_t
) temp
.val
[3], 3);
24996 __builtin_aarch64_st4df ((__builtin_aarch64_simd_df
*) __a
, __o
);
24999 __extension__
static __inline
void
25000 vst4_s8 (int8_t * __a
, int8x8x4_t val
)
25002 __builtin_aarch64_simd_xi __o
;
25004 temp
.val
[0] = vcombine_s8 (val
.val
[0], vcreate_s8 (INT64_C (0)));
25005 temp
.val
[1] = vcombine_s8 (val
.val
[1], vcreate_s8 (INT64_C (0)));
25006 temp
.val
[2] = vcombine_s8 (val
.val
[2], vcreate_s8 (INT64_C (0)));
25007 temp
.val
[3] = vcombine_s8 (val
.val
[3], vcreate_s8 (INT64_C (0)));
25008 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) temp
.val
[0], 0);
25009 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) temp
.val
[1], 1);
25010 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) temp
.val
[2], 2);
25011 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) temp
.val
[3], 3);
25012 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
25015 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25016 vst4_p8 (poly8_t
* __a
, poly8x8x4_t val
)
25018 __builtin_aarch64_simd_xi __o
;
25020 temp
.val
[0] = vcombine_p8 (val
.val
[0], vcreate_p8 (UINT64_C (0)));
25021 temp
.val
[1] = vcombine_p8 (val
.val
[1], vcreate_p8 (UINT64_C (0)));
25022 temp
.val
[2] = vcombine_p8 (val
.val
[2], vcreate_p8 (UINT64_C (0)));
25023 temp
.val
[3] = vcombine_p8 (val
.val
[3], vcreate_p8 (UINT64_C (0)));
25024 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) temp
.val
[0], 0);
25025 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) temp
.val
[1], 1);
25026 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) temp
.val
[2], 2);
25027 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) temp
.val
[3], 3);
25028 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
25031 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25032 vst4_s16 (int16_t * __a
, int16x4x4_t val
)
25034 __builtin_aarch64_simd_xi __o
;
25036 temp
.val
[0] = vcombine_s16 (val
.val
[0], vcreate_s16 (INT64_C (0)));
25037 temp
.val
[1] = vcombine_s16 (val
.val
[1], vcreate_s16 (INT64_C (0)));
25038 temp
.val
[2] = vcombine_s16 (val
.val
[2], vcreate_s16 (INT64_C (0)));
25039 temp
.val
[3] = vcombine_s16 (val
.val
[3], vcreate_s16 (INT64_C (0)));
25040 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) temp
.val
[0], 0);
25041 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) temp
.val
[1], 1);
25042 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) temp
.val
[2], 2);
25043 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) temp
.val
[3], 3);
25044 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
25047 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25048 vst4_p16 (poly16_t
* __a
, poly16x4x4_t val
)
25050 __builtin_aarch64_simd_xi __o
;
25052 temp
.val
[0] = vcombine_p16 (val
.val
[0], vcreate_p16 (UINT64_C (0)));
25053 temp
.val
[1] = vcombine_p16 (val
.val
[1], vcreate_p16 (UINT64_C (0)));
25054 temp
.val
[2] = vcombine_p16 (val
.val
[2], vcreate_p16 (UINT64_C (0)));
25055 temp
.val
[3] = vcombine_p16 (val
.val
[3], vcreate_p16 (UINT64_C (0)));
25056 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) temp
.val
[0], 0);
25057 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) temp
.val
[1], 1);
25058 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) temp
.val
[2], 2);
25059 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) temp
.val
[3], 3);
25060 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
25063 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25064 vst4_s32 (int32_t * __a
, int32x2x4_t val
)
25066 __builtin_aarch64_simd_xi __o
;
25068 temp
.val
[0] = vcombine_s32 (val
.val
[0], vcreate_s32 (INT64_C (0)));
25069 temp
.val
[1] = vcombine_s32 (val
.val
[1], vcreate_s32 (INT64_C (0)));
25070 temp
.val
[2] = vcombine_s32 (val
.val
[2], vcreate_s32 (INT64_C (0)));
25071 temp
.val
[3] = vcombine_s32 (val
.val
[3], vcreate_s32 (INT64_C (0)));
25072 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) temp
.val
[0], 0);
25073 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) temp
.val
[1], 1);
25074 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) temp
.val
[2], 2);
25075 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) temp
.val
[3], 3);
25076 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si
*) __a
, __o
);
25079 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25080 vst4_u8 (uint8_t * __a
, uint8x8x4_t val
)
25082 __builtin_aarch64_simd_xi __o
;
25084 temp
.val
[0] = vcombine_u8 (val
.val
[0], vcreate_u8 (UINT64_C (0)));
25085 temp
.val
[1] = vcombine_u8 (val
.val
[1], vcreate_u8 (UINT64_C (0)));
25086 temp
.val
[2] = vcombine_u8 (val
.val
[2], vcreate_u8 (UINT64_C (0)));
25087 temp
.val
[3] = vcombine_u8 (val
.val
[3], vcreate_u8 (UINT64_C (0)));
25088 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) temp
.val
[0], 0);
25089 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) temp
.val
[1], 1);
25090 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) temp
.val
[2], 2);
25091 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) temp
.val
[3], 3);
25092 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
25095 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25096 vst4_u16 (uint16_t * __a
, uint16x4x4_t val
)
25098 __builtin_aarch64_simd_xi __o
;
25100 temp
.val
[0] = vcombine_u16 (val
.val
[0], vcreate_u16 (UINT64_C (0)));
25101 temp
.val
[1] = vcombine_u16 (val
.val
[1], vcreate_u16 (UINT64_C (0)));
25102 temp
.val
[2] = vcombine_u16 (val
.val
[2], vcreate_u16 (UINT64_C (0)));
25103 temp
.val
[3] = vcombine_u16 (val
.val
[3], vcreate_u16 (UINT64_C (0)));
25104 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) temp
.val
[0], 0);
25105 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) temp
.val
[1], 1);
25106 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) temp
.val
[2], 2);
25107 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) temp
.val
[3], 3);
25108 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
25111 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25112 vst4_u32 (uint32_t * __a
, uint32x2x4_t val
)
25114 __builtin_aarch64_simd_xi __o
;
25116 temp
.val
[0] = vcombine_u32 (val
.val
[0], vcreate_u32 (UINT64_C (0)));
25117 temp
.val
[1] = vcombine_u32 (val
.val
[1], vcreate_u32 (UINT64_C (0)));
25118 temp
.val
[2] = vcombine_u32 (val
.val
[2], vcreate_u32 (UINT64_C (0)));
25119 temp
.val
[3] = vcombine_u32 (val
.val
[3], vcreate_u32 (UINT64_C (0)));
25120 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) temp
.val
[0], 0);
25121 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) temp
.val
[1], 1);
25122 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) temp
.val
[2], 2);
25123 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) temp
.val
[3], 3);
25124 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si
*) __a
, __o
);
25127 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25128 vst4_f32 (float32_t
* __a
, float32x2x4_t val
)
25130 __builtin_aarch64_simd_xi __o
;
25131 float32x4x4_t temp
;
25132 temp
.val
[0] = vcombine_f32 (val
.val
[0], vcreate_f32 (UINT64_C (0)));
25133 temp
.val
[1] = vcombine_f32 (val
.val
[1], vcreate_f32 (UINT64_C (0)));
25134 temp
.val
[2] = vcombine_f32 (val
.val
[2], vcreate_f32 (UINT64_C (0)));
25135 temp
.val
[3] = vcombine_f32 (val
.val
[3], vcreate_f32 (UINT64_C (0)));
25136 __o
= __builtin_aarch64_set_qregxiv4sf (__o
, (float32x4_t
) temp
.val
[0], 0);
25137 __o
= __builtin_aarch64_set_qregxiv4sf (__o
, (float32x4_t
) temp
.val
[1], 1);
25138 __o
= __builtin_aarch64_set_qregxiv4sf (__o
, (float32x4_t
) temp
.val
[2], 2);
25139 __o
= __builtin_aarch64_set_qregxiv4sf (__o
, (float32x4_t
) temp
.val
[3], 3);
25140 __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf
*) __a
, __o
);
25143 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25144 vst4q_s8 (int8_t * __a
, int8x16x4_t val
)
25146 __builtin_aarch64_simd_xi __o
;
25147 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) val
.val
[0], 0);
25148 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) val
.val
[1], 1);
25149 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) val
.val
[2], 2);
25150 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) val
.val
[3], 3);
25151 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
25154 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25155 vst4q_p8 (poly8_t
* __a
, poly8x16x4_t val
)
25157 __builtin_aarch64_simd_xi __o
;
25158 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) val
.val
[0], 0);
25159 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) val
.val
[1], 1);
25160 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) val
.val
[2], 2);
25161 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) val
.val
[3], 3);
25162 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
25165 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25166 vst4q_s16 (int16_t * __a
, int16x8x4_t val
)
25168 __builtin_aarch64_simd_xi __o
;
25169 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) val
.val
[0], 0);
25170 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) val
.val
[1], 1);
25171 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) val
.val
[2], 2);
25172 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) val
.val
[3], 3);
25173 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
25176 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25177 vst4q_p16 (poly16_t
* __a
, poly16x8x4_t val
)
25179 __builtin_aarch64_simd_xi __o
;
25180 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) val
.val
[0], 0);
25181 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) val
.val
[1], 1);
25182 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) val
.val
[2], 2);
25183 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) val
.val
[3], 3);
25184 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
25187 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25188 vst4q_s32 (int32_t * __a
, int32x4x4_t val
)
25190 __builtin_aarch64_simd_xi __o
;
25191 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) val
.val
[0], 0);
25192 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) val
.val
[1], 1);
25193 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) val
.val
[2], 2);
25194 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) val
.val
[3], 3);
25195 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si
*) __a
, __o
);
25198 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25199 vst4q_s64 (int64_t * __a
, int64x2x4_t val
)
25201 __builtin_aarch64_simd_xi __o
;
25202 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) val
.val
[0], 0);
25203 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) val
.val
[1], 1);
25204 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) val
.val
[2], 2);
25205 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) val
.val
[3], 3);
25206 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di
*) __a
, __o
);
25209 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25210 vst4q_u8 (uint8_t * __a
, uint8x16x4_t val
)
25212 __builtin_aarch64_simd_xi __o
;
25213 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) val
.val
[0], 0);
25214 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) val
.val
[1], 1);
25215 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) val
.val
[2], 2);
25216 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) val
.val
[3], 3);
25217 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
25220 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25221 vst4q_u16 (uint16_t * __a
, uint16x8x4_t val
)
25223 __builtin_aarch64_simd_xi __o
;
25224 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) val
.val
[0], 0);
25225 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) val
.val
[1], 1);
25226 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) val
.val
[2], 2);
25227 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) val
.val
[3], 3);
25228 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
25231 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25232 vst4q_u32 (uint32_t * __a
, uint32x4x4_t val
)
25234 __builtin_aarch64_simd_xi __o
;
25235 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) val
.val
[0], 0);
25236 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) val
.val
[1], 1);
25237 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) val
.val
[2], 2);
25238 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) val
.val
[3], 3);
25239 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si
*) __a
, __o
);
25242 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25243 vst4q_u64 (uint64_t * __a
, uint64x2x4_t val
)
25245 __builtin_aarch64_simd_xi __o
;
25246 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) val
.val
[0], 0);
25247 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) val
.val
[1], 1);
25248 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) val
.val
[2], 2);
25249 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) val
.val
[3], 3);
25250 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di
*) __a
, __o
);
25253 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25254 vst4q_f32 (float32_t
* __a
, float32x4x4_t val
)
25256 __builtin_aarch64_simd_xi __o
;
25257 __o
= __builtin_aarch64_set_qregxiv4sf (__o
, (float32x4_t
) val
.val
[0], 0);
25258 __o
= __builtin_aarch64_set_qregxiv4sf (__o
, (float32x4_t
) val
.val
[1], 1);
25259 __o
= __builtin_aarch64_set_qregxiv4sf (__o
, (float32x4_t
) val
.val
[2], 2);
25260 __o
= __builtin_aarch64_set_qregxiv4sf (__o
, (float32x4_t
) val
.val
[3], 3);
25261 __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf
*) __a
, __o
);
25264 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25265 vst4q_f64 (float64_t
* __a
, float64x2x4_t val
)
25267 __builtin_aarch64_simd_xi __o
;
25268 __o
= __builtin_aarch64_set_qregxiv2df (__o
, (float64x2_t
) val
.val
[0], 0);
25269 __o
= __builtin_aarch64_set_qregxiv2df (__o
, (float64x2_t
) val
.val
[1], 1);
25270 __o
= __builtin_aarch64_set_qregxiv2df (__o
, (float64x2_t
) val
.val
[2], 2);
25271 __o
= __builtin_aarch64_set_qregxiv2df (__o
, (float64x2_t
) val
.val
[3], 3);
25272 __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df
*) __a
, __o
);
25277 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
25278 vsubd_s64 (int64x1_t __a
, int64x1_t __b
)
25283 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
25284 vsubd_u64 (uint64x1_t __a
, uint64x1_t __b
)
25291 __extension__
static __inline float32x2x2_t
__attribute__ ((__always_inline__
))
25292 vtrn_f32 (float32x2_t a
, float32x2_t b
)
25294 return (float32x2x2_t
) {vtrn1_f32 (a
, b
), vtrn2_f32 (a
, b
)};
25297 __extension__
static __inline poly8x8x2_t
__attribute__ ((__always_inline__
))
25298 vtrn_p8 (poly8x8_t a
, poly8x8_t b
)
25300 return (poly8x8x2_t
) {vtrn1_p8 (a
, b
), vtrn2_p8 (a
, b
)};
25303 __extension__
static __inline poly16x4x2_t
__attribute__ ((__always_inline__
))
25304 vtrn_p16 (poly16x4_t a
, poly16x4_t b
)
25306 return (poly16x4x2_t
) {vtrn1_p16 (a
, b
), vtrn2_p16 (a
, b
)};
25309 __extension__
static __inline int8x8x2_t
__attribute__ ((__always_inline__
))
25310 vtrn_s8 (int8x8_t a
, int8x8_t b
)
25312 return (int8x8x2_t
) {vtrn1_s8 (a
, b
), vtrn2_s8 (a
, b
)};
25315 __extension__
static __inline int16x4x2_t
__attribute__ ((__always_inline__
))
25316 vtrn_s16 (int16x4_t a
, int16x4_t b
)
25318 return (int16x4x2_t
) {vtrn1_s16 (a
, b
), vtrn2_s16 (a
, b
)};
25321 __extension__
static __inline int32x2x2_t
__attribute__ ((__always_inline__
))
25322 vtrn_s32 (int32x2_t a
, int32x2_t b
)
25324 return (int32x2x2_t
) {vtrn1_s32 (a
, b
), vtrn2_s32 (a
, b
)};
25327 __extension__
static __inline uint8x8x2_t
__attribute__ ((__always_inline__
))
25328 vtrn_u8 (uint8x8_t a
, uint8x8_t b
)
25330 return (uint8x8x2_t
) {vtrn1_u8 (a
, b
), vtrn2_u8 (a
, b
)};
25333 __extension__
static __inline uint16x4x2_t
__attribute__ ((__always_inline__
))
25334 vtrn_u16 (uint16x4_t a
, uint16x4_t b
)
25336 return (uint16x4x2_t
) {vtrn1_u16 (a
, b
), vtrn2_u16 (a
, b
)};
25339 __extension__
static __inline uint32x2x2_t
__attribute__ ((__always_inline__
))
25340 vtrn_u32 (uint32x2_t a
, uint32x2_t b
)
25342 return (uint32x2x2_t
) {vtrn1_u32 (a
, b
), vtrn2_u32 (a
, b
)};
25345 __extension__
static __inline float32x4x2_t
__attribute__ ((__always_inline__
))
25346 vtrnq_f32 (float32x4_t a
, float32x4_t b
)
25348 return (float32x4x2_t
) {vtrn1q_f32 (a
, b
), vtrn2q_f32 (a
, b
)};
25351 __extension__
static __inline poly8x16x2_t
__attribute__ ((__always_inline__
))
25352 vtrnq_p8 (poly8x16_t a
, poly8x16_t b
)
25354 return (poly8x16x2_t
) {vtrn1q_p8 (a
, b
), vtrn2q_p8 (a
, b
)};
25357 __extension__
static __inline poly16x8x2_t
__attribute__ ((__always_inline__
))
25358 vtrnq_p16 (poly16x8_t a
, poly16x8_t b
)
25360 return (poly16x8x2_t
) {vtrn1q_p16 (a
, b
), vtrn2q_p16 (a
, b
)};
25363 __extension__
static __inline int8x16x2_t
__attribute__ ((__always_inline__
))
25364 vtrnq_s8 (int8x16_t a
, int8x16_t b
)
25366 return (int8x16x2_t
) {vtrn1q_s8 (a
, b
), vtrn2q_s8 (a
, b
)};
25369 __extension__
static __inline int16x8x2_t
__attribute__ ((__always_inline__
))
25370 vtrnq_s16 (int16x8_t a
, int16x8_t b
)
25372 return (int16x8x2_t
) {vtrn1q_s16 (a
, b
), vtrn2q_s16 (a
, b
)};
25375 __extension__
static __inline int32x4x2_t
__attribute__ ((__always_inline__
))
25376 vtrnq_s32 (int32x4_t a
, int32x4_t b
)
25378 return (int32x4x2_t
) {vtrn1q_s32 (a
, b
), vtrn2q_s32 (a
, b
)};
25381 __extension__
static __inline uint8x16x2_t
__attribute__ ((__always_inline__
))
25382 vtrnq_u8 (uint8x16_t a
, uint8x16_t b
)
25384 return (uint8x16x2_t
) {vtrn1q_u8 (a
, b
), vtrn2q_u8 (a
, b
)};
25387 __extension__
static __inline uint16x8x2_t
__attribute__ ((__always_inline__
))
25388 vtrnq_u16 (uint16x8_t a
, uint16x8_t b
)
25390 return (uint16x8x2_t
) {vtrn1q_u16 (a
, b
), vtrn2q_u16 (a
, b
)};
25393 __extension__
static __inline uint32x4x2_t
__attribute__ ((__always_inline__
))
25394 vtrnq_u32 (uint32x4_t a
, uint32x4_t b
)
25396 return (uint32x4x2_t
) {vtrn1q_u32 (a
, b
), vtrn2q_u32 (a
, b
)};
25401 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
25402 vtst_s8 (int8x8_t __a
, int8x8_t __b
)
25404 return (uint8x8_t
) __builtin_aarch64_cmtstv8qi (__a
, __b
);
25407 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
25408 vtst_s16 (int16x4_t __a
, int16x4_t __b
)
25410 return (uint16x4_t
) __builtin_aarch64_cmtstv4hi (__a
, __b
);
25413 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
25414 vtst_s32 (int32x2_t __a
, int32x2_t __b
)
25416 return (uint32x2_t
) __builtin_aarch64_cmtstv2si (__a
, __b
);
25419 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
25420 vtst_s64 (int64x1_t __a
, int64x1_t __b
)
25422 return (__a
& __b
) ? -1ll : 0ll;
25425 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
25426 vtst_u8 (uint8x8_t __a
, uint8x8_t __b
)
25428 return (uint8x8_t
) __builtin_aarch64_cmtstv8qi ((int8x8_t
) __a
,
25432 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
25433 vtst_u16 (uint16x4_t __a
, uint16x4_t __b
)
25435 return (uint16x4_t
) __builtin_aarch64_cmtstv4hi ((int16x4_t
) __a
,
25439 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
25440 vtst_u32 (uint32x2_t __a
, uint32x2_t __b
)
25442 return (uint32x2_t
) __builtin_aarch64_cmtstv2si ((int32x2_t
) __a
,
25446 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
25447 vtst_u64 (uint64x1_t __a
, uint64x1_t __b
)
25449 return (__a
& __b
) ? -1ll : 0ll;
25452 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
25453 vtstq_s8 (int8x16_t __a
, int8x16_t __b
)
25455 return (uint8x16_t
) __builtin_aarch64_cmtstv16qi (__a
, __b
);
25458 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
25459 vtstq_s16 (int16x8_t __a
, int16x8_t __b
)
25461 return (uint16x8_t
) __builtin_aarch64_cmtstv8hi (__a
, __b
);
25464 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
25465 vtstq_s32 (int32x4_t __a
, int32x4_t __b
)
25467 return (uint32x4_t
) __builtin_aarch64_cmtstv4si (__a
, __b
);
25470 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
25471 vtstq_s64 (int64x2_t __a
, int64x2_t __b
)
25473 return (uint64x2_t
) __builtin_aarch64_cmtstv2di (__a
, __b
);
25476 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
25477 vtstq_u8 (uint8x16_t __a
, uint8x16_t __b
)
25479 return (uint8x16_t
) __builtin_aarch64_cmtstv16qi ((int8x16_t
) __a
,
25483 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
25484 vtstq_u16 (uint16x8_t __a
, uint16x8_t __b
)
25486 return (uint16x8_t
) __builtin_aarch64_cmtstv8hi ((int16x8_t
) __a
,
25490 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
25491 vtstq_u32 (uint32x4_t __a
, uint32x4_t __b
)
25493 return (uint32x4_t
) __builtin_aarch64_cmtstv4si ((int32x4_t
) __a
,
25497 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
25498 vtstq_u64 (uint64x2_t __a
, uint64x2_t __b
)
25500 return (uint64x2_t
) __builtin_aarch64_cmtstv2di ((int64x2_t
) __a
,
25504 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
25505 vtstd_s64 (int64x1_t __a
, int64x1_t __b
)
25507 return (__a
& __b
) ? -1ll : 0ll;
25510 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
25511 vtstd_u64 (uint64x1_t __a
, uint64x1_t __b
)
25513 return (__a
& __b
) ? -1ll : 0ll;
25518 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
25519 vuqadd_s8 (int8x8_t __a
, uint8x8_t __b
)
25521 return (int8x8_t
) __builtin_aarch64_suqaddv8qi (__a
, (int8x8_t
) __b
);
25524 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
25525 vuqadd_s16 (int16x4_t __a
, uint16x4_t __b
)
25527 return (int16x4_t
) __builtin_aarch64_suqaddv4hi (__a
, (int16x4_t
) __b
);
25530 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
25531 vuqadd_s32 (int32x2_t __a
, uint32x2_t __b
)
25533 return (int32x2_t
) __builtin_aarch64_suqaddv2si (__a
, (int32x2_t
) __b
);
25536 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
25537 vuqadd_s64 (int64x1_t __a
, uint64x1_t __b
)
25539 return (int64x1_t
) __builtin_aarch64_suqadddi (__a
, (int64x1_t
) __b
);
25542 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
25543 vuqaddq_s8 (int8x16_t __a
, uint8x16_t __b
)
25545 return (int8x16_t
) __builtin_aarch64_suqaddv16qi (__a
, (int8x16_t
) __b
);
25548 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
25549 vuqaddq_s16 (int16x8_t __a
, uint16x8_t __b
)
25551 return (int16x8_t
) __builtin_aarch64_suqaddv8hi (__a
, (int16x8_t
) __b
);
25554 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
25555 vuqaddq_s32 (int32x4_t __a
, uint32x4_t __b
)
25557 return (int32x4_t
) __builtin_aarch64_suqaddv4si (__a
, (int32x4_t
) __b
);
25560 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
25561 vuqaddq_s64 (int64x2_t __a
, uint64x2_t __b
)
25563 return (int64x2_t
) __builtin_aarch64_suqaddv2di (__a
, (int64x2_t
) __b
);
25566 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
25567 vuqaddb_s8 (int8x1_t __a
, uint8x1_t __b
)
25569 return (int8x1_t
) __builtin_aarch64_suqaddqi (__a
, (int8x1_t
) __b
);
25572 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
25573 vuqaddh_s16 (int16x1_t __a
, uint16x1_t __b
)
25575 return (int16x1_t
) __builtin_aarch64_suqaddhi (__a
, (int16x1_t
) __b
);
25578 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
25579 vuqadds_s32 (int32x1_t __a
, uint32x1_t __b
)
25581 return (int32x1_t
) __builtin_aarch64_suqaddsi (__a
, (int32x1_t
) __b
);
25584 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
25585 vuqaddd_s64 (int64x1_t __a
, uint64x1_t __b
)
25587 return (int64x1_t
) __builtin_aarch64_suqadddi (__a
, (int64x1_t
) __b
);
25590 #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \
25591 __extension__ static __inline rettype \
25592 __attribute__ ((__always_inline__)) \
25593 v ## op ## Q ## _ ## funcsuffix (intype a, intype b) \
25595 return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b), \
25596 v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)}; \
25599 #define __INTERLEAVE_LIST(op) \
25600 __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,) \
25601 __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,) \
25602 __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,) \
25603 __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,) \
25604 __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,) \
25605 __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,) \
25606 __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,) \
25607 __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,) \
25608 __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,) \
25609 __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q) \
25610 __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q) \
25611 __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q) \
25612 __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q) \
25613 __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q) \
25614 __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q) \
25615 __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q) \
25616 __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q) \
25617 __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q)
25621 __INTERLEAVE_LIST (uzp
)
25625 __INTERLEAVE_LIST (zip
)
25627 #undef __INTERLEAVE_LIST
25628 #undef __DEFINTERLEAVE
25630 /* End of optimal implementations in approved order. */
25632 #undef __aarch64_vget_lane_any
25633 #undef __aarch64_vget_lane_f32
25634 #undef __aarch64_vget_lane_f64
25635 #undef __aarch64_vget_lane_p8
25636 #undef __aarch64_vget_lane_p16
25637 #undef __aarch64_vget_lane_s8
25638 #undef __aarch64_vget_lane_s16
25639 #undef __aarch64_vget_lane_s32
25640 #undef __aarch64_vget_lane_s64
25641 #undef __aarch64_vget_lane_u8
25642 #undef __aarch64_vget_lane_u16
25643 #undef __aarch64_vget_lane_u32
25644 #undef __aarch64_vget_lane_u64
25646 #undef __aarch64_vgetq_lane_f32
25647 #undef __aarch64_vgetq_lane_f64
25648 #undef __aarch64_vgetq_lane_p8
25649 #undef __aarch64_vgetq_lane_p16
25650 #undef __aarch64_vgetq_lane_s8
25651 #undef __aarch64_vgetq_lane_s16
25652 #undef __aarch64_vgetq_lane_s32
25653 #undef __aarch64_vgetq_lane_s64
25654 #undef __aarch64_vgetq_lane_u8
25655 #undef __aarch64_vgetq_lane_u16
25656 #undef __aarch64_vgetq_lane_u32
25657 #undef __aarch64_vgetq_lane_u64