1 /* ARM NEON intrinsics include file.
3 Copyright (C) 2011-2013 Free Software Foundation, Inc.
4 Contributed by ARM Ltd.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 #ifndef _AARCH64_NEON_H_
28 #define _AARCH64_NEON_H_
32 typedef __builtin_aarch64_simd_qi int8x8_t
33 __attribute__ ((__vector_size__ (8)));
34 typedef __builtin_aarch64_simd_hi int16x4_t
35 __attribute__ ((__vector_size__ (8)));
36 typedef __builtin_aarch64_simd_si int32x2_t
37 __attribute__ ((__vector_size__ (8)));
38 typedef int64_t int64x1_t
;
39 typedef int32_t int32x1_t
;
40 typedef int16_t int16x1_t
;
41 typedef int8_t int8x1_t
;
42 typedef double float64x1_t
;
43 typedef __builtin_aarch64_simd_sf float32x2_t
44 __attribute__ ((__vector_size__ (8)));
45 typedef __builtin_aarch64_simd_poly8 poly8x8_t
46 __attribute__ ((__vector_size__ (8)));
47 typedef __builtin_aarch64_simd_poly16 poly16x4_t
48 __attribute__ ((__vector_size__ (8)));
49 typedef __builtin_aarch64_simd_uqi uint8x8_t
50 __attribute__ ((__vector_size__ (8)));
51 typedef __builtin_aarch64_simd_uhi uint16x4_t
52 __attribute__ ((__vector_size__ (8)));
53 typedef __builtin_aarch64_simd_usi uint32x2_t
54 __attribute__ ((__vector_size__ (8)));
55 typedef uint64_t uint64x1_t
;
56 typedef uint32_t uint32x1_t
;
57 typedef uint16_t uint16x1_t
;
58 typedef uint8_t uint8x1_t
;
59 typedef __builtin_aarch64_simd_qi int8x16_t
60 __attribute__ ((__vector_size__ (16)));
61 typedef __builtin_aarch64_simd_hi int16x8_t
62 __attribute__ ((__vector_size__ (16)));
63 typedef __builtin_aarch64_simd_si int32x4_t
64 __attribute__ ((__vector_size__ (16)));
65 typedef __builtin_aarch64_simd_di int64x2_t
66 __attribute__ ((__vector_size__ (16)));
67 typedef __builtin_aarch64_simd_sf float32x4_t
68 __attribute__ ((__vector_size__ (16)));
69 typedef __builtin_aarch64_simd_df float64x2_t
70 __attribute__ ((__vector_size__ (16)));
71 typedef __builtin_aarch64_simd_poly8 poly8x16_t
72 __attribute__ ((__vector_size__ (16)));
73 typedef __builtin_aarch64_simd_poly16 poly16x8_t
74 __attribute__ ((__vector_size__ (16)));
75 typedef __builtin_aarch64_simd_uqi uint8x16_t
76 __attribute__ ((__vector_size__ (16)));
77 typedef __builtin_aarch64_simd_uhi uint16x8_t
78 __attribute__ ((__vector_size__ (16)));
79 typedef __builtin_aarch64_simd_usi uint32x4_t
80 __attribute__ ((__vector_size__ (16)));
81 typedef __builtin_aarch64_simd_udi uint64x2_t
82 __attribute__ ((__vector_size__ (16)));
84 typedef float float32_t
;
85 typedef double float64_t
;
86 typedef __builtin_aarch64_simd_poly8 poly8_t
;
87 typedef __builtin_aarch64_simd_poly16 poly16_t
;
89 typedef struct int8x8x2_t
94 typedef struct int8x16x2_t
99 typedef struct int16x4x2_t
104 typedef struct int16x8x2_t
109 typedef struct int32x2x2_t
114 typedef struct int32x4x2_t
119 typedef struct int64x1x2_t
124 typedef struct int64x2x2_t
129 typedef struct uint8x8x2_t
134 typedef struct uint8x16x2_t
139 typedef struct uint16x4x2_t
144 typedef struct uint16x8x2_t
149 typedef struct uint32x2x2_t
154 typedef struct uint32x4x2_t
159 typedef struct uint64x1x2_t
164 typedef struct uint64x2x2_t
169 typedef struct float32x2x2_t
174 typedef struct float32x4x2_t
179 typedef struct float64x2x2_t
184 typedef struct float64x1x2_t
189 typedef struct poly8x8x2_t
194 typedef struct poly8x16x2_t
199 typedef struct poly16x4x2_t
204 typedef struct poly16x8x2_t
209 typedef struct int8x8x3_t
214 typedef struct int8x16x3_t
219 typedef struct int16x4x3_t
224 typedef struct int16x8x3_t
229 typedef struct int32x2x3_t
234 typedef struct int32x4x3_t
239 typedef struct int64x1x3_t
244 typedef struct int64x2x3_t
249 typedef struct uint8x8x3_t
254 typedef struct uint8x16x3_t
259 typedef struct uint16x4x3_t
264 typedef struct uint16x8x3_t
269 typedef struct uint32x2x3_t
274 typedef struct uint32x4x3_t
279 typedef struct uint64x1x3_t
284 typedef struct uint64x2x3_t
289 typedef struct float32x2x3_t
294 typedef struct float32x4x3_t
299 typedef struct float64x2x3_t
304 typedef struct float64x1x3_t
309 typedef struct poly8x8x3_t
314 typedef struct poly8x16x3_t
319 typedef struct poly16x4x3_t
324 typedef struct poly16x8x3_t
329 typedef struct int8x8x4_t
334 typedef struct int8x16x4_t
339 typedef struct int16x4x4_t
344 typedef struct int16x8x4_t
349 typedef struct int32x2x4_t
354 typedef struct int32x4x4_t
359 typedef struct int64x1x4_t
364 typedef struct int64x2x4_t
369 typedef struct uint8x8x4_t
374 typedef struct uint8x16x4_t
379 typedef struct uint16x4x4_t
384 typedef struct uint16x8x4_t
389 typedef struct uint32x2x4_t
394 typedef struct uint32x4x4_t
399 typedef struct uint64x1x4_t
404 typedef struct uint64x2x4_t
409 typedef struct float32x2x4_t
414 typedef struct float32x4x4_t
419 typedef struct float64x2x4_t
424 typedef struct float64x1x4_t
429 typedef struct poly8x8x4_t
434 typedef struct poly8x16x4_t
439 typedef struct poly16x4x4_t
444 typedef struct poly16x8x4_t
450 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
451 vadd_s8 (int8x8_t __a
, int8x8_t __b
)
456 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
457 vadd_s16 (int16x4_t __a
, int16x4_t __b
)
462 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
463 vadd_s32 (int32x2_t __a
, int32x2_t __b
)
468 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
469 vadd_f32 (float32x2_t __a
, float32x2_t __b
)
474 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
475 vadd_u8 (uint8x8_t __a
, uint8x8_t __b
)
480 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
481 vadd_u16 (uint16x4_t __a
, uint16x4_t __b
)
486 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
487 vadd_u32 (uint32x2_t __a
, uint32x2_t __b
)
492 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
493 vadd_s64 (int64x1_t __a
, int64x1_t __b
)
498 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
499 vadd_u64 (uint64x1_t __a
, uint64x1_t __b
)
504 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
505 vaddq_s8 (int8x16_t __a
, int8x16_t __b
)
510 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
511 vaddq_s16 (int16x8_t __a
, int16x8_t __b
)
516 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
517 vaddq_s32 (int32x4_t __a
, int32x4_t __b
)
522 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
523 vaddq_s64 (int64x2_t __a
, int64x2_t __b
)
528 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
529 vaddq_f32 (float32x4_t __a
, float32x4_t __b
)
534 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
535 vaddq_f64 (float64x2_t __a
, float64x2_t __b
)
540 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
541 vaddq_u8 (uint8x16_t __a
, uint8x16_t __b
)
546 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
547 vaddq_u16 (uint16x8_t __a
, uint16x8_t __b
)
552 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
553 vaddq_u32 (uint32x4_t __a
, uint32x4_t __b
)
558 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
559 vaddq_u64 (uint64x2_t __a
, uint64x2_t __b
)
564 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
565 vaddl_s8 (int8x8_t __a
, int8x8_t __b
)
567 return (int16x8_t
) __builtin_aarch64_saddlv8qi (__a
, __b
);
570 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
571 vaddl_s16 (int16x4_t __a
, int16x4_t __b
)
573 return (int32x4_t
) __builtin_aarch64_saddlv4hi (__a
, __b
);
576 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
577 vaddl_s32 (int32x2_t __a
, int32x2_t __b
)
579 return (int64x2_t
) __builtin_aarch64_saddlv2si (__a
, __b
);
582 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
583 vaddl_u8 (uint8x8_t __a
, uint8x8_t __b
)
585 return (uint16x8_t
) __builtin_aarch64_uaddlv8qi ((int8x8_t
) __a
,
589 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
590 vaddl_u16 (uint16x4_t __a
, uint16x4_t __b
)
592 return (uint32x4_t
) __builtin_aarch64_uaddlv4hi ((int16x4_t
) __a
,
596 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
597 vaddl_u32 (uint32x2_t __a
, uint32x2_t __b
)
599 return (uint64x2_t
) __builtin_aarch64_uaddlv2si ((int32x2_t
) __a
,
603 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
604 vaddl_high_s8 (int8x16_t __a
, int8x16_t __b
)
606 return (int16x8_t
) __builtin_aarch64_saddl2v16qi (__a
, __b
);
609 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
610 vaddl_high_s16 (int16x8_t __a
, int16x8_t __b
)
612 return (int32x4_t
) __builtin_aarch64_saddl2v8hi (__a
, __b
);
615 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
616 vaddl_high_s32 (int32x4_t __a
, int32x4_t __b
)
618 return (int64x2_t
) __builtin_aarch64_saddl2v4si (__a
, __b
);
621 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
622 vaddl_high_u8 (uint8x16_t __a
, uint8x16_t __b
)
624 return (uint16x8_t
) __builtin_aarch64_uaddl2v16qi ((int8x16_t
) __a
,
628 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
629 vaddl_high_u16 (uint16x8_t __a
, uint16x8_t __b
)
631 return (uint32x4_t
) __builtin_aarch64_uaddl2v8hi ((int16x8_t
) __a
,
635 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
636 vaddl_high_u32 (uint32x4_t __a
, uint32x4_t __b
)
638 return (uint64x2_t
) __builtin_aarch64_uaddl2v4si ((int32x4_t
) __a
,
642 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
643 vaddw_s8 (int16x8_t __a
, int8x8_t __b
)
645 return (int16x8_t
) __builtin_aarch64_saddwv8qi (__a
, __b
);
648 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
649 vaddw_s16 (int32x4_t __a
, int16x4_t __b
)
651 return (int32x4_t
) __builtin_aarch64_saddwv4hi (__a
, __b
);
654 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
655 vaddw_s32 (int64x2_t __a
, int32x2_t __b
)
657 return (int64x2_t
) __builtin_aarch64_saddwv2si (__a
, __b
);
660 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
661 vaddw_u8 (uint16x8_t __a
, uint8x8_t __b
)
663 return (uint16x8_t
) __builtin_aarch64_uaddwv8qi ((int16x8_t
) __a
,
667 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
668 vaddw_u16 (uint32x4_t __a
, uint16x4_t __b
)
670 return (uint32x4_t
) __builtin_aarch64_uaddwv4hi ((int32x4_t
) __a
,
674 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
675 vaddw_u32 (uint64x2_t __a
, uint32x2_t __b
)
677 return (uint64x2_t
) __builtin_aarch64_uaddwv2si ((int64x2_t
) __a
,
681 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
682 vaddw_high_s8 (int16x8_t __a
, int8x16_t __b
)
684 return (int16x8_t
) __builtin_aarch64_saddw2v16qi (__a
, __b
);
687 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
688 vaddw_high_s16 (int32x4_t __a
, int16x8_t __b
)
690 return (int32x4_t
) __builtin_aarch64_saddw2v8hi (__a
, __b
);
693 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
694 vaddw_high_s32 (int64x2_t __a
, int32x4_t __b
)
696 return (int64x2_t
) __builtin_aarch64_saddw2v4si (__a
, __b
);
699 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
700 vaddw_high_u8 (uint16x8_t __a
, uint8x16_t __b
)
702 return (uint16x8_t
) __builtin_aarch64_uaddw2v16qi ((int16x8_t
) __a
,
706 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
707 vaddw_high_u16 (uint32x4_t __a
, uint16x8_t __b
)
709 return (uint32x4_t
) __builtin_aarch64_uaddw2v8hi ((int32x4_t
) __a
,
713 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
714 vaddw_high_u32 (uint64x2_t __a
, uint32x4_t __b
)
716 return (uint64x2_t
) __builtin_aarch64_uaddw2v4si ((int64x2_t
) __a
,
720 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
721 vhadd_s8 (int8x8_t __a
, int8x8_t __b
)
723 return (int8x8_t
) __builtin_aarch64_shaddv8qi (__a
, __b
);
726 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
727 vhadd_s16 (int16x4_t __a
, int16x4_t __b
)
729 return (int16x4_t
) __builtin_aarch64_shaddv4hi (__a
, __b
);
732 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
733 vhadd_s32 (int32x2_t __a
, int32x2_t __b
)
735 return (int32x2_t
) __builtin_aarch64_shaddv2si (__a
, __b
);
738 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
739 vhadd_u8 (uint8x8_t __a
, uint8x8_t __b
)
741 return (uint8x8_t
) __builtin_aarch64_uhaddv8qi ((int8x8_t
) __a
,
745 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
746 vhadd_u16 (uint16x4_t __a
, uint16x4_t __b
)
748 return (uint16x4_t
) __builtin_aarch64_uhaddv4hi ((int16x4_t
) __a
,
752 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
753 vhadd_u32 (uint32x2_t __a
, uint32x2_t __b
)
755 return (uint32x2_t
) __builtin_aarch64_uhaddv2si ((int32x2_t
) __a
,
759 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
760 vhaddq_s8 (int8x16_t __a
, int8x16_t __b
)
762 return (int8x16_t
) __builtin_aarch64_shaddv16qi (__a
, __b
);
765 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
766 vhaddq_s16 (int16x8_t __a
, int16x8_t __b
)
768 return (int16x8_t
) __builtin_aarch64_shaddv8hi (__a
, __b
);
771 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
772 vhaddq_s32 (int32x4_t __a
, int32x4_t __b
)
774 return (int32x4_t
) __builtin_aarch64_shaddv4si (__a
, __b
);
777 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
778 vhaddq_u8 (uint8x16_t __a
, uint8x16_t __b
)
780 return (uint8x16_t
) __builtin_aarch64_uhaddv16qi ((int8x16_t
) __a
,
784 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
785 vhaddq_u16 (uint16x8_t __a
, uint16x8_t __b
)
787 return (uint16x8_t
) __builtin_aarch64_uhaddv8hi ((int16x8_t
) __a
,
791 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
792 vhaddq_u32 (uint32x4_t __a
, uint32x4_t __b
)
794 return (uint32x4_t
) __builtin_aarch64_uhaddv4si ((int32x4_t
) __a
,
798 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
799 vrhadd_s8 (int8x8_t __a
, int8x8_t __b
)
801 return (int8x8_t
) __builtin_aarch64_srhaddv8qi (__a
, __b
);
804 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
805 vrhadd_s16 (int16x4_t __a
, int16x4_t __b
)
807 return (int16x4_t
) __builtin_aarch64_srhaddv4hi (__a
, __b
);
810 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
811 vrhadd_s32 (int32x2_t __a
, int32x2_t __b
)
813 return (int32x2_t
) __builtin_aarch64_srhaddv2si (__a
, __b
);
816 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
817 vrhadd_u8 (uint8x8_t __a
, uint8x8_t __b
)
819 return (uint8x8_t
) __builtin_aarch64_urhaddv8qi ((int8x8_t
) __a
,
823 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
824 vrhadd_u16 (uint16x4_t __a
, uint16x4_t __b
)
826 return (uint16x4_t
) __builtin_aarch64_urhaddv4hi ((int16x4_t
) __a
,
830 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
831 vrhadd_u32 (uint32x2_t __a
, uint32x2_t __b
)
833 return (uint32x2_t
) __builtin_aarch64_urhaddv2si ((int32x2_t
) __a
,
837 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
838 vrhaddq_s8 (int8x16_t __a
, int8x16_t __b
)
840 return (int8x16_t
) __builtin_aarch64_srhaddv16qi (__a
, __b
);
843 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
844 vrhaddq_s16 (int16x8_t __a
, int16x8_t __b
)
846 return (int16x8_t
) __builtin_aarch64_srhaddv8hi (__a
, __b
);
849 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
850 vrhaddq_s32 (int32x4_t __a
, int32x4_t __b
)
852 return (int32x4_t
) __builtin_aarch64_srhaddv4si (__a
, __b
);
855 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
856 vrhaddq_u8 (uint8x16_t __a
, uint8x16_t __b
)
858 return (uint8x16_t
) __builtin_aarch64_urhaddv16qi ((int8x16_t
) __a
,
862 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
863 vrhaddq_u16 (uint16x8_t __a
, uint16x8_t __b
)
865 return (uint16x8_t
) __builtin_aarch64_urhaddv8hi ((int16x8_t
) __a
,
869 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
870 vrhaddq_u32 (uint32x4_t __a
, uint32x4_t __b
)
872 return (uint32x4_t
) __builtin_aarch64_urhaddv4si ((int32x4_t
) __a
,
876 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
877 vaddhn_s16 (int16x8_t __a
, int16x8_t __b
)
879 return (int8x8_t
) __builtin_aarch64_addhnv8hi (__a
, __b
);
882 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
883 vaddhn_s32 (int32x4_t __a
, int32x4_t __b
)
885 return (int16x4_t
) __builtin_aarch64_addhnv4si (__a
, __b
);
888 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
889 vaddhn_s64 (int64x2_t __a
, int64x2_t __b
)
891 return (int32x2_t
) __builtin_aarch64_addhnv2di (__a
, __b
);
894 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
895 vaddhn_u16 (uint16x8_t __a
, uint16x8_t __b
)
897 return (uint8x8_t
) __builtin_aarch64_addhnv8hi ((int16x8_t
) __a
,
901 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
902 vaddhn_u32 (uint32x4_t __a
, uint32x4_t __b
)
904 return (uint16x4_t
) __builtin_aarch64_addhnv4si ((int32x4_t
) __a
,
908 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
909 vaddhn_u64 (uint64x2_t __a
, uint64x2_t __b
)
911 return (uint32x2_t
) __builtin_aarch64_addhnv2di ((int64x2_t
) __a
,
915 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
916 vraddhn_s16 (int16x8_t __a
, int16x8_t __b
)
918 return (int8x8_t
) __builtin_aarch64_raddhnv8hi (__a
, __b
);
921 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
922 vraddhn_s32 (int32x4_t __a
, int32x4_t __b
)
924 return (int16x4_t
) __builtin_aarch64_raddhnv4si (__a
, __b
);
927 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
928 vraddhn_s64 (int64x2_t __a
, int64x2_t __b
)
930 return (int32x2_t
) __builtin_aarch64_raddhnv2di (__a
, __b
);
933 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
934 vraddhn_u16 (uint16x8_t __a
, uint16x8_t __b
)
936 return (uint8x8_t
) __builtin_aarch64_raddhnv8hi ((int16x8_t
) __a
,
940 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
941 vraddhn_u32 (uint32x4_t __a
, uint32x4_t __b
)
943 return (uint16x4_t
) __builtin_aarch64_raddhnv4si ((int32x4_t
) __a
,
947 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
948 vraddhn_u64 (uint64x2_t __a
, uint64x2_t __b
)
950 return (uint32x2_t
) __builtin_aarch64_raddhnv2di ((int64x2_t
) __a
,
954 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
955 vaddhn_high_s16 (int8x8_t __a
, int16x8_t __b
, int16x8_t __c
)
957 return (int8x16_t
) __builtin_aarch64_addhn2v8hi (__a
, __b
, __c
);
960 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
961 vaddhn_high_s32 (int16x4_t __a
, int32x4_t __b
, int32x4_t __c
)
963 return (int16x8_t
) __builtin_aarch64_addhn2v4si (__a
, __b
, __c
);
966 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
967 vaddhn_high_s64 (int32x2_t __a
, int64x2_t __b
, int64x2_t __c
)
969 return (int32x4_t
) __builtin_aarch64_addhn2v2di (__a
, __b
, __c
);
972 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
973 vaddhn_high_u16 (uint8x8_t __a
, uint16x8_t __b
, uint16x8_t __c
)
975 return (uint8x16_t
) __builtin_aarch64_addhn2v8hi ((int8x8_t
) __a
,
980 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
981 vaddhn_high_u32 (uint16x4_t __a
, uint32x4_t __b
, uint32x4_t __c
)
983 return (uint16x8_t
) __builtin_aarch64_addhn2v4si ((int16x4_t
) __a
,
988 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
989 vaddhn_high_u64 (uint32x2_t __a
, uint64x2_t __b
, uint64x2_t __c
)
991 return (uint32x4_t
) __builtin_aarch64_addhn2v2di ((int32x2_t
) __a
,
996 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
997 vraddhn_high_s16 (int8x8_t __a
, int16x8_t __b
, int16x8_t __c
)
999 return (int8x16_t
) __builtin_aarch64_raddhn2v8hi (__a
, __b
, __c
);
1002 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
1003 vraddhn_high_s32 (int16x4_t __a
, int32x4_t __b
, int32x4_t __c
)
1005 return (int16x8_t
) __builtin_aarch64_raddhn2v4si (__a
, __b
, __c
);
1008 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
1009 vraddhn_high_s64 (int32x2_t __a
, int64x2_t __b
, int64x2_t __c
)
1011 return (int32x4_t
) __builtin_aarch64_raddhn2v2di (__a
, __b
, __c
);
1014 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
1015 vraddhn_high_u16 (uint8x8_t __a
, uint16x8_t __b
, uint16x8_t __c
)
1017 return (uint8x16_t
) __builtin_aarch64_raddhn2v8hi ((int8x8_t
) __a
,
1022 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
1023 vraddhn_high_u32 (uint16x4_t __a
, uint32x4_t __b
, uint32x4_t __c
)
1025 return (uint16x8_t
) __builtin_aarch64_raddhn2v4si ((int16x4_t
) __a
,
1030 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
1031 vraddhn_high_u64 (uint32x2_t __a
, uint64x2_t __b
, uint64x2_t __c
)
1033 return (uint32x4_t
) __builtin_aarch64_raddhn2v2di ((int32x2_t
) __a
,
1038 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
1039 vdiv_f32 (float32x2_t __a
, float32x2_t __b
)
1044 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
1045 vdivq_f32 (float32x4_t __a
, float32x4_t __b
)
1050 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
1051 vdivq_f64 (float64x2_t __a
, float64x2_t __b
)
1056 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
1057 vmul_s8 (int8x8_t __a
, int8x8_t __b
)
1062 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
1063 vmul_s16 (int16x4_t __a
, int16x4_t __b
)
1068 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
1069 vmul_s32 (int32x2_t __a
, int32x2_t __b
)
1074 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
1075 vmul_f32 (float32x2_t __a
, float32x2_t __b
)
1080 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
1081 vmul_u8 (uint8x8_t __a
, uint8x8_t __b
)
1086 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
1087 vmul_u16 (uint16x4_t __a
, uint16x4_t __b
)
1092 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
1093 vmul_u32 (uint32x2_t __a
, uint32x2_t __b
)
1098 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
1099 vmul_p8 (poly8x8_t __a
, poly8x8_t __b
)
1101 return (poly8x8_t
) __builtin_aarch64_pmulv8qi ((int8x8_t
) __a
,
1105 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
1106 vmulq_s8 (int8x16_t __a
, int8x16_t __b
)
1111 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
1112 vmulq_s16 (int16x8_t __a
, int16x8_t __b
)
1117 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
1118 vmulq_s32 (int32x4_t __a
, int32x4_t __b
)
1123 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
1124 vmulq_f32 (float32x4_t __a
, float32x4_t __b
)
1129 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
1130 vmulq_f64 (float64x2_t __a
, float64x2_t __b
)
1135 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
1136 vmulq_u8 (uint8x16_t __a
, uint8x16_t __b
)
1141 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
1142 vmulq_u16 (uint16x8_t __a
, uint16x8_t __b
)
1147 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
1148 vmulq_u32 (uint32x4_t __a
, uint32x4_t __b
)
1153 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
1154 vmulq_p8 (poly8x16_t __a
, poly8x16_t __b
)
1156 return (poly8x16_t
) __builtin_aarch64_pmulv16qi ((int8x16_t
) __a
,
1160 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
1161 vand_s8 (int8x8_t __a
, int8x8_t __b
)
1166 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
1167 vand_s16 (int16x4_t __a
, int16x4_t __b
)
1172 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
1173 vand_s32 (int32x2_t __a
, int32x2_t __b
)
1178 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
1179 vand_u8 (uint8x8_t __a
, uint8x8_t __b
)
1184 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
1185 vand_u16 (uint16x4_t __a
, uint16x4_t __b
)
1190 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
1191 vand_u32 (uint32x2_t __a
, uint32x2_t __b
)
1196 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
1197 vand_s64 (int64x1_t __a
, int64x1_t __b
)
1202 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
1203 vand_u64 (uint64x1_t __a
, uint64x1_t __b
)
1208 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
1209 vandq_s8 (int8x16_t __a
, int8x16_t __b
)
1214 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
1215 vandq_s16 (int16x8_t __a
, int16x8_t __b
)
1220 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
1221 vandq_s32 (int32x4_t __a
, int32x4_t __b
)
1226 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
1227 vandq_s64 (int64x2_t __a
, int64x2_t __b
)
1232 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
1233 vandq_u8 (uint8x16_t __a
, uint8x16_t __b
)
1238 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
1239 vandq_u16 (uint16x8_t __a
, uint16x8_t __b
)
1244 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
1245 vandq_u32 (uint32x4_t __a
, uint32x4_t __b
)
1250 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
1251 vandq_u64 (uint64x2_t __a
, uint64x2_t __b
)
1256 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
1257 vorr_s8 (int8x8_t __a
, int8x8_t __b
)
1262 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
1263 vorr_s16 (int16x4_t __a
, int16x4_t __b
)
1268 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
1269 vorr_s32 (int32x2_t __a
, int32x2_t __b
)
1274 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
1275 vorr_u8 (uint8x8_t __a
, uint8x8_t __b
)
1280 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
1281 vorr_u16 (uint16x4_t __a
, uint16x4_t __b
)
1286 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
1287 vorr_u32 (uint32x2_t __a
, uint32x2_t __b
)
1292 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
1293 vorr_s64 (int64x1_t __a
, int64x1_t __b
)
1298 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
1299 vorr_u64 (uint64x1_t __a
, uint64x1_t __b
)
1304 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
1305 vorrq_s8 (int8x16_t __a
, int8x16_t __b
)
1310 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
1311 vorrq_s16 (int16x8_t __a
, int16x8_t __b
)
1316 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
1317 vorrq_s32 (int32x4_t __a
, int32x4_t __b
)
1322 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
1323 vorrq_s64 (int64x2_t __a
, int64x2_t __b
)
1328 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
1329 vorrq_u8 (uint8x16_t __a
, uint8x16_t __b
)
1334 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
1335 vorrq_u16 (uint16x8_t __a
, uint16x8_t __b
)
1340 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
1341 vorrq_u32 (uint32x4_t __a
, uint32x4_t __b
)
1346 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
1347 vorrq_u64 (uint64x2_t __a
, uint64x2_t __b
)
1352 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
1353 veor_s8 (int8x8_t __a
, int8x8_t __b
)
1358 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
1359 veor_s16 (int16x4_t __a
, int16x4_t __b
)
1364 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
1365 veor_s32 (int32x2_t __a
, int32x2_t __b
)
1370 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
1371 veor_u8 (uint8x8_t __a
, uint8x8_t __b
)
1376 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
1377 veor_u16 (uint16x4_t __a
, uint16x4_t __b
)
1382 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
1383 veor_u32 (uint32x2_t __a
, uint32x2_t __b
)
1388 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
1389 veor_s64 (int64x1_t __a
, int64x1_t __b
)
1394 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
1395 veor_u64 (uint64x1_t __a
, uint64x1_t __b
)
1400 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
1401 veorq_s8 (int8x16_t __a
, int8x16_t __b
)
1406 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
1407 veorq_s16 (int16x8_t __a
, int16x8_t __b
)
1412 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
1413 veorq_s32 (int32x4_t __a
, int32x4_t __b
)
1418 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
1419 veorq_s64 (int64x2_t __a
, int64x2_t __b
)
1424 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
1425 veorq_u8 (uint8x16_t __a
, uint8x16_t __b
)
1430 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
1431 veorq_u16 (uint16x8_t __a
, uint16x8_t __b
)
1436 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
1437 veorq_u32 (uint32x4_t __a
, uint32x4_t __b
)
1442 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
1443 veorq_u64 (uint64x2_t __a
, uint64x2_t __b
)
1448 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
1449 vbic_s8 (int8x8_t __a
, int8x8_t __b
)
1454 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
1455 vbic_s16 (int16x4_t __a
, int16x4_t __b
)
1460 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
1461 vbic_s32 (int32x2_t __a
, int32x2_t __b
)
1466 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
1467 vbic_u8 (uint8x8_t __a
, uint8x8_t __b
)
1472 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
1473 vbic_u16 (uint16x4_t __a
, uint16x4_t __b
)
1478 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
1479 vbic_u32 (uint32x2_t __a
, uint32x2_t __b
)
1484 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
1485 vbic_s64 (int64x1_t __a
, int64x1_t __b
)
1490 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
1491 vbic_u64 (uint64x1_t __a
, uint64x1_t __b
)
1496 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
1497 vbicq_s8 (int8x16_t __a
, int8x16_t __b
)
1502 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
1503 vbicq_s16 (int16x8_t __a
, int16x8_t __b
)
1508 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
1509 vbicq_s32 (int32x4_t __a
, int32x4_t __b
)
1514 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
1515 vbicq_s64 (int64x2_t __a
, int64x2_t __b
)
1520 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
1521 vbicq_u8 (uint8x16_t __a
, uint8x16_t __b
)
1526 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
1527 vbicq_u16 (uint16x8_t __a
, uint16x8_t __b
)
1532 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
1533 vbicq_u32 (uint32x4_t __a
, uint32x4_t __b
)
1538 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
1539 vbicq_u64 (uint64x2_t __a
, uint64x2_t __b
)
1544 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
1545 vorn_s8 (int8x8_t __a
, int8x8_t __b
)
1550 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
1551 vorn_s16 (int16x4_t __a
, int16x4_t __b
)
1556 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
1557 vorn_s32 (int32x2_t __a
, int32x2_t __b
)
1562 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
1563 vorn_u8 (uint8x8_t __a
, uint8x8_t __b
)
1568 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
1569 vorn_u16 (uint16x4_t __a
, uint16x4_t __b
)
1574 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
1575 vorn_u32 (uint32x2_t __a
, uint32x2_t __b
)
1580 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
1581 vorn_s64 (int64x1_t __a
, int64x1_t __b
)
1586 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
1587 vorn_u64 (uint64x1_t __a
, uint64x1_t __b
)
1592 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
1593 vornq_s8 (int8x16_t __a
, int8x16_t __b
)
1598 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
1599 vornq_s16 (int16x8_t __a
, int16x8_t __b
)
1604 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
1605 vornq_s32 (int32x4_t __a
, int32x4_t __b
)
1610 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
1611 vornq_s64 (int64x2_t __a
, int64x2_t __b
)
1616 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
1617 vornq_u8 (uint8x16_t __a
, uint8x16_t __b
)
1622 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
1623 vornq_u16 (uint16x8_t __a
, uint16x8_t __b
)
1628 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
1629 vornq_u32 (uint32x4_t __a
, uint32x4_t __b
)
1634 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
1635 vornq_u64 (uint64x2_t __a
, uint64x2_t __b
)
1640 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
1641 vsub_s8 (int8x8_t __a
, int8x8_t __b
)
1646 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
1647 vsub_s16 (int16x4_t __a
, int16x4_t __b
)
1652 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
1653 vsub_s32 (int32x2_t __a
, int32x2_t __b
)
1658 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
1659 vsub_f32 (float32x2_t __a
, float32x2_t __b
)
1664 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
1665 vsub_u8 (uint8x8_t __a
, uint8x8_t __b
)
1670 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
1671 vsub_u16 (uint16x4_t __a
, uint16x4_t __b
)
1676 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
1677 vsub_u32 (uint32x2_t __a
, uint32x2_t __b
)
1682 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
1683 vsub_s64 (int64x1_t __a
, int64x1_t __b
)
1688 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
1689 vsub_u64 (uint64x1_t __a
, uint64x1_t __b
)
1694 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
1695 vsubq_s8 (int8x16_t __a
, int8x16_t __b
)
1700 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
1701 vsubq_s16 (int16x8_t __a
, int16x8_t __b
)
1706 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
1707 vsubq_s32 (int32x4_t __a
, int32x4_t __b
)
1712 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
1713 vsubq_s64 (int64x2_t __a
, int64x2_t __b
)
1718 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
1719 vsubq_f32 (float32x4_t __a
, float32x4_t __b
)
1724 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
1725 vsubq_f64 (float64x2_t __a
, float64x2_t __b
)
1730 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
1731 vsubq_u8 (uint8x16_t __a
, uint8x16_t __b
)
1736 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
1737 vsubq_u16 (uint16x8_t __a
, uint16x8_t __b
)
1742 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
1743 vsubq_u32 (uint32x4_t __a
, uint32x4_t __b
)
1748 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
1749 vsubq_u64 (uint64x2_t __a
, uint64x2_t __b
)
1754 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
1755 vsubl_s8 (int8x8_t __a
, int8x8_t __b
)
1757 return (int16x8_t
) __builtin_aarch64_ssublv8qi (__a
, __b
);
1760 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
1761 vsubl_s16 (int16x4_t __a
, int16x4_t __b
)
1763 return (int32x4_t
) __builtin_aarch64_ssublv4hi (__a
, __b
);
1766 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
1767 vsubl_s32 (int32x2_t __a
, int32x2_t __b
)
1769 return (int64x2_t
) __builtin_aarch64_ssublv2si (__a
, __b
);
1772 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
1773 vsubl_u8 (uint8x8_t __a
, uint8x8_t __b
)
1775 return (uint16x8_t
) __builtin_aarch64_usublv8qi ((int8x8_t
) __a
,
1779 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
1780 vsubl_u16 (uint16x4_t __a
, uint16x4_t __b
)
1782 return (uint32x4_t
) __builtin_aarch64_usublv4hi ((int16x4_t
) __a
,
1786 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
1787 vsubl_u32 (uint32x2_t __a
, uint32x2_t __b
)
1789 return (uint64x2_t
) __builtin_aarch64_usublv2si ((int32x2_t
) __a
,
1793 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
1794 vsubl_high_s8 (int8x16_t __a
, int8x16_t __b
)
1796 return (int16x8_t
) __builtin_aarch64_ssubl2v16qi (__a
, __b
);
1799 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
1800 vsubl_high_s16 (int16x8_t __a
, int16x8_t __b
)
1802 return (int32x4_t
) __builtin_aarch64_ssubl2v8hi (__a
, __b
);
1805 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
1806 vsubl_high_s32 (int32x4_t __a
, int32x4_t __b
)
1808 return (int64x2_t
) __builtin_aarch64_ssubl2v4si (__a
, __b
);
1811 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
1812 vsubl_high_u8 (uint8x16_t __a
, uint8x16_t __b
)
1814 return (uint16x8_t
) __builtin_aarch64_usubl2v16qi ((int8x16_t
) __a
,
1818 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
1819 vsubl_high_u16 (uint16x8_t __a
, uint16x8_t __b
)
1821 return (uint32x4_t
) __builtin_aarch64_usubl2v8hi ((int16x8_t
) __a
,
1825 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
1826 vsubl_high_u32 (uint32x4_t __a
, uint32x4_t __b
)
1828 return (uint64x2_t
) __builtin_aarch64_usubl2v4si ((int32x4_t
) __a
,
1832 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
1833 vsubw_s8 (int16x8_t __a
, int8x8_t __b
)
1835 return (int16x8_t
) __builtin_aarch64_ssubwv8qi (__a
, __b
);
1838 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
1839 vsubw_s16 (int32x4_t __a
, int16x4_t __b
)
1841 return (int32x4_t
) __builtin_aarch64_ssubwv4hi (__a
, __b
);
1844 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
1845 vsubw_s32 (int64x2_t __a
, int32x2_t __b
)
1847 return (int64x2_t
) __builtin_aarch64_ssubwv2si (__a
, __b
);
1850 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
1851 vsubw_u8 (uint16x8_t __a
, uint8x8_t __b
)
1853 return (uint16x8_t
) __builtin_aarch64_usubwv8qi ((int16x8_t
) __a
,
1857 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
1858 vsubw_u16 (uint32x4_t __a
, uint16x4_t __b
)
1860 return (uint32x4_t
) __builtin_aarch64_usubwv4hi ((int32x4_t
) __a
,
1864 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
1865 vsubw_u32 (uint64x2_t __a
, uint32x2_t __b
)
1867 return (uint64x2_t
) __builtin_aarch64_usubwv2si ((int64x2_t
) __a
,
1871 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
1872 vsubw_high_s8 (int16x8_t __a
, int8x16_t __b
)
1874 return (int16x8_t
) __builtin_aarch64_ssubw2v16qi (__a
, __b
);
1877 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
1878 vsubw_high_s16 (int32x4_t __a
, int16x8_t __b
)
1880 return (int32x4_t
) __builtin_aarch64_ssubw2v8hi (__a
, __b
);
1883 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
1884 vsubw_high_s32 (int64x2_t __a
, int32x4_t __b
)
1886 return (int64x2_t
) __builtin_aarch64_ssubw2v4si (__a
, __b
);
1889 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
1890 vsubw_high_u8 (uint16x8_t __a
, uint8x16_t __b
)
1892 return (uint16x8_t
) __builtin_aarch64_usubw2v16qi ((int16x8_t
) __a
,
1896 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
1897 vsubw_high_u16 (uint32x4_t __a
, uint16x8_t __b
)
1899 return (uint32x4_t
) __builtin_aarch64_usubw2v8hi ((int32x4_t
) __a
,
1903 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
1904 vsubw_high_u32 (uint64x2_t __a
, uint32x4_t __b
)
1906 return (uint64x2_t
) __builtin_aarch64_usubw2v4si ((int64x2_t
) __a
,
1910 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
1911 vqadd_s8 (int8x8_t __a
, int8x8_t __b
)
1913 return (int8x8_t
) __builtin_aarch64_sqaddv8qi (__a
, __b
);
1916 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
1917 vqadd_s16 (int16x4_t __a
, int16x4_t __b
)
1919 return (int16x4_t
) __builtin_aarch64_sqaddv4hi (__a
, __b
);
1922 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
1923 vqadd_s32 (int32x2_t __a
, int32x2_t __b
)
1925 return (int32x2_t
) __builtin_aarch64_sqaddv2si (__a
, __b
);
1928 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
1929 vqadd_s64 (int64x1_t __a
, int64x1_t __b
)
1931 return (int64x1_t
) __builtin_aarch64_sqadddi (__a
, __b
);
1934 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
1935 vqadd_u8 (uint8x8_t __a
, uint8x8_t __b
)
1937 return (uint8x8_t
) __builtin_aarch64_uqaddv8qi ((int8x8_t
) __a
,
1941 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
1942 vqadd_u16 (uint16x4_t __a
, uint16x4_t __b
)
1944 return (uint16x4_t
) __builtin_aarch64_uqaddv4hi ((int16x4_t
) __a
,
1948 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
1949 vqadd_u32 (uint32x2_t __a
, uint32x2_t __b
)
1951 return (uint32x2_t
) __builtin_aarch64_uqaddv2si ((int32x2_t
) __a
,
1955 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
1956 vqadd_u64 (uint64x1_t __a
, uint64x1_t __b
)
1958 return (uint64x1_t
) __builtin_aarch64_uqadddi ((int64x1_t
) __a
,
1962 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
1963 vqaddq_s8 (int8x16_t __a
, int8x16_t __b
)
1965 return (int8x16_t
) __builtin_aarch64_sqaddv16qi (__a
, __b
);
1968 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
1969 vqaddq_s16 (int16x8_t __a
, int16x8_t __b
)
1971 return (int16x8_t
) __builtin_aarch64_sqaddv8hi (__a
, __b
);
1974 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
1975 vqaddq_s32 (int32x4_t __a
, int32x4_t __b
)
1977 return (int32x4_t
) __builtin_aarch64_sqaddv4si (__a
, __b
);
1980 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
1981 vqaddq_s64 (int64x2_t __a
, int64x2_t __b
)
1983 return (int64x2_t
) __builtin_aarch64_sqaddv2di (__a
, __b
);
1986 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
1987 vqaddq_u8 (uint8x16_t __a
, uint8x16_t __b
)
1989 return (uint8x16_t
) __builtin_aarch64_uqaddv16qi ((int8x16_t
) __a
,
1993 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
1994 vqaddq_u16 (uint16x8_t __a
, uint16x8_t __b
)
1996 return (uint16x8_t
) __builtin_aarch64_uqaddv8hi ((int16x8_t
) __a
,
2000 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
2001 vqaddq_u32 (uint32x4_t __a
, uint32x4_t __b
)
2003 return (uint32x4_t
) __builtin_aarch64_uqaddv4si ((int32x4_t
) __a
,
2007 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
2008 vqaddq_u64 (uint64x2_t __a
, uint64x2_t __b
)
2010 return (uint64x2_t
) __builtin_aarch64_uqaddv2di ((int64x2_t
) __a
,
2014 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
2015 vqsub_s8 (int8x8_t __a
, int8x8_t __b
)
2017 return (int8x8_t
) __builtin_aarch64_sqsubv8qi (__a
, __b
);
2020 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
2021 vqsub_s16 (int16x4_t __a
, int16x4_t __b
)
2023 return (int16x4_t
) __builtin_aarch64_sqsubv4hi (__a
, __b
);
2026 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
2027 vqsub_s32 (int32x2_t __a
, int32x2_t __b
)
2029 return (int32x2_t
) __builtin_aarch64_sqsubv2si (__a
, __b
);
2032 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
2033 vqsub_s64 (int64x1_t __a
, int64x1_t __b
)
2035 return (int64x1_t
) __builtin_aarch64_sqsubdi (__a
, __b
);
2038 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
2039 vqsub_u8 (uint8x8_t __a
, uint8x8_t __b
)
2041 return (uint8x8_t
) __builtin_aarch64_uqsubv8qi ((int8x8_t
) __a
,
2045 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
2046 vqsub_u16 (uint16x4_t __a
, uint16x4_t __b
)
2048 return (uint16x4_t
) __builtin_aarch64_uqsubv4hi ((int16x4_t
) __a
,
2052 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
2053 vqsub_u32 (uint32x2_t __a
, uint32x2_t __b
)
2055 return (uint32x2_t
) __builtin_aarch64_uqsubv2si ((int32x2_t
) __a
,
2059 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
2060 vqsub_u64 (uint64x1_t __a
, uint64x1_t __b
)
2062 return (uint64x1_t
) __builtin_aarch64_uqsubdi ((int64x1_t
) __a
,
2066 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
2067 vqsubq_s8 (int8x16_t __a
, int8x16_t __b
)
2069 return (int8x16_t
) __builtin_aarch64_sqsubv16qi (__a
, __b
);
2072 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
2073 vqsubq_s16 (int16x8_t __a
, int16x8_t __b
)
2075 return (int16x8_t
) __builtin_aarch64_sqsubv8hi (__a
, __b
);
2078 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
2079 vqsubq_s32 (int32x4_t __a
, int32x4_t __b
)
2081 return (int32x4_t
) __builtin_aarch64_sqsubv4si (__a
, __b
);
2084 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
2085 vqsubq_s64 (int64x2_t __a
, int64x2_t __b
)
2087 return (int64x2_t
) __builtin_aarch64_sqsubv2di (__a
, __b
);
2090 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
2091 vqsubq_u8 (uint8x16_t __a
, uint8x16_t __b
)
2093 return (uint8x16_t
) __builtin_aarch64_uqsubv16qi ((int8x16_t
) __a
,
2097 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
2098 vqsubq_u16 (uint16x8_t __a
, uint16x8_t __b
)
2100 return (uint16x8_t
) __builtin_aarch64_uqsubv8hi ((int16x8_t
) __a
,
2104 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
2105 vqsubq_u32 (uint32x4_t __a
, uint32x4_t __b
)
2107 return (uint32x4_t
) __builtin_aarch64_uqsubv4si ((int32x4_t
) __a
,
2111 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
2112 vqsubq_u64 (uint64x2_t __a
, uint64x2_t __b
)
2114 return (uint64x2_t
) __builtin_aarch64_uqsubv2di ((int64x2_t
) __a
,
2118 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
2119 vqneg_s8 (int8x8_t __a
)
2121 return (int8x8_t
) __builtin_aarch64_sqnegv8qi (__a
);
2124 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
2125 vqneg_s16 (int16x4_t __a
)
2127 return (int16x4_t
) __builtin_aarch64_sqnegv4hi (__a
);
2130 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
2131 vqneg_s32 (int32x2_t __a
)
2133 return (int32x2_t
) __builtin_aarch64_sqnegv2si (__a
);
2136 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
2137 vqnegq_s8 (int8x16_t __a
)
2139 return (int8x16_t
) __builtin_aarch64_sqnegv16qi (__a
);
2142 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
2143 vqnegq_s16 (int16x8_t __a
)
2145 return (int16x8_t
) __builtin_aarch64_sqnegv8hi (__a
);
2148 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
2149 vqnegq_s32 (int32x4_t __a
)
2151 return (int32x4_t
) __builtin_aarch64_sqnegv4si (__a
);
2154 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
2155 vqabs_s8 (int8x8_t __a
)
2157 return (int8x8_t
) __builtin_aarch64_sqabsv8qi (__a
);
2160 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
2161 vqabs_s16 (int16x4_t __a
)
2163 return (int16x4_t
) __builtin_aarch64_sqabsv4hi (__a
);
2166 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
2167 vqabs_s32 (int32x2_t __a
)
2169 return (int32x2_t
) __builtin_aarch64_sqabsv2si (__a
);
2172 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
2173 vqabsq_s8 (int8x16_t __a
)
2175 return (int8x16_t
) __builtin_aarch64_sqabsv16qi (__a
);
2178 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
2179 vqabsq_s16 (int16x8_t __a
)
2181 return (int16x8_t
) __builtin_aarch64_sqabsv8hi (__a
);
2184 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
2185 vqabsq_s32 (int32x4_t __a
)
2187 return (int32x4_t
) __builtin_aarch64_sqabsv4si (__a
);
2190 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
2191 vqdmulh_s16 (int16x4_t __a
, int16x4_t __b
)
2193 return (int16x4_t
) __builtin_aarch64_sqdmulhv4hi (__a
, __b
);
2196 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
2197 vqdmulh_s32 (int32x2_t __a
, int32x2_t __b
)
2199 return (int32x2_t
) __builtin_aarch64_sqdmulhv2si (__a
, __b
);
2202 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
2203 vqdmulhq_s16 (int16x8_t __a
, int16x8_t __b
)
2205 return (int16x8_t
) __builtin_aarch64_sqdmulhv8hi (__a
, __b
);
2208 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
2209 vqdmulhq_s32 (int32x4_t __a
, int32x4_t __b
)
2211 return (int32x4_t
) __builtin_aarch64_sqdmulhv4si (__a
, __b
);
2214 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
2215 vqrdmulh_s16 (int16x4_t __a
, int16x4_t __b
)
2217 return (int16x4_t
) __builtin_aarch64_sqrdmulhv4hi (__a
, __b
);
2220 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
2221 vqrdmulh_s32 (int32x2_t __a
, int32x2_t __b
)
2223 return (int32x2_t
) __builtin_aarch64_sqrdmulhv2si (__a
, __b
);
2226 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
2227 vqrdmulhq_s16 (int16x8_t __a
, int16x8_t __b
)
2229 return (int16x8_t
) __builtin_aarch64_sqrdmulhv8hi (__a
, __b
);
2232 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
2233 vqrdmulhq_s32 (int32x4_t __a
, int32x4_t __b
)
2235 return (int32x4_t
) __builtin_aarch64_sqrdmulhv4si (__a
, __b
);
2238 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
2239 vcreate_s8 (uint64_t __a
)
2241 return (int8x8_t
) __a
;
2244 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
2245 vcreate_s16 (uint64_t __a
)
2247 return (int16x4_t
) __a
;
2250 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
2251 vcreate_s32 (uint64_t __a
)
2253 return (int32x2_t
) __a
;
2256 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
2257 vcreate_s64 (uint64_t __a
)
2259 return (int64x1_t
) __a
;
2262 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
2263 vcreate_f32 (uint64_t __a
)
2265 return (float32x2_t
) __a
;
2268 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
2269 vcreate_u8 (uint64_t __a
)
2271 return (uint8x8_t
) __a
;
2274 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
2275 vcreate_u16 (uint64_t __a
)
2277 return (uint16x4_t
) __a
;
2280 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
2281 vcreate_u32 (uint64_t __a
)
2283 return (uint32x2_t
) __a
;
2286 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
2287 vcreate_u64 (uint64_t __a
)
2289 return (uint64x1_t
) __a
;
2292 __extension__
static __inline float64x1_t
__attribute__ ((__always_inline__
))
2293 vcreate_f64 (uint64_t __a
)
2295 return (float64x1_t
) __builtin_aarch64_createdf (__a
);
2298 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
2299 vcreate_p8 (uint64_t __a
)
2301 return (poly8x8_t
) __a
;
2304 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
2305 vcreate_p16 (uint64_t __a
)
2307 return (poly16x4_t
) __a
;
2310 __extension__
static __inline
int8_t __attribute__ ((__always_inline__
))
2311 vget_lane_s8 (int8x8_t __a
, const int __b
)
2313 return (int8_t) __builtin_aarch64_get_lane_signedv8qi (__a
, __b
);
2316 __extension__
static __inline
int16_t __attribute__ ((__always_inline__
))
2317 vget_lane_s16 (int16x4_t __a
, const int __b
)
2319 return (int16_t) __builtin_aarch64_get_lane_signedv4hi (__a
, __b
);
2322 __extension__
static __inline
int32_t __attribute__ ((__always_inline__
))
2323 vget_lane_s32 (int32x2_t __a
, const int __b
)
2325 return (int32_t) __builtin_aarch64_get_lane_signedv2si (__a
, __b
);
2328 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
2329 vget_lane_f32 (float32x2_t __a
, const int __b
)
2331 return (float32_t
) __builtin_aarch64_get_lanev2sf (__a
, __b
);
2334 __extension__
static __inline
uint8_t __attribute__ ((__always_inline__
))
2335 vget_lane_u8 (uint8x8_t __a
, const int __b
)
2337 return (uint8_t) __builtin_aarch64_get_lane_unsignedv8qi ((int8x8_t
) __a
,
2341 __extension__
static __inline
uint16_t __attribute__ ((__always_inline__
))
2342 vget_lane_u16 (uint16x4_t __a
, const int __b
)
2344 return (uint16_t) __builtin_aarch64_get_lane_unsignedv4hi ((int16x4_t
) __a
,
2348 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
2349 vget_lane_u32 (uint32x2_t __a
, const int __b
)
2351 return (uint32_t) __builtin_aarch64_get_lane_unsignedv2si ((int32x2_t
) __a
,
2355 __extension__
static __inline poly8_t
__attribute__ ((__always_inline__
))
2356 vget_lane_p8 (poly8x8_t __a
, const int __b
)
2358 return (poly8_t
) __builtin_aarch64_get_lane_unsignedv8qi ((int8x8_t
) __a
,
2362 __extension__
static __inline poly16_t
__attribute__ ((__always_inline__
))
2363 vget_lane_p16 (poly16x4_t __a
, const int __b
)
2365 return (poly16_t
) __builtin_aarch64_get_lane_unsignedv4hi ((int16x4_t
) __a
,
2369 __extension__
static __inline
int64_t __attribute__ ((__always_inline__
))
2370 vget_lane_s64 (int64x1_t __a
, const int __b
)
2372 return (int64_t) __builtin_aarch64_get_lanedi (__a
, __b
);
2375 __extension__
static __inline
uint64_t __attribute__ ((__always_inline__
))
2376 vget_lane_u64 (uint64x1_t __a
, const int __b
)
2378 return (uint64_t) __builtin_aarch64_get_lanedi ((int64x1_t
) __a
, __b
);
2381 __extension__
static __inline
int8_t __attribute__ ((__always_inline__
))
2382 vgetq_lane_s8 (int8x16_t __a
, const int __b
)
2384 return (int8_t) __builtin_aarch64_get_lane_signedv16qi (__a
, __b
);
2387 __extension__
static __inline
int16_t __attribute__ ((__always_inline__
))
2388 vgetq_lane_s16 (int16x8_t __a
, const int __b
)
2390 return (int16_t) __builtin_aarch64_get_lane_signedv8hi (__a
, __b
);
2393 __extension__
static __inline
int32_t __attribute__ ((__always_inline__
))
2394 vgetq_lane_s32 (int32x4_t __a
, const int __b
)
2396 return (int32_t) __builtin_aarch64_get_lane_signedv4si (__a
, __b
);
2399 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
2400 vgetq_lane_f32 (float32x4_t __a
, const int __b
)
2402 return (float32_t
) __builtin_aarch64_get_lanev4sf (__a
, __b
);
2405 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
2406 vgetq_lane_f64 (float64x2_t __a
, const int __b
)
2408 return (float64_t
) __builtin_aarch64_get_lanev2df (__a
, __b
);
2411 __extension__
static __inline
uint8_t __attribute__ ((__always_inline__
))
2412 vgetq_lane_u8 (uint8x16_t __a
, const int __b
)
2414 return (uint8_t) __builtin_aarch64_get_lane_unsignedv16qi ((int8x16_t
) __a
,
2418 __extension__
static __inline
uint16_t __attribute__ ((__always_inline__
))
2419 vgetq_lane_u16 (uint16x8_t __a
, const int __b
)
2421 return (uint16_t) __builtin_aarch64_get_lane_unsignedv8hi ((int16x8_t
) __a
,
2425 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
2426 vgetq_lane_u32 (uint32x4_t __a
, const int __b
)
2428 return (uint32_t) __builtin_aarch64_get_lane_unsignedv4si ((int32x4_t
) __a
,
2432 __extension__
static __inline poly8_t
__attribute__ ((__always_inline__
))
2433 vgetq_lane_p8 (poly8x16_t __a
, const int __b
)
2435 return (poly8_t
) __builtin_aarch64_get_lane_unsignedv16qi ((int8x16_t
) __a
,
2439 __extension__
static __inline poly16_t
__attribute__ ((__always_inline__
))
2440 vgetq_lane_p16 (poly16x8_t __a
, const int __b
)
2442 return (poly16_t
) __builtin_aarch64_get_lane_unsignedv8hi ((int16x8_t
) __a
,
2446 __extension__
static __inline
int64_t __attribute__ ((__always_inline__
))
2447 vgetq_lane_s64 (int64x2_t __a
, const int __b
)
2449 return __builtin_aarch64_get_lane_unsignedv2di (__a
, __b
);
2452 __extension__
static __inline
uint64_t __attribute__ ((__always_inline__
))
2453 vgetq_lane_u64 (uint64x2_t __a
, const int __b
)
2455 return (uint64_t) __builtin_aarch64_get_lane_unsignedv2di ((int64x2_t
) __a
,
2459 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
2460 vreinterpret_p8_s8 (int8x8_t __a
)
2462 return (poly8x8_t
) __builtin_aarch64_reinterpretv8qiv8qi (__a
);
2465 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
2466 vreinterpret_p8_s16 (int16x4_t __a
)
2468 return (poly8x8_t
) __builtin_aarch64_reinterpretv8qiv4hi (__a
);
2471 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
2472 vreinterpret_p8_s32 (int32x2_t __a
)
2474 return (poly8x8_t
) __builtin_aarch64_reinterpretv8qiv2si (__a
);
2477 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
2478 vreinterpret_p8_s64 (int64x1_t __a
)
2480 return (poly8x8_t
) __builtin_aarch64_reinterpretv8qidi (__a
);
2483 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
2484 vreinterpret_p8_f32 (float32x2_t __a
)
2486 return (poly8x8_t
) __builtin_aarch64_reinterpretv8qiv2sf (__a
);
2489 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
2490 vreinterpret_p8_u8 (uint8x8_t __a
)
2492 return (poly8x8_t
) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t
) __a
);
2495 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
2496 vreinterpret_p8_u16 (uint16x4_t __a
)
2498 return (poly8x8_t
) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t
) __a
);
2501 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
2502 vreinterpret_p8_u32 (uint32x2_t __a
)
2504 return (poly8x8_t
) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t
) __a
);
2507 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
2508 vreinterpret_p8_u64 (uint64x1_t __a
)
2510 return (poly8x8_t
) __builtin_aarch64_reinterpretv8qidi ((int64x1_t
) __a
);
2513 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
2514 vreinterpret_p8_p16 (poly16x4_t __a
)
2516 return (poly8x8_t
) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t
) __a
);
2519 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
2520 vreinterpretq_p8_s8 (int8x16_t __a
)
2522 return (poly8x16_t
) __builtin_aarch64_reinterpretv16qiv16qi (__a
);
2525 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
2526 vreinterpretq_p8_s16 (int16x8_t __a
)
2528 return (poly8x16_t
) __builtin_aarch64_reinterpretv16qiv8hi (__a
);
2531 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
2532 vreinterpretq_p8_s32 (int32x4_t __a
)
2534 return (poly8x16_t
) __builtin_aarch64_reinterpretv16qiv4si (__a
);
2537 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
2538 vreinterpretq_p8_s64 (int64x2_t __a
)
2540 return (poly8x16_t
) __builtin_aarch64_reinterpretv16qiv2di (__a
);
2543 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
2544 vreinterpretq_p8_f32 (float32x4_t __a
)
2546 return (poly8x16_t
) __builtin_aarch64_reinterpretv16qiv4sf (__a
);
2549 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
2550 vreinterpretq_p8_u8 (uint8x16_t __a
)
2552 return (poly8x16_t
) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t
)
2556 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
2557 vreinterpretq_p8_u16 (uint16x8_t __a
)
2559 return (poly8x16_t
) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t
)
2563 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
2564 vreinterpretq_p8_u32 (uint32x4_t __a
)
2566 return (poly8x16_t
) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t
)
2570 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
2571 vreinterpretq_p8_u64 (uint64x2_t __a
)
2573 return (poly8x16_t
) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t
)
2577 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
2578 vreinterpretq_p8_p16 (poly16x8_t __a
)
2580 return (poly8x16_t
) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t
)
2584 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
2585 vreinterpret_p16_s8 (int8x8_t __a
)
2587 return (poly16x4_t
) __builtin_aarch64_reinterpretv4hiv8qi (__a
);
2590 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
2591 vreinterpret_p16_s16 (int16x4_t __a
)
2593 return (poly16x4_t
) __builtin_aarch64_reinterpretv4hiv4hi (__a
);
2596 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
2597 vreinterpret_p16_s32 (int32x2_t __a
)
2599 return (poly16x4_t
) __builtin_aarch64_reinterpretv4hiv2si (__a
);
2602 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
2603 vreinterpret_p16_s64 (int64x1_t __a
)
2605 return (poly16x4_t
) __builtin_aarch64_reinterpretv4hidi (__a
);
2608 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
2609 vreinterpret_p16_f32 (float32x2_t __a
)
2611 return (poly16x4_t
) __builtin_aarch64_reinterpretv4hiv2sf (__a
);
2614 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
2615 vreinterpret_p16_u8 (uint8x8_t __a
)
2617 return (poly16x4_t
) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t
) __a
);
2620 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
2621 vreinterpret_p16_u16 (uint16x4_t __a
)
2623 return (poly16x4_t
) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t
) __a
);
2626 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
2627 vreinterpret_p16_u32 (uint32x2_t __a
)
2629 return (poly16x4_t
) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t
) __a
);
2632 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
2633 vreinterpret_p16_u64 (uint64x1_t __a
)
2635 return (poly16x4_t
) __builtin_aarch64_reinterpretv4hidi ((int64x1_t
) __a
);
2638 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
2639 vreinterpret_p16_p8 (poly8x8_t __a
)
2641 return (poly16x4_t
) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t
) __a
);
2644 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
2645 vreinterpretq_p16_s8 (int8x16_t __a
)
2647 return (poly16x8_t
) __builtin_aarch64_reinterpretv8hiv16qi (__a
);
2650 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
2651 vreinterpretq_p16_s16 (int16x8_t __a
)
2653 return (poly16x8_t
) __builtin_aarch64_reinterpretv8hiv8hi (__a
);
2656 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
2657 vreinterpretq_p16_s32 (int32x4_t __a
)
2659 return (poly16x8_t
) __builtin_aarch64_reinterpretv8hiv4si (__a
);
2662 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
2663 vreinterpretq_p16_s64 (int64x2_t __a
)
2665 return (poly16x8_t
) __builtin_aarch64_reinterpretv8hiv2di (__a
);
2668 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
2669 vreinterpretq_p16_f32 (float32x4_t __a
)
2671 return (poly16x8_t
) __builtin_aarch64_reinterpretv8hiv4sf (__a
);
2674 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
2675 vreinterpretq_p16_u8 (uint8x16_t __a
)
2677 return (poly16x8_t
) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t
)
2681 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
2682 vreinterpretq_p16_u16 (uint16x8_t __a
)
2684 return (poly16x8_t
) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t
) __a
);
2687 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
2688 vreinterpretq_p16_u32 (uint32x4_t __a
)
2690 return (poly16x8_t
) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t
) __a
);
2693 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
2694 vreinterpretq_p16_u64 (uint64x2_t __a
)
2696 return (poly16x8_t
) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t
) __a
);
2699 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
2700 vreinterpretq_p16_p8 (poly8x16_t __a
)
2702 return (poly16x8_t
) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t
)
2706 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
2707 vreinterpret_f32_s8 (int8x8_t __a
)
2709 return (float32x2_t
) __builtin_aarch64_reinterpretv2sfv8qi (__a
);
2712 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
2713 vreinterpret_f32_s16 (int16x4_t __a
)
2715 return (float32x2_t
) __builtin_aarch64_reinterpretv2sfv4hi (__a
);
2718 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
2719 vreinterpret_f32_s32 (int32x2_t __a
)
2721 return (float32x2_t
) __builtin_aarch64_reinterpretv2sfv2si (__a
);
2724 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
2725 vreinterpret_f32_s64 (int64x1_t __a
)
2727 return (float32x2_t
) __builtin_aarch64_reinterpretv2sfdi (__a
);
2730 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
2731 vreinterpret_f32_u8 (uint8x8_t __a
)
2733 return (float32x2_t
) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t
) __a
);
2736 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
2737 vreinterpret_f32_u16 (uint16x4_t __a
)
2739 return (float32x2_t
) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t
)
2743 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
2744 vreinterpret_f32_u32 (uint32x2_t __a
)
2746 return (float32x2_t
) __builtin_aarch64_reinterpretv2sfv2si ((int32x2_t
)
2750 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
2751 vreinterpret_f32_u64 (uint64x1_t __a
)
2753 return (float32x2_t
) __builtin_aarch64_reinterpretv2sfdi ((int64x1_t
) __a
);
2756 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
2757 vreinterpret_f32_p8 (poly8x8_t __a
)
2759 return (float32x2_t
) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t
) __a
);
2762 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
2763 vreinterpret_f32_p16 (poly16x4_t __a
)
2765 return (float32x2_t
) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t
)
2769 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
2770 vreinterpretq_f32_s8 (int8x16_t __a
)
2772 return (float32x4_t
) __builtin_aarch64_reinterpretv4sfv16qi (__a
);
2775 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
2776 vreinterpretq_f32_s16 (int16x8_t __a
)
2778 return (float32x4_t
) __builtin_aarch64_reinterpretv4sfv8hi (__a
);
2781 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
2782 vreinterpretq_f32_s32 (int32x4_t __a
)
2784 return (float32x4_t
) __builtin_aarch64_reinterpretv4sfv4si (__a
);
2787 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
2788 vreinterpretq_f32_s64 (int64x2_t __a
)
2790 return (float32x4_t
) __builtin_aarch64_reinterpretv4sfv2di (__a
);
2793 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
2794 vreinterpretq_f32_u8 (uint8x16_t __a
)
2796 return (float32x4_t
) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t
)
2800 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
2801 vreinterpretq_f32_u16 (uint16x8_t __a
)
2803 return (float32x4_t
) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t
)
2807 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
2808 vreinterpretq_f32_u32 (uint32x4_t __a
)
2810 return (float32x4_t
) __builtin_aarch64_reinterpretv4sfv4si ((int32x4_t
)
2814 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
2815 vreinterpretq_f32_u64 (uint64x2_t __a
)
2817 return (float32x4_t
) __builtin_aarch64_reinterpretv4sfv2di ((int64x2_t
)
2821 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
2822 vreinterpretq_f32_p8 (poly8x16_t __a
)
2824 return (float32x4_t
) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t
)
2828 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
2829 vreinterpretq_f32_p16 (poly16x8_t __a
)
2831 return (float32x4_t
) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t
)
2835 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
2836 vreinterpret_s64_s8 (int8x8_t __a
)
2838 return (int64x1_t
) __builtin_aarch64_reinterpretdiv8qi (__a
);
2841 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
2842 vreinterpret_s64_s16 (int16x4_t __a
)
2844 return (int64x1_t
) __builtin_aarch64_reinterpretdiv4hi (__a
);
2847 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
2848 vreinterpret_s64_s32 (int32x2_t __a
)
2850 return (int64x1_t
) __builtin_aarch64_reinterpretdiv2si (__a
);
2853 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
2854 vreinterpret_s64_f32 (float32x2_t __a
)
2856 return (int64x1_t
) __builtin_aarch64_reinterpretdiv2sf (__a
);
2859 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
2860 vreinterpret_s64_u8 (uint8x8_t __a
)
2862 return (int64x1_t
) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t
) __a
);
2865 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
2866 vreinterpret_s64_u16 (uint16x4_t __a
)
2868 return (int64x1_t
) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t
) __a
);
2871 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
2872 vreinterpret_s64_u32 (uint32x2_t __a
)
2874 return (int64x1_t
) __builtin_aarch64_reinterpretdiv2si ((int32x2_t
) __a
);
2877 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
2878 vreinterpret_s64_u64 (uint64x1_t __a
)
2880 return (int64x1_t
) __builtin_aarch64_reinterpretdidi ((int64x1_t
) __a
);
2883 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
2884 vreinterpret_s64_p8 (poly8x8_t __a
)
2886 return (int64x1_t
) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t
) __a
);
2889 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
2890 vreinterpret_s64_p16 (poly16x4_t __a
)
2892 return (int64x1_t
) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t
) __a
);
2895 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
2896 vreinterpretq_s64_s8 (int8x16_t __a
)
2898 return (int64x2_t
) __builtin_aarch64_reinterpretv2div16qi (__a
);
2901 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
2902 vreinterpretq_s64_s16 (int16x8_t __a
)
2904 return (int64x2_t
) __builtin_aarch64_reinterpretv2div8hi (__a
);
2907 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
2908 vreinterpretq_s64_s32 (int32x4_t __a
)
2910 return (int64x2_t
) __builtin_aarch64_reinterpretv2div4si (__a
);
2913 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
2914 vreinterpretq_s64_f32 (float32x4_t __a
)
2916 return (int64x2_t
) __builtin_aarch64_reinterpretv2div4sf (__a
);
2919 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
2920 vreinterpretq_s64_u8 (uint8x16_t __a
)
2922 return (int64x2_t
) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t
) __a
);
2925 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
2926 vreinterpretq_s64_u16 (uint16x8_t __a
)
2928 return (int64x2_t
) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t
) __a
);
2931 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
2932 vreinterpretq_s64_u32 (uint32x4_t __a
)
2934 return (int64x2_t
) __builtin_aarch64_reinterpretv2div4si ((int32x4_t
) __a
);
2937 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
2938 vreinterpretq_s64_u64 (uint64x2_t __a
)
2940 return (int64x2_t
) __builtin_aarch64_reinterpretv2div2di ((int64x2_t
) __a
);
2943 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
2944 vreinterpretq_s64_p8 (poly8x16_t __a
)
2946 return (int64x2_t
) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t
) __a
);
2949 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
2950 vreinterpretq_s64_p16 (poly16x8_t __a
)
2952 return (int64x2_t
) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t
) __a
);
2955 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
2956 vreinterpret_u64_s8 (int8x8_t __a
)
2958 return (uint64x1_t
) __builtin_aarch64_reinterpretdiv8qi (__a
);
2961 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
2962 vreinterpret_u64_s16 (int16x4_t __a
)
2964 return (uint64x1_t
) __builtin_aarch64_reinterpretdiv4hi (__a
);
2967 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
2968 vreinterpret_u64_s32 (int32x2_t __a
)
2970 return (uint64x1_t
) __builtin_aarch64_reinterpretdiv2si (__a
);
2973 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
2974 vreinterpret_u64_s64 (int64x1_t __a
)
2976 return (uint64x1_t
) __builtin_aarch64_reinterpretdidi (__a
);
2979 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
2980 vreinterpret_u64_f32 (float32x2_t __a
)
2982 return (uint64x1_t
) __builtin_aarch64_reinterpretdiv2sf (__a
);
2985 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
2986 vreinterpret_u64_u8 (uint8x8_t __a
)
2988 return (uint64x1_t
) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t
) __a
);
2991 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
2992 vreinterpret_u64_u16 (uint16x4_t __a
)
2994 return (uint64x1_t
) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t
) __a
);
2997 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
2998 vreinterpret_u64_u32 (uint32x2_t __a
)
3000 return (uint64x1_t
) __builtin_aarch64_reinterpretdiv2si ((int32x2_t
) __a
);
3003 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
3004 vreinterpret_u64_p8 (poly8x8_t __a
)
3006 return (uint64x1_t
) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t
) __a
);
3009 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
3010 vreinterpret_u64_p16 (poly16x4_t __a
)
3012 return (uint64x1_t
) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t
) __a
);
3015 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
3016 vreinterpretq_u64_s8 (int8x16_t __a
)
3018 return (uint64x2_t
) __builtin_aarch64_reinterpretv2div16qi (__a
);
3021 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
3022 vreinterpretq_u64_s16 (int16x8_t __a
)
3024 return (uint64x2_t
) __builtin_aarch64_reinterpretv2div8hi (__a
);
3027 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
3028 vreinterpretq_u64_s32 (int32x4_t __a
)
3030 return (uint64x2_t
) __builtin_aarch64_reinterpretv2div4si (__a
);
3033 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
3034 vreinterpretq_u64_s64 (int64x2_t __a
)
3036 return (uint64x2_t
) __builtin_aarch64_reinterpretv2div2di (__a
);
3039 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
3040 vreinterpretq_u64_f32 (float32x4_t __a
)
3042 return (uint64x2_t
) __builtin_aarch64_reinterpretv2div4sf (__a
);
3045 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
3046 vreinterpretq_u64_u8 (uint8x16_t __a
)
3048 return (uint64x2_t
) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t
)
3052 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
3053 vreinterpretq_u64_u16 (uint16x8_t __a
)
3055 return (uint64x2_t
) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t
) __a
);
3058 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
3059 vreinterpretq_u64_u32 (uint32x4_t __a
)
3061 return (uint64x2_t
) __builtin_aarch64_reinterpretv2div4si ((int32x4_t
) __a
);
3064 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
3065 vreinterpretq_u64_p8 (poly8x16_t __a
)
3067 return (uint64x2_t
) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t
)
3071 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
3072 vreinterpretq_u64_p16 (poly16x8_t __a
)
3074 return (uint64x2_t
) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t
) __a
);
3077 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
3078 vreinterpret_s8_s16 (int16x4_t __a
)
3080 return (int8x8_t
) __builtin_aarch64_reinterpretv8qiv4hi (__a
);
3083 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
3084 vreinterpret_s8_s32 (int32x2_t __a
)
3086 return (int8x8_t
) __builtin_aarch64_reinterpretv8qiv2si (__a
);
3089 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
3090 vreinterpret_s8_s64 (int64x1_t __a
)
3092 return (int8x8_t
) __builtin_aarch64_reinterpretv8qidi (__a
);
3095 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
3096 vreinterpret_s8_f32 (float32x2_t __a
)
3098 return (int8x8_t
) __builtin_aarch64_reinterpretv8qiv2sf (__a
);
3101 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
3102 vreinterpret_s8_u8 (uint8x8_t __a
)
3104 return (int8x8_t
) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t
) __a
);
3107 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
3108 vreinterpret_s8_u16 (uint16x4_t __a
)
3110 return (int8x8_t
) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t
) __a
);
3113 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
3114 vreinterpret_s8_u32 (uint32x2_t __a
)
3116 return (int8x8_t
) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t
) __a
);
3119 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
3120 vreinterpret_s8_u64 (uint64x1_t __a
)
3122 return (int8x8_t
) __builtin_aarch64_reinterpretv8qidi ((int64x1_t
) __a
);
3125 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
3126 vreinterpret_s8_p8 (poly8x8_t __a
)
3128 return (int8x8_t
) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t
) __a
);
3131 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
3132 vreinterpret_s8_p16 (poly16x4_t __a
)
3134 return (int8x8_t
) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t
) __a
);
3137 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
3138 vreinterpretq_s8_s16 (int16x8_t __a
)
3140 return (int8x16_t
) __builtin_aarch64_reinterpretv16qiv8hi (__a
);
3143 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
3144 vreinterpretq_s8_s32 (int32x4_t __a
)
3146 return (int8x16_t
) __builtin_aarch64_reinterpretv16qiv4si (__a
);
3149 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
3150 vreinterpretq_s8_s64 (int64x2_t __a
)
3152 return (int8x16_t
) __builtin_aarch64_reinterpretv16qiv2di (__a
);
3155 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
3156 vreinterpretq_s8_f32 (float32x4_t __a
)
3158 return (int8x16_t
) __builtin_aarch64_reinterpretv16qiv4sf (__a
);
3161 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
3162 vreinterpretq_s8_u8 (uint8x16_t __a
)
3164 return (int8x16_t
) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t
)
3168 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
3169 vreinterpretq_s8_u16 (uint16x8_t __a
)
3171 return (int8x16_t
) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t
) __a
);
3174 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
3175 vreinterpretq_s8_u32 (uint32x4_t __a
)
3177 return (int8x16_t
) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t
) __a
);
3180 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
3181 vreinterpretq_s8_u64 (uint64x2_t __a
)
3183 return (int8x16_t
) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t
) __a
);
3186 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
3187 vreinterpretq_s8_p8 (poly8x16_t __a
)
3189 return (int8x16_t
) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t
)
3193 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
3194 vreinterpretq_s8_p16 (poly16x8_t __a
)
3196 return (int8x16_t
) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t
) __a
);
3199 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
3200 vreinterpret_s16_s8 (int8x8_t __a
)
3202 return (int16x4_t
) __builtin_aarch64_reinterpretv4hiv8qi (__a
);
3205 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
3206 vreinterpret_s16_s32 (int32x2_t __a
)
3208 return (int16x4_t
) __builtin_aarch64_reinterpretv4hiv2si (__a
);
3211 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
3212 vreinterpret_s16_s64 (int64x1_t __a
)
3214 return (int16x4_t
) __builtin_aarch64_reinterpretv4hidi (__a
);
3217 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
3218 vreinterpret_s16_f32 (float32x2_t __a
)
3220 return (int16x4_t
) __builtin_aarch64_reinterpretv4hiv2sf (__a
);
3223 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
3224 vreinterpret_s16_u8 (uint8x8_t __a
)
3226 return (int16x4_t
) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t
) __a
);
3229 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
3230 vreinterpret_s16_u16 (uint16x4_t __a
)
3232 return (int16x4_t
) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t
) __a
);
3235 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
3236 vreinterpret_s16_u32 (uint32x2_t __a
)
3238 return (int16x4_t
) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t
) __a
);
3241 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
3242 vreinterpret_s16_u64 (uint64x1_t __a
)
3244 return (int16x4_t
) __builtin_aarch64_reinterpretv4hidi ((int64x1_t
) __a
);
3247 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
3248 vreinterpret_s16_p8 (poly8x8_t __a
)
3250 return (int16x4_t
) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t
) __a
);
3253 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
3254 vreinterpret_s16_p16 (poly16x4_t __a
)
3256 return (int16x4_t
) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t
) __a
);
3259 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
3260 vreinterpretq_s16_s8 (int8x16_t __a
)
3262 return (int16x8_t
) __builtin_aarch64_reinterpretv8hiv16qi (__a
);
3265 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
3266 vreinterpretq_s16_s32 (int32x4_t __a
)
3268 return (int16x8_t
) __builtin_aarch64_reinterpretv8hiv4si (__a
);
3271 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
3272 vreinterpretq_s16_s64 (int64x2_t __a
)
3274 return (int16x8_t
) __builtin_aarch64_reinterpretv8hiv2di (__a
);
3277 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
3278 vreinterpretq_s16_f32 (float32x4_t __a
)
3280 return (int16x8_t
) __builtin_aarch64_reinterpretv8hiv4sf (__a
);
3283 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
3284 vreinterpretq_s16_u8 (uint8x16_t __a
)
3286 return (int16x8_t
) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t
) __a
);
3289 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
3290 vreinterpretq_s16_u16 (uint16x8_t __a
)
3292 return (int16x8_t
) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t
) __a
);
3295 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
3296 vreinterpretq_s16_u32 (uint32x4_t __a
)
3298 return (int16x8_t
) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t
) __a
);
3301 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
3302 vreinterpretq_s16_u64 (uint64x2_t __a
)
3304 return (int16x8_t
) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t
) __a
);
3307 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
3308 vreinterpretq_s16_p8 (poly8x16_t __a
)
3310 return (int16x8_t
) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t
) __a
);
3313 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
3314 vreinterpretq_s16_p16 (poly16x8_t __a
)
3316 return (int16x8_t
) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t
) __a
);
3319 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
3320 vreinterpret_s32_s8 (int8x8_t __a
)
3322 return (int32x2_t
) __builtin_aarch64_reinterpretv2siv8qi (__a
);
3325 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
3326 vreinterpret_s32_s16 (int16x4_t __a
)
3328 return (int32x2_t
) __builtin_aarch64_reinterpretv2siv4hi (__a
);
3331 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
3332 vreinterpret_s32_s64 (int64x1_t __a
)
3334 return (int32x2_t
) __builtin_aarch64_reinterpretv2sidi (__a
);
3337 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
3338 vreinterpret_s32_f32 (float32x2_t __a
)
3340 return (int32x2_t
) __builtin_aarch64_reinterpretv2siv2sf (__a
);
3343 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
3344 vreinterpret_s32_u8 (uint8x8_t __a
)
3346 return (int32x2_t
) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t
) __a
);
3349 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
3350 vreinterpret_s32_u16 (uint16x4_t __a
)
3352 return (int32x2_t
) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t
) __a
);
3355 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
3356 vreinterpret_s32_u32 (uint32x2_t __a
)
3358 return (int32x2_t
) __builtin_aarch64_reinterpretv2siv2si ((int32x2_t
) __a
);
3361 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
3362 vreinterpret_s32_u64 (uint64x1_t __a
)
3364 return (int32x2_t
) __builtin_aarch64_reinterpretv2sidi ((int64x1_t
) __a
);
3367 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
3368 vreinterpret_s32_p8 (poly8x8_t __a
)
3370 return (int32x2_t
) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t
) __a
);
3373 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
3374 vreinterpret_s32_p16 (poly16x4_t __a
)
3376 return (int32x2_t
) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t
) __a
);
3379 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
3380 vreinterpretq_s32_s8 (int8x16_t __a
)
3382 return (int32x4_t
) __builtin_aarch64_reinterpretv4siv16qi (__a
);
3385 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
3386 vreinterpretq_s32_s16 (int16x8_t __a
)
3388 return (int32x4_t
) __builtin_aarch64_reinterpretv4siv8hi (__a
);
3391 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
3392 vreinterpretq_s32_s64 (int64x2_t __a
)
3394 return (int32x4_t
) __builtin_aarch64_reinterpretv4siv2di (__a
);
3397 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
3398 vreinterpretq_s32_f32 (float32x4_t __a
)
3400 return (int32x4_t
) __builtin_aarch64_reinterpretv4siv4sf (__a
);
3403 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
3404 vreinterpretq_s32_u8 (uint8x16_t __a
)
3406 return (int32x4_t
) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t
) __a
);
3409 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
3410 vreinterpretq_s32_u16 (uint16x8_t __a
)
3412 return (int32x4_t
) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t
) __a
);
3415 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
3416 vreinterpretq_s32_u32 (uint32x4_t __a
)
3418 return (int32x4_t
) __builtin_aarch64_reinterpretv4siv4si ((int32x4_t
) __a
);
3421 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
3422 vreinterpretq_s32_u64 (uint64x2_t __a
)
3424 return (int32x4_t
) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t
) __a
);
3427 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
3428 vreinterpretq_s32_p8 (poly8x16_t __a
)
3430 return (int32x4_t
) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t
) __a
);
3433 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
3434 vreinterpretq_s32_p16 (poly16x8_t __a
)
3436 return (int32x4_t
) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t
) __a
);
3439 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
3440 vreinterpret_u8_s8 (int8x8_t __a
)
3442 return (uint8x8_t
) __builtin_aarch64_reinterpretv8qiv8qi (__a
);
3445 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
3446 vreinterpret_u8_s16 (int16x4_t __a
)
3448 return (uint8x8_t
) __builtin_aarch64_reinterpretv8qiv4hi (__a
);
3451 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
3452 vreinterpret_u8_s32 (int32x2_t __a
)
3454 return (uint8x8_t
) __builtin_aarch64_reinterpretv8qiv2si (__a
);
3457 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
3458 vreinterpret_u8_s64 (int64x1_t __a
)
3460 return (uint8x8_t
) __builtin_aarch64_reinterpretv8qidi (__a
);
3463 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
3464 vreinterpret_u8_f32 (float32x2_t __a
)
3466 return (uint8x8_t
) __builtin_aarch64_reinterpretv8qiv2sf (__a
);
3469 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
3470 vreinterpret_u8_u16 (uint16x4_t __a
)
3472 return (uint8x8_t
) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t
) __a
);
3475 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
3476 vreinterpret_u8_u32 (uint32x2_t __a
)
3478 return (uint8x8_t
) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t
) __a
);
3481 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
3482 vreinterpret_u8_u64 (uint64x1_t __a
)
3484 return (uint8x8_t
) __builtin_aarch64_reinterpretv8qidi ((int64x1_t
) __a
);
3487 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
3488 vreinterpret_u8_p8 (poly8x8_t __a
)
3490 return (uint8x8_t
) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t
) __a
);
3493 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
3494 vreinterpret_u8_p16 (poly16x4_t __a
)
3496 return (uint8x8_t
) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t
) __a
);
3499 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
3500 vreinterpretq_u8_s8 (int8x16_t __a
)
3502 return (uint8x16_t
) __builtin_aarch64_reinterpretv16qiv16qi (__a
);
3505 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
3506 vreinterpretq_u8_s16 (int16x8_t __a
)
3508 return (uint8x16_t
) __builtin_aarch64_reinterpretv16qiv8hi (__a
);
3511 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
3512 vreinterpretq_u8_s32 (int32x4_t __a
)
3514 return (uint8x16_t
) __builtin_aarch64_reinterpretv16qiv4si (__a
);
3517 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
3518 vreinterpretq_u8_s64 (int64x2_t __a
)
3520 return (uint8x16_t
) __builtin_aarch64_reinterpretv16qiv2di (__a
);
3523 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
3524 vreinterpretq_u8_f32 (float32x4_t __a
)
3526 return (uint8x16_t
) __builtin_aarch64_reinterpretv16qiv4sf (__a
);
3529 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
3530 vreinterpretq_u8_u16 (uint16x8_t __a
)
3532 return (uint8x16_t
) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t
)
3536 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
3537 vreinterpretq_u8_u32 (uint32x4_t __a
)
3539 return (uint8x16_t
) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t
)
3543 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
3544 vreinterpretq_u8_u64 (uint64x2_t __a
)
3546 return (uint8x16_t
) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t
)
3550 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
3551 vreinterpretq_u8_p8 (poly8x16_t __a
)
3553 return (uint8x16_t
) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t
)
3557 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
3558 vreinterpretq_u8_p16 (poly16x8_t __a
)
3560 return (uint8x16_t
) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t
)
3564 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
3565 vreinterpret_u16_s8 (int8x8_t __a
)
3567 return (uint16x4_t
) __builtin_aarch64_reinterpretv4hiv8qi (__a
);
3570 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
3571 vreinterpret_u16_s16 (int16x4_t __a
)
3573 return (uint16x4_t
) __builtin_aarch64_reinterpretv4hiv4hi (__a
);
3576 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
3577 vreinterpret_u16_s32 (int32x2_t __a
)
3579 return (uint16x4_t
) __builtin_aarch64_reinterpretv4hiv2si (__a
);
3582 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
3583 vreinterpret_u16_s64 (int64x1_t __a
)
3585 return (uint16x4_t
) __builtin_aarch64_reinterpretv4hidi (__a
);
3588 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
3589 vreinterpret_u16_f32 (float32x2_t __a
)
3591 return (uint16x4_t
) __builtin_aarch64_reinterpretv4hiv2sf (__a
);
3594 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
3595 vreinterpret_u16_u8 (uint8x8_t __a
)
3597 return (uint16x4_t
) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t
) __a
);
3600 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
3601 vreinterpret_u16_u32 (uint32x2_t __a
)
3603 return (uint16x4_t
) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t
) __a
);
3606 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
3607 vreinterpret_u16_u64 (uint64x1_t __a
)
3609 return (uint16x4_t
) __builtin_aarch64_reinterpretv4hidi ((int64x1_t
) __a
);
3612 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
3613 vreinterpret_u16_p8 (poly8x8_t __a
)
3615 return (uint16x4_t
) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t
) __a
);
3618 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
3619 vreinterpret_u16_p16 (poly16x4_t __a
)
3621 return (uint16x4_t
) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t
) __a
);
3624 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
3625 vreinterpretq_u16_s8 (int8x16_t __a
)
3627 return (uint16x8_t
) __builtin_aarch64_reinterpretv8hiv16qi (__a
);
3630 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
3631 vreinterpretq_u16_s16 (int16x8_t __a
)
3633 return (uint16x8_t
) __builtin_aarch64_reinterpretv8hiv8hi (__a
);
3636 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
3637 vreinterpretq_u16_s32 (int32x4_t __a
)
3639 return (uint16x8_t
) __builtin_aarch64_reinterpretv8hiv4si (__a
);
3642 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
3643 vreinterpretq_u16_s64 (int64x2_t __a
)
3645 return (uint16x8_t
) __builtin_aarch64_reinterpretv8hiv2di (__a
);
3648 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
3649 vreinterpretq_u16_f32 (float32x4_t __a
)
3651 return (uint16x8_t
) __builtin_aarch64_reinterpretv8hiv4sf (__a
);
3654 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
3655 vreinterpretq_u16_u8 (uint8x16_t __a
)
3657 return (uint16x8_t
) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t
)
3661 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
3662 vreinterpretq_u16_u32 (uint32x4_t __a
)
3664 return (uint16x8_t
) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t
) __a
);
3667 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
3668 vreinterpretq_u16_u64 (uint64x2_t __a
)
3670 return (uint16x8_t
) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t
) __a
);
3673 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
3674 vreinterpretq_u16_p8 (poly8x16_t __a
)
3676 return (uint16x8_t
) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t
)
3680 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
3681 vreinterpretq_u16_p16 (poly16x8_t __a
)
3683 return (uint16x8_t
) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t
) __a
);
3686 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
3687 vreinterpret_u32_s8 (int8x8_t __a
)
3689 return (uint32x2_t
) __builtin_aarch64_reinterpretv2siv8qi (__a
);
3692 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
3693 vreinterpret_u32_s16 (int16x4_t __a
)
3695 return (uint32x2_t
) __builtin_aarch64_reinterpretv2siv4hi (__a
);
3698 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
3699 vreinterpret_u32_s32 (int32x2_t __a
)
3701 return (uint32x2_t
) __builtin_aarch64_reinterpretv2siv2si (__a
);
3704 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
3705 vreinterpret_u32_s64 (int64x1_t __a
)
3707 return (uint32x2_t
) __builtin_aarch64_reinterpretv2sidi (__a
);
3710 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
3711 vreinterpret_u32_f32 (float32x2_t __a
)
3713 return (uint32x2_t
) __builtin_aarch64_reinterpretv2siv2sf (__a
);
3716 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
3717 vreinterpret_u32_u8 (uint8x8_t __a
)
3719 return (uint32x2_t
) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t
) __a
);
3722 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
3723 vreinterpret_u32_u16 (uint16x4_t __a
)
3725 return (uint32x2_t
) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t
) __a
);
3728 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
3729 vreinterpret_u32_u64 (uint64x1_t __a
)
3731 return (uint32x2_t
) __builtin_aarch64_reinterpretv2sidi ((int64x1_t
) __a
);
3734 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
3735 vreinterpret_u32_p8 (poly8x8_t __a
)
3737 return (uint32x2_t
) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t
) __a
);
3740 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
3741 vreinterpret_u32_p16 (poly16x4_t __a
)
3743 return (uint32x2_t
) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t
) __a
);
3746 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
3747 vreinterpretq_u32_s8 (int8x16_t __a
)
3749 return (uint32x4_t
) __builtin_aarch64_reinterpretv4siv16qi (__a
);
3752 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
3753 vreinterpretq_u32_s16 (int16x8_t __a
)
3755 return (uint32x4_t
) __builtin_aarch64_reinterpretv4siv8hi (__a
);
3758 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
3759 vreinterpretq_u32_s32 (int32x4_t __a
)
3761 return (uint32x4_t
) __builtin_aarch64_reinterpretv4siv4si (__a
);
3764 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
3765 vreinterpretq_u32_s64 (int64x2_t __a
)
3767 return (uint32x4_t
) __builtin_aarch64_reinterpretv4siv2di (__a
);
3770 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
3771 vreinterpretq_u32_f32 (float32x4_t __a
)
3773 return (uint32x4_t
) __builtin_aarch64_reinterpretv4siv4sf (__a
);
3776 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
3777 vreinterpretq_u32_u8 (uint8x16_t __a
)
3779 return (uint32x4_t
) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t
)
3783 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
3784 vreinterpretq_u32_u16 (uint16x8_t __a
)
3786 return (uint32x4_t
) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t
) __a
);
3789 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
3790 vreinterpretq_u32_u64 (uint64x2_t __a
)
3792 return (uint32x4_t
) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t
) __a
);
3795 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
3796 vreinterpretq_u32_p8 (poly8x16_t __a
)
3798 return (uint32x4_t
) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t
)
3802 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
3803 vreinterpretq_u32_p16 (poly16x8_t __a
)
3805 return (uint32x4_t
) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t
) __a
);
3808 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
3809 vcombine_s8 (int8x8_t __a
, int8x8_t __b
)
3811 return (int8x16_t
) __builtin_aarch64_combinev8qi (__a
, __b
);
3814 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
3815 vcombine_s16 (int16x4_t __a
, int16x4_t __b
)
3817 return (int16x8_t
) __builtin_aarch64_combinev4hi (__a
, __b
);
3820 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
3821 vcombine_s32 (int32x2_t __a
, int32x2_t __b
)
3823 return (int32x4_t
) __builtin_aarch64_combinev2si (__a
, __b
);
3826 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
3827 vcombine_s64 (int64x1_t __a
, int64x1_t __b
)
3829 return (int64x2_t
) __builtin_aarch64_combinedi (__a
, __b
);
3832 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
3833 vcombine_f32 (float32x2_t __a
, float32x2_t __b
)
3835 return (float32x4_t
) __builtin_aarch64_combinev2sf (__a
, __b
);
3838 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
3839 vcombine_u8 (uint8x8_t __a
, uint8x8_t __b
)
3841 return (uint8x16_t
) __builtin_aarch64_combinev8qi ((int8x8_t
) __a
,
3845 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
3846 vcombine_u16 (uint16x4_t __a
, uint16x4_t __b
)
3848 return (uint16x8_t
) __builtin_aarch64_combinev4hi ((int16x4_t
) __a
,
3852 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
3853 vcombine_u32 (uint32x2_t __a
, uint32x2_t __b
)
3855 return (uint32x4_t
) __builtin_aarch64_combinev2si ((int32x2_t
) __a
,
3859 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
3860 vcombine_u64 (uint64x1_t __a
, uint64x1_t __b
)
3862 return (uint64x2_t
) __builtin_aarch64_combinedi ((int64x1_t
) __a
,
3866 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
3867 vcombine_f64 (float64x1_t __a
, float64x1_t __b
)
3869 return (float64x2_t
) __builtin_aarch64_combinedf (__a
, __b
);
3872 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
3873 vcombine_p8 (poly8x8_t __a
, poly8x8_t __b
)
3875 return (poly8x16_t
) __builtin_aarch64_combinev8qi ((int8x8_t
) __a
,
3879 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
3880 vcombine_p16 (poly16x4_t __a
, poly16x4_t __b
)
3882 return (poly16x8_t
) __builtin_aarch64_combinev4hi ((int16x4_t
) __a
,
3886 /* Start of temporary inline asm implementations. */
3888 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
3889 vaba_s8 (int8x8_t a
, int8x8_t b
, int8x8_t c
)
3892 __asm__ ("saba %0.8b,%2.8b,%3.8b"
3894 : "0"(a
), "w"(b
), "w"(c
)
3895 : /* No clobbers */);
3899 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
3900 vaba_s16 (int16x4_t a
, int16x4_t b
, int16x4_t c
)
3903 __asm__ ("saba %0.4h,%2.4h,%3.4h"
3905 : "0"(a
), "w"(b
), "w"(c
)
3906 : /* No clobbers */);
3910 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
3911 vaba_s32 (int32x2_t a
, int32x2_t b
, int32x2_t c
)
3914 __asm__ ("saba %0.2s,%2.2s,%3.2s"
3916 : "0"(a
), "w"(b
), "w"(c
)
3917 : /* No clobbers */);
3921 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
3922 vaba_u8 (uint8x8_t a
, uint8x8_t b
, uint8x8_t c
)
3925 __asm__ ("uaba %0.8b,%2.8b,%3.8b"
3927 : "0"(a
), "w"(b
), "w"(c
)
3928 : /* No clobbers */);
3932 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
3933 vaba_u16 (uint16x4_t a
, uint16x4_t b
, uint16x4_t c
)
3936 __asm__ ("uaba %0.4h,%2.4h,%3.4h"
3938 : "0"(a
), "w"(b
), "w"(c
)
3939 : /* No clobbers */);
3943 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
3944 vaba_u32 (uint32x2_t a
, uint32x2_t b
, uint32x2_t c
)
3947 __asm__ ("uaba %0.2s,%2.2s,%3.2s"
3949 : "0"(a
), "w"(b
), "w"(c
)
3950 : /* No clobbers */);
3954 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
3955 vabal_high_s8 (int16x8_t a
, int8x16_t b
, int8x16_t c
)
3958 __asm__ ("sabal2 %0.8h,%2.16b,%3.16b"
3960 : "0"(a
), "w"(b
), "w"(c
)
3961 : /* No clobbers */);
3965 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
3966 vabal_high_s16 (int32x4_t a
, int16x8_t b
, int16x8_t c
)
3969 __asm__ ("sabal2 %0.4s,%2.8h,%3.8h"
3971 : "0"(a
), "w"(b
), "w"(c
)
3972 : /* No clobbers */);
3976 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
3977 vabal_high_s32 (int64x2_t a
, int32x4_t b
, int32x4_t c
)
3980 __asm__ ("sabal2 %0.2d,%2.4s,%3.4s"
3982 : "0"(a
), "w"(b
), "w"(c
)
3983 : /* No clobbers */);
3987 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
3988 vabal_high_u8 (uint16x8_t a
, uint8x16_t b
, uint8x16_t c
)
3991 __asm__ ("uabal2 %0.8h,%2.16b,%3.16b"
3993 : "0"(a
), "w"(b
), "w"(c
)
3994 : /* No clobbers */);
3998 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
3999 vabal_high_u16 (uint32x4_t a
, uint16x8_t b
, uint16x8_t c
)
4002 __asm__ ("uabal2 %0.4s,%2.8h,%3.8h"
4004 : "0"(a
), "w"(b
), "w"(c
)
4005 : /* No clobbers */);
4009 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
4010 vabal_high_u32 (uint64x2_t a
, uint32x4_t b
, uint32x4_t c
)
4013 __asm__ ("uabal2 %0.2d,%2.4s,%3.4s"
4015 : "0"(a
), "w"(b
), "w"(c
)
4016 : /* No clobbers */);
4020 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
4021 vabal_s8 (int16x8_t a
, int8x8_t b
, int8x8_t c
)
4024 __asm__ ("sabal %0.8h,%2.8b,%3.8b"
4026 : "0"(a
), "w"(b
), "w"(c
)
4027 : /* No clobbers */);
4031 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
4032 vabal_s16 (int32x4_t a
, int16x4_t b
, int16x4_t c
)
4035 __asm__ ("sabal %0.4s,%2.4h,%3.4h"
4037 : "0"(a
), "w"(b
), "w"(c
)
4038 : /* No clobbers */);
4042 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
4043 vabal_s32 (int64x2_t a
, int32x2_t b
, int32x2_t c
)
4046 __asm__ ("sabal %0.2d,%2.2s,%3.2s"
4048 : "0"(a
), "w"(b
), "w"(c
)
4049 : /* No clobbers */);
4053 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
4054 vabal_u8 (uint16x8_t a
, uint8x8_t b
, uint8x8_t c
)
4057 __asm__ ("uabal %0.8h,%2.8b,%3.8b"
4059 : "0"(a
), "w"(b
), "w"(c
)
4060 : /* No clobbers */);
4064 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
4065 vabal_u16 (uint32x4_t a
, uint16x4_t b
, uint16x4_t c
)
4068 __asm__ ("uabal %0.4s,%2.4h,%3.4h"
4070 : "0"(a
), "w"(b
), "w"(c
)
4071 : /* No clobbers */);
4075 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
4076 vabal_u32 (uint64x2_t a
, uint32x2_t b
, uint32x2_t c
)
4079 __asm__ ("uabal %0.2d,%2.2s,%3.2s"
4081 : "0"(a
), "w"(b
), "w"(c
)
4082 : /* No clobbers */);
4086 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
4087 vabaq_s8 (int8x16_t a
, int8x16_t b
, int8x16_t c
)
4090 __asm__ ("saba %0.16b,%2.16b,%3.16b"
4092 : "0"(a
), "w"(b
), "w"(c
)
4093 : /* No clobbers */);
4097 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
4098 vabaq_s16 (int16x8_t a
, int16x8_t b
, int16x8_t c
)
4101 __asm__ ("saba %0.8h,%2.8h,%3.8h"
4103 : "0"(a
), "w"(b
), "w"(c
)
4104 : /* No clobbers */);
4108 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
4109 vabaq_s32 (int32x4_t a
, int32x4_t b
, int32x4_t c
)
4112 __asm__ ("saba %0.4s,%2.4s,%3.4s"
4114 : "0"(a
), "w"(b
), "w"(c
)
4115 : /* No clobbers */);
4119 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
4120 vabaq_u8 (uint8x16_t a
, uint8x16_t b
, uint8x16_t c
)
4123 __asm__ ("uaba %0.16b,%2.16b,%3.16b"
4125 : "0"(a
), "w"(b
), "w"(c
)
4126 : /* No clobbers */);
4130 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
4131 vabaq_u16 (uint16x8_t a
, uint16x8_t b
, uint16x8_t c
)
4134 __asm__ ("uaba %0.8h,%2.8h,%3.8h"
4136 : "0"(a
), "w"(b
), "w"(c
)
4137 : /* No clobbers */);
4141 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
4142 vabaq_u32 (uint32x4_t a
, uint32x4_t b
, uint32x4_t c
)
4145 __asm__ ("uaba %0.4s,%2.4s,%3.4s"
4147 : "0"(a
), "w"(b
), "w"(c
)
4148 : /* No clobbers */);
4152 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
4153 vabd_f32 (float32x2_t a
, float32x2_t b
)
4156 __asm__ ("fabd %0.2s, %1.2s, %2.2s"
4159 : /* No clobbers */);
4163 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
4164 vabd_s8 (int8x8_t a
, int8x8_t b
)
4167 __asm__ ("sabd %0.8b, %1.8b, %2.8b"
4170 : /* No clobbers */);
4174 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
4175 vabd_s16 (int16x4_t a
, int16x4_t b
)
4178 __asm__ ("sabd %0.4h, %1.4h, %2.4h"
4181 : /* No clobbers */);
4185 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
4186 vabd_s32 (int32x2_t a
, int32x2_t b
)
4189 __asm__ ("sabd %0.2s, %1.2s, %2.2s"
4192 : /* No clobbers */);
4196 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
4197 vabd_u8 (uint8x8_t a
, uint8x8_t b
)
4200 __asm__ ("uabd %0.8b, %1.8b, %2.8b"
4203 : /* No clobbers */);
4207 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
4208 vabd_u16 (uint16x4_t a
, uint16x4_t b
)
4211 __asm__ ("uabd %0.4h, %1.4h, %2.4h"
4214 : /* No clobbers */);
4218 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
4219 vabd_u32 (uint32x2_t a
, uint32x2_t b
)
4222 __asm__ ("uabd %0.2s, %1.2s, %2.2s"
4225 : /* No clobbers */);
4229 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
4230 vabdd_f64 (float64_t a
, float64_t b
)
4233 __asm__ ("fabd %d0, %d1, %d2"
4236 : /* No clobbers */);
4240 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
4241 vabdl_high_s8 (int8x16_t a
, int8x16_t b
)
4244 __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b"
4247 : /* No clobbers */);
4251 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
4252 vabdl_high_s16 (int16x8_t a
, int16x8_t b
)
4255 __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h"
4258 : /* No clobbers */);
4262 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
4263 vabdl_high_s32 (int32x4_t a
, int32x4_t b
)
4266 __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s"
4269 : /* No clobbers */);
4273 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
4274 vabdl_high_u8 (uint8x16_t a
, uint8x16_t b
)
4277 __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b"
4280 : /* No clobbers */);
4284 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
4285 vabdl_high_u16 (uint16x8_t a
, uint16x8_t b
)
4288 __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h"
4291 : /* No clobbers */);
4295 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
4296 vabdl_high_u32 (uint32x4_t a
, uint32x4_t b
)
4299 __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s"
4302 : /* No clobbers */);
4306 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
4307 vabdl_s8 (int8x8_t a
, int8x8_t b
)
4310 __asm__ ("sabdl %0.8h, %1.8b, %2.8b"
4313 : /* No clobbers */);
4317 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
4318 vabdl_s16 (int16x4_t a
, int16x4_t b
)
4321 __asm__ ("sabdl %0.4s, %1.4h, %2.4h"
4324 : /* No clobbers */);
4328 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
4329 vabdl_s32 (int32x2_t a
, int32x2_t b
)
4332 __asm__ ("sabdl %0.2d, %1.2s, %2.2s"
4335 : /* No clobbers */);
4339 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
4340 vabdl_u8 (uint8x8_t a
, uint8x8_t b
)
4343 __asm__ ("uabdl %0.8h, %1.8b, %2.8b"
4346 : /* No clobbers */);
4350 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
4351 vabdl_u16 (uint16x4_t a
, uint16x4_t b
)
4354 __asm__ ("uabdl %0.4s, %1.4h, %2.4h"
4357 : /* No clobbers */);
4361 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
4362 vabdl_u32 (uint32x2_t a
, uint32x2_t b
)
4365 __asm__ ("uabdl %0.2d, %1.2s, %2.2s"
4368 : /* No clobbers */);
4372 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
4373 vabdq_f32 (float32x4_t a
, float32x4_t b
)
4376 __asm__ ("fabd %0.4s, %1.4s, %2.4s"
4379 : /* No clobbers */);
4383 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
4384 vabdq_f64 (float64x2_t a
, float64x2_t b
)
4387 __asm__ ("fabd %0.2d, %1.2d, %2.2d"
4390 : /* No clobbers */);
4394 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
4395 vabdq_s8 (int8x16_t a
, int8x16_t b
)
4398 __asm__ ("sabd %0.16b, %1.16b, %2.16b"
4401 : /* No clobbers */);
4405 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
4406 vabdq_s16 (int16x8_t a
, int16x8_t b
)
4409 __asm__ ("sabd %0.8h, %1.8h, %2.8h"
4412 : /* No clobbers */);
4416 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
4417 vabdq_s32 (int32x4_t a
, int32x4_t b
)
4420 __asm__ ("sabd %0.4s, %1.4s, %2.4s"
4423 : /* No clobbers */);
4427 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
4428 vabdq_u8 (uint8x16_t a
, uint8x16_t b
)
4431 __asm__ ("uabd %0.16b, %1.16b, %2.16b"
4434 : /* No clobbers */);
4438 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
4439 vabdq_u16 (uint16x8_t a
, uint16x8_t b
)
4442 __asm__ ("uabd %0.8h, %1.8h, %2.8h"
4445 : /* No clobbers */);
4449 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
4450 vabdq_u32 (uint32x4_t a
, uint32x4_t b
)
4453 __asm__ ("uabd %0.4s, %1.4s, %2.4s"
4456 : /* No clobbers */);
4460 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
4461 vabds_f32 (float32_t a
, float32_t b
)
4464 __asm__ ("fabd %s0, %s1, %s2"
4467 : /* No clobbers */);
4471 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
4472 vabs_s8 (int8x8_t a
)
4475 __asm__ ("abs %0.8b,%1.8b"
4478 : /* No clobbers */);
4482 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
4483 vabs_s16 (int16x4_t a
)
4486 __asm__ ("abs %0.4h,%1.4h"
4489 : /* No clobbers */);
4493 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
4494 vabs_s32 (int32x2_t a
)
4497 __asm__ ("abs %0.2s,%1.2s"
4500 : /* No clobbers */);
4504 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
4505 vabsq_s8 (int8x16_t a
)
4508 __asm__ ("abs %0.16b,%1.16b"
4511 : /* No clobbers */);
4515 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
4516 vabsq_s16 (int16x8_t a
)
4519 __asm__ ("abs %0.8h,%1.8h"
4522 : /* No clobbers */);
4526 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
4527 vabsq_s32 (int32x4_t a
)
4530 __asm__ ("abs %0.4s,%1.4s"
4533 : /* No clobbers */);
4537 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
4538 vabsq_s64 (int64x2_t a
)
4541 __asm__ ("abs %0.2d,%1.2d"
4544 : /* No clobbers */);
4548 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
4549 vacged_f64 (float64_t a
, float64_t b
)
4552 __asm__ ("facge %d0,%d1,%d2"
4555 : /* No clobbers */);
4559 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
4560 vacges_f32 (float32_t a
, float32_t b
)
4563 __asm__ ("facge %s0,%s1,%s2"
4566 : /* No clobbers */);
4570 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
4571 vacgtd_f64 (float64_t a
, float64_t b
)
4574 __asm__ ("facgt %d0,%d1,%d2"
4577 : /* No clobbers */);
4581 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
4582 vacgts_f32 (float32_t a
, float32_t b
)
4585 __asm__ ("facgt %s0,%s1,%s2"
4588 : /* No clobbers */);
4592 __extension__
static __inline
int16_t __attribute__ ((__always_inline__
))
4593 vaddlv_s8 (int8x8_t a
)
4596 __asm__ ("saddlv %h0,%1.8b"
4599 : /* No clobbers */);
4603 __extension__
static __inline
int32_t __attribute__ ((__always_inline__
))
4604 vaddlv_s16 (int16x4_t a
)
4607 __asm__ ("saddlv %s0,%1.4h"
4610 : /* No clobbers */);
4614 __extension__
static __inline
uint16_t __attribute__ ((__always_inline__
))
4615 vaddlv_u8 (uint8x8_t a
)
4618 __asm__ ("uaddlv %h0,%1.8b"
4621 : /* No clobbers */);
4625 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
4626 vaddlv_u16 (uint16x4_t a
)
4629 __asm__ ("uaddlv %s0,%1.4h"
4632 : /* No clobbers */);
4636 __extension__
static __inline
int16_t __attribute__ ((__always_inline__
))
4637 vaddlvq_s8 (int8x16_t a
)
4640 __asm__ ("saddlv %h0,%1.16b"
4643 : /* No clobbers */);
4647 __extension__
static __inline
int32_t __attribute__ ((__always_inline__
))
4648 vaddlvq_s16 (int16x8_t a
)
4651 __asm__ ("saddlv %s0,%1.8h"
4654 : /* No clobbers */);
4658 __extension__
static __inline
int64_t __attribute__ ((__always_inline__
))
4659 vaddlvq_s32 (int32x4_t a
)
4662 __asm__ ("saddlv %d0,%1.4s"
4665 : /* No clobbers */);
4669 __extension__
static __inline
uint16_t __attribute__ ((__always_inline__
))
4670 vaddlvq_u8 (uint8x16_t a
)
4673 __asm__ ("uaddlv %h0,%1.16b"
4676 : /* No clobbers */);
4680 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
4681 vaddlvq_u16 (uint16x8_t a
)
4684 __asm__ ("uaddlv %s0,%1.8h"
4687 : /* No clobbers */);
4691 __extension__
static __inline
uint64_t __attribute__ ((__always_inline__
))
4692 vaddlvq_u32 (uint32x4_t a
)
4695 __asm__ ("uaddlv %d0,%1.4s"
4698 : /* No clobbers */);
4702 __extension__
static __inline
int8_t __attribute__ ((__always_inline__
))
4703 vaddv_s8 (int8x8_t a
)
4706 __asm__ ("addv %b0,%1.8b"
4709 : /* No clobbers */);
4713 __extension__
static __inline
int16_t __attribute__ ((__always_inline__
))
4714 vaddv_s16 (int16x4_t a
)
4717 __asm__ ("addv %h0,%1.4h"
4720 : /* No clobbers */);
4724 __extension__
static __inline
uint8_t __attribute__ ((__always_inline__
))
4725 vaddv_u8 (uint8x8_t a
)
4728 __asm__ ("addv %b0,%1.8b"
4731 : /* No clobbers */);
4735 __extension__
static __inline
uint16_t __attribute__ ((__always_inline__
))
4736 vaddv_u16 (uint16x4_t a
)
4739 __asm__ ("addv %h0,%1.4h"
4742 : /* No clobbers */);
4746 __extension__
static __inline
int8_t __attribute__ ((__always_inline__
))
4747 vaddvq_s8 (int8x16_t a
)
4750 __asm__ ("addv %b0,%1.16b"
4753 : /* No clobbers */);
4757 __extension__
static __inline
int16_t __attribute__ ((__always_inline__
))
4758 vaddvq_s16 (int16x8_t a
)
4761 __asm__ ("addv %h0,%1.8h"
4764 : /* No clobbers */);
4768 __extension__
static __inline
int32_t __attribute__ ((__always_inline__
))
4769 vaddvq_s32 (int32x4_t a
)
4772 __asm__ ("addv %s0,%1.4s"
4775 : /* No clobbers */);
4779 __extension__
static __inline
uint8_t __attribute__ ((__always_inline__
))
4780 vaddvq_u8 (uint8x16_t a
)
4783 __asm__ ("addv %b0,%1.16b"
4786 : /* No clobbers */);
4790 __extension__
static __inline
uint16_t __attribute__ ((__always_inline__
))
4791 vaddvq_u16 (uint16x8_t a
)
4794 __asm__ ("addv %h0,%1.8h"
4797 : /* No clobbers */);
4801 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
4802 vaddvq_u32 (uint32x4_t a
)
4805 __asm__ ("addv %s0,%1.4s"
4808 : /* No clobbers */);
4812 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
4813 vbsl_f32 (uint32x2_t a
, float32x2_t b
, float32x2_t c
)
4816 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4818 : "0"(a
), "w"(b
), "w"(c
)
4819 : /* No clobbers */);
4823 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
4824 vbsl_p8 (uint8x8_t a
, poly8x8_t b
, poly8x8_t c
)
4827 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4829 : "0"(a
), "w"(b
), "w"(c
)
4830 : /* No clobbers */);
4834 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
4835 vbsl_p16 (uint16x4_t a
, poly16x4_t b
, poly16x4_t c
)
4838 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4840 : "0"(a
), "w"(b
), "w"(c
)
4841 : /* No clobbers */);
4845 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
4846 vbsl_s8 (uint8x8_t a
, int8x8_t b
, int8x8_t c
)
4849 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4851 : "0"(a
), "w"(b
), "w"(c
)
4852 : /* No clobbers */);
4856 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
4857 vbsl_s16 (uint16x4_t a
, int16x4_t b
, int16x4_t c
)
4860 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4862 : "0"(a
), "w"(b
), "w"(c
)
4863 : /* No clobbers */);
4867 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
4868 vbsl_s32 (uint32x2_t a
, int32x2_t b
, int32x2_t c
)
4871 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4873 : "0"(a
), "w"(b
), "w"(c
)
4874 : /* No clobbers */);
4878 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
4879 vbsl_s64 (uint64x1_t a
, int64x1_t b
, int64x1_t c
)
4882 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4884 : "0"(a
), "w"(b
), "w"(c
)
4885 : /* No clobbers */);
4889 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
4890 vbsl_u8 (uint8x8_t a
, uint8x8_t b
, uint8x8_t c
)
4893 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4895 : "0"(a
), "w"(b
), "w"(c
)
4896 : /* No clobbers */);
4900 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
4901 vbsl_u16 (uint16x4_t a
, uint16x4_t b
, uint16x4_t c
)
4904 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4906 : "0"(a
), "w"(b
), "w"(c
)
4907 : /* No clobbers */);
4911 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
4912 vbsl_u32 (uint32x2_t a
, uint32x2_t b
, uint32x2_t c
)
4915 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4917 : "0"(a
), "w"(b
), "w"(c
)
4918 : /* No clobbers */);
4922 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
4923 vbsl_u64 (uint64x1_t a
, uint64x1_t b
, uint64x1_t c
)
4926 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4928 : "0"(a
), "w"(b
), "w"(c
)
4929 : /* No clobbers */);
4933 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
4934 vbslq_f32 (uint32x4_t a
, float32x4_t b
, float32x4_t c
)
4937 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4939 : "0"(a
), "w"(b
), "w"(c
)
4940 : /* No clobbers */);
4944 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
4945 vbslq_f64 (uint64x2_t a
, float64x2_t b
, float64x2_t c
)
4948 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4950 : "0"(a
), "w"(b
), "w"(c
)
4951 : /* No clobbers */);
4955 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
4956 vbslq_p8 (uint8x16_t a
, poly8x16_t b
, poly8x16_t c
)
4959 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4961 : "0"(a
), "w"(b
), "w"(c
)
4962 : /* No clobbers */);
4966 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
4967 vbslq_p16 (uint16x8_t a
, poly16x8_t b
, poly16x8_t c
)
4970 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4972 : "0"(a
), "w"(b
), "w"(c
)
4973 : /* No clobbers */);
4977 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
4978 vbslq_s8 (uint8x16_t a
, int8x16_t b
, int8x16_t c
)
4981 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4983 : "0"(a
), "w"(b
), "w"(c
)
4984 : /* No clobbers */);
4988 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
4989 vbslq_s16 (uint16x8_t a
, int16x8_t b
, int16x8_t c
)
4992 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4994 : "0"(a
), "w"(b
), "w"(c
)
4995 : /* No clobbers */);
4999 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
5000 vbslq_s32 (uint32x4_t a
, int32x4_t b
, int32x4_t c
)
5003 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5005 : "0"(a
), "w"(b
), "w"(c
)
5006 : /* No clobbers */);
5010 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
5011 vbslq_s64 (uint64x2_t a
, int64x2_t b
, int64x2_t c
)
5014 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5016 : "0"(a
), "w"(b
), "w"(c
)
5017 : /* No clobbers */);
5021 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
5022 vbslq_u8 (uint8x16_t a
, uint8x16_t b
, uint8x16_t c
)
5025 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5027 : "0"(a
), "w"(b
), "w"(c
)
5028 : /* No clobbers */);
5032 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
5033 vbslq_u16 (uint16x8_t a
, uint16x8_t b
, uint16x8_t c
)
5036 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5038 : "0"(a
), "w"(b
), "w"(c
)
5039 : /* No clobbers */);
5043 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
5044 vbslq_u32 (uint32x4_t a
, uint32x4_t b
, uint32x4_t c
)
5047 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5049 : "0"(a
), "w"(b
), "w"(c
)
5050 : /* No clobbers */);
5054 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
5055 vbslq_u64 (uint64x2_t a
, uint64x2_t b
, uint64x2_t c
)
5058 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5060 : "0"(a
), "w"(b
), "w"(c
)
5061 : /* No clobbers */);
5065 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
5066 vcage_f32 (float32x2_t a
, float32x2_t b
)
5069 __asm__ ("facge %0.2s, %1.2s, %2.2s"
5072 : /* No clobbers */);
5076 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
5077 vcageq_f32 (float32x4_t a
, float32x4_t b
)
5080 __asm__ ("facge %0.4s, %1.4s, %2.4s"
5083 : /* No clobbers */);
5087 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
5088 vcageq_f64 (float64x2_t a
, float64x2_t b
)
5091 __asm__ ("facge %0.2d, %1.2d, %2.2d"
5094 : /* No clobbers */);
5098 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
5099 vcagt_f32 (float32x2_t a
, float32x2_t b
)
5102 __asm__ ("facgt %0.2s, %1.2s, %2.2s"
5105 : /* No clobbers */);
5109 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
5110 vcagtq_f32 (float32x4_t a
, float32x4_t b
)
5113 __asm__ ("facgt %0.4s, %1.4s, %2.4s"
5116 : /* No clobbers */);
5120 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
5121 vcagtq_f64 (float64x2_t a
, float64x2_t b
)
5124 __asm__ ("facgt %0.2d, %1.2d, %2.2d"
5127 : /* No clobbers */);
5131 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
5132 vcale_f32 (float32x2_t a
, float32x2_t b
)
5135 __asm__ ("facge %0.2s, %2.2s, %1.2s"
5138 : /* No clobbers */);
5142 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
5143 vcaleq_f32 (float32x4_t a
, float32x4_t b
)
5146 __asm__ ("facge %0.4s, %2.4s, %1.4s"
5149 : /* No clobbers */);
5153 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
5154 vcaleq_f64 (float64x2_t a
, float64x2_t b
)
5157 __asm__ ("facge %0.2d, %2.2d, %1.2d"
5160 : /* No clobbers */);
5164 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
5165 vcalt_f32 (float32x2_t a
, float32x2_t b
)
5168 __asm__ ("facgt %0.2s, %2.2s, %1.2s"
5171 : /* No clobbers */);
5175 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
5176 vcaltq_f32 (float32x4_t a
, float32x4_t b
)
5179 __asm__ ("facgt %0.4s, %2.4s, %1.4s"
5182 : /* No clobbers */);
5186 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
5187 vcaltq_f64 (float64x2_t a
, float64x2_t b
)
5190 __asm__ ("facgt %0.2d, %2.2d, %1.2d"
5193 : /* No clobbers */);
5197 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
5198 vceq_f32 (float32x2_t a
, float32x2_t b
)
5201 __asm__ ("fcmeq %0.2s, %1.2s, %2.2s"
5204 : /* No clobbers */);
5208 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
5209 vceq_f64 (float64x1_t a
, float64x1_t b
)
5212 __asm__ ("fcmeq %d0, %d1, %d2"
5215 : /* No clobbers */);
5219 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
5220 vceqd_f64 (float64_t a
, float64_t b
)
5223 __asm__ ("fcmeq %d0,%d1,%d2"
5226 : /* No clobbers */);
5230 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
5231 vceqq_f32 (float32x4_t a
, float32x4_t b
)
5234 __asm__ ("fcmeq %0.4s, %1.4s, %2.4s"
5237 : /* No clobbers */);
5241 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
5242 vceqq_f64 (float64x2_t a
, float64x2_t b
)
5245 __asm__ ("fcmeq %0.2d, %1.2d, %2.2d"
5248 : /* No clobbers */);
5252 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
5253 vceqs_f32 (float32_t a
, float32_t b
)
5256 __asm__ ("fcmeq %s0,%s1,%s2"
5259 : /* No clobbers */);
5263 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
5264 vceqzd_f64 (float64_t a
)
5267 __asm__ ("fcmeq %d0,%d1,#0"
5270 : /* No clobbers */);
5274 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
5275 vceqzs_f32 (float32_t a
)
5278 __asm__ ("fcmeq %s0,%s1,#0"
5281 : /* No clobbers */);
5285 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
5286 vcge_f32 (float32x2_t a
, float32x2_t b
)
5289 __asm__ ("fcmge %0.2s, %1.2s, %2.2s"
5292 : /* No clobbers */);
5296 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
5297 vcge_f64 (float64x1_t a
, float64x1_t b
)
5300 __asm__ ("fcmge %d0, %d1, %d2"
5303 : /* No clobbers */);
5307 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
5308 vcgeq_f32 (float32x4_t a
, float32x4_t b
)
5311 __asm__ ("fcmge %0.4s, %1.4s, %2.4s"
5314 : /* No clobbers */);
5318 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
5319 vcgeq_f64 (float64x2_t a
, float64x2_t b
)
5322 __asm__ ("fcmge %0.2d, %1.2d, %2.2d"
5325 : /* No clobbers */);
5329 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
5330 vcgt_f32 (float32x2_t a
, float32x2_t b
)
5333 __asm__ ("fcmgt %0.2s, %1.2s, %2.2s"
5336 : /* No clobbers */);
5340 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
5341 vcgt_f64 (float64x1_t a
, float64x1_t b
)
5344 __asm__ ("fcmgt %d0, %d1, %d2"
5347 : /* No clobbers */);
5351 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
5352 vcgtq_f32 (float32x4_t a
, float32x4_t b
)
5355 __asm__ ("fcmgt %0.4s, %1.4s, %2.4s"
5358 : /* No clobbers */);
5362 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
5363 vcgtq_f64 (float64x2_t a
, float64x2_t b
)
5366 __asm__ ("fcmgt %0.2d, %1.2d, %2.2d"
5369 : /* No clobbers */);
5373 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
5374 vcle_f32 (float32x2_t a
, float32x2_t b
)
5377 __asm__ ("fcmge %0.2s, %2.2s, %1.2s"
5380 : /* No clobbers */);
5384 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
5385 vcle_f64 (float64x1_t a
, float64x1_t b
)
5388 __asm__ ("fcmge %d0, %d2, %d1"
5391 : /* No clobbers */);
5395 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
5396 vcleq_f32 (float32x4_t a
, float32x4_t b
)
5399 __asm__ ("fcmge %0.4s, %2.4s, %1.4s"
5402 : /* No clobbers */);
5406 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
5407 vcleq_f64 (float64x2_t a
, float64x2_t b
)
5410 __asm__ ("fcmge %0.2d, %2.2d, %1.2d"
5413 : /* No clobbers */);
5417 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
5418 vcls_s8 (int8x8_t a
)
5421 __asm__ ("cls %0.8b,%1.8b"
5424 : /* No clobbers */);
5428 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
5429 vcls_s16 (int16x4_t a
)
5432 __asm__ ("cls %0.4h,%1.4h"
5435 : /* No clobbers */);
5439 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
5440 vcls_s32 (int32x2_t a
)
5443 __asm__ ("cls %0.2s,%1.2s"
5446 : /* No clobbers */);
5450 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
5451 vclsq_s8 (int8x16_t a
)
5454 __asm__ ("cls %0.16b,%1.16b"
5457 : /* No clobbers */);
5461 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
5462 vclsq_s16 (int16x8_t a
)
5465 __asm__ ("cls %0.8h,%1.8h"
5468 : /* No clobbers */);
5472 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
5473 vclsq_s32 (int32x4_t a
)
5476 __asm__ ("cls %0.4s,%1.4s"
5479 : /* No clobbers */);
5483 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
5484 vclt_f32 (float32x2_t a
, float32x2_t b
)
5487 __asm__ ("fcmgt %0.2s, %2.2s, %1.2s"
5490 : /* No clobbers */);
5494 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
5495 vclt_f64 (float64x1_t a
, float64x1_t b
)
5498 __asm__ ("fcmgt %d0, %d2, %d1"
5501 : /* No clobbers */);
5505 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
5506 vcltq_f32 (float32x4_t a
, float32x4_t b
)
5509 __asm__ ("fcmgt %0.4s, %2.4s, %1.4s"
5512 : /* No clobbers */);
5516 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
5517 vcltq_f64 (float64x2_t a
, float64x2_t b
)
5520 __asm__ ("fcmgt %0.2d, %2.2d, %1.2d"
5523 : /* No clobbers */);
5527 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
5528 vclz_s8 (int8x8_t a
)
5531 __asm__ ("clz %0.8b,%1.8b"
5534 : /* No clobbers */);
5538 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
5539 vclz_s16 (int16x4_t a
)
5542 __asm__ ("clz %0.4h,%1.4h"
5545 : /* No clobbers */);
5549 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
5550 vclz_s32 (int32x2_t a
)
5553 __asm__ ("clz %0.2s,%1.2s"
5556 : /* No clobbers */);
5560 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
5561 vclz_u8 (uint8x8_t a
)
5564 __asm__ ("clz %0.8b,%1.8b"
5567 : /* No clobbers */);
5571 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
5572 vclz_u16 (uint16x4_t a
)
5575 __asm__ ("clz %0.4h,%1.4h"
5578 : /* No clobbers */);
5582 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
5583 vclz_u32 (uint32x2_t a
)
5586 __asm__ ("clz %0.2s,%1.2s"
5589 : /* No clobbers */);
5593 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
5594 vclzq_s8 (int8x16_t a
)
5597 __asm__ ("clz %0.16b,%1.16b"
5600 : /* No clobbers */);
5604 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
5605 vclzq_s16 (int16x8_t a
)
5608 __asm__ ("clz %0.8h,%1.8h"
5611 : /* No clobbers */);
5615 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
5616 vclzq_s32 (int32x4_t a
)
5619 __asm__ ("clz %0.4s,%1.4s"
5622 : /* No clobbers */);
5626 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
5627 vclzq_u8 (uint8x16_t a
)
5630 __asm__ ("clz %0.16b,%1.16b"
5633 : /* No clobbers */);
5637 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
5638 vclzq_u16 (uint16x8_t a
)
5641 __asm__ ("clz %0.8h,%1.8h"
5644 : /* No clobbers */);
5648 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
5649 vclzq_u32 (uint32x4_t a
)
5652 __asm__ ("clz %0.4s,%1.4s"
5655 : /* No clobbers */);
5659 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
5660 vcnt_p8 (poly8x8_t a
)
5663 __asm__ ("cnt %0.8b,%1.8b"
5666 : /* No clobbers */);
5670 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
5671 vcnt_s8 (int8x8_t a
)
5674 __asm__ ("cnt %0.8b,%1.8b"
5677 : /* No clobbers */);
5681 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
5682 vcnt_u8 (uint8x8_t a
)
5685 __asm__ ("cnt %0.8b,%1.8b"
5688 : /* No clobbers */);
5692 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
5693 vcntq_p8 (poly8x16_t a
)
5696 __asm__ ("cnt %0.16b,%1.16b"
5699 : /* No clobbers */);
5703 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
5704 vcntq_s8 (int8x16_t a
)
5707 __asm__ ("cnt %0.16b,%1.16b"
5710 : /* No clobbers */);
5714 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
5715 vcntq_u8 (uint8x16_t a
)
5718 __asm__ ("cnt %0.16b,%1.16b"
5721 : /* No clobbers */);
5725 #define vcopyq_lane_f32(a, b, c, d) \
5728 float32x4_t c_ = (c); \
5729 float32x4_t a_ = (a); \
5730 float32x4_t result; \
5731 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5733 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5734 : /* No clobbers */); \
5738 #define vcopyq_lane_f64(a, b, c, d) \
5741 float64x2_t c_ = (c); \
5742 float64x2_t a_ = (a); \
5743 float64x2_t result; \
5744 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5746 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5747 : /* No clobbers */); \
5751 #define vcopyq_lane_p8(a, b, c, d) \
5754 poly8x16_t c_ = (c); \
5755 poly8x16_t a_ = (a); \
5756 poly8x16_t result; \
5757 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5759 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5760 : /* No clobbers */); \
5764 #define vcopyq_lane_p16(a, b, c, d) \
5767 poly16x8_t c_ = (c); \
5768 poly16x8_t a_ = (a); \
5769 poly16x8_t result; \
5770 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5772 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5773 : /* No clobbers */); \
5777 #define vcopyq_lane_s8(a, b, c, d) \
5780 int8x16_t c_ = (c); \
5781 int8x16_t a_ = (a); \
5783 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5785 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5786 : /* No clobbers */); \
5790 #define vcopyq_lane_s16(a, b, c, d) \
5793 int16x8_t c_ = (c); \
5794 int16x8_t a_ = (a); \
5796 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5798 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5799 : /* No clobbers */); \
5803 #define vcopyq_lane_s32(a, b, c, d) \
5806 int32x4_t c_ = (c); \
5807 int32x4_t a_ = (a); \
5809 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5811 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5812 : /* No clobbers */); \
5816 #define vcopyq_lane_s64(a, b, c, d) \
5819 int64x2_t c_ = (c); \
5820 int64x2_t a_ = (a); \
5822 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5824 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5825 : /* No clobbers */); \
5829 #define vcopyq_lane_u8(a, b, c, d) \
5832 uint8x16_t c_ = (c); \
5833 uint8x16_t a_ = (a); \
5834 uint8x16_t result; \
5835 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5837 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5838 : /* No clobbers */); \
5842 #define vcopyq_lane_u16(a, b, c, d) \
5845 uint16x8_t c_ = (c); \
5846 uint16x8_t a_ = (a); \
5847 uint16x8_t result; \
5848 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5850 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5851 : /* No clobbers */); \
5855 #define vcopyq_lane_u32(a, b, c, d) \
5858 uint32x4_t c_ = (c); \
5859 uint32x4_t a_ = (a); \
5860 uint32x4_t result; \
5861 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5863 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5864 : /* No clobbers */); \
5868 #define vcopyq_lane_u64(a, b, c, d) \
5871 uint64x2_t c_ = (c); \
5872 uint64x2_t a_ = (a); \
5873 uint64x2_t result; \
5874 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5876 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5877 : /* No clobbers */); \
5881 /* vcvt_f16_f32 not supported */
5883 /* vcvt_f32_f16 not supported */
5885 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
5886 vcvt_f32_f64 (float64x2_t a
)
5889 __asm__ ("fcvtn %0.2s,%1.2d"
5892 : /* No clobbers */);
5896 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
5897 vcvt_f32_s32 (int32x2_t a
)
5900 __asm__ ("scvtf %0.2s, %1.2s"
5903 : /* No clobbers */);
5907 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
5908 vcvt_f32_u32 (uint32x2_t a
)
5911 __asm__ ("ucvtf %0.2s, %1.2s"
5914 : /* No clobbers */);
5918 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
5919 vcvt_f64_f32 (float32x2_t a
)
5922 __asm__ ("fcvtl %0.2d,%1.2s"
5925 : /* No clobbers */);
5929 __extension__
static __inline float64x1_t
__attribute__ ((__always_inline__
))
5930 vcvt_f64_s64 (uint64x1_t a
)
5933 __asm__ ("scvtf %d0, %d1"
5936 : /* No clobbers */);
5940 __extension__
static __inline float64x1_t
__attribute__ ((__always_inline__
))
5941 vcvt_f64_u64 (uint64x1_t a
)
5944 __asm__ ("ucvtf %d0, %d1"
5947 : /* No clobbers */);
5951 /* vcvt_high_f16_f32 not supported */
5953 /* vcvt_high_f32_f16 not supported */
5955 static float32x2_t
vdup_n_f32 (float32_t
);
5957 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
5958 vcvt_high_f32_f64 (float32x2_t a
, float64x2_t b
)
5960 float32x4_t result
= vcombine_f32 (a
, vdup_n_f32 (0.0f
));
5961 __asm__ ("fcvtn2 %0.4s,%2.2d"
5964 : /* No clobbers */);
5968 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
5969 vcvt_high_f64_f32 (float32x4_t a
)
5972 __asm__ ("fcvtl2 %0.2d,%1.4s"
5975 : /* No clobbers */);
5979 #define vcvt_n_f32_s32(a, b) \
5982 int32x2_t a_ = (a); \
5983 float32x2_t result; \
5984 __asm__ ("scvtf %0.2s, %1.2s, #%2" \
5987 : /* No clobbers */); \
5991 #define vcvt_n_f32_u32(a, b) \
5994 uint32x2_t a_ = (a); \
5995 float32x2_t result; \
5996 __asm__ ("ucvtf %0.2s, %1.2s, #%2" \
5999 : /* No clobbers */); \
6003 #define vcvt_n_s32_f32(a, b) \
6006 float32x2_t a_ = (a); \
6008 __asm__ ("fcvtzs %0.2s, %1.2s, #%2" \
6011 : /* No clobbers */); \
6015 #define vcvt_n_u32_f32(a, b) \
6018 float32x2_t a_ = (a); \
6019 uint32x2_t result; \
6020 __asm__ ("fcvtzu %0.2s, %1.2s, #%2" \
6023 : /* No clobbers */); \
6027 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
6028 vcvt_s32_f32 (float32x2_t a
)
6031 __asm__ ("fcvtzs %0.2s, %1.2s"
6034 : /* No clobbers */);
6038 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
6039 vcvt_u32_f32 (float32x2_t a
)
6042 __asm__ ("fcvtzu %0.2s, %1.2s"
6045 : /* No clobbers */);
6049 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
6050 vcvta_s32_f32 (float32x2_t a
)
6053 __asm__ ("fcvtas %0.2s, %1.2s"
6056 : /* No clobbers */);
6060 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
6061 vcvta_u32_f32 (float32x2_t a
)
6064 __asm__ ("fcvtau %0.2s, %1.2s"
6067 : /* No clobbers */);
6071 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
6072 vcvtad_s64_f64 (float64_t a
)
6075 __asm__ ("fcvtas %d0,%d1"
6078 : /* No clobbers */);
6082 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
6083 vcvtad_u64_f64 (float64_t a
)
6086 __asm__ ("fcvtau %d0,%d1"
6089 : /* No clobbers */);
6093 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
6094 vcvtaq_s32_f32 (float32x4_t a
)
6097 __asm__ ("fcvtas %0.4s, %1.4s"
6100 : /* No clobbers */);
6104 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
6105 vcvtaq_s64_f64 (float64x2_t a
)
6108 __asm__ ("fcvtas %0.2d, %1.2d"
6111 : /* No clobbers */);
6115 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
6116 vcvtaq_u32_f32 (float32x4_t a
)
6119 __asm__ ("fcvtau %0.4s, %1.4s"
6122 : /* No clobbers */);
6126 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
6127 vcvtaq_u64_f64 (float64x2_t a
)
6130 __asm__ ("fcvtau %0.2d, %1.2d"
6133 : /* No clobbers */);
6137 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
6138 vcvtas_s64_f64 (float32_t a
)
6141 __asm__ ("fcvtas %s0,%s1"
6144 : /* No clobbers */);
6148 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
6149 vcvtas_u64_f64 (float32_t a
)
6152 __asm__ ("fcvtau %s0,%s1"
6155 : /* No clobbers */);
6159 __extension__
static __inline
int64_t __attribute__ ((__always_inline__
))
6160 vcvtd_f64_s64 (int64_t a
)
6163 __asm__ ("scvtf %d0,%d1"
6166 : /* No clobbers */);
6170 __extension__
static __inline
uint64_t __attribute__ ((__always_inline__
))
6171 vcvtd_f64_u64 (uint64_t a
)
6174 __asm__ ("ucvtf %d0,%d1"
6177 : /* No clobbers */);
6181 #define vcvtd_n_f64_s64(a, b) \
6186 __asm__ ("scvtf %d0,%d1,%2" \
6189 : /* No clobbers */); \
6193 #define vcvtd_n_f64_u64(a, b) \
6196 uint64_t a_ = (a); \
6198 __asm__ ("ucvtf %d0,%d1,%2" \
6201 : /* No clobbers */); \
6205 #define vcvtd_n_s64_f64(a, b) \
6208 float64_t a_ = (a); \
6210 __asm__ ("fcvtzs %d0,%d1,%2" \
6213 : /* No clobbers */); \
6217 #define vcvtd_n_u64_f64(a, b) \
6220 float64_t a_ = (a); \
6222 __asm__ ("fcvtzu %d0,%d1,%2" \
6225 : /* No clobbers */); \
6229 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
6230 vcvtd_s64_f64 (float64_t a
)
6233 __asm__ ("fcvtzs %d0,%d1"
6236 : /* No clobbers */);
6240 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
6241 vcvtd_u64_f64 (float64_t a
)
6244 __asm__ ("fcvtzu %d0,%d1"
6247 : /* No clobbers */);
6251 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
6252 vcvtm_s32_f32 (float32x2_t a
)
6255 __asm__ ("fcvtms %0.2s, %1.2s"
6258 : /* No clobbers */);
6262 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
6263 vcvtm_u32_f32 (float32x2_t a
)
6266 __asm__ ("fcvtmu %0.2s, %1.2s"
6269 : /* No clobbers */);
6273 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
6274 vcvtmd_s64_f64 (float64_t a
)
6277 __asm__ ("fcvtms %d0,%d1"
6280 : /* No clobbers */);
6284 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
6285 vcvtmd_u64_f64 (float64_t a
)
6288 __asm__ ("fcvtmu %d0,%d1"
6291 : /* No clobbers */);
6295 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
6296 vcvtmq_s32_f32 (float32x4_t a
)
6299 __asm__ ("fcvtms %0.4s, %1.4s"
6302 : /* No clobbers */);
6306 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
6307 vcvtmq_s64_f64 (float64x2_t a
)
6310 __asm__ ("fcvtms %0.2d, %1.2d"
6313 : /* No clobbers */);
6317 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
6318 vcvtmq_u32_f32 (float32x4_t a
)
6321 __asm__ ("fcvtmu %0.4s, %1.4s"
6324 : /* No clobbers */);
6328 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
6329 vcvtmq_u64_f64 (float64x2_t a
)
6332 __asm__ ("fcvtmu %0.2d, %1.2d"
6335 : /* No clobbers */);
6339 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
6340 vcvtms_s64_f64 (float32_t a
)
6343 __asm__ ("fcvtms %s0,%s1"
6346 : /* No clobbers */);
6350 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
6351 vcvtms_u64_f64 (float32_t a
)
6354 __asm__ ("fcvtmu %s0,%s1"
6357 : /* No clobbers */);
6361 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
6362 vcvtn_s32_f32 (float32x2_t a
)
6365 __asm__ ("fcvtns %0.2s, %1.2s"
6368 : /* No clobbers */);
6372 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
6373 vcvtn_u32_f32 (float32x2_t a
)
6376 __asm__ ("fcvtnu %0.2s, %1.2s"
6379 : /* No clobbers */);
6383 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
6384 vcvtnd_s64_f64 (float64_t a
)
6387 __asm__ ("fcvtns %d0,%d1"
6390 : /* No clobbers */);
6394 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
6395 vcvtnd_u64_f64 (float64_t a
)
6398 __asm__ ("fcvtnu %d0,%d1"
6401 : /* No clobbers */);
6405 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
6406 vcvtnq_s32_f32 (float32x4_t a
)
6409 __asm__ ("fcvtns %0.4s, %1.4s"
6412 : /* No clobbers */);
6416 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
6417 vcvtnq_s64_f64 (float64x2_t a
)
6420 __asm__ ("fcvtns %0.2d, %1.2d"
6423 : /* No clobbers */);
6427 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
6428 vcvtnq_u32_f32 (float32x4_t a
)
6431 __asm__ ("fcvtnu %0.4s, %1.4s"
6434 : /* No clobbers */);
6438 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
6439 vcvtnq_u64_f64 (float64x2_t a
)
6442 __asm__ ("fcvtnu %0.2d, %1.2d"
6445 : /* No clobbers */);
6449 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
6450 vcvtns_s64_f64 (float32_t a
)
6453 __asm__ ("fcvtns %s0,%s1"
6456 : /* No clobbers */);
6460 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
6461 vcvtns_u64_f64 (float32_t a
)
6464 __asm__ ("fcvtnu %s0,%s1"
6467 : /* No clobbers */);
6471 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
6472 vcvtp_s32_f32 (float32x2_t a
)
6475 __asm__ ("fcvtps %0.2s, %1.2s"
6478 : /* No clobbers */);
6482 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
6483 vcvtp_u32_f32 (float32x2_t a
)
6486 __asm__ ("fcvtpu %0.2s, %1.2s"
6489 : /* No clobbers */);
6493 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
6494 vcvtpd_s64_f64 (float64_t a
)
6497 __asm__ ("fcvtps %d0,%d1"
6500 : /* No clobbers */);
6504 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
6505 vcvtpd_u64_f64 (float64_t a
)
6508 __asm__ ("fcvtpu %d0,%d1"
6511 : /* No clobbers */);
6515 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
6516 vcvtpq_s32_f32 (float32x4_t a
)
6519 __asm__ ("fcvtps %0.4s, %1.4s"
6522 : /* No clobbers */);
6526 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
6527 vcvtpq_s64_f64 (float64x2_t a
)
6530 __asm__ ("fcvtps %0.2d, %1.2d"
6533 : /* No clobbers */);
6537 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
6538 vcvtpq_u32_f32 (float32x4_t a
)
6541 __asm__ ("fcvtpu %0.4s, %1.4s"
6544 : /* No clobbers */);
6548 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
6549 vcvtpq_u64_f64 (float64x2_t a
)
6552 __asm__ ("fcvtpu %0.2d, %1.2d"
6555 : /* No clobbers */);
6559 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
6560 vcvtps_s64_f64 (float32_t a
)
6563 __asm__ ("fcvtps %s0,%s1"
6566 : /* No clobbers */);
6570 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
6571 vcvtps_u64_f64 (float32_t a
)
6574 __asm__ ("fcvtpu %s0,%s1"
6577 : /* No clobbers */);
6581 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
6582 vcvtq_f32_s32 (int32x4_t a
)
6585 __asm__ ("scvtf %0.4s, %1.4s"
6588 : /* No clobbers */);
6592 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
6593 vcvtq_f32_u32 (uint32x4_t a
)
6596 __asm__ ("ucvtf %0.4s, %1.4s"
6599 : /* No clobbers */);
6603 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
6604 vcvtq_f64_s64 (int64x2_t a
)
6607 __asm__ ("scvtf %0.2d, %1.2d"
6610 : /* No clobbers */);
6614 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
6615 vcvtq_f64_u64 (uint64x2_t a
)
6618 __asm__ ("ucvtf %0.2d, %1.2d"
6621 : /* No clobbers */);
6625 #define vcvtq_n_f32_s32(a, b) \
6628 int32x4_t a_ = (a); \
6629 float32x4_t result; \
6630 __asm__ ("scvtf %0.4s, %1.4s, #%2" \
6633 : /* No clobbers */); \
6637 #define vcvtq_n_f32_u32(a, b) \
6640 uint32x4_t a_ = (a); \
6641 float32x4_t result; \
6642 __asm__ ("ucvtf %0.4s, %1.4s, #%2" \
6645 : /* No clobbers */); \
6649 #define vcvtq_n_f64_s64(a, b) \
6652 int64x2_t a_ = (a); \
6653 float64x2_t result; \
6654 __asm__ ("scvtf %0.2d, %1.2d, #%2" \
6657 : /* No clobbers */); \
6661 #define vcvtq_n_f64_u64(a, b) \
6664 uint64x2_t a_ = (a); \
6665 float64x2_t result; \
6666 __asm__ ("ucvtf %0.2d, %1.2d, #%2" \
6669 : /* No clobbers */); \
6673 #define vcvtq_n_s32_f32(a, b) \
6676 float32x4_t a_ = (a); \
6678 __asm__ ("fcvtzs %0.4s, %1.4s, #%2" \
6681 : /* No clobbers */); \
6685 #define vcvtq_n_s64_f64(a, b) \
6688 float64x2_t a_ = (a); \
6690 __asm__ ("fcvtzs %0.2d, %1.2d, #%2" \
6693 : /* No clobbers */); \
6697 #define vcvtq_n_u32_f32(a, b) \
6700 float32x4_t a_ = (a); \
6701 uint32x4_t result; \
6702 __asm__ ("fcvtzu %0.4s, %1.4s, #%2" \
6705 : /* No clobbers */); \
6709 #define vcvtq_n_u64_f64(a, b) \
6712 float64x2_t a_ = (a); \
6713 uint64x2_t result; \
6714 __asm__ ("fcvtzu %0.2d, %1.2d, #%2" \
6717 : /* No clobbers */); \
6721 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
6722 vcvtq_s32_f32 (float32x4_t a
)
6725 __asm__ ("fcvtzs %0.4s, %1.4s"
6728 : /* No clobbers */);
6732 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
6733 vcvtq_s64_f64 (float64x2_t a
)
6736 __asm__ ("fcvtzs %0.2d, %1.2d"
6739 : /* No clobbers */);
6743 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
6744 vcvtq_u32_f32 (float32x4_t a
)
6747 __asm__ ("fcvtzu %0.4s, %1.4s"
6750 : /* No clobbers */);
6754 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
6755 vcvtq_u64_f64 (float64x2_t a
)
6758 __asm__ ("fcvtzu %0.2d, %1.2d"
6761 : /* No clobbers */);
6765 __extension__
static __inline
int32_t __attribute__ ((__always_inline__
))
6766 vcvts_f64_s32 (int32_t a
)
6769 __asm__ ("scvtf %s0,%s1"
6772 : /* No clobbers */);
6776 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
6777 vcvts_f64_u32 (uint32_t a
)
6780 __asm__ ("ucvtf %s0,%s1"
6783 : /* No clobbers */);
6787 #define vcvts_n_f32_s32(a, b) \
6792 __asm__ ("scvtf %s0,%s1,%2" \
6795 : /* No clobbers */); \
6799 #define vcvts_n_f32_u32(a, b) \
6802 uint32_t a_ = (a); \
6804 __asm__ ("ucvtf %s0,%s1,%2" \
6807 : /* No clobbers */); \
6811 #define vcvts_n_s32_f32(a, b) \
6814 float32_t a_ = (a); \
6816 __asm__ ("fcvtzs %s0,%s1,%2" \
6819 : /* No clobbers */); \
6823 #define vcvts_n_u32_f32(a, b) \
6826 float32_t a_ = (a); \
6828 __asm__ ("fcvtzu %s0,%s1,%2" \
6831 : /* No clobbers */); \
6835 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
6836 vcvts_s64_f64 (float32_t a
)
6839 __asm__ ("fcvtzs %s0,%s1"
6842 : /* No clobbers */);
6846 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
6847 vcvts_u64_f64 (float32_t a
)
6850 __asm__ ("fcvtzu %s0,%s1"
6853 : /* No clobbers */);
6857 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
6858 vcvtx_f32_f64 (float64x2_t a
)
6861 __asm__ ("fcvtxn %0.2s,%1.2d"
6864 : /* No clobbers */);
6868 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
6869 vcvtx_high_f32_f64 (float64x2_t a
)
6872 __asm__ ("fcvtxn2 %0.4s,%1.2d"
6875 : /* No clobbers */);
6879 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
6880 vcvtxd_f32_f64 (float64_t a
)
6883 __asm__ ("fcvtxn %s0,%d1"
6886 : /* No clobbers */);
6890 #define vdup_lane_f32(a, b) \
6893 float32x2_t a_ = (a); \
6894 float32x2_t result; \
6895 __asm__ ("dup %0.2s,%1.s[%2]" \
6898 : /* No clobbers */); \
6902 #define vdup_lane_p8(a, b) \
6905 poly8x8_t a_ = (a); \
6907 __asm__ ("dup %0.8b,%1.b[%2]" \
6910 : /* No clobbers */); \
6914 #define vdup_lane_p16(a, b) \
6917 poly16x4_t a_ = (a); \
6918 poly16x4_t result; \
6919 __asm__ ("dup %0.4h,%1.h[%2]" \
6922 : /* No clobbers */); \
6926 #define vdup_lane_s8(a, b) \
6929 int8x8_t a_ = (a); \
6931 __asm__ ("dup %0.8b,%1.b[%2]" \
6934 : /* No clobbers */); \
6938 #define vdup_lane_s16(a, b) \
6941 int16x4_t a_ = (a); \
6943 __asm__ ("dup %0.4h,%1.h[%2]" \
6946 : /* No clobbers */); \
6950 #define vdup_lane_s32(a, b) \
6953 int32x2_t a_ = (a); \
6955 __asm__ ("dup %0.2s,%1.s[%2]" \
6958 : /* No clobbers */); \
6962 #define vdup_lane_s64(a, b) \
6965 int64x1_t a_ = (a); \
6967 __asm__ ("ins %0.d[0],%1.d[%2]" \
6970 : /* No clobbers */); \
6974 #define vdup_lane_u8(a, b) \
6977 uint8x8_t a_ = (a); \
6979 __asm__ ("dup %0.8b,%1.b[%2]" \
6982 : /* No clobbers */); \
6986 #define vdup_lane_u16(a, b) \
6989 uint16x4_t a_ = (a); \
6990 uint16x4_t result; \
6991 __asm__ ("dup %0.4h,%1.h[%2]" \
6994 : /* No clobbers */); \
6998 #define vdup_lane_u32(a, b) \
7001 uint32x2_t a_ = (a); \
7002 uint32x2_t result; \
7003 __asm__ ("dup %0.2s,%1.s[%2]" \
7006 : /* No clobbers */); \
7010 #define vdup_lane_u64(a, b) \
7013 uint64x1_t a_ = (a); \
7014 uint64x1_t result; \
7015 __asm__ ("ins %0.d[0],%1.d[%2]" \
7018 : /* No clobbers */); \
7022 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
7023 vdup_n_f32 (float32_t a
)
7026 __asm__ ("dup %0.2s, %w1"
7029 : /* No clobbers */);
7033 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
7034 vdup_n_p8 (uint32_t a
)
7037 __asm__ ("dup %0.8b,%w1"
7040 : /* No clobbers */);
7044 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
7045 vdup_n_p16 (uint32_t a
)
7048 __asm__ ("dup %0.4h,%w1"
7051 : /* No clobbers */);
7055 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
7056 vdup_n_s8 (int32_t a
)
7059 __asm__ ("dup %0.8b,%w1"
7062 : /* No clobbers */);
7066 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
7067 vdup_n_s16 (int32_t a
)
7070 __asm__ ("dup %0.4h,%w1"
7073 : /* No clobbers */);
7077 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
7078 vdup_n_s32 (int32_t a
)
7081 __asm__ ("dup %0.2s,%w1"
7084 : /* No clobbers */);
7088 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
7089 vdup_n_s64 (int64_t a
)
7092 __asm__ ("ins %0.d[0],%x1"
7095 : /* No clobbers */);
7099 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
7100 vdup_n_u8 (uint32_t a
)
7103 __asm__ ("dup %0.8b,%w1"
7106 : /* No clobbers */);
7110 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
7111 vdup_n_u16 (uint32_t a
)
7114 __asm__ ("dup %0.4h,%w1"
7117 : /* No clobbers */);
7121 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
7122 vdup_n_u32 (uint32_t a
)
7125 __asm__ ("dup %0.2s,%w1"
7128 : /* No clobbers */);
7132 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
7133 vdup_n_u64 (uint64_t a
)
7136 __asm__ ("ins %0.d[0],%x1"
7139 : /* No clobbers */);
7143 #define vdupd_lane_f64(a, b) \
7146 float64x2_t a_ = (a); \
7148 __asm__ ("dup %d0, %1.d[%2]" \
7151 : /* No clobbers */); \
7155 #define vdupq_lane_f32(a, b) \
7158 float32x2_t a_ = (a); \
7159 float32x4_t result; \
7160 __asm__ ("dup %0.4s,%1.s[%2]" \
7163 : /* No clobbers */); \
7167 #define vdupq_lane_f64(a, b) \
7170 float64x1_t a_ = (a); \
7171 float64x2_t result; \
7172 __asm__ ("dup %0.2d,%1.d[%2]" \
7175 : /* No clobbers */); \
7179 #define vdupq_lane_p8(a, b) \
7182 poly8x8_t a_ = (a); \
7183 poly8x16_t result; \
7184 __asm__ ("dup %0.16b,%1.b[%2]" \
7187 : /* No clobbers */); \
7191 #define vdupq_lane_p16(a, b) \
7194 poly16x4_t a_ = (a); \
7195 poly16x8_t result; \
7196 __asm__ ("dup %0.8h,%1.h[%2]" \
7199 : /* No clobbers */); \
7203 #define vdupq_lane_s8(a, b) \
7206 int8x8_t a_ = (a); \
7208 __asm__ ("dup %0.16b,%1.b[%2]" \
7211 : /* No clobbers */); \
7215 #define vdupq_lane_s16(a, b) \
7218 int16x4_t a_ = (a); \
7220 __asm__ ("dup %0.8h,%1.h[%2]" \
7223 : /* No clobbers */); \
7227 #define vdupq_lane_s32(a, b) \
7230 int32x2_t a_ = (a); \
7232 __asm__ ("dup %0.4s,%1.s[%2]" \
7235 : /* No clobbers */); \
7239 #define vdupq_lane_s64(a, b) \
7242 int64x1_t a_ = (a); \
7244 __asm__ ("dup %0.2d,%1.d[%2]" \
7247 : /* No clobbers */); \
7251 #define vdupq_lane_u8(a, b) \
7254 uint8x8_t a_ = (a); \
7255 uint8x16_t result; \
7256 __asm__ ("dup %0.16b,%1.b[%2]" \
7259 : /* No clobbers */); \
7263 #define vdupq_lane_u16(a, b) \
7266 uint16x4_t a_ = (a); \
7267 uint16x8_t result; \
7268 __asm__ ("dup %0.8h,%1.h[%2]" \
7271 : /* No clobbers */); \
7275 #define vdupq_lane_u32(a, b) \
7278 uint32x2_t a_ = (a); \
7279 uint32x4_t result; \
7280 __asm__ ("dup %0.4s,%1.s[%2]" \
7283 : /* No clobbers */); \
7287 #define vdupq_lane_u64(a, b) \
7290 uint64x1_t a_ = (a); \
7291 uint64x2_t result; \
7292 __asm__ ("dup %0.2d,%1.d[%2]" \
7295 : /* No clobbers */); \
7299 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
7300 vdupq_n_f32 (float32_t a
)
7303 __asm__ ("dup %0.4s, %w1"
7306 : /* No clobbers */);
7310 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
7311 vdupq_n_f64 (float64_t a
)
7314 __asm__ ("dup %0.2d, %x1"
7317 : /* No clobbers */);
7321 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
7322 vdupq_n_p8 (uint32_t a
)
7325 __asm__ ("dup %0.16b,%w1"
7328 : /* No clobbers */);
7332 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
7333 vdupq_n_p16 (uint32_t a
)
7336 __asm__ ("dup %0.8h,%w1"
7339 : /* No clobbers */);
7343 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
7344 vdupq_n_s8 (int32_t a
)
7347 __asm__ ("dup %0.16b,%w1"
7350 : /* No clobbers */);
7354 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
7355 vdupq_n_s16 (int32_t a
)
7358 __asm__ ("dup %0.8h,%w1"
7361 : /* No clobbers */);
7365 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
7366 vdupq_n_s32 (int32_t a
)
7369 __asm__ ("dup %0.4s,%w1"
7372 : /* No clobbers */);
7376 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
7377 vdupq_n_s64 (int64_t a
)
7380 __asm__ ("dup %0.2d,%x1"
7383 : /* No clobbers */);
7387 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
7388 vdupq_n_u8 (uint32_t a
)
7391 __asm__ ("dup %0.16b,%w1"
7394 : /* No clobbers */);
7398 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
7399 vdupq_n_u16 (uint32_t a
)
7402 __asm__ ("dup %0.8h,%w1"
7405 : /* No clobbers */);
7409 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
7410 vdupq_n_u32 (uint32_t a
)
7413 __asm__ ("dup %0.4s,%w1"
7416 : /* No clobbers */);
7420 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
7421 vdupq_n_u64 (uint64_t a
)
7424 __asm__ ("dup %0.2d,%x1"
7427 : /* No clobbers */);
7431 #define vdups_lane_f32(a, b) \
7434 float32x4_t a_ = (a); \
7436 __asm__ ("dup %s0, %1.s[%2]" \
7439 : /* No clobbers */); \
7443 #define vext_f32(a, b, c) \
7446 float32x2_t b_ = (b); \
7447 float32x2_t a_ = (a); \
7448 float32x2_t result; \
7449 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
7451 : "w"(a_), "w"(b_), "i"(c) \
7452 : /* No clobbers */); \
7456 #define vext_f64(a, b, c) \
7459 float64x1_t b_ = (b); \
7460 float64x1_t a_ = (a); \
7461 float64x1_t result; \
7462 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
7464 : "w"(a_), "w"(b_), "i"(c) \
7465 : /* No clobbers */); \
7469 #define vext_p8(a, b, c) \
7472 poly8x8_t b_ = (b); \
7473 poly8x8_t a_ = (a); \
7475 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
7477 : "w"(a_), "w"(b_), "i"(c) \
7478 : /* No clobbers */); \
7482 #define vext_p16(a, b, c) \
7485 poly16x4_t b_ = (b); \
7486 poly16x4_t a_ = (a); \
7487 poly16x4_t result; \
7488 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
7490 : "w"(a_), "w"(b_), "i"(c) \
7491 : /* No clobbers */); \
7495 #define vext_s8(a, b, c) \
7498 int8x8_t b_ = (b); \
7499 int8x8_t a_ = (a); \
7501 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
7503 : "w"(a_), "w"(b_), "i"(c) \
7504 : /* No clobbers */); \
7508 #define vext_s16(a, b, c) \
7511 int16x4_t b_ = (b); \
7512 int16x4_t a_ = (a); \
7514 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
7516 : "w"(a_), "w"(b_), "i"(c) \
7517 : /* No clobbers */); \
7521 #define vext_s32(a, b, c) \
7524 int32x2_t b_ = (b); \
7525 int32x2_t a_ = (a); \
7527 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
7529 : "w"(a_), "w"(b_), "i"(c) \
7530 : /* No clobbers */); \
7534 #define vext_s64(a, b, c) \
7537 int64x1_t b_ = (b); \
7538 int64x1_t a_ = (a); \
7540 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
7542 : "w"(a_), "w"(b_), "i"(c) \
7543 : /* No clobbers */); \
7547 #define vext_u8(a, b, c) \
7550 uint8x8_t b_ = (b); \
7551 uint8x8_t a_ = (a); \
7553 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
7555 : "w"(a_), "w"(b_), "i"(c) \
7556 : /* No clobbers */); \
7560 #define vext_u16(a, b, c) \
7563 uint16x4_t b_ = (b); \
7564 uint16x4_t a_ = (a); \
7565 uint16x4_t result; \
7566 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
7568 : "w"(a_), "w"(b_), "i"(c) \
7569 : /* No clobbers */); \
7573 #define vext_u32(a, b, c) \
7576 uint32x2_t b_ = (b); \
7577 uint32x2_t a_ = (a); \
7578 uint32x2_t result; \
7579 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
7581 : "w"(a_), "w"(b_), "i"(c) \
7582 : /* No clobbers */); \
7586 #define vext_u64(a, b, c) \
7589 uint64x1_t b_ = (b); \
7590 uint64x1_t a_ = (a); \
7591 uint64x1_t result; \
7592 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
7594 : "w"(a_), "w"(b_), "i"(c) \
7595 : /* No clobbers */); \
7599 #define vextq_f32(a, b, c) \
7602 float32x4_t b_ = (b); \
7603 float32x4_t a_ = (a); \
7604 float32x4_t result; \
7605 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
7607 : "w"(a_), "w"(b_), "i"(c) \
7608 : /* No clobbers */); \
7612 #define vextq_f64(a, b, c) \
7615 float64x2_t b_ = (b); \
7616 float64x2_t a_ = (a); \
7617 float64x2_t result; \
7618 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
7620 : "w"(a_), "w"(b_), "i"(c) \
7621 : /* No clobbers */); \
7625 #define vextq_p8(a, b, c) \
7628 poly8x16_t b_ = (b); \
7629 poly8x16_t a_ = (a); \
7630 poly8x16_t result; \
7631 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
7633 : "w"(a_), "w"(b_), "i"(c) \
7634 : /* No clobbers */); \
7638 #define vextq_p16(a, b, c) \
7641 poly16x8_t b_ = (b); \
7642 poly16x8_t a_ = (a); \
7643 poly16x8_t result; \
7644 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
7646 : "w"(a_), "w"(b_), "i"(c) \
7647 : /* No clobbers */); \
7651 #define vextq_s8(a, b, c) \
7654 int8x16_t b_ = (b); \
7655 int8x16_t a_ = (a); \
7657 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
7659 : "w"(a_), "w"(b_), "i"(c) \
7660 : /* No clobbers */); \
7664 #define vextq_s16(a, b, c) \
7667 int16x8_t b_ = (b); \
7668 int16x8_t a_ = (a); \
7670 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
7672 : "w"(a_), "w"(b_), "i"(c) \
7673 : /* No clobbers */); \
7677 #define vextq_s32(a, b, c) \
7680 int32x4_t b_ = (b); \
7681 int32x4_t a_ = (a); \
7683 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
7685 : "w"(a_), "w"(b_), "i"(c) \
7686 : /* No clobbers */); \
7690 #define vextq_s64(a, b, c) \
7693 int64x2_t b_ = (b); \
7694 int64x2_t a_ = (a); \
7696 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
7698 : "w"(a_), "w"(b_), "i"(c) \
7699 : /* No clobbers */); \
7703 #define vextq_u8(a, b, c) \
7706 uint8x16_t b_ = (b); \
7707 uint8x16_t a_ = (a); \
7708 uint8x16_t result; \
7709 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
7711 : "w"(a_), "w"(b_), "i"(c) \
7712 : /* No clobbers */); \
7716 #define vextq_u16(a, b, c) \
7719 uint16x8_t b_ = (b); \
7720 uint16x8_t a_ = (a); \
7721 uint16x8_t result; \
7722 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
7724 : "w"(a_), "w"(b_), "i"(c) \
7725 : /* No clobbers */); \
7729 #define vextq_u32(a, b, c) \
7732 uint32x4_t b_ = (b); \
7733 uint32x4_t a_ = (a); \
7734 uint32x4_t result; \
7735 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
7737 : "w"(a_), "w"(b_), "i"(c) \
7738 : /* No clobbers */); \
7742 #define vextq_u64(a, b, c) \
7745 uint64x2_t b_ = (b); \
7746 uint64x2_t a_ = (a); \
7747 uint64x2_t result; \
7748 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
7750 : "w"(a_), "w"(b_), "i"(c) \
7751 : /* No clobbers */); \
7755 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
7756 vfma_f32 (float32x2_t a
, float32x2_t b
, float32x2_t c
)
7759 __asm__ ("fmla %0.2s,%2.2s,%3.2s"
7761 : "0"(a
), "w"(b
), "w"(c
)
7762 : /* No clobbers */);
7766 #define vfma_lane_f32(a, b, c, d) \
7769 float32x2_t c_ = (c); \
7770 float32x2_t b_ = (b); \
7771 float32x2_t a_ = (a); \
7772 float32x2_t result; \
7773 __asm__ ("fmla %0.2s,%2.2s,%3.s[%4]" \
7775 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7776 : /* No clobbers */); \
7780 #define vfmad_lane_f64(a, b, c) \
7783 float64x2_t b_ = (b); \
7784 float64_t a_ = (a); \
7786 __asm__ ("fmla %d0,%d1,%2.d[%3]" \
7788 : "w"(a_), "w"(b_), "i"(c) \
7789 : /* No clobbers */); \
7793 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
7794 vfmaq_f32 (float32x4_t a
, float32x4_t b
, float32x4_t c
)
7797 __asm__ ("fmla %0.4s,%2.4s,%3.4s"
7799 : "0"(a
), "w"(b
), "w"(c
)
7800 : /* No clobbers */);
7804 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
7805 vfmaq_f64 (float64x2_t a
, float64x2_t b
, float64x2_t c
)
7808 __asm__ ("fmla %0.2d,%2.2d,%3.2d"
7810 : "0"(a
), "w"(b
), "w"(c
)
7811 : /* No clobbers */);
7815 #define vfmaq_lane_f32(a, b, c, d) \
7818 float32x4_t c_ = (c); \
7819 float32x4_t b_ = (b); \
7820 float32x4_t a_ = (a); \
7821 float32x4_t result; \
7822 __asm__ ("fmla %0.4s,%2.4s,%3.s[%4]" \
7824 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7825 : /* No clobbers */); \
7829 #define vfmaq_lane_f64(a, b, c, d) \
7832 float64x2_t c_ = (c); \
7833 float64x2_t b_ = (b); \
7834 float64x2_t a_ = (a); \
7835 float64x2_t result; \
7836 __asm__ ("fmla %0.2d,%2.2d,%3.d[%4]" \
7838 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7839 : /* No clobbers */); \
7843 #define vfmas_lane_f32(a, b, c) \
7846 float32x4_t b_ = (b); \
7847 float32_t a_ = (a); \
7849 __asm__ ("fmla %s0,%s1,%2.s[%3]" \
7851 : "w"(a_), "w"(b_), "i"(c) \
7852 : /* No clobbers */); \
7856 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
7857 vfma_n_f32 (float32x2_t a
, float32x2_t b
, float32_t c
)
7860 __asm__ ("fmla %0.2s, %2.2s, %3.s[0]"
7862 : "0"(a
), "w"(b
), "w"(c
)
7863 : /* No clobbers */);
7867 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
7868 vfmaq_n_f32 (float32x4_t a
, float32x4_t b
, float32_t c
)
7871 __asm__ ("fmla %0.4s, %2.4s, %3.s[0]"
7873 : "0"(a
), "w"(b
), "w"(c
)
7874 : /* No clobbers */);
7878 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
7879 vfmaq_n_f64 (float64x2_t a
, float64x2_t b
, float64_t c
)
7882 __asm__ ("fmla %0.2d, %2.2d, %3.d[0]"
7884 : "0"(a
), "w"(b
), "w"(c
)
7885 : /* No clobbers */);
7889 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
7890 vfms_f32 (float32x2_t a
, float32x2_t b
, float32x2_t c
)
7893 __asm__ ("fmls %0.2s,%2.2s,%3.2s"
7895 : "0"(a
), "w"(b
), "w"(c
)
7896 : /* No clobbers */);
7900 #define vfmsd_lane_f64(a, b, c) \
7903 float64x2_t b_ = (b); \
7904 float64_t a_ = (a); \
7906 __asm__ ("fmls %d0,%d1,%2.d[%3]" \
7908 : "w"(a_), "w"(b_), "i"(c) \
7909 : /* No clobbers */); \
7913 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
7914 vfmsq_f32 (float32x4_t a
, float32x4_t b
, float32x4_t c
)
7917 __asm__ ("fmls %0.4s,%2.4s,%3.4s"
7919 : "0"(a
), "w"(b
), "w"(c
)
7920 : /* No clobbers */);
7924 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
7925 vfmsq_f64 (float64x2_t a
, float64x2_t b
, float64x2_t c
)
7928 __asm__ ("fmls %0.2d,%2.2d,%3.2d"
7930 : "0"(a
), "w"(b
), "w"(c
)
7931 : /* No clobbers */);
7935 #define vfmss_lane_f32(a, b, c) \
7938 float32x4_t b_ = (b); \
7939 float32_t a_ = (a); \
7941 __asm__ ("fmls %s0,%s1,%2.s[%3]" \
7943 : "w"(a_), "w"(b_), "i"(c) \
7944 : /* No clobbers */); \
7948 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
7949 vget_high_f32 (float32x4_t a
)
7952 __asm__ ("ins %0.d[0], %1.d[1]"
7955 : /* No clobbers */);
7959 __extension__
static __inline float64x1_t
__attribute__ ((__always_inline__
))
7960 vget_high_f64 (float64x2_t a
)
7963 __asm__ ("ins %0.d[0], %1.d[1]"
7966 : /* No clobbers */);
7970 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
7971 vget_high_p8 (poly8x16_t a
)
7974 __asm__ ("ins %0.d[0], %1.d[1]"
7977 : /* No clobbers */);
7981 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
7982 vget_high_p16 (poly16x8_t a
)
7985 __asm__ ("ins %0.d[0], %1.d[1]"
7988 : /* No clobbers */);
7992 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
7993 vget_high_s8 (int8x16_t a
)
7996 __asm__ ("ins %0.d[0], %1.d[1]"
7999 : /* No clobbers */);
8003 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
8004 vget_high_s16 (int16x8_t a
)
8007 __asm__ ("ins %0.d[0], %1.d[1]"
8010 : /* No clobbers */);
8014 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
8015 vget_high_s32 (int32x4_t a
)
8018 __asm__ ("ins %0.d[0], %1.d[1]"
8021 : /* No clobbers */);
8025 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
8026 vget_high_s64 (int64x2_t a
)
8029 __asm__ ("ins %0.d[0], %1.d[1]"
8032 : /* No clobbers */);
8036 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
8037 vget_high_u8 (uint8x16_t a
)
8040 __asm__ ("ins %0.d[0], %1.d[1]"
8043 : /* No clobbers */);
8047 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
8048 vget_high_u16 (uint16x8_t a
)
8051 __asm__ ("ins %0.d[0], %1.d[1]"
8054 : /* No clobbers */);
8058 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
8059 vget_high_u32 (uint32x4_t a
)
8062 __asm__ ("ins %0.d[0], %1.d[1]"
8065 : /* No clobbers */);
8069 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
8070 vget_high_u64 (uint64x2_t a
)
8073 __asm__ ("ins %0.d[0], %1.d[1]"
8076 : /* No clobbers */);
8080 #define vget_lane_f64(a, b) \
8083 float64x1_t a_ = (a); \
8085 __asm__ ("umov %x0, %1.d[%2]" \
8088 : /* No clobbers */); \
8092 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
8093 vget_low_f32 (float32x4_t a
)
8096 __asm__ ("ins %0.d[0], %1.d[0]"
8099 : /* No clobbers */);
8103 __extension__
static __inline float64x1_t
__attribute__ ((__always_inline__
))
8104 vget_low_f64 (float64x2_t a
)
8107 __asm__ ("ins %0.d[0], %1.d[0]"
8110 : /* No clobbers */);
8114 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
8115 vget_low_p8 (poly8x16_t a
)
8118 __asm__ ("ins %0.d[0], %1.d[0]"
8121 : /* No clobbers */);
8125 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
8126 vget_low_p16 (poly16x8_t a
)
8129 __asm__ ("ins %0.d[0], %1.d[0]"
8132 : /* No clobbers */);
8136 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
8137 vget_low_s8 (int8x16_t a
)
8140 __asm__ ("ins %0.d[0], %1.d[0]"
8143 : /* No clobbers */);
8147 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
8148 vget_low_s16 (int16x8_t a
)
8151 __asm__ ("ins %0.d[0], %1.d[0]"
8154 : /* No clobbers */);
8158 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
8159 vget_low_s32 (int32x4_t a
)
8162 __asm__ ("ins %0.d[0], %1.d[0]"
8165 : /* No clobbers */);
8169 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
8170 vget_low_s64 (int64x2_t a
)
8173 __asm__ ("ins %0.d[0], %1.d[0]"
8176 : /* No clobbers */);
8180 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
8181 vget_low_u8 (uint8x16_t a
)
8184 __asm__ ("ins %0.d[0], %1.d[0]"
8187 : /* No clobbers */);
8191 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
8192 vget_low_u16 (uint16x8_t a
)
8195 __asm__ ("ins %0.d[0], %1.d[0]"
8198 : /* No clobbers */);
8202 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
8203 vget_low_u32 (uint32x4_t a
)
8206 __asm__ ("ins %0.d[0], %1.d[0]"
8209 : /* No clobbers */);
8213 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
8214 vget_low_u64 (uint64x2_t a
)
8217 __asm__ ("ins %0.d[0], %1.d[0]"
8220 : /* No clobbers */);
8224 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
8225 vhsub_s8 (int8x8_t a
, int8x8_t b
)
8228 __asm__ ("shsub %0.8b, %1.8b, %2.8b"
8231 : /* No clobbers */);
8235 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
8236 vhsub_s16 (int16x4_t a
, int16x4_t b
)
8239 __asm__ ("shsub %0.4h, %1.4h, %2.4h"
8242 : /* No clobbers */);
8246 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
8247 vhsub_s32 (int32x2_t a
, int32x2_t b
)
8250 __asm__ ("shsub %0.2s, %1.2s, %2.2s"
8253 : /* No clobbers */);
8257 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
8258 vhsub_u8 (uint8x8_t a
, uint8x8_t b
)
8261 __asm__ ("uhsub %0.8b, %1.8b, %2.8b"
8264 : /* No clobbers */);
8268 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
8269 vhsub_u16 (uint16x4_t a
, uint16x4_t b
)
8272 __asm__ ("uhsub %0.4h, %1.4h, %2.4h"
8275 : /* No clobbers */);
8279 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
8280 vhsub_u32 (uint32x2_t a
, uint32x2_t b
)
8283 __asm__ ("uhsub %0.2s, %1.2s, %2.2s"
8286 : /* No clobbers */);
8290 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
8291 vhsubq_s8 (int8x16_t a
, int8x16_t b
)
8294 __asm__ ("shsub %0.16b, %1.16b, %2.16b"
8297 : /* No clobbers */);
8301 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
8302 vhsubq_s16 (int16x8_t a
, int16x8_t b
)
8305 __asm__ ("shsub %0.8h, %1.8h, %2.8h"
8308 : /* No clobbers */);
8312 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
8313 vhsubq_s32 (int32x4_t a
, int32x4_t b
)
8316 __asm__ ("shsub %0.4s, %1.4s, %2.4s"
8319 : /* No clobbers */);
8323 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
8324 vhsubq_u8 (uint8x16_t a
, uint8x16_t b
)
8327 __asm__ ("uhsub %0.16b, %1.16b, %2.16b"
8330 : /* No clobbers */);
8334 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
8335 vhsubq_u16 (uint16x8_t a
, uint16x8_t b
)
8338 __asm__ ("uhsub %0.8h, %1.8h, %2.8h"
8341 : /* No clobbers */);
8345 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
8346 vhsubq_u32 (uint32x4_t a
, uint32x4_t b
)
8349 __asm__ ("uhsub %0.4s, %1.4s, %2.4s"
8352 : /* No clobbers */);
8356 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
8357 vld1_dup_f32 (const float32_t
* a
)
8360 __asm__ ("ld1r {%0.2s}, %1"
8363 : /* No clobbers */);
8367 __extension__
static __inline float64x1_t
__attribute__ ((__always_inline__
))
8368 vld1_dup_f64 (const float64_t
* a
)
8371 __asm__ ("ld1r {%0.1d}, %1"
8374 : /* No clobbers */);
8378 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
8379 vld1_dup_p8 (const poly8_t
* a
)
8382 __asm__ ("ld1r {%0.8b}, %1"
8385 : /* No clobbers */);
8389 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
8390 vld1_dup_p16 (const poly16_t
* a
)
8393 __asm__ ("ld1r {%0.4h}, %1"
8396 : /* No clobbers */);
8400 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
8401 vld1_dup_s8 (const int8_t * a
)
8404 __asm__ ("ld1r {%0.8b}, %1"
8407 : /* No clobbers */);
8411 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
8412 vld1_dup_s16 (const int16_t * a
)
8415 __asm__ ("ld1r {%0.4h}, %1"
8418 : /* No clobbers */);
8422 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
8423 vld1_dup_s32 (const int32_t * a
)
8426 __asm__ ("ld1r {%0.2s}, %1"
8429 : /* No clobbers */);
8433 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
8434 vld1_dup_s64 (const int64_t * a
)
8437 __asm__ ("ld1r {%0.1d}, %1"
8440 : /* No clobbers */);
8444 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
8445 vld1_dup_u8 (const uint8_t * a
)
8448 __asm__ ("ld1r {%0.8b}, %1"
8451 : /* No clobbers */);
8455 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
8456 vld1_dup_u16 (const uint16_t * a
)
8459 __asm__ ("ld1r {%0.4h}, %1"
8462 : /* No clobbers */);
8466 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
8467 vld1_dup_u32 (const uint32_t * a
)
8470 __asm__ ("ld1r {%0.2s}, %1"
8473 : /* No clobbers */);
8477 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
8478 vld1_dup_u64 (const uint64_t * a
)
8481 __asm__ ("ld1r {%0.1d}, %1"
8484 : /* No clobbers */);
8488 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
8489 vld1_f32 (const float32_t
* a
)
8492 __asm__ ("ld1 {%0.2s}, %1"
8494 : "Utv"(({const float32x2_t
*_a
= (float32x2_t
*) a
; *_a
;}))
8495 : /* No clobbers */);
8499 __extension__
static __inline float64x1_t
__attribute__ ((__always_inline__
))
8500 vld1_f64 (const float64_t
* a
)
8503 __asm__ ("ld1 {%0.1d}, %1"
8506 : /* No clobbers */);
8510 #define vld1_lane_f32(a, b, c) \
8513 float32x2_t b_ = (b); \
8514 const float32_t * a_ = (a); \
8515 float32x2_t result; \
8516 __asm__ ("ld1 {%0.s}[%1], %2" \
8518 : "i" (c), "Utv"(*a_), "0"(b_) \
8519 : /* No clobbers */); \
8523 #define vld1_lane_f64(a, b, c) \
8526 float64x1_t b_ = (b); \
8527 const float64_t * a_ = (a); \
8528 float64x1_t result; \
8529 __asm__ ("ld1 {%0.d}[%1], %2" \
8531 : "i" (c), "Utv"(*a_), "0"(b_) \
8532 : /* No clobbers */); \
8536 #define vld1_lane_p8(a, b, c) \
8539 poly8x8_t b_ = (b); \
8540 const poly8_t * a_ = (a); \
8542 __asm__ ("ld1 {%0.b}[%1], %2" \
8544 : "i" (c), "Utv"(*a_), "0"(b_) \
8545 : /* No clobbers */); \
8549 #define vld1_lane_p16(a, b, c) \
8552 poly16x4_t b_ = (b); \
8553 const poly16_t * a_ = (a); \
8554 poly16x4_t result; \
8555 __asm__ ("ld1 {%0.h}[%1], %2" \
8557 : "i" (c), "Utv"(*a_), "0"(b_) \
8558 : /* No clobbers */); \
8562 #define vld1_lane_s8(a, b, c) \
8565 int8x8_t b_ = (b); \
8566 const int8_t * a_ = (a); \
8568 __asm__ ("ld1 {%0.b}[%1], %2" \
8570 : "i" (c), "Utv"(*a_), "0"(b_) \
8571 : /* No clobbers */); \
8575 #define vld1_lane_s16(a, b, c) \
8578 int16x4_t b_ = (b); \
8579 const int16_t * a_ = (a); \
8581 __asm__ ("ld1 {%0.h}[%1], %2" \
8583 : "i" (c), "Utv"(*a_), "0"(b_) \
8584 : /* No clobbers */); \
8588 #define vld1_lane_s32(a, b, c) \
8591 int32x2_t b_ = (b); \
8592 const int32_t * a_ = (a); \
8594 __asm__ ("ld1 {%0.s}[%1], %2" \
8596 : "i" (c), "Utv"(*a_), "0"(b_) \
8597 : /* No clobbers */); \
8601 #define vld1_lane_s64(a, b, c) \
8604 int64x1_t b_ = (b); \
8605 const int64_t * a_ = (a); \
8607 __asm__ ("ld1 {%0.d}[%1], %2" \
8609 : "i" (c), "Utv"(*a_), "0"(b_) \
8610 : /* No clobbers */); \
8614 #define vld1_lane_u8(a, b, c) \
8617 uint8x8_t b_ = (b); \
8618 const uint8_t * a_ = (a); \
8620 __asm__ ("ld1 {%0.b}[%1], %2" \
8622 : "i" (c), "Utv"(*a_), "0"(b_) \
8623 : /* No clobbers */); \
8627 #define vld1_lane_u16(a, b, c) \
8630 uint16x4_t b_ = (b); \
8631 const uint16_t * a_ = (a); \
8632 uint16x4_t result; \
8633 __asm__ ("ld1 {%0.h}[%1], %2" \
8635 : "i" (c), "Utv"(*a_), "0"(b_) \
8636 : /* No clobbers */); \
8640 #define vld1_lane_u32(a, b, c) \
8643 uint32x2_t b_ = (b); \
8644 const uint32_t * a_ = (a); \
8645 uint32x2_t result; \
8646 __asm__ ("ld1 {%0.s}[%1], %2" \
8648 : "i" (c), "Utv"(*a_), "0"(b_) \
8649 : /* No clobbers */); \
8653 #define vld1_lane_u64(a, b, c) \
8656 uint64x1_t b_ = (b); \
8657 const uint64_t * a_ = (a); \
8658 uint64x1_t result; \
8659 __asm__ ("ld1 {%0.d}[%1], %2" \
8661 : "i" (c), "Utv"(*a_), "0"(b_) \
8662 : /* No clobbers */); \
8666 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
8667 vld1_p8 (const poly8_t
* a
)
8670 __asm__ ("ld1 {%0.8b}, %1"
8672 : "Utv"(({const poly8x8_t
*_a
= (poly8x8_t
*) a
; *_a
;}))
8673 : /* No clobbers */);
8677 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
8678 vld1_p16 (const poly16_t
* a
)
8681 __asm__ ("ld1 {%0.4h}, %1"
8683 : "Utv"(({const poly16x4_t
*_a
= (poly16x4_t
*) a
; *_a
;}))
8684 : /* No clobbers */);
8688 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
8689 vld1_s8 (const int8_t * a
)
8692 __asm__ ("ld1 {%0.8b}, %1"
8694 : "Utv"(({const int8x8_t
*_a
= (int8x8_t
*) a
; *_a
;}))
8695 : /* No clobbers */);
8699 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
8700 vld1_s16 (const int16_t * a
)
8703 __asm__ ("ld1 {%0.4h}, %1"
8705 : "Utv"(({const int16x4_t
*_a
= (int16x4_t
*) a
; *_a
;}))
8706 : /* No clobbers */);
8710 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
8711 vld1_s32 (const int32_t * a
)
8714 __asm__ ("ld1 {%0.2s}, %1"
8716 : "Utv"(({const int32x2_t
*_a
= (int32x2_t
*) a
; *_a
;}))
8717 : /* No clobbers */);
8721 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
8722 vld1_s64 (const int64_t * a
)
8725 __asm__ ("ld1 {%0.1d}, %1"
8728 : /* No clobbers */);
8732 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
8733 vld1_u8 (const uint8_t * a
)
8736 __asm__ ("ld1 {%0.8b}, %1"
8738 : "Utv"(({const uint8x8_t
*_a
= (uint8x8_t
*) a
; *_a
;}))
8739 : /* No clobbers */);
8743 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
8744 vld1_u16 (const uint16_t * a
)
8747 __asm__ ("ld1 {%0.4h}, %1"
8749 : "Utv"(({const uint16x4_t
*_a
= (uint16x4_t
*) a
; *_a
;}))
8750 : /* No clobbers */);
8754 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
8755 vld1_u32 (const uint32_t * a
)
8758 __asm__ ("ld1 {%0.2s}, %1"
8760 : "Utv"(({const uint32x2_t
*_a
= (uint32x2_t
*) a
; *_a
;}))
8761 : /* No clobbers */);
8765 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
8766 vld1_u64 (const uint64_t * a
)
8769 __asm__ ("ld1 {%0.1d}, %1"
8772 : /* No clobbers */);
8776 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
8777 vld1q_dup_f32 (const float32_t
* a
)
8780 __asm__ ("ld1r {%0.4s}, %1"
8783 : /* No clobbers */);
8787 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
8788 vld1q_dup_f64 (const float64_t
* a
)
8791 __asm__ ("ld1r {%0.2d}, %1"
8794 : /* No clobbers */);
8798 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
8799 vld1q_dup_p8 (const poly8_t
* a
)
8802 __asm__ ("ld1r {%0.16b}, %1"
8805 : /* No clobbers */);
8809 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
8810 vld1q_dup_p16 (const poly16_t
* a
)
8813 __asm__ ("ld1r {%0.8h}, %1"
8816 : /* No clobbers */);
8820 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
8821 vld1q_dup_s8 (const int8_t * a
)
8824 __asm__ ("ld1r {%0.16b}, %1"
8827 : /* No clobbers */);
8831 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
8832 vld1q_dup_s16 (const int16_t * a
)
8835 __asm__ ("ld1r {%0.8h}, %1"
8838 : /* No clobbers */);
8842 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
8843 vld1q_dup_s32 (const int32_t * a
)
8846 __asm__ ("ld1r {%0.4s}, %1"
8849 : /* No clobbers */);
8853 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
8854 vld1q_dup_s64 (const int64_t * a
)
8857 __asm__ ("ld1r {%0.2d}, %1"
8860 : /* No clobbers */);
8864 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
8865 vld1q_dup_u8 (const uint8_t * a
)
8868 __asm__ ("ld1r {%0.16b}, %1"
8871 : /* No clobbers */);
8875 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
8876 vld1q_dup_u16 (const uint16_t * a
)
8879 __asm__ ("ld1r {%0.8h}, %1"
8882 : /* No clobbers */);
8886 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
8887 vld1q_dup_u32 (const uint32_t * a
)
8890 __asm__ ("ld1r {%0.4s}, %1"
8893 : /* No clobbers */);
8897 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
8898 vld1q_dup_u64 (const uint64_t * a
)
8901 __asm__ ("ld1r {%0.2d}, %1"
8904 : /* No clobbers */);
8908 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
8909 vld1q_f32 (const float32_t
* a
)
8912 __asm__ ("ld1 {%0.4s}, %1"
8914 : "Utv"(({const float32x4_t
*_a
= (float32x4_t
*) a
; *_a
;}))
8915 : /* No clobbers */);
8919 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
8920 vld1q_f64 (const float64_t
* a
)
8923 __asm__ ("ld1 {%0.2d}, %1"
8925 : "Utv"(({const float64x2_t
*_a
= (float64x2_t
*) a
; *_a
;}))
8926 : /* No clobbers */);
8930 #define vld1q_lane_f32(a, b, c) \
8933 float32x4_t b_ = (b); \
8934 const float32_t * a_ = (a); \
8935 float32x4_t result; \
8936 __asm__ ("ld1 {%0.s}[%1], %2" \
8938 : "i"(c), "Utv"(*a_), "0"(b_) \
8939 : /* No clobbers */); \
8943 #define vld1q_lane_f64(a, b, c) \
8946 float64x2_t b_ = (b); \
8947 const float64_t * a_ = (a); \
8948 float64x2_t result; \
8949 __asm__ ("ld1 {%0.d}[%1], %2" \
8951 : "i"(c), "Utv"(*a_), "0"(b_) \
8952 : /* No clobbers */); \
8956 #define vld1q_lane_p8(a, b, c) \
8959 poly8x16_t b_ = (b); \
8960 const poly8_t * a_ = (a); \
8961 poly8x16_t result; \
8962 __asm__ ("ld1 {%0.b}[%1], %2" \
8964 : "i"(c), "Utv"(*a_), "0"(b_) \
8965 : /* No clobbers */); \
8969 #define vld1q_lane_p16(a, b, c) \
8972 poly16x8_t b_ = (b); \
8973 const poly16_t * a_ = (a); \
8974 poly16x8_t result; \
8975 __asm__ ("ld1 {%0.h}[%1], %2" \
8977 : "i"(c), "Utv"(*a_), "0"(b_) \
8978 : /* No clobbers */); \
8982 #define vld1q_lane_s8(a, b, c) \
8985 int8x16_t b_ = (b); \
8986 const int8_t * a_ = (a); \
8988 __asm__ ("ld1 {%0.b}[%1], %2" \
8990 : "i"(c), "Utv"(*a_), "0"(b_) \
8991 : /* No clobbers */); \
8995 #define vld1q_lane_s16(a, b, c) \
8998 int16x8_t b_ = (b); \
8999 const int16_t * a_ = (a); \
9001 __asm__ ("ld1 {%0.h}[%1], %2" \
9003 : "i"(c), "Utv"(*a_), "0"(b_) \
9004 : /* No clobbers */); \
9008 #define vld1q_lane_s32(a, b, c) \
9011 int32x4_t b_ = (b); \
9012 const int32_t * a_ = (a); \
9014 __asm__ ("ld1 {%0.s}[%1], %2" \
9016 : "i"(c), "Utv"(*a_), "0"(b_) \
9017 : /* No clobbers */); \
9021 #define vld1q_lane_s64(a, b, c) \
9024 int64x2_t b_ = (b); \
9025 const int64_t * a_ = (a); \
9027 __asm__ ("ld1 {%0.d}[%1], %2" \
9029 : "i"(c), "Utv"(*a_), "0"(b_) \
9030 : /* No clobbers */); \
9034 #define vld1q_lane_u8(a, b, c) \
9037 uint8x16_t b_ = (b); \
9038 const uint8_t * a_ = (a); \
9039 uint8x16_t result; \
9040 __asm__ ("ld1 {%0.b}[%1], %2" \
9042 : "i"(c), "Utv"(*a_), "0"(b_) \
9043 : /* No clobbers */); \
9047 #define vld1q_lane_u16(a, b, c) \
9050 uint16x8_t b_ = (b); \
9051 const uint16_t * a_ = (a); \
9052 uint16x8_t result; \
9053 __asm__ ("ld1 {%0.h}[%1], %2" \
9055 : "i"(c), "Utv"(*a_), "0"(b_) \
9056 : /* No clobbers */); \
9060 #define vld1q_lane_u32(a, b, c) \
9063 uint32x4_t b_ = (b); \
9064 const uint32_t * a_ = (a); \
9065 uint32x4_t result; \
9066 __asm__ ("ld1 {%0.s}[%1], %2" \
9068 : "i"(c), "Utv"(*a_), "0"(b_) \
9069 : /* No clobbers */); \
9073 #define vld1q_lane_u64(a, b, c) \
9076 uint64x2_t b_ = (b); \
9077 const uint64_t * a_ = (a); \
9078 uint64x2_t result; \
9079 __asm__ ("ld1 {%0.d}[%1], %2" \
9081 : "i"(c), "Utv"(*a_), "0"(b_) \
9082 : /* No clobbers */); \
9086 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
9087 vld1q_p8 (const poly8_t
* a
)
9090 __asm__ ("ld1 {%0.16b}, %1"
9092 : "Utv"(({const poly8x16_t
*_a
= (poly8x16_t
*) a
; *_a
;}))
9093 : /* No clobbers */);
9097 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
9098 vld1q_p16 (const poly16_t
* a
)
9101 __asm__ ("ld1 {%0.16b}, %1"
9103 : "Utv"(({const poly16x8_t
*_a
= (poly16x8_t
*) a
; *_a
;}))
9104 : /* No clobbers */);
9108 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
9109 vld1q_s8 (const int8_t * a
)
9112 __asm__ ("ld1 {%0.16b}, %1"
9114 : "Utv"(({const int8x16_t
*_a
= (int8x16_t
*) a
; *_a
;}))
9115 : /* No clobbers */);
9119 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
9120 vld1q_s16 (const int16_t * a
)
9123 __asm__ ("ld1 {%0.8h}, %1"
9125 : "Utv"(({const int16x8_t
*_a
= (int16x8_t
*) a
; *_a
;}))
9126 : /* No clobbers */);
9130 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
9131 vld1q_s32 (const int32_t * a
)
9134 __asm__ ("ld1 {%0.4s}, %1"
9136 : "Utv"(({const int32x4_t
*_a
= (int32x4_t
*) a
; *_a
;}))
9137 : /* No clobbers */);
9141 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
9142 vld1q_s64 (const int64_t * a
)
9145 __asm__ ("ld1 {%0.2d}, %1"
9147 : "Utv"(({const int64x2_t
*_a
= (int64x2_t
*) a
; *_a
;}))
9148 : /* No clobbers */);
9152 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
9153 vld1q_u8 (const uint8_t * a
)
9156 __asm__ ("ld1 {%0.16b}, %1"
9158 : "Utv"(({const uint8x16_t
*_a
= (uint8x16_t
*) a
; *_a
;}))
9159 : /* No clobbers */);
9163 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
9164 vld1q_u16 (const uint16_t * a
)
9167 __asm__ ("ld1 {%0.8h}, %1"
9169 : "Utv"(({const uint16x8_t
*_a
= (uint16x8_t
*) a
; *_a
;}))
9170 : /* No clobbers */);
9174 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
9175 vld1q_u32 (const uint32_t * a
)
9178 __asm__ ("ld1 {%0.4s}, %1"
9180 : "Utv"(({const uint32x4_t
*_a
= (uint32x4_t
*) a
; *_a
;}))
9181 : /* No clobbers */);
9185 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
9186 vld1q_u64 (const uint64_t * a
)
9189 __asm__ ("ld1 {%0.2d}, %1"
9191 : "Utv"(({const uint64x2_t
*_a
= (uint64x2_t
*) a
; *_a
;}))
9192 : /* No clobbers */);
9196 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
9197 vmaxnm_f32 (float32x2_t a
, float32x2_t b
)
9200 __asm__ ("fmaxnm %0.2s,%1.2s,%2.2s"
9203 : /* No clobbers */);
9207 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
9208 vmaxnmq_f32 (float32x4_t a
, float32x4_t b
)
9211 __asm__ ("fmaxnm %0.4s,%1.4s,%2.4s"
9214 : /* No clobbers */);
9218 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
9219 vmaxnmq_f64 (float64x2_t a
, float64x2_t b
)
9222 __asm__ ("fmaxnm %0.2d,%1.2d,%2.2d"
9225 : /* No clobbers */);
9229 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
9230 vmaxnmvq_f32 (float32x4_t a
)
9233 __asm__ ("fmaxnmv %s0,%1.4s"
9236 : /* No clobbers */);
9240 __extension__
static __inline
int8_t __attribute__ ((__always_inline__
))
9241 vmaxv_s8 (int8x8_t a
)
9244 __asm__ ("smaxv %b0,%1.8b"
9247 : /* No clobbers */);
9251 __extension__
static __inline
int16_t __attribute__ ((__always_inline__
))
9252 vmaxv_s16 (int16x4_t a
)
9255 __asm__ ("smaxv %h0,%1.4h"
9258 : /* No clobbers */);
9262 __extension__
static __inline
uint8_t __attribute__ ((__always_inline__
))
9263 vmaxv_u8 (uint8x8_t a
)
9266 __asm__ ("umaxv %b0,%1.8b"
9269 : /* No clobbers */);
9273 __extension__
static __inline
uint16_t __attribute__ ((__always_inline__
))
9274 vmaxv_u16 (uint16x4_t a
)
9277 __asm__ ("umaxv %h0,%1.4h"
9280 : /* No clobbers */);
9284 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
9285 vmaxvq_f32 (float32x4_t a
)
9288 __asm__ ("fmaxv %s0,%1.4s"
9291 : /* No clobbers */);
9295 __extension__
static __inline
int8_t __attribute__ ((__always_inline__
))
9296 vmaxvq_s8 (int8x16_t a
)
9299 __asm__ ("smaxv %b0,%1.16b"
9302 : /* No clobbers */);
9306 __extension__
static __inline
int16_t __attribute__ ((__always_inline__
))
9307 vmaxvq_s16 (int16x8_t a
)
9310 __asm__ ("smaxv %h0,%1.8h"
9313 : /* No clobbers */);
9317 __extension__
static __inline
int32_t __attribute__ ((__always_inline__
))
9318 vmaxvq_s32 (int32x4_t a
)
9321 __asm__ ("smaxv %s0,%1.4s"
9324 : /* No clobbers */);
9328 __extension__
static __inline
uint8_t __attribute__ ((__always_inline__
))
9329 vmaxvq_u8 (uint8x16_t a
)
9332 __asm__ ("umaxv %b0,%1.16b"
9335 : /* No clobbers */);
9339 __extension__
static __inline
uint16_t __attribute__ ((__always_inline__
))
9340 vmaxvq_u16 (uint16x8_t a
)
9343 __asm__ ("umaxv %h0,%1.8h"
9346 : /* No clobbers */);
9350 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
9351 vmaxvq_u32 (uint32x4_t a
)
9354 __asm__ ("umaxv %s0,%1.4s"
9357 : /* No clobbers */);
9361 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
9362 vminnmvq_f32 (float32x4_t a
)
9365 __asm__ ("fminnmv %s0,%1.4s"
9368 : /* No clobbers */);
9372 __extension__
static __inline
int8_t __attribute__ ((__always_inline__
))
9373 vminv_s8 (int8x8_t a
)
9376 __asm__ ("sminv %b0,%1.8b"
9379 : /* No clobbers */);
9383 __extension__
static __inline
int16_t __attribute__ ((__always_inline__
))
9384 vminv_s16 (int16x4_t a
)
9387 __asm__ ("sminv %h0,%1.4h"
9390 : /* No clobbers */);
9394 __extension__
static __inline
uint8_t __attribute__ ((__always_inline__
))
9395 vminv_u8 (uint8x8_t a
)
9398 __asm__ ("uminv %b0,%1.8b"
9401 : /* No clobbers */);
9405 __extension__
static __inline
uint16_t __attribute__ ((__always_inline__
))
9406 vminv_u16 (uint16x4_t a
)
9409 __asm__ ("uminv %h0,%1.4h"
9412 : /* No clobbers */);
9416 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
9417 vminvq_f32 (float32x4_t a
)
9420 __asm__ ("fminv %s0,%1.4s"
9423 : /* No clobbers */);
9427 __extension__
static __inline
int8_t __attribute__ ((__always_inline__
))
9428 vminvq_s8 (int8x16_t a
)
9431 __asm__ ("sminv %b0,%1.16b"
9434 : /* No clobbers */);
9438 __extension__
static __inline
int16_t __attribute__ ((__always_inline__
))
9439 vminvq_s16 (int16x8_t a
)
9442 __asm__ ("sminv %h0,%1.8h"
9445 : /* No clobbers */);
9449 __extension__
static __inline
int32_t __attribute__ ((__always_inline__
))
9450 vminvq_s32 (int32x4_t a
)
9453 __asm__ ("sminv %s0,%1.4s"
9456 : /* No clobbers */);
9460 __extension__
static __inline
uint8_t __attribute__ ((__always_inline__
))
9461 vminvq_u8 (uint8x16_t a
)
9464 __asm__ ("uminv %b0,%1.16b"
9467 : /* No clobbers */);
9471 __extension__
static __inline
uint16_t __attribute__ ((__always_inline__
))
9472 vminvq_u16 (uint16x8_t a
)
9475 __asm__ ("uminv %h0,%1.8h"
9478 : /* No clobbers */);
9482 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
9483 vminvq_u32 (uint32x4_t a
)
9486 __asm__ ("uminv %s0,%1.4s"
9489 : /* No clobbers */);
9493 #define vmla_lane_f32(a, b, c, d) \
9496 float32x2_t c_ = (c); \
9497 float32x2_t b_ = (b); \
9498 float32x2_t a_ = (a); \
9499 float32x2_t result; \
9501 __asm__ ("fmul %1.2s, %3.2s, %4.s[%5]; fadd %0.2s, %0.2s, %1.2s" \
9502 : "=w"(result), "=w"(t1) \
9503 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9504 : /* No clobbers */); \
9508 #define vmla_lane_s16(a, b, c, d) \
9511 int16x4_t c_ = (c); \
9512 int16x4_t b_ = (b); \
9513 int16x4_t a_ = (a); \
9515 __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \
9517 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9518 : /* No clobbers */); \
9522 #define vmla_lane_s32(a, b, c, d) \
9525 int32x2_t c_ = (c); \
9526 int32x2_t b_ = (b); \
9527 int32x2_t a_ = (a); \
9529 __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \
9531 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9532 : /* No clobbers */); \
9536 #define vmla_lane_u16(a, b, c, d) \
9539 uint16x4_t c_ = (c); \
9540 uint16x4_t b_ = (b); \
9541 uint16x4_t a_ = (a); \
9542 uint16x4_t result; \
9543 __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \
9545 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9546 : /* No clobbers */); \
9550 #define vmla_lane_u32(a, b, c, d) \
9553 uint32x2_t c_ = (c); \
9554 uint32x2_t b_ = (b); \
9555 uint32x2_t a_ = (a); \
9556 uint32x2_t result; \
9557 __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \
9559 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9560 : /* No clobbers */); \
9564 #define vmla_laneq_s16(a, b, c, d) \
9567 int16x8_t c_ = (c); \
9568 int16x4_t b_ = (b); \
9569 int16x4_t a_ = (a); \
9571 __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \
9573 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9574 : /* No clobbers */); \
9578 #define vmla_laneq_s32(a, b, c, d) \
9581 int32x4_t c_ = (c); \
9582 int32x2_t b_ = (b); \
9583 int32x2_t a_ = (a); \
9585 __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \
9587 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9588 : /* No clobbers */); \
9592 #define vmla_laneq_u16(a, b, c, d) \
9595 uint16x8_t c_ = (c); \
9596 uint16x4_t b_ = (b); \
9597 uint16x4_t a_ = (a); \
9598 uint16x4_t result; \
9599 __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \
9601 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9602 : /* No clobbers */); \
9606 #define vmla_laneq_u32(a, b, c, d) \
9609 uint32x4_t c_ = (c); \
9610 uint32x2_t b_ = (b); \
9611 uint32x2_t a_ = (a); \
9612 uint32x2_t result; \
9613 __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \
9615 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9616 : /* No clobbers */); \
9620 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
9621 vmla_n_f32 (float32x2_t a
, float32x2_t b
, float32_t c
)
9625 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s"
9626 : "=w"(result
), "=w"(t1
)
9627 : "0"(a
), "w"(b
), "w"(c
)
9628 : /* No clobbers */);
9632 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
9633 vmla_n_s16 (int16x4_t a
, int16x4_t b
, int16_t c
)
9636 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
9638 : "0"(a
), "w"(b
), "w"(c
)
9639 : /* No clobbers */);
9643 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
9644 vmla_n_s32 (int32x2_t a
, int32x2_t b
, int32_t c
)
9647 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
9649 : "0"(a
), "w"(b
), "w"(c
)
9650 : /* No clobbers */);
9654 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
9655 vmla_n_u16 (uint16x4_t a
, uint16x4_t b
, uint16_t c
)
9658 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
9660 : "0"(a
), "w"(b
), "w"(c
)
9661 : /* No clobbers */);
9665 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
9666 vmla_n_u32 (uint32x2_t a
, uint32x2_t b
, uint32_t c
)
9669 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
9671 : "0"(a
), "w"(b
), "w"(c
)
9672 : /* No clobbers */);
9676 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
9677 vmla_s8 (int8x8_t a
, int8x8_t b
, int8x8_t c
)
9680 __asm__ ("mla %0.8b, %2.8b, %3.8b"
9682 : "0"(a
), "w"(b
), "w"(c
)
9683 : /* No clobbers */);
9687 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
9688 vmla_s16 (int16x4_t a
, int16x4_t b
, int16x4_t c
)
9691 __asm__ ("mla %0.4h, %2.4h, %3.4h"
9693 : "0"(a
), "w"(b
), "w"(c
)
9694 : /* No clobbers */);
9698 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
9699 vmla_s32 (int32x2_t a
, int32x2_t b
, int32x2_t c
)
9702 __asm__ ("mla %0.2s, %2.2s, %3.2s"
9704 : "0"(a
), "w"(b
), "w"(c
)
9705 : /* No clobbers */);
9709 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
9710 vmla_u8 (uint8x8_t a
, uint8x8_t b
, uint8x8_t c
)
9713 __asm__ ("mla %0.8b, %2.8b, %3.8b"
9715 : "0"(a
), "w"(b
), "w"(c
)
9716 : /* No clobbers */);
9720 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
9721 vmla_u16 (uint16x4_t a
, uint16x4_t b
, uint16x4_t c
)
9724 __asm__ ("mla %0.4h, %2.4h, %3.4h"
9726 : "0"(a
), "w"(b
), "w"(c
)
9727 : /* No clobbers */);
9731 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
9732 vmla_u32 (uint32x2_t a
, uint32x2_t b
, uint32x2_t c
)
9735 __asm__ ("mla %0.2s, %2.2s, %3.2s"
9737 : "0"(a
), "w"(b
), "w"(c
)
9738 : /* No clobbers */);
9742 #define vmlal_high_lane_s16(a, b, c, d) \
9745 int16x8_t c_ = (c); \
9746 int16x8_t b_ = (b); \
9747 int32x4_t a_ = (a); \
9749 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
9751 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9752 : /* No clobbers */); \
9756 #define vmlal_high_lane_s32(a, b, c, d) \
9759 int32x4_t c_ = (c); \
9760 int32x4_t b_ = (b); \
9761 int64x2_t a_ = (a); \
9763 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
9765 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9766 : /* No clobbers */); \
9770 #define vmlal_high_lane_u16(a, b, c, d) \
9773 uint16x8_t c_ = (c); \
9774 uint16x8_t b_ = (b); \
9775 uint32x4_t a_ = (a); \
9776 uint32x4_t result; \
9777 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
9779 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9780 : /* No clobbers */); \
9784 #define vmlal_high_lane_u32(a, b, c, d) \
9787 uint32x4_t c_ = (c); \
9788 uint32x4_t b_ = (b); \
9789 uint64x2_t a_ = (a); \
9790 uint64x2_t result; \
9791 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
9793 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9794 : /* No clobbers */); \
9798 #define vmlal_high_laneq_s16(a, b, c, d) \
9801 int16x8_t c_ = (c); \
9802 int16x8_t b_ = (b); \
9803 int32x4_t a_ = (a); \
9805 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
9807 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9808 : /* No clobbers */); \
9812 #define vmlal_high_laneq_s32(a, b, c, d) \
9815 int32x4_t c_ = (c); \
9816 int32x4_t b_ = (b); \
9817 int64x2_t a_ = (a); \
9819 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
9821 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9822 : /* No clobbers */); \
9826 #define vmlal_high_laneq_u16(a, b, c, d) \
9829 uint16x8_t c_ = (c); \
9830 uint16x8_t b_ = (b); \
9831 uint32x4_t a_ = (a); \
9832 uint32x4_t result; \
9833 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
9835 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9836 : /* No clobbers */); \
9840 #define vmlal_high_laneq_u32(a, b, c, d) \
9843 uint32x4_t c_ = (c); \
9844 uint32x4_t b_ = (b); \
9845 uint64x2_t a_ = (a); \
9846 uint64x2_t result; \
9847 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
9849 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9850 : /* No clobbers */); \
9854 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
9855 vmlal_high_n_s16 (int32x4_t a
, int16x8_t b
, int16_t c
)
9858 __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]"
9860 : "0"(a
), "w"(b
), "w"(c
)
9861 : /* No clobbers */);
9865 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
9866 vmlal_high_n_s32 (int64x2_t a
, int32x4_t b
, int32_t c
)
9869 __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]"
9871 : "0"(a
), "w"(b
), "w"(c
)
9872 : /* No clobbers */);
9876 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
9877 vmlal_high_n_u16 (uint32x4_t a
, uint16x8_t b
, uint16_t c
)
9880 __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]"
9882 : "0"(a
), "w"(b
), "w"(c
)
9883 : /* No clobbers */);
9887 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
9888 vmlal_high_n_u32 (uint64x2_t a
, uint32x4_t b
, uint32_t c
)
9891 __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]"
9893 : "0"(a
), "w"(b
), "w"(c
)
9894 : /* No clobbers */);
9898 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
9899 vmlal_high_s8 (int16x8_t a
, int8x16_t b
, int8x16_t c
)
9902 __asm__ ("smlal2 %0.8h,%2.16b,%3.16b"
9904 : "0"(a
), "w"(b
), "w"(c
)
9905 : /* No clobbers */);
9909 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
9910 vmlal_high_s16 (int32x4_t a
, int16x8_t b
, int16x8_t c
)
9913 __asm__ ("smlal2 %0.4s,%2.8h,%3.8h"
9915 : "0"(a
), "w"(b
), "w"(c
)
9916 : /* No clobbers */);
9920 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
9921 vmlal_high_s32 (int64x2_t a
, int32x4_t b
, int32x4_t c
)
9924 __asm__ ("smlal2 %0.2d,%2.4s,%3.4s"
9926 : "0"(a
), "w"(b
), "w"(c
)
9927 : /* No clobbers */);
9931 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
9932 vmlal_high_u8 (uint16x8_t a
, uint8x16_t b
, uint8x16_t c
)
9935 __asm__ ("umlal2 %0.8h,%2.16b,%3.16b"
9937 : "0"(a
), "w"(b
), "w"(c
)
9938 : /* No clobbers */);
9942 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
9943 vmlal_high_u16 (uint32x4_t a
, uint16x8_t b
, uint16x8_t c
)
9946 __asm__ ("umlal2 %0.4s,%2.8h,%3.8h"
9948 : "0"(a
), "w"(b
), "w"(c
)
9949 : /* No clobbers */);
9953 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
9954 vmlal_high_u32 (uint64x2_t a
, uint32x4_t b
, uint32x4_t c
)
9957 __asm__ ("umlal2 %0.2d,%2.4s,%3.4s"
9959 : "0"(a
), "w"(b
), "w"(c
)
9960 : /* No clobbers */);
9964 #define vmlal_lane_s16(a, b, c, d) \
9967 int16x4_t c_ = (c); \
9968 int16x4_t b_ = (b); \
9969 int32x4_t a_ = (a); \
9971 __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]" \
9973 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9974 : /* No clobbers */); \
9978 #define vmlal_lane_s32(a, b, c, d) \
9981 int32x2_t c_ = (c); \
9982 int32x2_t b_ = (b); \
9983 int64x2_t a_ = (a); \
9985 __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]" \
9987 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9988 : /* No clobbers */); \
9992 #define vmlal_lane_u16(a, b, c, d) \
9995 uint16x4_t c_ = (c); \
9996 uint16x4_t b_ = (b); \
9997 uint32x4_t a_ = (a); \
9998 uint32x4_t result; \
9999 __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]" \
10001 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10002 : /* No clobbers */); \
10006 #define vmlal_lane_u32(a, b, c, d) \
10009 uint32x2_t c_ = (c); \
10010 uint32x2_t b_ = (b); \
10011 uint64x2_t a_ = (a); \
10012 uint64x2_t result; \
10013 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
10015 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10016 : /* No clobbers */); \
10020 #define vmlal_laneq_s16(a, b, c, d) \
10023 int16x8_t c_ = (c); \
10024 int16x4_t b_ = (b); \
10025 int32x4_t a_ = (a); \
10026 int32x4_t result; \
10027 __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]" \
10029 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10030 : /* No clobbers */); \
10034 #define vmlal_laneq_s32(a, b, c, d) \
10037 int32x4_t c_ = (c); \
10038 int32x2_t b_ = (b); \
10039 int64x2_t a_ = (a); \
10040 int64x2_t result; \
10041 __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]" \
10043 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10044 : /* No clobbers */); \
10048 #define vmlal_laneq_u16(a, b, c, d) \
10051 uint16x8_t c_ = (c); \
10052 uint16x4_t b_ = (b); \
10053 uint32x4_t a_ = (a); \
10054 uint32x4_t result; \
10055 __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]" \
10057 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10058 : /* No clobbers */); \
10062 #define vmlal_laneq_u32(a, b, c, d) \
10065 uint32x4_t c_ = (c); \
10066 uint32x2_t b_ = (b); \
10067 uint64x2_t a_ = (a); \
10068 uint64x2_t result; \
10069 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
10071 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10072 : /* No clobbers */); \
10076 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
10077 vmlal_n_s16 (int32x4_t a
, int16x4_t b
, int16_t c
)
10080 __asm__ ("smlal %0.4s,%2.4h,%3.h[0]"
10082 : "0"(a
), "w"(b
), "w"(c
)
10083 : /* No clobbers */);
10087 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
10088 vmlal_n_s32 (int64x2_t a
, int32x2_t b
, int32_t c
)
10091 __asm__ ("smlal %0.2d,%2.2s,%3.s[0]"
10093 : "0"(a
), "w"(b
), "w"(c
)
10094 : /* No clobbers */);
10098 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
10099 vmlal_n_u16 (uint32x4_t a
, uint16x4_t b
, uint16_t c
)
10102 __asm__ ("umlal %0.4s,%2.4h,%3.h[0]"
10104 : "0"(a
), "w"(b
), "w"(c
)
10105 : /* No clobbers */);
10109 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
10110 vmlal_n_u32 (uint64x2_t a
, uint32x2_t b
, uint32_t c
)
10113 __asm__ ("umlal %0.2d,%2.2s,%3.s[0]"
10115 : "0"(a
), "w"(b
), "w"(c
)
10116 : /* No clobbers */);
10120 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
10121 vmlal_s8 (int16x8_t a
, int8x8_t b
, int8x8_t c
)
10124 __asm__ ("smlal %0.8h,%2.8b,%3.8b"
10126 : "0"(a
), "w"(b
), "w"(c
)
10127 : /* No clobbers */);
10131 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
10132 vmlal_s16 (int32x4_t a
, int16x4_t b
, int16x4_t c
)
10135 __asm__ ("smlal %0.4s,%2.4h,%3.4h"
10137 : "0"(a
), "w"(b
), "w"(c
)
10138 : /* No clobbers */);
10142 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
10143 vmlal_s32 (int64x2_t a
, int32x2_t b
, int32x2_t c
)
10146 __asm__ ("smlal %0.2d,%2.2s,%3.2s"
10148 : "0"(a
), "w"(b
), "w"(c
)
10149 : /* No clobbers */);
10153 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
10154 vmlal_u8 (uint16x8_t a
, uint8x8_t b
, uint8x8_t c
)
10157 __asm__ ("umlal %0.8h,%2.8b,%3.8b"
10159 : "0"(a
), "w"(b
), "w"(c
)
10160 : /* No clobbers */);
10164 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
10165 vmlal_u16 (uint32x4_t a
, uint16x4_t b
, uint16x4_t c
)
10168 __asm__ ("umlal %0.4s,%2.4h,%3.4h"
10170 : "0"(a
), "w"(b
), "w"(c
)
10171 : /* No clobbers */);
10175 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
10176 vmlal_u32 (uint64x2_t a
, uint32x2_t b
, uint32x2_t c
)
10179 __asm__ ("umlal %0.2d,%2.2s,%3.2s"
10181 : "0"(a
), "w"(b
), "w"(c
)
10182 : /* No clobbers */);
10186 #define vmlaq_lane_f32(a, b, c, d) \
10189 float32x4_t c_ = (c); \
10190 float32x4_t b_ = (b); \
10191 float32x4_t a_ = (a); \
10192 float32x4_t result; \
10194 __asm__ ("fmul %1.4s, %3.4s, %4.s[%5]; fadd %0.4s, %0.4s, %1.4s" \
10195 : "=w"(result), "=w"(t1) \
10196 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10197 : /* No clobbers */); \
10201 #define vmlaq_lane_s16(a, b, c, d) \
10204 int16x8_t c_ = (c); \
10205 int16x8_t b_ = (b); \
10206 int16x8_t a_ = (a); \
10207 int16x8_t result; \
10208 __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \
10210 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10211 : /* No clobbers */); \
10215 #define vmlaq_lane_s32(a, b, c, d) \
10218 int32x4_t c_ = (c); \
10219 int32x4_t b_ = (b); \
10220 int32x4_t a_ = (a); \
10221 int32x4_t result; \
10222 __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \
10224 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10225 : /* No clobbers */); \
10229 #define vmlaq_lane_u16(a, b, c, d) \
10232 uint16x8_t c_ = (c); \
10233 uint16x8_t b_ = (b); \
10234 uint16x8_t a_ = (a); \
10235 uint16x8_t result; \
10236 __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \
10238 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10239 : /* No clobbers */); \
10243 #define vmlaq_lane_u32(a, b, c, d) \
10246 uint32x4_t c_ = (c); \
10247 uint32x4_t b_ = (b); \
10248 uint32x4_t a_ = (a); \
10249 uint32x4_t result; \
10250 __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \
10252 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10253 : /* No clobbers */); \
10257 #define vmlaq_laneq_s16(a, b, c, d) \
10260 int16x8_t c_ = (c); \
10261 int16x8_t b_ = (b); \
10262 int16x8_t a_ = (a); \
10263 int16x8_t result; \
10264 __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \
10266 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10267 : /* No clobbers */); \
10271 #define vmlaq_laneq_s32(a, b, c, d) \
10274 int32x4_t c_ = (c); \
10275 int32x4_t b_ = (b); \
10276 int32x4_t a_ = (a); \
10277 int32x4_t result; \
10278 __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \
10280 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10281 : /* No clobbers */); \
10285 #define vmlaq_laneq_u16(a, b, c, d) \
10288 uint16x8_t c_ = (c); \
10289 uint16x8_t b_ = (b); \
10290 uint16x8_t a_ = (a); \
10291 uint16x8_t result; \
10292 __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \
10294 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10295 : /* No clobbers */); \
10299 #define vmlaq_laneq_u32(a, b, c, d) \
10302 uint32x4_t c_ = (c); \
10303 uint32x4_t b_ = (b); \
10304 uint32x4_t a_ = (a); \
10305 uint32x4_t result; \
10306 __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \
10308 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10309 : /* No clobbers */); \
10313 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
10314 vmlaq_n_f32 (float32x4_t a
, float32x4_t b
, float32_t c
)
10316 float32x4_t result
;
10318 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s"
10319 : "=w"(result
), "=w"(t1
)
10320 : "0"(a
), "w"(b
), "w"(c
)
10321 : /* No clobbers */);
10325 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
10326 vmlaq_n_f64 (float64x2_t a
, float64x2_t b
, float64_t c
)
10328 float64x2_t result
;
10330 __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fadd %0.2d, %0.2d, %1.2d"
10331 : "=w"(result
), "=w"(t1
)
10332 : "0"(a
), "w"(b
), "w"(c
)
10333 : /* No clobbers */);
10337 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
10338 vmlaq_n_s16 (int16x8_t a
, int16x8_t b
, int16_t c
)
10341 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
10343 : "0"(a
), "w"(b
), "w"(c
)
10344 : /* No clobbers */);
10348 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
10349 vmlaq_n_s32 (int32x4_t a
, int32x4_t b
, int32_t c
)
10352 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
10354 : "0"(a
), "w"(b
), "w"(c
)
10355 : /* No clobbers */);
10359 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
10360 vmlaq_n_u16 (uint16x8_t a
, uint16x8_t b
, uint16_t c
)
10363 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
10365 : "0"(a
), "w"(b
), "w"(c
)
10366 : /* No clobbers */);
10370 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
10371 vmlaq_n_u32 (uint32x4_t a
, uint32x4_t b
, uint32_t c
)
10374 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
10376 : "0"(a
), "w"(b
), "w"(c
)
10377 : /* No clobbers */);
10381 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
10382 vmlaq_s8 (int8x16_t a
, int8x16_t b
, int8x16_t c
)
10385 __asm__ ("mla %0.16b, %2.16b, %3.16b"
10387 : "0"(a
), "w"(b
), "w"(c
)
10388 : /* No clobbers */);
10392 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
10393 vmlaq_s16 (int16x8_t a
, int16x8_t b
, int16x8_t c
)
10396 __asm__ ("mla %0.8h, %2.8h, %3.8h"
10398 : "0"(a
), "w"(b
), "w"(c
)
10399 : /* No clobbers */);
10403 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
10404 vmlaq_s32 (int32x4_t a
, int32x4_t b
, int32x4_t c
)
10407 __asm__ ("mla %0.4s, %2.4s, %3.4s"
10409 : "0"(a
), "w"(b
), "w"(c
)
10410 : /* No clobbers */);
10414 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
10415 vmlaq_u8 (uint8x16_t a
, uint8x16_t b
, uint8x16_t c
)
10418 __asm__ ("mla %0.16b, %2.16b, %3.16b"
10420 : "0"(a
), "w"(b
), "w"(c
)
10421 : /* No clobbers */);
10425 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
10426 vmlaq_u16 (uint16x8_t a
, uint16x8_t b
, uint16x8_t c
)
10429 __asm__ ("mla %0.8h, %2.8h, %3.8h"
10431 : "0"(a
), "w"(b
), "w"(c
)
10432 : /* No clobbers */);
10436 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
10437 vmlaq_u32 (uint32x4_t a
, uint32x4_t b
, uint32x4_t c
)
10440 __asm__ ("mla %0.4s, %2.4s, %3.4s"
10442 : "0"(a
), "w"(b
), "w"(c
)
10443 : /* No clobbers */);
10447 #define vmls_lane_f32(a, b, c, d) \
10450 float32x2_t c_ = (c); \
10451 float32x2_t b_ = (b); \
10452 float32x2_t a_ = (a); \
10453 float32x2_t result; \
10455 __asm__ ("fmul %1.2s, %3.2s, %4.s[%5]; fsub %0.2s, %0.2s, %1.2s" \
10456 : "=w"(result), "=w"(t1) \
10457 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10458 : /* No clobbers */); \
10462 #define vmls_lane_s16(a, b, c, d) \
10465 int16x4_t c_ = (c); \
10466 int16x4_t b_ = (b); \
10467 int16x4_t a_ = (a); \
10468 int16x4_t result; \
10469 __asm__ ("mls %0.4h,%2.4h,%3.h[%4]" \
10471 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10472 : /* No clobbers */); \
10476 #define vmls_lane_s32(a, b, c, d) \
10479 int32x2_t c_ = (c); \
10480 int32x2_t b_ = (b); \
10481 int32x2_t a_ = (a); \
10482 int32x2_t result; \
10483 __asm__ ("mls %0.2s,%2.2s,%3.s[%4]" \
10485 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10486 : /* No clobbers */); \
10490 #define vmls_lane_u16(a, b, c, d) \
10493 uint16x4_t c_ = (c); \
10494 uint16x4_t b_ = (b); \
10495 uint16x4_t a_ = (a); \
10496 uint16x4_t result; \
10497 __asm__ ("mls %0.4h,%2.4h,%3.h[%4]" \
10499 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10500 : /* No clobbers */); \
10504 #define vmls_lane_u32(a, b, c, d) \
10507 uint32x2_t c_ = (c); \
10508 uint32x2_t b_ = (b); \
10509 uint32x2_t a_ = (a); \
10510 uint32x2_t result; \
10511 __asm__ ("mls %0.2s,%2.2s,%3.s[%4]" \
10513 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10514 : /* No clobbers */); \
10518 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
10519 vmls_n_f32 (float32x2_t a
, float32x2_t b
, float32_t c
)
10521 float32x2_t result
;
10523 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s"
10524 : "=w"(result
), "=w"(t1
)
10525 : "0"(a
), "w"(b
), "w"(c
)
10526 : /* No clobbers */);
10530 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
10531 vmls_n_s16 (int16x4_t a
, int16x4_t b
, int16_t c
)
10534 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
10536 : "0"(a
), "w"(b
), "w"(c
)
10537 : /* No clobbers */);
10541 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
10542 vmls_n_s32 (int32x2_t a
, int32x2_t b
, int32_t c
)
10545 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
10547 : "0"(a
), "w"(b
), "w"(c
)
10548 : /* No clobbers */);
10552 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
10553 vmls_n_u16 (uint16x4_t a
, uint16x4_t b
, uint16_t c
)
10556 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
10558 : "0"(a
), "w"(b
), "w"(c
)
10559 : /* No clobbers */);
10563 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
10564 vmls_n_u32 (uint32x2_t a
, uint32x2_t b
, uint32_t c
)
10567 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
10569 : "0"(a
), "w"(b
), "w"(c
)
10570 : /* No clobbers */);
10574 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
10575 vmls_s8 (int8x8_t a
, int8x8_t b
, int8x8_t c
)
10578 __asm__ ("mls %0.8b,%2.8b,%3.8b"
10580 : "0"(a
), "w"(b
), "w"(c
)
10581 : /* No clobbers */);
10585 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
10586 vmls_s16 (int16x4_t a
, int16x4_t b
, int16x4_t c
)
10589 __asm__ ("mls %0.4h,%2.4h,%3.4h"
10591 : "0"(a
), "w"(b
), "w"(c
)
10592 : /* No clobbers */);
10596 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
10597 vmls_s32 (int32x2_t a
, int32x2_t b
, int32x2_t c
)
10600 __asm__ ("mls %0.2s,%2.2s,%3.2s"
10602 : "0"(a
), "w"(b
), "w"(c
)
10603 : /* No clobbers */);
10607 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
10608 vmls_u8 (uint8x8_t a
, uint8x8_t b
, uint8x8_t c
)
10611 __asm__ ("mls %0.8b,%2.8b,%3.8b"
10613 : "0"(a
), "w"(b
), "w"(c
)
10614 : /* No clobbers */);
10618 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
10619 vmls_u16 (uint16x4_t a
, uint16x4_t b
, uint16x4_t c
)
10622 __asm__ ("mls %0.4h,%2.4h,%3.4h"
10624 : "0"(a
), "w"(b
), "w"(c
)
10625 : /* No clobbers */);
10629 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
10630 vmls_u32 (uint32x2_t a
, uint32x2_t b
, uint32x2_t c
)
10633 __asm__ ("mls %0.2s,%2.2s,%3.2s"
10635 : "0"(a
), "w"(b
), "w"(c
)
10636 : /* No clobbers */);
10640 #define vmlsl_high_lane_s16(a, b, c, d) \
10643 int16x8_t c_ = (c); \
10644 int16x8_t b_ = (b); \
10645 int32x4_t a_ = (a); \
10646 int32x4_t result; \
10647 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
10649 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10650 : /* No clobbers */); \
10654 #define vmlsl_high_lane_s32(a, b, c, d) \
10657 int32x4_t c_ = (c); \
10658 int32x4_t b_ = (b); \
10659 int64x2_t a_ = (a); \
10660 int64x2_t result; \
10661 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
10663 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10664 : /* No clobbers */); \
10668 #define vmlsl_high_lane_u16(a, b, c, d) \
10671 uint16x8_t c_ = (c); \
10672 uint16x8_t b_ = (b); \
10673 uint32x4_t a_ = (a); \
10674 uint32x4_t result; \
10675 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
10677 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10678 : /* No clobbers */); \
10682 #define vmlsl_high_lane_u32(a, b, c, d) \
10685 uint32x4_t c_ = (c); \
10686 uint32x4_t b_ = (b); \
10687 uint64x2_t a_ = (a); \
10688 uint64x2_t result; \
10689 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
10691 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10692 : /* No clobbers */); \
10696 #define vmlsl_high_laneq_s16(a, b, c, d) \
10699 int16x8_t c_ = (c); \
10700 int16x8_t b_ = (b); \
10701 int32x4_t a_ = (a); \
10702 int32x4_t result; \
10703 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
10705 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10706 : /* No clobbers */); \
10710 #define vmlsl_high_laneq_s32(a, b, c, d) \
10713 int32x4_t c_ = (c); \
10714 int32x4_t b_ = (b); \
10715 int64x2_t a_ = (a); \
10716 int64x2_t result; \
10717 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
10719 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10720 : /* No clobbers */); \
10724 #define vmlsl_high_laneq_u16(a, b, c, d) \
10727 uint16x8_t c_ = (c); \
10728 uint16x8_t b_ = (b); \
10729 uint32x4_t a_ = (a); \
10730 uint32x4_t result; \
10731 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
10733 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10734 : /* No clobbers */); \
10738 #define vmlsl_high_laneq_u32(a, b, c, d) \
10741 uint32x4_t c_ = (c); \
10742 uint32x4_t b_ = (b); \
10743 uint64x2_t a_ = (a); \
10744 uint64x2_t result; \
10745 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
10747 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10748 : /* No clobbers */); \
10752 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
10753 vmlsl_high_n_s16 (int32x4_t a
, int16x8_t b
, int16_t c
)
10756 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]"
10758 : "0"(a
), "w"(b
), "w"(c
)
10759 : /* No clobbers */);
10763 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
10764 vmlsl_high_n_s32 (int64x2_t a
, int32x4_t b
, int32_t c
)
10767 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]"
10769 : "0"(a
), "w"(b
), "w"(c
)
10770 : /* No clobbers */);
10774 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
10775 vmlsl_high_n_u16 (uint32x4_t a
, uint16x8_t b
, uint16_t c
)
10778 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]"
10780 : "0"(a
), "w"(b
), "w"(c
)
10781 : /* No clobbers */);
10785 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
10786 vmlsl_high_n_u32 (uint64x2_t a
, uint32x4_t b
, uint32_t c
)
10789 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]"
10791 : "0"(a
), "w"(b
), "w"(c
)
10792 : /* No clobbers */);
10796 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
10797 vmlsl_high_s8 (int16x8_t a
, int8x16_t b
, int8x16_t c
)
10800 __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b"
10802 : "0"(a
), "w"(b
), "w"(c
)
10803 : /* No clobbers */);
10807 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
10808 vmlsl_high_s16 (int32x4_t a
, int16x8_t b
, int16x8_t c
)
10811 __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h"
10813 : "0"(a
), "w"(b
), "w"(c
)
10814 : /* No clobbers */);
10818 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
10819 vmlsl_high_s32 (int64x2_t a
, int32x4_t b
, int32x4_t c
)
10822 __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s"
10824 : "0"(a
), "w"(b
), "w"(c
)
10825 : /* No clobbers */);
10829 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
10830 vmlsl_high_u8 (uint16x8_t a
, uint8x16_t b
, uint8x16_t c
)
10833 __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b"
10835 : "0"(a
), "w"(b
), "w"(c
)
10836 : /* No clobbers */);
10840 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
10841 vmlsl_high_u16 (uint32x4_t a
, uint16x8_t b
, uint16x8_t c
)
10844 __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h"
10846 : "0"(a
), "w"(b
), "w"(c
)
10847 : /* No clobbers */);
10851 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
10852 vmlsl_high_u32 (uint64x2_t a
, uint32x4_t b
, uint32x4_t c
)
10855 __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s"
10857 : "0"(a
), "w"(b
), "w"(c
)
10858 : /* No clobbers */);
10862 #define vmlsl_lane_s16(a, b, c, d) \
10865 int16x4_t c_ = (c); \
10866 int16x4_t b_ = (b); \
10867 int32x4_t a_ = (a); \
10868 int32x4_t result; \
10869 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
10871 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10872 : /* No clobbers */); \
10876 #define vmlsl_lane_s32(a, b, c, d) \
10879 int32x2_t c_ = (c); \
10880 int32x2_t b_ = (b); \
10881 int64x2_t a_ = (a); \
10882 int64x2_t result; \
10883 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
10885 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10886 : /* No clobbers */); \
10890 #define vmlsl_lane_u16(a, b, c, d) \
10893 uint16x4_t c_ = (c); \
10894 uint16x4_t b_ = (b); \
10895 uint32x4_t a_ = (a); \
10896 uint32x4_t result; \
10897 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
10899 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10900 : /* No clobbers */); \
10904 #define vmlsl_lane_u32(a, b, c, d) \
10907 uint32x2_t c_ = (c); \
10908 uint32x2_t b_ = (b); \
10909 uint64x2_t a_ = (a); \
10910 uint64x2_t result; \
10911 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
10913 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10914 : /* No clobbers */); \
10918 #define vmlsl_laneq_s16(a, b, c, d) \
10921 int16x8_t c_ = (c); \
10922 int16x4_t b_ = (b); \
10923 int32x4_t a_ = (a); \
10924 int32x4_t result; \
10925 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
10927 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10928 : /* No clobbers */); \
10932 #define vmlsl_laneq_s32(a, b, c, d) \
10935 int32x4_t c_ = (c); \
10936 int32x2_t b_ = (b); \
10937 int64x2_t a_ = (a); \
10938 int64x2_t result; \
10939 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
10941 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10942 : /* No clobbers */); \
10946 #define vmlsl_laneq_u16(a, b, c, d) \
10949 uint16x8_t c_ = (c); \
10950 uint16x4_t b_ = (b); \
10951 uint32x4_t a_ = (a); \
10952 uint32x4_t result; \
10953 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
10955 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10956 : /* No clobbers */); \
10960 #define vmlsl_laneq_u32(a, b, c, d) \
10963 uint32x4_t c_ = (c); \
10964 uint32x2_t b_ = (b); \
10965 uint64x2_t a_ = (a); \
10966 uint64x2_t result; \
10967 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
10969 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10970 : /* No clobbers */); \
10974 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
10975 vmlsl_n_s16 (int32x4_t a
, int16x4_t b
, int16_t c
)
10978 __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]"
10980 : "0"(a
), "w"(b
), "w"(c
)
10981 : /* No clobbers */);
10985 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
10986 vmlsl_n_s32 (int64x2_t a
, int32x2_t b
, int32_t c
)
10989 __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]"
10991 : "0"(a
), "w"(b
), "w"(c
)
10992 : /* No clobbers */);
10996 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
10997 vmlsl_n_u16 (uint32x4_t a
, uint16x4_t b
, uint16_t c
)
11000 __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]"
11002 : "0"(a
), "w"(b
), "w"(c
)
11003 : /* No clobbers */);
11007 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
11008 vmlsl_n_u32 (uint64x2_t a
, uint32x2_t b
, uint32_t c
)
11011 __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]"
11013 : "0"(a
), "w"(b
), "w"(c
)
11014 : /* No clobbers */);
11018 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
11019 vmlsl_s8 (int16x8_t a
, int8x8_t b
, int8x8_t c
)
11022 __asm__ ("smlsl %0.8h, %2.8b, %3.8b"
11024 : "0"(a
), "w"(b
), "w"(c
)
11025 : /* No clobbers */);
11029 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
11030 vmlsl_s16 (int32x4_t a
, int16x4_t b
, int16x4_t c
)
11033 __asm__ ("smlsl %0.4s, %2.4h, %3.4h"
11035 : "0"(a
), "w"(b
), "w"(c
)
11036 : /* No clobbers */);
11040 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
11041 vmlsl_s32 (int64x2_t a
, int32x2_t b
, int32x2_t c
)
11044 __asm__ ("smlsl %0.2d, %2.2s, %3.2s"
11046 : "0"(a
), "w"(b
), "w"(c
)
11047 : /* No clobbers */);
11051 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
11052 vmlsl_u8 (uint16x8_t a
, uint8x8_t b
, uint8x8_t c
)
11055 __asm__ ("umlsl %0.8h, %2.8b, %3.8b"
11057 : "0"(a
), "w"(b
), "w"(c
)
11058 : /* No clobbers */);
11062 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
11063 vmlsl_u16 (uint32x4_t a
, uint16x4_t b
, uint16x4_t c
)
11066 __asm__ ("umlsl %0.4s, %2.4h, %3.4h"
11068 : "0"(a
), "w"(b
), "w"(c
)
11069 : /* No clobbers */);
11073 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
11074 vmlsl_u32 (uint64x2_t a
, uint32x2_t b
, uint32x2_t c
)
11077 __asm__ ("umlsl %0.2d, %2.2s, %3.2s"
11079 : "0"(a
), "w"(b
), "w"(c
)
11080 : /* No clobbers */);
11084 #define vmlsq_lane_f32(a, b, c, d) \
11087 float32x4_t c_ = (c); \
11088 float32x4_t b_ = (b); \
11089 float32x4_t a_ = (a); \
11090 float32x4_t result; \
11092 __asm__ ("fmul %1.4s, %3.4s, %4.s[%5]; fsub %0.4s, %0.4s, %1.4s" \
11093 : "=w"(result), "=w"(t1) \
11094 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
11095 : /* No clobbers */); \
11099 #define vmlsq_lane_s16(a, b, c, d) \
11102 int16x8_t c_ = (c); \
11103 int16x8_t b_ = (b); \
11104 int16x8_t a_ = (a); \
11105 int16x8_t result; \
11106 __asm__ ("mls %0.8h,%2.8h,%3.h[%4]" \
11108 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
11109 : /* No clobbers */); \
11113 #define vmlsq_lane_s32(a, b, c, d) \
11116 int32x4_t c_ = (c); \
11117 int32x4_t b_ = (b); \
11118 int32x4_t a_ = (a); \
11119 int32x4_t result; \
11120 __asm__ ("mls %0.4s,%2.4s,%3.s[%4]" \
11122 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
11123 : /* No clobbers */); \
11127 #define vmlsq_lane_u16(a, b, c, d) \
11130 uint16x8_t c_ = (c); \
11131 uint16x8_t b_ = (b); \
11132 uint16x8_t a_ = (a); \
11133 uint16x8_t result; \
11134 __asm__ ("mls %0.8h,%2.8h,%3.h[%4]" \
11136 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
11137 : /* No clobbers */); \
11141 #define vmlsq_lane_u32(a, b, c, d) \
11144 uint32x4_t c_ = (c); \
11145 uint32x4_t b_ = (b); \
11146 uint32x4_t a_ = (a); \
11147 uint32x4_t result; \
11148 __asm__ ("mls %0.4s,%2.4s,%3.s[%4]" \
11150 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
11151 : /* No clobbers */); \
11155 #define vmlsq_laneq_f32(__a, __b, __c, __d) \
11158 float32x4_t __c_ = (__c); \
11159 float32x4_t __b_ = (__b); \
11160 float32x4_t __a_ = (__a); \
11161 float32x4_t __result; \
11162 float32x4_t __t1; \
11163 __asm__ ("fmul %1.4s, %3.4s, %4.s[%5]; fsub %0.4s, %0.4s, %1.4s" \
11164 : "=w"(__result), "=w"(__t1) \
11165 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
11166 : /* No clobbers */); \
11170 #define vmlsq_laneq_s16(__a, __b, __c, __d) \
11173 int16x8_t __c_ = (__c); \
11174 int16x8_t __b_ = (__b); \
11175 int16x8_t __a_ = (__a); \
11176 int16x8_t __result; \
11177 __asm__ ("mls %0.8h, %2.8h, %3.h[%4]" \
11179 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
11180 : /* No clobbers */); \
11184 #define vmlsq_laneq_s32(__a, __b, __c, __d) \
11187 int32x4_t __c_ = (__c); \
11188 int32x4_t __b_ = (__b); \
11189 int32x4_t __a_ = (__a); \
11190 int32x4_t __result; \
11191 __asm__ ("mls %0.4s, %2.4s, %3.s[%4]" \
11193 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
11194 : /* No clobbers */); \
11198 #define vmlsq_laneq_u16(__a, __b, __c, __d) \
11201 uint16x8_t __c_ = (__c); \
11202 uint16x8_t __b_ = (__b); \
11203 uint16x8_t __a_ = (__a); \
11204 uint16x8_t __result; \
11205 __asm__ ("mls %0.8h, %2.8h, %3.h[%4]" \
11207 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
11208 : /* No clobbers */); \
11212 #define vmlsq_laneq_u32(__a, __b, __c, __d) \
11215 uint32x4_t __c_ = (__c); \
11216 uint32x4_t __b_ = (__b); \
11217 uint32x4_t __a_ = (__a); \
11218 uint32x4_t __result; \
11219 __asm__ ("mls %0.4s, %2.4s, %3.s[%4]" \
11221 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
11222 : /* No clobbers */); \
11226 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
11227 vmlsq_n_f32 (float32x4_t a
, float32x4_t b
, float32_t c
)
11229 float32x4_t result
;
11231 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s"
11232 : "=w"(result
), "=w"(t1
)
11233 : "0"(a
), "w"(b
), "w"(c
)
11234 : /* No clobbers */);
11238 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
11239 vmlsq_n_f64 (float64x2_t a
, float64x2_t b
, float64_t c
)
11241 float64x2_t result
;
11243 __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fsub %0.2d, %0.2d, %1.2d"
11244 : "=w"(result
), "=w"(t1
)
11245 : "0"(a
), "w"(b
), "w"(c
)
11246 : /* No clobbers */);
11250 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
11251 vmlsq_n_s16 (int16x8_t a
, int16x8_t b
, int16_t c
)
11254 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
11256 : "0"(a
), "w"(b
), "w"(c
)
11257 : /* No clobbers */);
11261 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
11262 vmlsq_n_s32 (int32x4_t a
, int32x4_t b
, int32_t c
)
11265 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
11267 : "0"(a
), "w"(b
), "w"(c
)
11268 : /* No clobbers */);
11272 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
11273 vmlsq_n_u16 (uint16x8_t a
, uint16x8_t b
, uint16_t c
)
11276 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
11278 : "0"(a
), "w"(b
), "w"(c
)
11279 : /* No clobbers */);
11283 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
11284 vmlsq_n_u32 (uint32x4_t a
, uint32x4_t b
, uint32_t c
)
11287 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
11289 : "0"(a
), "w"(b
), "w"(c
)
11290 : /* No clobbers */);
11294 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
11295 vmlsq_s8 (int8x16_t a
, int8x16_t b
, int8x16_t c
)
11298 __asm__ ("mls %0.16b,%2.16b,%3.16b"
11300 : "0"(a
), "w"(b
), "w"(c
)
11301 : /* No clobbers */);
11305 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
11306 vmlsq_s16 (int16x8_t a
, int16x8_t b
, int16x8_t c
)
11309 __asm__ ("mls %0.8h,%2.8h,%3.8h"
11311 : "0"(a
), "w"(b
), "w"(c
)
11312 : /* No clobbers */);
11316 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
11317 vmlsq_s32 (int32x4_t a
, int32x4_t b
, int32x4_t c
)
11320 __asm__ ("mls %0.4s,%2.4s,%3.4s"
11322 : "0"(a
), "w"(b
), "w"(c
)
11323 : /* No clobbers */);
11327 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
11328 vmlsq_u8 (uint8x16_t a
, uint8x16_t b
, uint8x16_t c
)
11331 __asm__ ("mls %0.16b,%2.16b,%3.16b"
11333 : "0"(a
), "w"(b
), "w"(c
)
11334 : /* No clobbers */);
11338 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
11339 vmlsq_u16 (uint16x8_t a
, uint16x8_t b
, uint16x8_t c
)
11342 __asm__ ("mls %0.8h,%2.8h,%3.8h"
11344 : "0"(a
), "w"(b
), "w"(c
)
11345 : /* No clobbers */);
11349 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
11350 vmlsq_u32 (uint32x4_t a
, uint32x4_t b
, uint32x4_t c
)
11353 __asm__ ("mls %0.4s,%2.4s,%3.4s"
11355 : "0"(a
), "w"(b
), "w"(c
)
11356 : /* No clobbers */);
11360 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
11361 vmov_n_f32 (float32_t a
)
11363 float32x2_t result
;
11364 __asm__ ("dup %0.2s, %w1"
11367 : /* No clobbers */);
11371 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
11372 vmov_n_p8 (uint32_t a
)
11375 __asm__ ("dup %0.8b,%w1"
11378 : /* No clobbers */);
11382 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
11383 vmov_n_p16 (uint32_t a
)
11386 __asm__ ("dup %0.4h,%w1"
11389 : /* No clobbers */);
11393 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
11394 vmov_n_s8 (int32_t a
)
11397 __asm__ ("dup %0.8b,%w1"
11400 : /* No clobbers */);
11404 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
11405 vmov_n_s16 (int32_t a
)
11408 __asm__ ("dup %0.4h,%w1"
11411 : /* No clobbers */);
11415 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
11416 vmov_n_s32 (int32_t a
)
11419 __asm__ ("dup %0.2s,%w1"
11422 : /* No clobbers */);
11426 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
11427 vmov_n_s64 (int64_t a
)
11430 __asm__ ("ins %0.d[0],%x1"
11433 : /* No clobbers */);
11437 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
11438 vmov_n_u8 (uint32_t a
)
11441 __asm__ ("dup %0.8b,%w1"
11444 : /* No clobbers */);
11448 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
11449 vmov_n_u16 (uint32_t a
)
11452 __asm__ ("dup %0.4h,%w1"
11455 : /* No clobbers */);
11459 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
11460 vmov_n_u32 (uint32_t a
)
11463 __asm__ ("dup %0.2s,%w1"
11466 : /* No clobbers */);
11470 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
11471 vmov_n_u64 (uint64_t a
)
11474 __asm__ ("ins %0.d[0],%x1"
11477 : /* No clobbers */);
11481 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
11482 vmovl_high_s8 (int8x16_t a
)
11485 __asm__ ("sshll2 %0.8h,%1.16b,#0"
11488 : /* No clobbers */);
11492 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
11493 vmovl_high_s16 (int16x8_t a
)
11496 __asm__ ("sshll2 %0.4s,%1.8h,#0"
11499 : /* No clobbers */);
11503 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
11504 vmovl_high_s32 (int32x4_t a
)
11507 __asm__ ("sshll2 %0.2d,%1.4s,#0"
11510 : /* No clobbers */);
11514 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
11515 vmovl_high_u8 (uint8x16_t a
)
11518 __asm__ ("ushll2 %0.8h,%1.16b,#0"
11521 : /* No clobbers */);
11525 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
11526 vmovl_high_u16 (uint16x8_t a
)
11529 __asm__ ("ushll2 %0.4s,%1.8h,#0"
11532 : /* No clobbers */);
11536 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
11537 vmovl_high_u32 (uint32x4_t a
)
11540 __asm__ ("ushll2 %0.2d,%1.4s,#0"
11543 : /* No clobbers */);
11547 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
11548 vmovl_s8 (int8x8_t a
)
11551 __asm__ ("sshll %0.8h,%1.8b,#0"
11554 : /* No clobbers */);
11558 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
11559 vmovl_s16 (int16x4_t a
)
11562 __asm__ ("sshll %0.4s,%1.4h,#0"
11565 : /* No clobbers */);
11569 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
11570 vmovl_s32 (int32x2_t a
)
11573 __asm__ ("sshll %0.2d,%1.2s,#0"
11576 : /* No clobbers */);
11580 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
11581 vmovl_u8 (uint8x8_t a
)
11584 __asm__ ("ushll %0.8h,%1.8b,#0"
11587 : /* No clobbers */);
11591 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
11592 vmovl_u16 (uint16x4_t a
)
11595 __asm__ ("ushll %0.4s,%1.4h,#0"
11598 : /* No clobbers */);
11602 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
11603 vmovl_u32 (uint32x2_t a
)
11606 __asm__ ("ushll %0.2d,%1.2s,#0"
11609 : /* No clobbers */);
11613 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
11614 vmovn_high_s16 (int8x8_t a
, int16x8_t b
)
11616 int8x16_t result
= vcombine_s8 (a
, vcreate_s8 (UINT64_C (0x0)));
11617 __asm__ ("xtn2 %0.16b,%1.8h"
11620 : /* No clobbers */);
11624 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
11625 vmovn_high_s32 (int16x4_t a
, int32x4_t b
)
11627 int16x8_t result
= vcombine_s16 (a
, vcreate_s16 (UINT64_C (0x0)));
11628 __asm__ ("xtn2 %0.8h,%1.4s"
11631 : /* No clobbers */);
11635 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
11636 vmovn_high_s64 (int32x2_t a
, int64x2_t b
)
11638 int32x4_t result
= vcombine_s32 (a
, vcreate_s32 (UINT64_C (0x0)));
11639 __asm__ ("xtn2 %0.4s,%1.2d"
11642 : /* No clobbers */);
11646 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
11647 vmovn_high_u16 (uint8x8_t a
, uint16x8_t b
)
11649 uint8x16_t result
= vcombine_u8 (a
, vcreate_u8 (UINT64_C (0x0)));
11650 __asm__ ("xtn2 %0.16b,%1.8h"
11653 : /* No clobbers */);
11657 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
11658 vmovn_high_u32 (uint16x4_t a
, uint32x4_t b
)
11660 uint16x8_t result
= vcombine_u16 (a
, vcreate_u16 (UINT64_C (0x0)));
11661 __asm__ ("xtn2 %0.8h,%1.4s"
11664 : /* No clobbers */);
11668 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
11669 vmovn_high_u64 (uint32x2_t a
, uint64x2_t b
)
11671 uint32x4_t result
= vcombine_u32 (a
, vcreate_u32 (UINT64_C (0x0)));
11672 __asm__ ("xtn2 %0.4s,%1.2d"
11675 : /* No clobbers */);
11679 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
11680 vmovn_s16 (int16x8_t a
)
11683 __asm__ ("xtn %0.8b,%1.8h"
11686 : /* No clobbers */);
11690 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
11691 vmovn_s32 (int32x4_t a
)
11694 __asm__ ("xtn %0.4h,%1.4s"
11697 : /* No clobbers */);
11701 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
11702 vmovn_s64 (int64x2_t a
)
11705 __asm__ ("xtn %0.2s,%1.2d"
11708 : /* No clobbers */);
11712 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
11713 vmovn_u16 (uint16x8_t a
)
11716 __asm__ ("xtn %0.8b,%1.8h"
11719 : /* No clobbers */);
11723 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
11724 vmovn_u32 (uint32x4_t a
)
11727 __asm__ ("xtn %0.4h,%1.4s"
11730 : /* No clobbers */);
11734 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
11735 vmovn_u64 (uint64x2_t a
)
11738 __asm__ ("xtn %0.2s,%1.2d"
11741 : /* No clobbers */);
11745 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
11746 vmovq_n_f32 (float32_t a
)
11748 float32x4_t result
;
11749 __asm__ ("dup %0.4s, %w1"
11752 : /* No clobbers */);
11756 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
11757 vmovq_n_f64 (float64_t a
)
11759 return (float64x2_t
) {a
, a
};
11762 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
11763 vmovq_n_p8 (uint32_t a
)
11766 __asm__ ("dup %0.16b,%w1"
11769 : /* No clobbers */);
11773 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
11774 vmovq_n_p16 (uint32_t a
)
11777 __asm__ ("dup %0.8h,%w1"
11780 : /* No clobbers */);
11784 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
11785 vmovq_n_s8 (int32_t a
)
11788 __asm__ ("dup %0.16b,%w1"
11791 : /* No clobbers */);
11795 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
11796 vmovq_n_s16 (int32_t a
)
11799 __asm__ ("dup %0.8h,%w1"
11802 : /* No clobbers */);
11806 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
11807 vmovq_n_s32 (int32_t a
)
11810 __asm__ ("dup %0.4s,%w1"
11813 : /* No clobbers */);
11817 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
11818 vmovq_n_s64 (int64_t a
)
11821 __asm__ ("dup %0.2d,%x1"
11824 : /* No clobbers */);
11828 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
11829 vmovq_n_u8 (uint32_t a
)
11832 __asm__ ("dup %0.16b,%w1"
11835 : /* No clobbers */);
11839 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
11840 vmovq_n_u16 (uint32_t a
)
11843 __asm__ ("dup %0.8h,%w1"
11846 : /* No clobbers */);
11850 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
11851 vmovq_n_u32 (uint32_t a
)
11854 __asm__ ("dup %0.4s,%w1"
11857 : /* No clobbers */);
11861 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
11862 vmovq_n_u64 (uint64_t a
)
11865 __asm__ ("dup %0.2d,%x1"
11868 : /* No clobbers */);
11872 #define vmul_lane_f32(a, b, c) \
11875 float32x2_t b_ = (b); \
11876 float32x2_t a_ = (a); \
11877 float32x2_t result; \
11878 __asm__ ("fmul %0.2s,%1.2s,%2.s[%3]" \
11880 : "w"(a_), "w"(b_), "i"(c) \
11881 : /* No clobbers */); \
11885 #define vmul_lane_s16(a, b, c) \
11888 int16x4_t b_ = (b); \
11889 int16x4_t a_ = (a); \
11890 int16x4_t result; \
11891 __asm__ ("mul %0.4h,%1.4h,%2.h[%3]" \
11893 : "w"(a_), "w"(b_), "i"(c) \
11894 : /* No clobbers */); \
11898 #define vmul_lane_s32(a, b, c) \
11901 int32x2_t b_ = (b); \
11902 int32x2_t a_ = (a); \
11903 int32x2_t result; \
11904 __asm__ ("mul %0.2s,%1.2s,%2.s[%3]" \
11906 : "w"(a_), "w"(b_), "i"(c) \
11907 : /* No clobbers */); \
11911 #define vmul_lane_u16(a, b, c) \
11914 uint16x4_t b_ = (b); \
11915 uint16x4_t a_ = (a); \
11916 uint16x4_t result; \
11917 __asm__ ("mul %0.4h,%1.4h,%2.h[%3]" \
11919 : "w"(a_), "w"(b_), "i"(c) \
11920 : /* No clobbers */); \
11924 #define vmul_lane_u32(a, b, c) \
11927 uint32x2_t b_ = (b); \
11928 uint32x2_t a_ = (a); \
11929 uint32x2_t result; \
11930 __asm__ ("mul %0.2s, %1.2s, %2.s[%3]" \
11932 : "w"(a_), "w"(b_), "i"(c) \
11933 : /* No clobbers */); \
11937 #define vmul_laneq_f32(a, b, c) \
11940 float32x4_t b_ = (b); \
11941 float32x2_t a_ = (a); \
11942 float32x2_t result; \
11943 __asm__ ("fmul %0.2s, %1.2s, %2.s[%3]" \
11945 : "w"(a_), "w"(b_), "i"(c) \
11946 : /* No clobbers */); \
11950 #define vmul_laneq_s16(a, b, c) \
11953 int16x8_t b_ = (b); \
11954 int16x4_t a_ = (a); \
11955 int16x4_t result; \
11956 __asm__ ("mul %0.4h, %1.4h, %2.h[%3]" \
11958 : "w"(a_), "w"(b_), "i"(c) \
11959 : /* No clobbers */); \
11963 #define vmul_laneq_s32(a, b, c) \
11966 int32x4_t b_ = (b); \
11967 int32x2_t a_ = (a); \
11968 int32x2_t result; \
11969 __asm__ ("mul %0.2s, %1.2s, %2.s[%3]" \
11971 : "w"(a_), "w"(b_), "i"(c) \
11972 : /* No clobbers */); \
11976 #define vmul_laneq_u16(a, b, c) \
11979 uint16x8_t b_ = (b); \
11980 uint16x4_t a_ = (a); \
11981 uint16x4_t result; \
11982 __asm__ ("mul %0.4h, %1.4h, %2.h[%3]" \
11984 : "w"(a_), "w"(b_), "i"(c) \
11985 : /* No clobbers */); \
11989 #define vmul_laneq_u32(a, b, c) \
11992 uint32x4_t b_ = (b); \
11993 uint32x2_t a_ = (a); \
11994 uint32x2_t result; \
11995 __asm__ ("mul %0.2s, %1.2s, %2.s[%3]" \
11997 : "w"(a_), "w"(b_), "i"(c) \
11998 : /* No clobbers */); \
12002 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
12003 vmul_n_f32 (float32x2_t a
, float32_t b
)
12005 float32x2_t result
;
12006 __asm__ ("fmul %0.2s,%1.2s,%2.s[0]"
12009 : /* No clobbers */);
12013 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
12014 vmul_n_s16 (int16x4_t a
, int16_t b
)
12017 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
12020 : /* No clobbers */);
12024 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
12025 vmul_n_s32 (int32x2_t a
, int32_t b
)
12028 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
12031 : /* No clobbers */);
12035 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
12036 vmul_n_u16 (uint16x4_t a
, uint16_t b
)
12039 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
12042 : /* No clobbers */);
12046 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
12047 vmul_n_u32 (uint32x2_t a
, uint32_t b
)
12050 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
12053 : /* No clobbers */);
12057 #define vmuld_lane_f64(a, b, c) \
12060 float64x2_t b_ = (b); \
12061 float64_t a_ = (a); \
12062 float64_t result; \
12063 __asm__ ("fmul %d0,%d1,%2.d[%3]" \
12065 : "w"(a_), "w"(b_), "i"(c) \
12066 : /* No clobbers */); \
12070 #define vmull_high_lane_s16(a, b, c) \
12073 int16x8_t b_ = (b); \
12074 int16x8_t a_ = (a); \
12075 int32x4_t result; \
12076 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
12078 : "w"(a_), "w"(b_), "i"(c) \
12079 : /* No clobbers */); \
12083 #define vmull_high_lane_s32(a, b, c) \
12086 int32x4_t b_ = (b); \
12087 int32x4_t a_ = (a); \
12088 int64x2_t result; \
12089 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
12091 : "w"(a_), "w"(b_), "i"(c) \
12092 : /* No clobbers */); \
12096 #define vmull_high_lane_u16(a, b, c) \
12099 uint16x8_t b_ = (b); \
12100 uint16x8_t a_ = (a); \
12101 uint32x4_t result; \
12102 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
12104 : "w"(a_), "w"(b_), "i"(c) \
12105 : /* No clobbers */); \
12109 #define vmull_high_lane_u32(a, b, c) \
12112 uint32x4_t b_ = (b); \
12113 uint32x4_t a_ = (a); \
12114 uint64x2_t result; \
12115 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
12117 : "w"(a_), "w"(b_), "i"(c) \
12118 : /* No clobbers */); \
12122 #define vmull_high_laneq_s16(a, b, c) \
12125 int16x8_t b_ = (b); \
12126 int16x8_t a_ = (a); \
12127 int32x4_t result; \
12128 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
12130 : "w"(a_), "w"(b_), "i"(c) \
12131 : /* No clobbers */); \
12135 #define vmull_high_laneq_s32(a, b, c) \
12138 int32x4_t b_ = (b); \
12139 int32x4_t a_ = (a); \
12140 int64x2_t result; \
12141 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
12143 : "w"(a_), "w"(b_), "i"(c) \
12144 : /* No clobbers */); \
12148 #define vmull_high_laneq_u16(a, b, c) \
12151 uint16x8_t b_ = (b); \
12152 uint16x8_t a_ = (a); \
12153 uint32x4_t result; \
12154 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
12156 : "w"(a_), "w"(b_), "i"(c) \
12157 : /* No clobbers */); \
12161 #define vmull_high_laneq_u32(a, b, c) \
12164 uint32x4_t b_ = (b); \
12165 uint32x4_t a_ = (a); \
12166 uint64x2_t result; \
12167 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
12169 : "w"(a_), "w"(b_), "i"(c) \
12170 : /* No clobbers */); \
12174 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
12175 vmull_high_n_s16 (int16x8_t a
, int16_t b
)
12178 __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]"
12181 : /* No clobbers */);
12185 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
12186 vmull_high_n_s32 (int32x4_t a
, int32_t b
)
12189 __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]"
12192 : /* No clobbers */);
12196 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
12197 vmull_high_n_u16 (uint16x8_t a
, uint16_t b
)
12200 __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]"
12203 : /* No clobbers */);
12207 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
12208 vmull_high_n_u32 (uint32x4_t a
, uint32_t b
)
12211 __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]"
12214 : /* No clobbers */);
12218 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
12219 vmull_high_p8 (poly8x16_t a
, poly8x16_t b
)
12222 __asm__ ("pmull2 %0.8h,%1.16b,%2.16b"
12225 : /* No clobbers */);
12229 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
12230 vmull_high_s8 (int8x16_t a
, int8x16_t b
)
12233 __asm__ ("smull2 %0.8h,%1.16b,%2.16b"
12236 : /* No clobbers */);
12240 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
12241 vmull_high_s16 (int16x8_t a
, int16x8_t b
)
12244 __asm__ ("smull2 %0.4s,%1.8h,%2.8h"
12247 : /* No clobbers */);
12251 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
12252 vmull_high_s32 (int32x4_t a
, int32x4_t b
)
12255 __asm__ ("smull2 %0.2d,%1.4s,%2.4s"
12258 : /* No clobbers */);
12262 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
12263 vmull_high_u8 (uint8x16_t a
, uint8x16_t b
)
12266 __asm__ ("umull2 %0.8h,%1.16b,%2.16b"
12269 : /* No clobbers */);
12273 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
12274 vmull_high_u16 (uint16x8_t a
, uint16x8_t b
)
12277 __asm__ ("umull2 %0.4s,%1.8h,%2.8h"
12280 : /* No clobbers */);
12284 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
12285 vmull_high_u32 (uint32x4_t a
, uint32x4_t b
)
12288 __asm__ ("umull2 %0.2d,%1.4s,%2.4s"
12291 : /* No clobbers */);
12295 #define vmull_lane_s16(a, b, c) \
12298 int16x4_t b_ = (b); \
12299 int16x4_t a_ = (a); \
12300 int32x4_t result; \
12301 __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \
12303 : "w"(a_), "w"(b_), "i"(c) \
12304 : /* No clobbers */); \
12308 #define vmull_lane_s32(a, b, c) \
12311 int32x2_t b_ = (b); \
12312 int32x2_t a_ = (a); \
12313 int64x2_t result; \
12314 __asm__ ("smull %0.2d,%1.2s,%2.s[%3]" \
12316 : "w"(a_), "w"(b_), "i"(c) \
12317 : /* No clobbers */); \
12321 #define vmull_lane_u16(a, b, c) \
12324 uint16x4_t b_ = (b); \
12325 uint16x4_t a_ = (a); \
12326 uint32x4_t result; \
12327 __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \
12329 : "w"(a_), "w"(b_), "i"(c) \
12330 : /* No clobbers */); \
12334 #define vmull_lane_u32(a, b, c) \
12337 uint32x2_t b_ = (b); \
12338 uint32x2_t a_ = (a); \
12339 uint64x2_t result; \
12340 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
12342 : "w"(a_), "w"(b_), "i"(c) \
12343 : /* No clobbers */); \
12347 #define vmull_laneq_s16(a, b, c) \
12350 int16x8_t b_ = (b); \
12351 int16x4_t a_ = (a); \
12352 int32x4_t result; \
12353 __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \
12355 : "w"(a_), "w"(b_), "i"(c) \
12356 : /* No clobbers */); \
12360 #define vmull_laneq_s32(a, b, c) \
12363 int32x4_t b_ = (b); \
12364 int32x2_t a_ = (a); \
12365 int64x2_t result; \
12366 __asm__ ("smull %0.2d, %1.2s, %2.s[%3]" \
12368 : "w"(a_), "w"(b_), "i"(c) \
12369 : /* No clobbers */); \
12373 #define vmull_laneq_u16(a, b, c) \
12376 uint16x8_t b_ = (b); \
12377 uint16x4_t a_ = (a); \
12378 uint32x4_t result; \
12379 __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \
12381 : "w"(a_), "w"(b_), "i"(c) \
12382 : /* No clobbers */); \
12386 #define vmull_laneq_u32(a, b, c) \
12389 uint32x4_t b_ = (b); \
12390 uint32x2_t a_ = (a); \
12391 uint64x2_t result; \
12392 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
12394 : "w"(a_), "w"(b_), "i"(c) \
12395 : /* No clobbers */); \
12399 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
12400 vmull_n_s16 (int16x4_t a
, int16_t b
)
12403 __asm__ ("smull %0.4s,%1.4h,%2.h[0]"
12406 : /* No clobbers */);
12410 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
12411 vmull_n_s32 (int32x2_t a
, int32_t b
)
12414 __asm__ ("smull %0.2d,%1.2s,%2.s[0]"
12417 : /* No clobbers */);
12421 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
12422 vmull_n_u16 (uint16x4_t a
, uint16_t b
)
12425 __asm__ ("umull %0.4s,%1.4h,%2.h[0]"
12428 : /* No clobbers */);
12432 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
12433 vmull_n_u32 (uint32x2_t a
, uint32_t b
)
12436 __asm__ ("umull %0.2d,%1.2s,%2.s[0]"
12439 : /* No clobbers */);
12443 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
12444 vmull_p8 (poly8x8_t a
, poly8x8_t b
)
12447 __asm__ ("pmull %0.8h, %1.8b, %2.8b"
12450 : /* No clobbers */);
12454 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
12455 vmull_s8 (int8x8_t a
, int8x8_t b
)
12458 __asm__ ("smull %0.8h, %1.8b, %2.8b"
12461 : /* No clobbers */);
12465 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
12466 vmull_s16 (int16x4_t a
, int16x4_t b
)
12469 __asm__ ("smull %0.4s, %1.4h, %2.4h"
12472 : /* No clobbers */);
12476 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
12477 vmull_s32 (int32x2_t a
, int32x2_t b
)
12480 __asm__ ("smull %0.2d, %1.2s, %2.2s"
12483 : /* No clobbers */);
12487 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
12488 vmull_u8 (uint8x8_t a
, uint8x8_t b
)
12491 __asm__ ("umull %0.8h, %1.8b, %2.8b"
12494 : /* No clobbers */);
12498 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
12499 vmull_u16 (uint16x4_t a
, uint16x4_t b
)
12502 __asm__ ("umull %0.4s, %1.4h, %2.4h"
12505 : /* No clobbers */);
12509 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
12510 vmull_u32 (uint32x2_t a
, uint32x2_t b
)
12513 __asm__ ("umull %0.2d, %1.2s, %2.2s"
12516 : /* No clobbers */);
12520 #define vmulq_lane_f32(a, b, c) \
12523 float32x2_t b_ = (b); \
12524 float32x4_t a_ = (a); \
12525 float32x4_t result; \
12526 __asm__ ("fmul %0.4s, %1.4s, %2.s[%3]" \
12528 : "w"(a_), "w"(b_), "i"(c) \
12529 : /* No clobbers */); \
12533 #define vmulq_lane_f64(a, b, c) \
12536 float64x1_t b_ = (b); \
12537 float64x2_t a_ = (a); \
12538 float64x2_t result; \
12539 __asm__ ("fmul %0.2d,%1.2d,%2.d[%3]" \
12541 : "w"(a_), "w"(b_), "i"(c) \
12542 : /* No clobbers */); \
12546 #define vmulq_lane_s16(a, b, c) \
12549 int16x4_t b_ = (b); \
12550 int16x8_t a_ = (a); \
12551 int16x8_t result; \
12552 __asm__ ("mul %0.8h,%1.8h,%2.h[%3]" \
12554 : "w"(a_), "w"(b_), "i"(c) \
12555 : /* No clobbers */); \
12559 #define vmulq_lane_s32(a, b, c) \
12562 int32x2_t b_ = (b); \
12563 int32x4_t a_ = (a); \
12564 int32x4_t result; \
12565 __asm__ ("mul %0.4s,%1.4s,%2.s[%3]" \
12567 : "w"(a_), "w"(b_), "i"(c) \
12568 : /* No clobbers */); \
12572 #define vmulq_lane_u16(a, b, c) \
12575 uint16x4_t b_ = (b); \
12576 uint16x8_t a_ = (a); \
12577 uint16x8_t result; \
12578 __asm__ ("mul %0.8h,%1.8h,%2.h[%3]" \
12580 : "w"(a_), "w"(b_), "i"(c) \
12581 : /* No clobbers */); \
12585 #define vmulq_lane_u32(a, b, c) \
12588 uint32x2_t b_ = (b); \
12589 uint32x4_t a_ = (a); \
12590 uint32x4_t result; \
12591 __asm__ ("mul %0.4s, %1.4s, %2.s[%3]" \
12593 : "w"(a_), "w"(b_), "i"(c) \
12594 : /* No clobbers */); \
12598 #define vmulq_laneq_f32(a, b, c) \
12601 float32x4_t b_ = (b); \
12602 float32x4_t a_ = (a); \
12603 float32x4_t result; \
12604 __asm__ ("fmul %0.4s, %1.4s, %2.s[%3]" \
12606 : "w"(a_), "w"(b_), "i"(c) \
12607 : /* No clobbers */); \
12611 #define vmulq_laneq_f64(a, b, c) \
12614 float64x2_t b_ = (b); \
12615 float64x2_t a_ = (a); \
12616 float64x2_t result; \
12617 __asm__ ("fmul %0.2d,%1.2d,%2.d[%3]" \
12619 : "w"(a_), "w"(b_), "i"(c) \
12620 : /* No clobbers */); \
12624 #define vmulq_laneq_s16(a, b, c) \
12627 int16x8_t b_ = (b); \
12628 int16x8_t a_ = (a); \
12629 int16x8_t result; \
12630 __asm__ ("mul %0.8h, %1.8h, %2.h[%3]" \
12632 : "w"(a_), "w"(b_), "i"(c) \
12633 : /* No clobbers */); \
12637 #define vmulq_laneq_s32(a, b, c) \
12640 int32x4_t b_ = (b); \
12641 int32x4_t a_ = (a); \
12642 int32x4_t result; \
12643 __asm__ ("mul %0.4s, %1.4s, %2.s[%3]" \
12645 : "w"(a_), "w"(b_), "i"(c) \
12646 : /* No clobbers */); \
12650 #define vmulq_laneq_u16(a, b, c) \
12653 uint16x8_t b_ = (b); \
12654 uint16x8_t a_ = (a); \
12655 uint16x8_t result; \
12656 __asm__ ("mul %0.8h, %1.8h, %2.h[%3]" \
12658 : "w"(a_), "w"(b_), "i"(c) \
12659 : /* No clobbers */); \
12663 #define vmulq_laneq_u32(a, b, c) \
12666 uint32x4_t b_ = (b); \
12667 uint32x4_t a_ = (a); \
12668 uint32x4_t result; \
12669 __asm__ ("mul %0.4s, %1.4s, %2.s[%3]" \
12671 : "w"(a_), "w"(b_), "i"(c) \
12672 : /* No clobbers */); \
12676 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
12677 vmulq_n_f32 (float32x4_t a
, float32_t b
)
12679 float32x4_t result
;
12680 __asm__ ("fmul %0.4s,%1.4s,%2.s[0]"
12683 : /* No clobbers */);
12687 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
12688 vmulq_n_f64 (float64x2_t a
, float64_t b
)
12690 float64x2_t result
;
12691 __asm__ ("fmul %0.2d,%1.2d,%2.d[0]"
12694 : /* No clobbers */);
12698 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
12699 vmulq_n_s16 (int16x8_t a
, int16_t b
)
12702 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
12705 : /* No clobbers */);
12709 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
12710 vmulq_n_s32 (int32x4_t a
, int32_t b
)
12713 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
12716 : /* No clobbers */);
12720 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
12721 vmulq_n_u16 (uint16x8_t a
, uint16_t b
)
12724 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
12727 : /* No clobbers */);
12731 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
12732 vmulq_n_u32 (uint32x4_t a
, uint32_t b
)
12735 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
12738 : /* No clobbers */);
12742 #define vmuls_lane_f32(a, b, c) \
12745 float32x4_t b_ = (b); \
12746 float32_t a_ = (a); \
12747 float32_t result; \
12748 __asm__ ("fmul %s0,%s1,%2.s[%3]" \
12750 : "w"(a_), "w"(b_), "i"(c) \
12751 : /* No clobbers */); \
12755 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
12756 vmulx_f32 (float32x2_t a
, float32x2_t b
)
12758 float32x2_t result
;
12759 __asm__ ("fmulx %0.2s,%1.2s,%2.2s"
12762 : /* No clobbers */);
12766 #define vmulx_lane_f32(a, b, c) \
12769 float32x4_t b_ = (b); \
12770 float32x2_t a_ = (a); \
12771 float32x2_t result; \
12772 __asm__ ("fmulx %0.2s,%1.2s,%2.s[%3]" \
12774 : "w"(a_), "w"(b_), "i"(c) \
12775 : /* No clobbers */); \
12779 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
12780 vmulxd_f64 (float64_t a
, float64_t b
)
12783 __asm__ ("fmulx %d0, %d1, %d2"
12786 : /* No clobbers */);
12790 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
12791 vmulxq_f32 (float32x4_t a
, float32x4_t b
)
12793 float32x4_t result
;
12794 __asm__ ("fmulx %0.4s,%1.4s,%2.4s"
12797 : /* No clobbers */);
12801 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
12802 vmulxq_f64 (float64x2_t a
, float64x2_t b
)
12804 float64x2_t result
;
12805 __asm__ ("fmulx %0.2d,%1.2d,%2.2d"
12808 : /* No clobbers */);
12812 #define vmulxq_lane_f32(a, b, c) \
12815 float32x4_t b_ = (b); \
12816 float32x4_t a_ = (a); \
12817 float32x4_t result; \
12818 __asm__ ("fmulx %0.4s,%1.4s,%2.s[%3]" \
12820 : "w"(a_), "w"(b_), "i"(c) \
12821 : /* No clobbers */); \
12825 #define vmulxq_lane_f64(a, b, c) \
12828 float64x2_t b_ = (b); \
12829 float64x2_t a_ = (a); \
12830 float64x2_t result; \
12831 __asm__ ("fmulx %0.2d,%1.2d,%2.d[%3]" \
12833 : "w"(a_), "w"(b_), "i"(c) \
12834 : /* No clobbers */); \
12838 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
12839 vmulxs_f32 (float32_t a
, float32_t b
)
12842 __asm__ ("fmulx %s0, %s1, %s2"
12845 : /* No clobbers */);
12849 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
12850 vmvn_p8 (poly8x8_t a
)
12853 __asm__ ("mvn %0.8b,%1.8b"
12856 : /* No clobbers */);
12860 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
12861 vmvn_s8 (int8x8_t a
)
12864 __asm__ ("mvn %0.8b,%1.8b"
12867 : /* No clobbers */);
12871 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
12872 vmvn_s16 (int16x4_t a
)
12875 __asm__ ("mvn %0.8b,%1.8b"
12878 : /* No clobbers */);
12882 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
12883 vmvn_s32 (int32x2_t a
)
12886 __asm__ ("mvn %0.8b,%1.8b"
12889 : /* No clobbers */);
12893 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
12894 vmvn_u8 (uint8x8_t a
)
12897 __asm__ ("mvn %0.8b,%1.8b"
12900 : /* No clobbers */);
12904 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
12905 vmvn_u16 (uint16x4_t a
)
12908 __asm__ ("mvn %0.8b,%1.8b"
12911 : /* No clobbers */);
12915 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
12916 vmvn_u32 (uint32x2_t a
)
12919 __asm__ ("mvn %0.8b,%1.8b"
12922 : /* No clobbers */);
12926 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
12927 vmvnq_p8 (poly8x16_t a
)
12930 __asm__ ("mvn %0.16b,%1.16b"
12933 : /* No clobbers */);
12937 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
12938 vmvnq_s8 (int8x16_t a
)
12941 __asm__ ("mvn %0.16b,%1.16b"
12944 : /* No clobbers */);
12948 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
12949 vmvnq_s16 (int16x8_t a
)
12952 __asm__ ("mvn %0.16b,%1.16b"
12955 : /* No clobbers */);
12959 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
12960 vmvnq_s32 (int32x4_t a
)
12963 __asm__ ("mvn %0.16b,%1.16b"
12966 : /* No clobbers */);
12970 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
12971 vmvnq_u8 (uint8x16_t a
)
12974 __asm__ ("mvn %0.16b,%1.16b"
12977 : /* No clobbers */);
12981 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
12982 vmvnq_u16 (uint16x8_t a
)
12985 __asm__ ("mvn %0.16b,%1.16b"
12988 : /* No clobbers */);
12992 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
12993 vmvnq_u32 (uint32x4_t a
)
12996 __asm__ ("mvn %0.16b,%1.16b"
12999 : /* No clobbers */);
13003 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
13004 vneg_f32 (float32x2_t a
)
13006 float32x2_t result
;
13007 __asm__ ("fneg %0.2s,%1.2s"
13010 : /* No clobbers */);
13014 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
13015 vneg_s8 (int8x8_t a
)
13018 __asm__ ("neg %0.8b,%1.8b"
13021 : /* No clobbers */);
13025 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
13026 vneg_s16 (int16x4_t a
)
13029 __asm__ ("neg %0.4h,%1.4h"
13032 : /* No clobbers */);
13036 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
13037 vneg_s32 (int32x2_t a
)
13040 __asm__ ("neg %0.2s,%1.2s"
13043 : /* No clobbers */);
13047 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
13048 vnegq_f32 (float32x4_t a
)
13050 float32x4_t result
;
13051 __asm__ ("fneg %0.4s,%1.4s"
13054 : /* No clobbers */);
13058 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
13059 vnegq_f64 (float64x2_t a
)
13061 float64x2_t result
;
13062 __asm__ ("fneg %0.2d,%1.2d"
13065 : /* No clobbers */);
13069 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
13070 vnegq_s8 (int8x16_t a
)
13073 __asm__ ("neg %0.16b,%1.16b"
13076 : /* No clobbers */);
13080 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
13081 vnegq_s16 (int16x8_t a
)
13084 __asm__ ("neg %0.8h,%1.8h"
13087 : /* No clobbers */);
13091 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
13092 vnegq_s32 (int32x4_t a
)
13095 __asm__ ("neg %0.4s,%1.4s"
13098 : /* No clobbers */);
13102 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
13103 vnegq_s64 (int64x2_t a
)
13106 __asm__ ("neg %0.2d,%1.2d"
13109 : /* No clobbers */);
13113 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
13114 vpadal_s8 (int16x4_t a
, int8x8_t b
)
13117 __asm__ ("sadalp %0.4h,%2.8b"
13120 : /* No clobbers */);
13124 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
13125 vpadal_s16 (int32x2_t a
, int16x4_t b
)
13128 __asm__ ("sadalp %0.2s,%2.4h"
13131 : /* No clobbers */);
13135 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
13136 vpadal_s32 (int64x1_t a
, int32x2_t b
)
13139 __asm__ ("sadalp %0.1d,%2.2s"
13142 : /* No clobbers */);
13146 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
13147 vpadal_u8 (uint16x4_t a
, uint8x8_t b
)
13150 __asm__ ("uadalp %0.4h,%2.8b"
13153 : /* No clobbers */);
13157 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
13158 vpadal_u16 (uint32x2_t a
, uint16x4_t b
)
13161 __asm__ ("uadalp %0.2s,%2.4h"
13164 : /* No clobbers */);
13168 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
13169 vpadal_u32 (uint64x1_t a
, uint32x2_t b
)
13172 __asm__ ("uadalp %0.1d,%2.2s"
13175 : /* No clobbers */);
13179 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
13180 vpadalq_s8 (int16x8_t a
, int8x16_t b
)
13183 __asm__ ("sadalp %0.8h,%2.16b"
13186 : /* No clobbers */);
13190 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
13191 vpadalq_s16 (int32x4_t a
, int16x8_t b
)
13194 __asm__ ("sadalp %0.4s,%2.8h"
13197 : /* No clobbers */);
13201 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
13202 vpadalq_s32 (int64x2_t a
, int32x4_t b
)
13205 __asm__ ("sadalp %0.2d,%2.4s"
13208 : /* No clobbers */);
13212 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
13213 vpadalq_u8 (uint16x8_t a
, uint8x16_t b
)
13216 __asm__ ("uadalp %0.8h,%2.16b"
13219 : /* No clobbers */);
13223 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
13224 vpadalq_u16 (uint32x4_t a
, uint16x8_t b
)
13227 __asm__ ("uadalp %0.4s,%2.8h"
13230 : /* No clobbers */);
13234 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
13235 vpadalq_u32 (uint64x2_t a
, uint32x4_t b
)
13238 __asm__ ("uadalp %0.2d,%2.4s"
13241 : /* No clobbers */);
13245 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
13246 vpadd_f32 (float32x2_t a
, float32x2_t b
)
13248 float32x2_t result
;
13249 __asm__ ("faddp %0.2s,%1.2s,%2.2s"
13252 : /* No clobbers */);
13256 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
13257 vpadd_s8 (int8x8_t __a
, int8x8_t __b
)
13259 return __builtin_aarch64_addpv8qi (__a
, __b
);
13262 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
13263 vpadd_s16 (int16x4_t __a
, int16x4_t __b
)
13265 return __builtin_aarch64_addpv4hi (__a
, __b
);
13268 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
13269 vpadd_s32 (int32x2_t __a
, int32x2_t __b
)
13271 return __builtin_aarch64_addpv2si (__a
, __b
);
13274 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
13275 vpadd_u8 (uint8x8_t __a
, uint8x8_t __b
)
13277 return (uint8x8_t
) __builtin_aarch64_addpv8qi ((int8x8_t
) __a
,
13281 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
13282 vpadd_u16 (uint16x4_t __a
, uint16x4_t __b
)
13284 return (uint16x4_t
) __builtin_aarch64_addpv4hi ((int16x4_t
) __a
,
13288 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
13289 vpadd_u32 (uint32x2_t __a
, uint32x2_t __b
)
13291 return (uint32x2_t
) __builtin_aarch64_addpv2si ((int32x2_t
) __a
,
13295 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
13296 vpaddd_f64 (float64x2_t a
)
13299 __asm__ ("faddp %d0,%1.2d"
13302 : /* No clobbers */);
13306 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
13307 vpaddl_s8 (int8x8_t a
)
13310 __asm__ ("saddlp %0.4h,%1.8b"
13313 : /* No clobbers */);
13317 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
13318 vpaddl_s16 (int16x4_t a
)
13321 __asm__ ("saddlp %0.2s,%1.4h"
13324 : /* No clobbers */);
13328 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
13329 vpaddl_s32 (int32x2_t a
)
13332 __asm__ ("saddlp %0.1d,%1.2s"
13335 : /* No clobbers */);
13339 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
13340 vpaddl_u8 (uint8x8_t a
)
13343 __asm__ ("uaddlp %0.4h,%1.8b"
13346 : /* No clobbers */);
13350 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
13351 vpaddl_u16 (uint16x4_t a
)
13354 __asm__ ("uaddlp %0.2s,%1.4h"
13357 : /* No clobbers */);
13361 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
13362 vpaddl_u32 (uint32x2_t a
)
13365 __asm__ ("uaddlp %0.1d,%1.2s"
13368 : /* No clobbers */);
13372 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
13373 vpaddlq_s8 (int8x16_t a
)
13376 __asm__ ("saddlp %0.8h,%1.16b"
13379 : /* No clobbers */);
13383 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
13384 vpaddlq_s16 (int16x8_t a
)
13387 __asm__ ("saddlp %0.4s,%1.8h"
13390 : /* No clobbers */);
13394 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
13395 vpaddlq_s32 (int32x4_t a
)
13398 __asm__ ("saddlp %0.2d,%1.4s"
13401 : /* No clobbers */);
13405 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
13406 vpaddlq_u8 (uint8x16_t a
)
13409 __asm__ ("uaddlp %0.8h,%1.16b"
13412 : /* No clobbers */);
13416 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
13417 vpaddlq_u16 (uint16x8_t a
)
13420 __asm__ ("uaddlp %0.4s,%1.8h"
13423 : /* No clobbers */);
13427 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
13428 vpaddlq_u32 (uint32x4_t a
)
13431 __asm__ ("uaddlp %0.2d,%1.4s"
13434 : /* No clobbers */);
13438 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
13439 vpaddq_f32 (float32x4_t a
, float32x4_t b
)
13441 float32x4_t result
;
13442 __asm__ ("faddp %0.4s,%1.4s,%2.4s"
13445 : /* No clobbers */);
13449 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
13450 vpaddq_f64 (float64x2_t a
, float64x2_t b
)
13452 float64x2_t result
;
13453 __asm__ ("faddp %0.2d,%1.2d,%2.2d"
13456 : /* No clobbers */);
13460 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
13461 vpaddq_s8 (int8x16_t a
, int8x16_t b
)
13464 __asm__ ("addp %0.16b,%1.16b,%2.16b"
13467 : /* No clobbers */);
13471 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
13472 vpaddq_s16 (int16x8_t a
, int16x8_t b
)
13475 __asm__ ("addp %0.8h,%1.8h,%2.8h"
13478 : /* No clobbers */);
13482 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
13483 vpaddq_s32 (int32x4_t a
, int32x4_t b
)
13486 __asm__ ("addp %0.4s,%1.4s,%2.4s"
13489 : /* No clobbers */);
13493 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
13494 vpaddq_s64 (int64x2_t a
, int64x2_t b
)
13497 __asm__ ("addp %0.2d,%1.2d,%2.2d"
13500 : /* No clobbers */);
13504 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
13505 vpaddq_u8 (uint8x16_t a
, uint8x16_t b
)
13508 __asm__ ("addp %0.16b,%1.16b,%2.16b"
13511 : /* No clobbers */);
13515 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
13516 vpaddq_u16 (uint16x8_t a
, uint16x8_t b
)
13519 __asm__ ("addp %0.8h,%1.8h,%2.8h"
13522 : /* No clobbers */);
13526 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
13527 vpaddq_u32 (uint32x4_t a
, uint32x4_t b
)
13530 __asm__ ("addp %0.4s,%1.4s,%2.4s"
13533 : /* No clobbers */);
13537 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
13538 vpaddq_u64 (uint64x2_t a
, uint64x2_t b
)
13541 __asm__ ("addp %0.2d,%1.2d,%2.2d"
13544 : /* No clobbers */);
13548 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
13549 vpadds_f32 (float32x2_t a
)
13552 __asm__ ("faddp %s0,%1.2s"
13555 : /* No clobbers */);
13559 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
13560 vpmax_f32 (float32x2_t a
, float32x2_t b
)
13562 float32x2_t result
;
13563 __asm__ ("fmaxp %0.2s, %1.2s, %2.2s"
13566 : /* No clobbers */);
13570 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
13571 vpmax_s8 (int8x8_t a
, int8x8_t b
)
13574 __asm__ ("smaxp %0.8b, %1.8b, %2.8b"
13577 : /* No clobbers */);
13581 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
13582 vpmax_s16 (int16x4_t a
, int16x4_t b
)
13585 __asm__ ("smaxp %0.4h, %1.4h, %2.4h"
13588 : /* No clobbers */);
13592 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
13593 vpmax_s32 (int32x2_t a
, int32x2_t b
)
13596 __asm__ ("smaxp %0.2s, %1.2s, %2.2s"
13599 : /* No clobbers */);
13603 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
13604 vpmax_u8 (uint8x8_t a
, uint8x8_t b
)
13607 __asm__ ("umaxp %0.8b, %1.8b, %2.8b"
13610 : /* No clobbers */);
13614 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
13615 vpmax_u16 (uint16x4_t a
, uint16x4_t b
)
13618 __asm__ ("umaxp %0.4h, %1.4h, %2.4h"
13621 : /* No clobbers */);
13625 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
13626 vpmax_u32 (uint32x2_t a
, uint32x2_t b
)
13629 __asm__ ("umaxp %0.2s, %1.2s, %2.2s"
13632 : /* No clobbers */);
13636 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
13637 vpmaxnm_f32 (float32x2_t a
, float32x2_t b
)
13639 float32x2_t result
;
13640 __asm__ ("fmaxnmp %0.2s,%1.2s,%2.2s"
13643 : /* No clobbers */);
13647 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
13648 vpmaxnmq_f32 (float32x4_t a
, float32x4_t b
)
13650 float32x4_t result
;
13651 __asm__ ("fmaxnmp %0.4s,%1.4s,%2.4s"
13654 : /* No clobbers */);
13658 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
13659 vpmaxnmq_f64 (float64x2_t a
, float64x2_t b
)
13661 float64x2_t result
;
13662 __asm__ ("fmaxnmp %0.2d,%1.2d,%2.2d"
13665 : /* No clobbers */);
13669 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
13670 vpmaxnmqd_f64 (float64x2_t a
)
13673 __asm__ ("fmaxnmp %d0,%1.2d"
13676 : /* No clobbers */);
13680 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
13681 vpmaxnms_f32 (float32x2_t a
)
13684 __asm__ ("fmaxnmp %s0,%1.2s"
13687 : /* No clobbers */);
13691 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
13692 vpmaxq_f32 (float32x4_t a
, float32x4_t b
)
13694 float32x4_t result
;
13695 __asm__ ("fmaxp %0.4s, %1.4s, %2.4s"
13698 : /* No clobbers */);
13702 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
13703 vpmaxq_f64 (float64x2_t a
, float64x2_t b
)
13705 float64x2_t result
;
13706 __asm__ ("fmaxp %0.2d, %1.2d, %2.2d"
13709 : /* No clobbers */);
13713 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
13714 vpmaxq_s8 (int8x16_t a
, int8x16_t b
)
13717 __asm__ ("smaxp %0.16b, %1.16b, %2.16b"
13720 : /* No clobbers */);
13724 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
13725 vpmaxq_s16 (int16x8_t a
, int16x8_t b
)
13728 __asm__ ("smaxp %0.8h, %1.8h, %2.8h"
13731 : /* No clobbers */);
13735 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
13736 vpmaxq_s32 (int32x4_t a
, int32x4_t b
)
13739 __asm__ ("smaxp %0.4s, %1.4s, %2.4s"
13742 : /* No clobbers */);
13746 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
13747 vpmaxq_u8 (uint8x16_t a
, uint8x16_t b
)
13750 __asm__ ("umaxp %0.16b, %1.16b, %2.16b"
13753 : /* No clobbers */);
13757 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
13758 vpmaxq_u16 (uint16x8_t a
, uint16x8_t b
)
13761 __asm__ ("umaxp %0.8h, %1.8h, %2.8h"
13764 : /* No clobbers */);
13768 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
13769 vpmaxq_u32 (uint32x4_t a
, uint32x4_t b
)
13772 __asm__ ("umaxp %0.4s, %1.4s, %2.4s"
13775 : /* No clobbers */);
13779 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
13780 vpmaxqd_f64 (float64x2_t a
)
13783 __asm__ ("fmaxp %d0,%1.2d"
13786 : /* No clobbers */);
13790 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
13791 vpmaxs_f32 (float32x2_t a
)
13794 __asm__ ("fmaxp %s0,%1.2s"
13797 : /* No clobbers */);
13801 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
13802 vpmin_f32 (float32x2_t a
, float32x2_t b
)
13804 float32x2_t result
;
13805 __asm__ ("fminp %0.2s, %1.2s, %2.2s"
13808 : /* No clobbers */);
13812 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
13813 vpmin_s8 (int8x8_t a
, int8x8_t b
)
13816 __asm__ ("sminp %0.8b, %1.8b, %2.8b"
13819 : /* No clobbers */);
13823 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
13824 vpmin_s16 (int16x4_t a
, int16x4_t b
)
13827 __asm__ ("sminp %0.4h, %1.4h, %2.4h"
13830 : /* No clobbers */);
13834 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
13835 vpmin_s32 (int32x2_t a
, int32x2_t b
)
13838 __asm__ ("sminp %0.2s, %1.2s, %2.2s"
13841 : /* No clobbers */);
13845 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
13846 vpmin_u8 (uint8x8_t a
, uint8x8_t b
)
13849 __asm__ ("uminp %0.8b, %1.8b, %2.8b"
13852 : /* No clobbers */);
13856 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
13857 vpmin_u16 (uint16x4_t a
, uint16x4_t b
)
13860 __asm__ ("uminp %0.4h, %1.4h, %2.4h"
13863 : /* No clobbers */);
13867 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
13868 vpmin_u32 (uint32x2_t a
, uint32x2_t b
)
13871 __asm__ ("uminp %0.2s, %1.2s, %2.2s"
13874 : /* No clobbers */);
13878 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
13879 vpminnm_f32 (float32x2_t a
, float32x2_t b
)
13881 float32x2_t result
;
13882 __asm__ ("fminnmp %0.2s,%1.2s,%2.2s"
13885 : /* No clobbers */);
13889 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
13890 vpminnmq_f32 (float32x4_t a
, float32x4_t b
)
13892 float32x4_t result
;
13893 __asm__ ("fminnmp %0.4s,%1.4s,%2.4s"
13896 : /* No clobbers */);
13900 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
13901 vpminnmq_f64 (float64x2_t a
, float64x2_t b
)
13903 float64x2_t result
;
13904 __asm__ ("fminnmp %0.2d,%1.2d,%2.2d"
13907 : /* No clobbers */);
13911 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
13912 vpminnmqd_f64 (float64x2_t a
)
13915 __asm__ ("fminnmp %d0,%1.2d"
13918 : /* No clobbers */);
13922 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
13923 vpminnms_f32 (float32x2_t a
)
13926 __asm__ ("fminnmp %s0,%1.2s"
13929 : /* No clobbers */);
13933 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
13934 vpminq_f32 (float32x4_t a
, float32x4_t b
)
13936 float32x4_t result
;
13937 __asm__ ("fminp %0.4s, %1.4s, %2.4s"
13940 : /* No clobbers */);
13944 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
13945 vpminq_f64 (float64x2_t a
, float64x2_t b
)
13947 float64x2_t result
;
13948 __asm__ ("fminp %0.2d, %1.2d, %2.2d"
13951 : /* No clobbers */);
13955 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
13956 vpminq_s8 (int8x16_t a
, int8x16_t b
)
13959 __asm__ ("sminp %0.16b, %1.16b, %2.16b"
13962 : /* No clobbers */);
13966 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
13967 vpminq_s16 (int16x8_t a
, int16x8_t b
)
13970 __asm__ ("sminp %0.8h, %1.8h, %2.8h"
13973 : /* No clobbers */);
13977 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
13978 vpminq_s32 (int32x4_t a
, int32x4_t b
)
13981 __asm__ ("sminp %0.4s, %1.4s, %2.4s"
13984 : /* No clobbers */);
13988 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
13989 vpminq_u8 (uint8x16_t a
, uint8x16_t b
)
13992 __asm__ ("uminp %0.16b, %1.16b, %2.16b"
13995 : /* No clobbers */);
13999 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
14000 vpminq_u16 (uint16x8_t a
, uint16x8_t b
)
14003 __asm__ ("uminp %0.8h, %1.8h, %2.8h"
14006 : /* No clobbers */);
14010 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
14011 vpminq_u32 (uint32x4_t a
, uint32x4_t b
)
14014 __asm__ ("uminp %0.4s, %1.4s, %2.4s"
14017 : /* No clobbers */);
14021 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
14022 vpminqd_f64 (float64x2_t a
)
14025 __asm__ ("fminp %d0,%1.2d"
14028 : /* No clobbers */);
14032 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
14033 vpmins_f32 (float32x2_t a
)
14036 __asm__ ("fminp %s0,%1.2s"
14039 : /* No clobbers */);
14043 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
14044 vqdmulh_n_s16 (int16x4_t a
, int16_t b
)
14047 __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]"
14050 : /* No clobbers */);
14054 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
14055 vqdmulh_n_s32 (int32x2_t a
, int32_t b
)
14058 __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]"
14061 : /* No clobbers */);
14065 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
14066 vqdmulhq_n_s16 (int16x8_t a
, int16_t b
)
14069 __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]"
14072 : /* No clobbers */);
14076 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
14077 vqdmulhq_n_s32 (int32x4_t a
, int32_t b
)
14080 __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]"
14083 : /* No clobbers */);
14087 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
14088 vqmovn_high_s16 (int8x8_t a
, int16x8_t b
)
14090 int8x16_t result
= vcombine_s8 (a
, vcreate_s8 (UINT64_C (0x0)));
14091 __asm__ ("sqxtn2 %0.16b, %1.8h"
14094 : /* No clobbers */);
14098 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
14099 vqmovn_high_s32 (int16x4_t a
, int32x4_t b
)
14101 int16x8_t result
= vcombine_s16 (a
, vcreate_s16 (UINT64_C (0x0)));
14102 __asm__ ("sqxtn2 %0.8h, %1.4s"
14105 : /* No clobbers */);
14109 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
14110 vqmovn_high_s64 (int32x2_t a
, int64x2_t b
)
14112 int32x4_t result
= vcombine_s32 (a
, vcreate_s32 (UINT64_C (0x0)));
14113 __asm__ ("sqxtn2 %0.4s, %1.2d"
14116 : /* No clobbers */);
14120 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
14121 vqmovn_high_u16 (uint8x8_t a
, uint16x8_t b
)
14123 uint8x16_t result
= vcombine_u8 (a
, vcreate_u8 (UINT64_C (0x0)));
14124 __asm__ ("uqxtn2 %0.16b, %1.8h"
14127 : /* No clobbers */);
14131 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
14132 vqmovn_high_u32 (uint16x4_t a
, uint32x4_t b
)
14134 uint16x8_t result
= vcombine_u16 (a
, vcreate_u16 (UINT64_C (0x0)));
14135 __asm__ ("uqxtn2 %0.8h, %1.4s"
14138 : /* No clobbers */);
14142 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
14143 vqmovn_high_u64 (uint32x2_t a
, uint64x2_t b
)
14145 uint32x4_t result
= vcombine_u32 (a
, vcreate_u32 (UINT64_C (0x0)));
14146 __asm__ ("uqxtn2 %0.4s, %1.2d"
14149 : /* No clobbers */);
14153 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
14154 vqmovun_high_s16 (uint8x8_t a
, int16x8_t b
)
14156 uint8x16_t result
= vcombine_u8 (a
, vcreate_u8 (UINT64_C (0x0)));
14157 __asm__ ("sqxtun2 %0.16b, %1.8h"
14160 : /* No clobbers */);
14164 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
14165 vqmovun_high_s32 (uint16x4_t a
, int32x4_t b
)
14167 uint16x8_t result
= vcombine_u16 (a
, vcreate_u16 (UINT64_C (0x0)));
14168 __asm__ ("sqxtun2 %0.8h, %1.4s"
14171 : /* No clobbers */);
14175 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
14176 vqmovun_high_s64 (uint32x2_t a
, int64x2_t b
)
14178 uint32x4_t result
= vcombine_u32 (a
, vcreate_u32 (UINT64_C (0x0)));
14179 __asm__ ("sqxtun2 %0.4s, %1.2d"
14182 : /* No clobbers */);
14186 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
14187 vqrdmulh_n_s16 (int16x4_t a
, int16_t b
)
14190 __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
14193 : /* No clobbers */);
14197 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
14198 vqrdmulh_n_s32 (int32x2_t a
, int32_t b
)
14201 __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]"
14204 : /* No clobbers */);
14208 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
14209 vqrdmulhq_n_s16 (int16x8_t a
, int16_t b
)
14212 __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
14215 : /* No clobbers */);
14219 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
14220 vqrdmulhq_n_s32 (int32x4_t a
, int32_t b
)
14223 __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]"
14226 : /* No clobbers */);
14230 #define vqrshrn_high_n_s16(a, b, c) \
14233 int16x8_t b_ = (b); \
14234 int8x8_t a_ = (a); \
14235 int8x16_t result = vcombine_s8 \
14236 (a_, vcreate_s8 (UINT64_C (0x0))); \
14237 __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \
14239 : "w"(b_), "i"(c) \
14240 : /* No clobbers */); \
14244 #define vqrshrn_high_n_s32(a, b, c) \
14247 int32x4_t b_ = (b); \
14248 int16x4_t a_ = (a); \
14249 int16x8_t result = vcombine_s16 \
14250 (a_, vcreate_s16 (UINT64_C (0x0))); \
14251 __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \
14253 : "w"(b_), "i"(c) \
14254 : /* No clobbers */); \
14258 #define vqrshrn_high_n_s64(a, b, c) \
14261 int64x2_t b_ = (b); \
14262 int32x2_t a_ = (a); \
14263 int32x4_t result = vcombine_s32 \
14264 (a_, vcreate_s32 (UINT64_C (0x0))); \
14265 __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \
14267 : "w"(b_), "i"(c) \
14268 : /* No clobbers */); \
14272 #define vqrshrn_high_n_u16(a, b, c) \
14275 uint16x8_t b_ = (b); \
14276 uint8x8_t a_ = (a); \
14277 uint8x16_t result = vcombine_u8 \
14278 (a_, vcreate_u8 (UINT64_C (0x0))); \
14279 __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \
14281 : "w"(b_), "i"(c) \
14282 : /* No clobbers */); \
14286 #define vqrshrn_high_n_u32(a, b, c) \
14289 uint32x4_t b_ = (b); \
14290 uint16x4_t a_ = (a); \
14291 uint16x8_t result = vcombine_u16 \
14292 (a_, vcreate_u16 (UINT64_C (0x0))); \
14293 __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \
14295 : "w"(b_), "i"(c) \
14296 : /* No clobbers */); \
14300 #define vqrshrn_high_n_u64(a, b, c) \
14303 uint64x2_t b_ = (b); \
14304 uint32x2_t a_ = (a); \
14305 uint32x4_t result = vcombine_u32 \
14306 (a_, vcreate_u32 (UINT64_C (0x0))); \
14307 __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \
14309 : "w"(b_), "i"(c) \
14310 : /* No clobbers */); \
14314 #define vqrshrun_high_n_s16(a, b, c) \
14317 int16x8_t b_ = (b); \
14318 uint8x8_t a_ = (a); \
14319 uint8x16_t result = vcombine_u8 \
14320 (a_, vcreate_u8 (UINT64_C (0x0))); \
14321 __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \
14323 : "w"(b_), "i"(c) \
14324 : /* No clobbers */); \
14328 #define vqrshrun_high_n_s32(a, b, c) \
14331 int32x4_t b_ = (b); \
14332 uint16x4_t a_ = (a); \
14333 uint16x8_t result = vcombine_u16 \
14334 (a_, vcreate_u16 (UINT64_C (0x0))); \
14335 __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \
14337 : "w"(b_), "i"(c) \
14338 : /* No clobbers */); \
14342 #define vqrshrun_high_n_s64(a, b, c) \
14345 int64x2_t b_ = (b); \
14346 uint32x2_t a_ = (a); \
14347 uint32x4_t result = vcombine_u32 \
14348 (a_, vcreate_u32 (UINT64_C (0x0))); \
14349 __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \
14351 : "w"(b_), "i"(c) \
14352 : /* No clobbers */); \
14356 #define vqshrn_high_n_s16(a, b, c) \
14359 int16x8_t b_ = (b); \
14360 int8x8_t a_ = (a); \
14361 int8x16_t result = vcombine_s8 \
14362 (a_, vcreate_s8 (UINT64_C (0x0))); \
14363 __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \
14365 : "w"(b_), "i"(c) \
14366 : /* No clobbers */); \
14370 #define vqshrn_high_n_s32(a, b, c) \
14373 int32x4_t b_ = (b); \
14374 int16x4_t a_ = (a); \
14375 int16x8_t result = vcombine_s16 \
14376 (a_, vcreate_s16 (UINT64_C (0x0))); \
14377 __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \
14379 : "w"(b_), "i"(c) \
14380 : /* No clobbers */); \
14384 #define vqshrn_high_n_s64(a, b, c) \
14387 int64x2_t b_ = (b); \
14388 int32x2_t a_ = (a); \
14389 int32x4_t result = vcombine_s32 \
14390 (a_, vcreate_s32 (UINT64_C (0x0))); \
14391 __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \
14393 : "w"(b_), "i"(c) \
14394 : /* No clobbers */); \
14398 #define vqshrn_high_n_u16(a, b, c) \
14401 uint16x8_t b_ = (b); \
14402 uint8x8_t a_ = (a); \
14403 uint8x16_t result = vcombine_u8 \
14404 (a_, vcreate_u8 (UINT64_C (0x0))); \
14405 __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \
14407 : "w"(b_), "i"(c) \
14408 : /* No clobbers */); \
14412 #define vqshrn_high_n_u32(a, b, c) \
14415 uint32x4_t b_ = (b); \
14416 uint16x4_t a_ = (a); \
14417 uint16x8_t result = vcombine_u16 \
14418 (a_, vcreate_u16 (UINT64_C (0x0))); \
14419 __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \
14421 : "w"(b_), "i"(c) \
14422 : /* No clobbers */); \
14426 #define vqshrn_high_n_u64(a, b, c) \
14429 uint64x2_t b_ = (b); \
14430 uint32x2_t a_ = (a); \
14431 uint32x4_t result = vcombine_u32 \
14432 (a_, vcreate_u32 (UINT64_C (0x0))); \
14433 __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \
14435 : "w"(b_), "i"(c) \
14436 : /* No clobbers */); \
14440 #define vqshrun_high_n_s16(a, b, c) \
14443 int16x8_t b_ = (b); \
14444 uint8x8_t a_ = (a); \
14445 uint8x16_t result = vcombine_u8 \
14446 (a_, vcreate_u8 (UINT64_C (0x0))); \
14447 __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \
14449 : "w"(b_), "i"(c) \
14450 : /* No clobbers */); \
14454 #define vqshrun_high_n_s32(a, b, c) \
14457 int32x4_t b_ = (b); \
14458 uint16x4_t a_ = (a); \
14459 uint16x8_t result = vcombine_u16 \
14460 (a_, vcreate_u16 (UINT64_C (0x0))); \
14461 __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \
14463 : "w"(b_), "i"(c) \
14464 : /* No clobbers */); \
14468 #define vqshrun_high_n_s64(a, b, c) \
14471 int64x2_t b_ = (b); \
14472 uint32x2_t a_ = (a); \
14473 uint32x4_t result = vcombine_u32 \
14474 (a_, vcreate_u32 (UINT64_C (0x0))); \
14475 __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \
14477 : "w"(b_), "i"(c) \
14478 : /* No clobbers */); \
14482 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
14483 vrbit_s8 (int8x8_t a
)
14486 __asm__ ("rbit %0.8b,%1.8b"
14489 : /* No clobbers */);
14493 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
14494 vrbit_u8 (uint8x8_t a
)
14497 __asm__ ("rbit %0.8b,%1.8b"
14500 : /* No clobbers */);
14504 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
14505 vrbitq_s8 (int8x16_t a
)
14508 __asm__ ("rbit %0.16b,%1.16b"
14511 : /* No clobbers */);
14515 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
14516 vrbitq_u8 (uint8x16_t a
)
14519 __asm__ ("rbit %0.16b,%1.16b"
14522 : /* No clobbers */);
14526 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
14527 vrecpe_u32 (uint32x2_t a
)
14530 __asm__ ("urecpe %0.2s,%1.2s"
14533 : /* No clobbers */);
14537 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
14538 vrecpeq_u32 (uint32x4_t a
)
14541 __asm__ ("urecpe %0.4s,%1.4s"
14544 : /* No clobbers */);
14548 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
14549 vrev16_p8 (poly8x8_t a
)
14552 __asm__ ("rev16 %0.8b,%1.8b"
14555 : /* No clobbers */);
14559 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
14560 vrev16_s8 (int8x8_t a
)
14563 __asm__ ("rev16 %0.8b,%1.8b"
14566 : /* No clobbers */);
14570 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
14571 vrev16_u8 (uint8x8_t a
)
14574 __asm__ ("rev16 %0.8b,%1.8b"
14577 : /* No clobbers */);
14581 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
14582 vrev16q_p8 (poly8x16_t a
)
14585 __asm__ ("rev16 %0.16b,%1.16b"
14588 : /* No clobbers */);
14592 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
14593 vrev16q_s8 (int8x16_t a
)
14596 __asm__ ("rev16 %0.16b,%1.16b"
14599 : /* No clobbers */);
14603 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
14604 vrev16q_u8 (uint8x16_t a
)
14607 __asm__ ("rev16 %0.16b,%1.16b"
14610 : /* No clobbers */);
14614 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
14615 vrev32_p8 (poly8x8_t a
)
14618 __asm__ ("rev32 %0.8b,%1.8b"
14621 : /* No clobbers */);
14625 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
14626 vrev32_p16 (poly16x4_t a
)
14629 __asm__ ("rev32 %0.4h,%1.4h"
14632 : /* No clobbers */);
14636 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
14637 vrev32_s8 (int8x8_t a
)
14640 __asm__ ("rev32 %0.8b,%1.8b"
14643 : /* No clobbers */);
14647 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
14648 vrev32_s16 (int16x4_t a
)
14651 __asm__ ("rev32 %0.4h,%1.4h"
14654 : /* No clobbers */);
14658 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
14659 vrev32_u8 (uint8x8_t a
)
14662 __asm__ ("rev32 %0.8b,%1.8b"
14665 : /* No clobbers */);
14669 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
14670 vrev32_u16 (uint16x4_t a
)
14673 __asm__ ("rev32 %0.4h,%1.4h"
14676 : /* No clobbers */);
14680 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
14681 vrev32q_p8 (poly8x16_t a
)
14684 __asm__ ("rev32 %0.16b,%1.16b"
14687 : /* No clobbers */);
14691 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
14692 vrev32q_p16 (poly16x8_t a
)
14695 __asm__ ("rev32 %0.8h,%1.8h"
14698 : /* No clobbers */);
14702 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
14703 vrev32q_s8 (int8x16_t a
)
14706 __asm__ ("rev32 %0.16b,%1.16b"
14709 : /* No clobbers */);
14713 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
14714 vrev32q_s16 (int16x8_t a
)
14717 __asm__ ("rev32 %0.8h,%1.8h"
14720 : /* No clobbers */);
14724 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
14725 vrev32q_u8 (uint8x16_t a
)
14728 __asm__ ("rev32 %0.16b,%1.16b"
14731 : /* No clobbers */);
14735 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
14736 vrev32q_u16 (uint16x8_t a
)
14739 __asm__ ("rev32 %0.8h,%1.8h"
14742 : /* No clobbers */);
14746 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
14747 vrev64_f32 (float32x2_t a
)
14749 float32x2_t result
;
14750 __asm__ ("rev64 %0.2s,%1.2s"
14753 : /* No clobbers */);
14757 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
14758 vrev64_p8 (poly8x8_t a
)
14761 __asm__ ("rev64 %0.8b,%1.8b"
14764 : /* No clobbers */);
14768 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
14769 vrev64_p16 (poly16x4_t a
)
14772 __asm__ ("rev64 %0.4h,%1.4h"
14775 : /* No clobbers */);
14779 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
14780 vrev64_s8 (int8x8_t a
)
14783 __asm__ ("rev64 %0.8b,%1.8b"
14786 : /* No clobbers */);
14790 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
14791 vrev64_s16 (int16x4_t a
)
14794 __asm__ ("rev64 %0.4h,%1.4h"
14797 : /* No clobbers */);
14801 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
14802 vrev64_s32 (int32x2_t a
)
14805 __asm__ ("rev64 %0.2s,%1.2s"
14808 : /* No clobbers */);
14812 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
14813 vrev64_u8 (uint8x8_t a
)
14816 __asm__ ("rev64 %0.8b,%1.8b"
14819 : /* No clobbers */);
14823 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
14824 vrev64_u16 (uint16x4_t a
)
14827 __asm__ ("rev64 %0.4h,%1.4h"
14830 : /* No clobbers */);
14834 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
14835 vrev64_u32 (uint32x2_t a
)
14838 __asm__ ("rev64 %0.2s,%1.2s"
14841 : /* No clobbers */);
14845 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
14846 vrev64q_f32 (float32x4_t a
)
14848 float32x4_t result
;
14849 __asm__ ("rev64 %0.4s,%1.4s"
14852 : /* No clobbers */);
14856 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
14857 vrev64q_p8 (poly8x16_t a
)
14860 __asm__ ("rev64 %0.16b,%1.16b"
14863 : /* No clobbers */);
14867 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
14868 vrev64q_p16 (poly16x8_t a
)
14871 __asm__ ("rev64 %0.8h,%1.8h"
14874 : /* No clobbers */);
14878 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
14879 vrev64q_s8 (int8x16_t a
)
14882 __asm__ ("rev64 %0.16b,%1.16b"
14885 : /* No clobbers */);
14889 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
14890 vrev64q_s16 (int16x8_t a
)
14893 __asm__ ("rev64 %0.8h,%1.8h"
14896 : /* No clobbers */);
14900 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
14901 vrev64q_s32 (int32x4_t a
)
14904 __asm__ ("rev64 %0.4s,%1.4s"
14907 : /* No clobbers */);
14911 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
14912 vrev64q_u8 (uint8x16_t a
)
14915 __asm__ ("rev64 %0.16b,%1.16b"
14918 : /* No clobbers */);
14922 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
14923 vrev64q_u16 (uint16x8_t a
)
14926 __asm__ ("rev64 %0.8h,%1.8h"
14929 : /* No clobbers */);
14933 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
14934 vrev64q_u32 (uint32x4_t a
)
14937 __asm__ ("rev64 %0.4s,%1.4s"
14940 : /* No clobbers */);
14944 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
14945 vrnd_f32 (float32x2_t a
)
14947 float32x2_t result
;
14948 __asm__ ("frintz %0.2s,%1.2s"
14951 : /* No clobbers */);
14955 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
14956 vrnda_f32 (float32x2_t a
)
14958 float32x2_t result
;
14959 __asm__ ("frinta %0.2s,%1.2s"
14962 : /* No clobbers */);
14966 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
14967 vrndm_f32 (float32x2_t a
)
14969 float32x2_t result
;
14970 __asm__ ("frintm %0.2s,%1.2s"
14973 : /* No clobbers */);
14977 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
14978 vrndn_f32 (float32x2_t a
)
14980 float32x2_t result
;
14981 __asm__ ("frintn %0.2s,%1.2s"
14984 : /* No clobbers */);
14988 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
14989 vrndp_f32 (float32x2_t a
)
14991 float32x2_t result
;
14992 __asm__ ("frintp %0.2s,%1.2s"
14995 : /* No clobbers */);
14999 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
15000 vrndq_f32 (float32x4_t a
)
15002 float32x4_t result
;
15003 __asm__ ("frintz %0.4s,%1.4s"
15006 : /* No clobbers */);
15010 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
15011 vrndq_f64 (float64x2_t a
)
15013 float64x2_t result
;
15014 __asm__ ("frintz %0.2d,%1.2d"
15017 : /* No clobbers */);
15021 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
15022 vrndqa_f32 (float32x4_t a
)
15024 float32x4_t result
;
15025 __asm__ ("frinta %0.4s,%1.4s"
15028 : /* No clobbers */);
15032 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
15033 vrndqa_f64 (float64x2_t a
)
15035 float64x2_t result
;
15036 __asm__ ("frinta %0.2d,%1.2d"
15039 : /* No clobbers */);
15043 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
15044 vrndqm_f32 (float32x4_t a
)
15046 float32x4_t result
;
15047 __asm__ ("frintm %0.4s,%1.4s"
15050 : /* No clobbers */);
15054 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
15055 vrndqm_f64 (float64x2_t a
)
15057 float64x2_t result
;
15058 __asm__ ("frintm %0.2d,%1.2d"
15061 : /* No clobbers */);
15065 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
15066 vrndqn_f32 (float32x4_t a
)
15068 float32x4_t result
;
15069 __asm__ ("frintn %0.4s,%1.4s"
15072 : /* No clobbers */);
15076 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
15077 vrndqn_f64 (float64x2_t a
)
15079 float64x2_t result
;
15080 __asm__ ("frintn %0.2d,%1.2d"
15083 : /* No clobbers */);
15087 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
15088 vrndqp_f32 (float32x4_t a
)
15090 float32x4_t result
;
15091 __asm__ ("frintp %0.4s,%1.4s"
15094 : /* No clobbers */);
15098 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
15099 vrndqp_f64 (float64x2_t a
)
15101 float64x2_t result
;
15102 __asm__ ("frintp %0.2d,%1.2d"
15105 : /* No clobbers */);
15109 #define vrshrn_high_n_s16(a, b, c) \
15112 int16x8_t b_ = (b); \
15113 int8x8_t a_ = (a); \
15114 int8x16_t result = vcombine_s8 \
15115 (a_, vcreate_s8 (UINT64_C (0x0))); \
15116 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
15118 : "w"(b_), "i"(c) \
15119 : /* No clobbers */); \
15123 #define vrshrn_high_n_s32(a, b, c) \
15126 int32x4_t b_ = (b); \
15127 int16x4_t a_ = (a); \
15128 int16x8_t result = vcombine_s16 \
15129 (a_, vcreate_s16 (UINT64_C (0x0))); \
15130 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
15132 : "w"(b_), "i"(c) \
15133 : /* No clobbers */); \
15137 #define vrshrn_high_n_s64(a, b, c) \
15140 int64x2_t b_ = (b); \
15141 int32x2_t a_ = (a); \
15142 int32x4_t result = vcombine_s32 \
15143 (a_, vcreate_s32 (UINT64_C (0x0))); \
15144 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
15146 : "w"(b_), "i"(c) \
15147 : /* No clobbers */); \
15151 #define vrshrn_high_n_u16(a, b, c) \
15154 uint16x8_t b_ = (b); \
15155 uint8x8_t a_ = (a); \
15156 uint8x16_t result = vcombine_u8 \
15157 (a_, vcreate_u8 (UINT64_C (0x0))); \
15158 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
15160 : "w"(b_), "i"(c) \
15161 : /* No clobbers */); \
15165 #define vrshrn_high_n_u32(a, b, c) \
15168 uint32x4_t b_ = (b); \
15169 uint16x4_t a_ = (a); \
15170 uint16x8_t result = vcombine_u16 \
15171 (a_, vcreate_u16 (UINT64_C (0x0))); \
15172 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
15174 : "w"(b_), "i"(c) \
15175 : /* No clobbers */); \
15179 #define vrshrn_high_n_u64(a, b, c) \
15182 uint64x2_t b_ = (b); \
15183 uint32x2_t a_ = (a); \
15184 uint32x4_t result = vcombine_u32 \
15185 (a_, vcreate_u32 (UINT64_C (0x0))); \
15186 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
15188 : "w"(b_), "i"(c) \
15189 : /* No clobbers */); \
15193 #define vrshrn_n_s16(a, b) \
15196 int16x8_t a_ = (a); \
15198 __asm__ ("rshrn %0.8b,%1.8h,%2" \
15200 : "w"(a_), "i"(b) \
15201 : /* No clobbers */); \
15205 #define vrshrn_n_s32(a, b) \
15208 int32x4_t a_ = (a); \
15209 int16x4_t result; \
15210 __asm__ ("rshrn %0.4h,%1.4s,%2" \
15212 : "w"(a_), "i"(b) \
15213 : /* No clobbers */); \
15217 #define vrshrn_n_s64(a, b) \
15220 int64x2_t a_ = (a); \
15221 int32x2_t result; \
15222 __asm__ ("rshrn %0.2s,%1.2d,%2" \
15224 : "w"(a_), "i"(b) \
15225 : /* No clobbers */); \
15229 #define vrshrn_n_u16(a, b) \
15232 uint16x8_t a_ = (a); \
15233 uint8x8_t result; \
15234 __asm__ ("rshrn %0.8b,%1.8h,%2" \
15236 : "w"(a_), "i"(b) \
15237 : /* No clobbers */); \
15241 #define vrshrn_n_u32(a, b) \
15244 uint32x4_t a_ = (a); \
15245 uint16x4_t result; \
15246 __asm__ ("rshrn %0.4h,%1.4s,%2" \
15248 : "w"(a_), "i"(b) \
15249 : /* No clobbers */); \
15253 #define vrshrn_n_u64(a, b) \
15256 uint64x2_t a_ = (a); \
15257 uint32x2_t result; \
15258 __asm__ ("rshrn %0.2s,%1.2d,%2" \
15260 : "w"(a_), "i"(b) \
15261 : /* No clobbers */); \
15265 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
15266 vrsqrte_f32 (float32x2_t a
)
15268 float32x2_t result
;
15269 __asm__ ("frsqrte %0.2s,%1.2s"
15272 : /* No clobbers */);
15276 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
15277 vrsqrte_f64 (float64x2_t a
)
15279 float64x2_t result
;
15280 __asm__ ("frsqrte %0.2d,%1.2d"
15283 : /* No clobbers */);
15287 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
15288 vrsqrte_u32 (uint32x2_t a
)
15291 __asm__ ("ursqrte %0.2s,%1.2s"
15294 : /* No clobbers */);
15298 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
15299 vrsqrted_f64 (float64_t a
)
15302 __asm__ ("frsqrte %d0,%d1"
15305 : /* No clobbers */);
15309 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
15310 vrsqrteq_f32 (float32x4_t a
)
15312 float32x4_t result
;
15313 __asm__ ("frsqrte %0.4s,%1.4s"
15316 : /* No clobbers */);
15320 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
15321 vrsqrteq_f64 (float64x2_t a
)
15323 float64x2_t result
;
15324 __asm__ ("frsqrte %0.2d,%1.2d"
15327 : /* No clobbers */);
15331 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
15332 vrsqrteq_u32 (uint32x4_t a
)
15335 __asm__ ("ursqrte %0.4s,%1.4s"
15338 : /* No clobbers */);
15342 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
15343 vrsqrtes_f32 (float32_t a
)
15346 __asm__ ("frsqrte %s0,%s1"
15349 : /* No clobbers */);
15353 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
15354 vrsqrts_f32 (float32x2_t a
, float32x2_t b
)
15356 float32x2_t result
;
15357 __asm__ ("frsqrts %0.2s,%1.2s,%2.2s"
15360 : /* No clobbers */);
15364 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
15365 vrsqrtsd_f64 (float64_t a
, float64_t b
)
15368 __asm__ ("frsqrts %d0,%d1,%d2"
15371 : /* No clobbers */);
15375 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
15376 vrsqrtsq_f32 (float32x4_t a
, float32x4_t b
)
15378 float32x4_t result
;
15379 __asm__ ("frsqrts %0.4s,%1.4s,%2.4s"
15382 : /* No clobbers */);
15386 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
15387 vrsqrtsq_f64 (float64x2_t a
, float64x2_t b
)
15389 float64x2_t result
;
15390 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
15393 : /* No clobbers */);
15397 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
15398 vrsqrtss_f32 (float32_t a
, float32_t b
)
15401 __asm__ ("frsqrts %s0,%s1,%s2"
15404 : /* No clobbers */);
15408 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
15409 vrsrtsq_f64 (float64x2_t a
, float64x2_t b
)
15411 float64x2_t result
;
15412 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
15415 : /* No clobbers */);
15419 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
15420 vrsubhn_high_s16 (int8x8_t a
, int16x8_t b
, int16x8_t c
)
15422 int8x16_t result
= vcombine_s8 (a
, vcreate_s8 (UINT64_C (0x0)));
15423 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
15426 : /* No clobbers */);
15430 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
15431 vrsubhn_high_s32 (int16x4_t a
, int32x4_t b
, int32x4_t c
)
15433 int16x8_t result
= vcombine_s16 (a
, vcreate_s16 (UINT64_C (0x0)));
15434 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
15437 : /* No clobbers */);
15441 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
15442 vrsubhn_high_s64 (int32x2_t a
, int64x2_t b
, int64x2_t c
)
15444 int32x4_t result
= vcombine_s32 (a
, vcreate_s32 (UINT64_C (0x0)));
15445 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
15448 : /* No clobbers */);
15452 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
15453 vrsubhn_high_u16 (uint8x8_t a
, uint16x8_t b
, uint16x8_t c
)
15455 uint8x16_t result
= vcombine_u8 (a
, vcreate_u8 (UINT64_C (0x0)));
15456 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
15459 : /* No clobbers */);
15463 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
15464 vrsubhn_high_u32 (uint16x4_t a
, uint32x4_t b
, uint32x4_t c
)
15466 uint16x8_t result
= vcombine_u16 (a
, vcreate_u16 (UINT64_C (0x0)));
15467 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
15470 : /* No clobbers */);
15474 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
15475 vrsubhn_high_u64 (uint32x2_t a
, uint64x2_t b
, uint64x2_t c
)
15477 uint32x4_t result
= vcombine_u32 (a
, vcreate_u32 (UINT64_C (0x0)));
15478 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
15481 : /* No clobbers */);
15485 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
15486 vrsubhn_s16 (int16x8_t a
, int16x8_t b
)
15489 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
15492 : /* No clobbers */);
15496 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
15497 vrsubhn_s32 (int32x4_t a
, int32x4_t b
)
15500 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
15503 : /* No clobbers */);
15507 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
15508 vrsubhn_s64 (int64x2_t a
, int64x2_t b
)
15511 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
15514 : /* No clobbers */);
15518 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
15519 vrsubhn_u16 (uint16x8_t a
, uint16x8_t b
)
15522 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
15525 : /* No clobbers */);
15529 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
15530 vrsubhn_u32 (uint32x4_t a
, uint32x4_t b
)
15533 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
15536 : /* No clobbers */);
15540 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
15541 vrsubhn_u64 (uint64x2_t a
, uint64x2_t b
)
15544 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
15547 : /* No clobbers */);
15551 #define vset_lane_f32(a, b, c) \
15554 float32x2_t b_ = (b); \
15555 float32_t a_ = (a); \
15556 float32x2_t result; \
15557 __asm__ ("ins %0.s[%3], %w1" \
15559 : "r"(a_), "0"(b_), "i"(c) \
15560 : /* No clobbers */); \
15564 #define vset_lane_f64(a, b, c) \
15567 float64x1_t b_ = (b); \
15568 float64_t a_ = (a); \
15569 float64x1_t result; \
15570 __asm__ ("ins %0.d[%3], %x1" \
15572 : "r"(a_), "0"(b_), "i"(c) \
15573 : /* No clobbers */); \
15577 #define vset_lane_p8(a, b, c) \
15580 poly8x8_t b_ = (b); \
15581 poly8_t a_ = (a); \
15582 poly8x8_t result; \
15583 __asm__ ("ins %0.b[%3], %w1" \
15585 : "r"(a_), "0"(b_), "i"(c) \
15586 : /* No clobbers */); \
15590 #define vset_lane_p16(a, b, c) \
15593 poly16x4_t b_ = (b); \
15594 poly16_t a_ = (a); \
15595 poly16x4_t result; \
15596 __asm__ ("ins %0.h[%3], %w1" \
15598 : "r"(a_), "0"(b_), "i"(c) \
15599 : /* No clobbers */); \
15603 #define vset_lane_s8(a, b, c) \
15606 int8x8_t b_ = (b); \
15609 __asm__ ("ins %0.b[%3], %w1" \
15611 : "r"(a_), "0"(b_), "i"(c) \
15612 : /* No clobbers */); \
15616 #define vset_lane_s16(a, b, c) \
15619 int16x4_t b_ = (b); \
15620 int16_t a_ = (a); \
15621 int16x4_t result; \
15622 __asm__ ("ins %0.h[%3], %w1" \
15624 : "r"(a_), "0"(b_), "i"(c) \
15625 : /* No clobbers */); \
15629 #define vset_lane_s32(a, b, c) \
15632 int32x2_t b_ = (b); \
15633 int32_t a_ = (a); \
15634 int32x2_t result; \
15635 __asm__ ("ins %0.s[%3], %w1" \
15637 : "r"(a_), "0"(b_), "i"(c) \
15638 : /* No clobbers */); \
15642 #define vset_lane_s64(a, b, c) \
15645 int64x1_t b_ = (b); \
15646 int64_t a_ = (a); \
15647 int64x1_t result; \
15648 __asm__ ("ins %0.d[%3], %x1" \
15650 : "r"(a_), "0"(b_), "i"(c) \
15651 : /* No clobbers */); \
15655 #define vset_lane_u8(a, b, c) \
15658 uint8x8_t b_ = (b); \
15659 uint8_t a_ = (a); \
15660 uint8x8_t result; \
15661 __asm__ ("ins %0.b[%3], %w1" \
15663 : "r"(a_), "0"(b_), "i"(c) \
15664 : /* No clobbers */); \
15668 #define vset_lane_u16(a, b, c) \
15671 uint16x4_t b_ = (b); \
15672 uint16_t a_ = (a); \
15673 uint16x4_t result; \
15674 __asm__ ("ins %0.h[%3], %w1" \
15676 : "r"(a_), "0"(b_), "i"(c) \
15677 : /* No clobbers */); \
15681 #define vset_lane_u32(a, b, c) \
15684 uint32x2_t b_ = (b); \
15685 uint32_t a_ = (a); \
15686 uint32x2_t result; \
15687 __asm__ ("ins %0.s[%3], %w1" \
15689 : "r"(a_), "0"(b_), "i"(c) \
15690 : /* No clobbers */); \
15694 #define vset_lane_u64(a, b, c) \
15697 uint64x1_t b_ = (b); \
15698 uint64_t a_ = (a); \
15699 uint64x1_t result; \
15700 __asm__ ("ins %0.d[%3], %x1" \
15702 : "r"(a_), "0"(b_), "i"(c) \
15703 : /* No clobbers */); \
15707 #define vsetq_lane_f32(a, b, c) \
15710 float32x4_t b_ = (b); \
15711 float32_t a_ = (a); \
15712 float32x4_t result; \
15713 __asm__ ("ins %0.s[%3], %w1" \
15715 : "r"(a_), "0"(b_), "i"(c) \
15716 : /* No clobbers */); \
15720 #define vsetq_lane_f64(a, b, c) \
15723 float64x2_t b_ = (b); \
15724 float64_t a_ = (a); \
15725 float64x2_t result; \
15726 __asm__ ("ins %0.d[%3], %x1" \
15728 : "r"(a_), "0"(b_), "i"(c) \
15729 : /* No clobbers */); \
15733 #define vsetq_lane_p8(a, b, c) \
15736 poly8x16_t b_ = (b); \
15737 poly8_t a_ = (a); \
15738 poly8x16_t result; \
15739 __asm__ ("ins %0.b[%3], %w1" \
15741 : "r"(a_), "0"(b_), "i"(c) \
15742 : /* No clobbers */); \
15746 #define vsetq_lane_p16(a, b, c) \
15749 poly16x8_t b_ = (b); \
15750 poly16_t a_ = (a); \
15751 poly16x8_t result; \
15752 __asm__ ("ins %0.h[%3], %w1" \
15754 : "r"(a_), "0"(b_), "i"(c) \
15755 : /* No clobbers */); \
15759 #define vsetq_lane_s8(a, b, c) \
15762 int8x16_t b_ = (b); \
15764 int8x16_t result; \
15765 __asm__ ("ins %0.b[%3], %w1" \
15767 : "r"(a_), "0"(b_), "i"(c) \
15768 : /* No clobbers */); \
15772 #define vsetq_lane_s16(a, b, c) \
15775 int16x8_t b_ = (b); \
15776 int16_t a_ = (a); \
15777 int16x8_t result; \
15778 __asm__ ("ins %0.h[%3], %w1" \
15780 : "r"(a_), "0"(b_), "i"(c) \
15781 : /* No clobbers */); \
15785 #define vsetq_lane_s32(a, b, c) \
15788 int32x4_t b_ = (b); \
15789 int32_t a_ = (a); \
15790 int32x4_t result; \
15791 __asm__ ("ins %0.s[%3], %w1" \
15793 : "r"(a_), "0"(b_), "i"(c) \
15794 : /* No clobbers */); \
15798 #define vsetq_lane_s64(a, b, c) \
15801 int64x2_t b_ = (b); \
15802 int64_t a_ = (a); \
15803 int64x2_t result; \
15804 __asm__ ("ins %0.d[%3], %x1" \
15806 : "r"(a_), "0"(b_), "i"(c) \
15807 : /* No clobbers */); \
15811 #define vsetq_lane_u8(a, b, c) \
15814 uint8x16_t b_ = (b); \
15815 uint8_t a_ = (a); \
15816 uint8x16_t result; \
15817 __asm__ ("ins %0.b[%3], %w1" \
15819 : "r"(a_), "0"(b_), "i"(c) \
15820 : /* No clobbers */); \
15824 #define vsetq_lane_u16(a, b, c) \
15827 uint16x8_t b_ = (b); \
15828 uint16_t a_ = (a); \
15829 uint16x8_t result; \
15830 __asm__ ("ins %0.h[%3], %w1" \
15832 : "r"(a_), "0"(b_), "i"(c) \
15833 : /* No clobbers */); \
15837 #define vsetq_lane_u32(a, b, c) \
15840 uint32x4_t b_ = (b); \
15841 uint32_t a_ = (a); \
15842 uint32x4_t result; \
15843 __asm__ ("ins %0.s[%3], %w1" \
15845 : "r"(a_), "0"(b_), "i"(c) \
15846 : /* No clobbers */); \
15850 #define vsetq_lane_u64(a, b, c) \
15853 uint64x2_t b_ = (b); \
15854 uint64_t a_ = (a); \
15855 uint64x2_t result; \
15856 __asm__ ("ins %0.d[%3], %x1" \
15858 : "r"(a_), "0"(b_), "i"(c) \
15859 : /* No clobbers */); \
15863 #define vshrn_high_n_s16(a, b, c) \
15866 int16x8_t b_ = (b); \
15867 int8x8_t a_ = (a); \
15868 int8x16_t result = vcombine_s8 \
15869 (a_, vcreate_s8 (UINT64_C (0x0))); \
15870 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
15872 : "w"(b_), "i"(c) \
15873 : /* No clobbers */); \
15877 #define vshrn_high_n_s32(a, b, c) \
15880 int32x4_t b_ = (b); \
15881 int16x4_t a_ = (a); \
15882 int16x8_t result = vcombine_s16 \
15883 (a_, vcreate_s16 (UINT64_C (0x0))); \
15884 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
15886 : "w"(b_), "i"(c) \
15887 : /* No clobbers */); \
15891 #define vshrn_high_n_s64(a, b, c) \
15894 int64x2_t b_ = (b); \
15895 int32x2_t a_ = (a); \
15896 int32x4_t result = vcombine_s32 \
15897 (a_, vcreate_s32 (UINT64_C (0x0))); \
15898 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
15900 : "w"(b_), "i"(c) \
15901 : /* No clobbers */); \
15905 #define vshrn_high_n_u16(a, b, c) \
15908 uint16x8_t b_ = (b); \
15909 uint8x8_t a_ = (a); \
15910 uint8x16_t result = vcombine_u8 \
15911 (a_, vcreate_u8 (UINT64_C (0x0))); \
15912 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
15914 : "w"(b_), "i"(c) \
15915 : /* No clobbers */); \
15919 #define vshrn_high_n_u32(a, b, c) \
15922 uint32x4_t b_ = (b); \
15923 uint16x4_t a_ = (a); \
15924 uint16x8_t result = vcombine_u16 \
15925 (a_, vcreate_u16 (UINT64_C (0x0))); \
15926 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
15928 : "w"(b_), "i"(c) \
15929 : /* No clobbers */); \
15933 #define vshrn_high_n_u64(a, b, c) \
15936 uint64x2_t b_ = (b); \
15937 uint32x2_t a_ = (a); \
15938 uint32x4_t result = vcombine_u32 \
15939 (a_, vcreate_u32 (UINT64_C (0x0))); \
15940 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
15942 : "w"(b_), "i"(c) \
15943 : /* No clobbers */); \
15947 #define vshrn_n_s16(a, b) \
15950 int16x8_t a_ = (a); \
15952 __asm__ ("shrn %0.8b,%1.8h,%2" \
15954 : "w"(a_), "i"(b) \
15955 : /* No clobbers */); \
15959 #define vshrn_n_s32(a, b) \
15962 int32x4_t a_ = (a); \
15963 int16x4_t result; \
15964 __asm__ ("shrn %0.4h,%1.4s,%2" \
15966 : "w"(a_), "i"(b) \
15967 : /* No clobbers */); \
15971 #define vshrn_n_s64(a, b) \
15974 int64x2_t a_ = (a); \
15975 int32x2_t result; \
15976 __asm__ ("shrn %0.2s,%1.2d,%2" \
15978 : "w"(a_), "i"(b) \
15979 : /* No clobbers */); \
15983 #define vshrn_n_u16(a, b) \
15986 uint16x8_t a_ = (a); \
15987 uint8x8_t result; \
15988 __asm__ ("shrn %0.8b,%1.8h,%2" \
15990 : "w"(a_), "i"(b) \
15991 : /* No clobbers */); \
15995 #define vshrn_n_u32(a, b) \
15998 uint32x4_t a_ = (a); \
15999 uint16x4_t result; \
16000 __asm__ ("shrn %0.4h,%1.4s,%2" \
16002 : "w"(a_), "i"(b) \
16003 : /* No clobbers */); \
16007 #define vshrn_n_u64(a, b) \
16010 uint64x2_t a_ = (a); \
16011 uint32x2_t result; \
16012 __asm__ ("shrn %0.2s,%1.2d,%2" \
16014 : "w"(a_), "i"(b) \
16015 : /* No clobbers */); \
16019 #define vsli_n_p8(a, b, c) \
16022 poly8x8_t b_ = (b); \
16023 poly8x8_t a_ = (a); \
16024 poly8x8_t result; \
16025 __asm__ ("sli %0.8b,%2.8b,%3" \
16027 : "0"(a_), "w"(b_), "i"(c) \
16028 : /* No clobbers */); \
16032 #define vsli_n_p16(a, b, c) \
16035 poly16x4_t b_ = (b); \
16036 poly16x4_t a_ = (a); \
16037 poly16x4_t result; \
16038 __asm__ ("sli %0.4h,%2.4h,%3" \
16040 : "0"(a_), "w"(b_), "i"(c) \
16041 : /* No clobbers */); \
16045 #define vsliq_n_p8(a, b, c) \
16048 poly8x16_t b_ = (b); \
16049 poly8x16_t a_ = (a); \
16050 poly8x16_t result; \
16051 __asm__ ("sli %0.16b,%2.16b,%3" \
16053 : "0"(a_), "w"(b_), "i"(c) \
16054 : /* No clobbers */); \
16058 #define vsliq_n_p16(a, b, c) \
16061 poly16x8_t b_ = (b); \
16062 poly16x8_t a_ = (a); \
16063 poly16x8_t result; \
16064 __asm__ ("sli %0.8h,%2.8h,%3" \
16066 : "0"(a_), "w"(b_), "i"(c) \
16067 : /* No clobbers */); \
16071 #define vsri_n_p8(a, b, c) \
16074 poly8x8_t b_ = (b); \
16075 poly8x8_t a_ = (a); \
16076 poly8x8_t result; \
16077 __asm__ ("sri %0.8b,%2.8b,%3" \
16079 : "0"(a_), "w"(b_), "i"(c) \
16080 : /* No clobbers */); \
16084 #define vsri_n_p16(a, b, c) \
16087 poly16x4_t b_ = (b); \
16088 poly16x4_t a_ = (a); \
16089 poly16x4_t result; \
16090 __asm__ ("sri %0.4h,%2.4h,%3" \
16092 : "0"(a_), "w"(b_), "i"(c) \
16093 : /* No clobbers */); \
16097 #define vsriq_n_p8(a, b, c) \
16100 poly8x16_t b_ = (b); \
16101 poly8x16_t a_ = (a); \
16102 poly8x16_t result; \
16103 __asm__ ("sri %0.16b,%2.16b,%3" \
16105 : "0"(a_), "w"(b_), "i"(c) \
16106 : /* No clobbers */); \
16110 #define vsriq_n_p16(a, b, c) \
16113 poly16x8_t b_ = (b); \
16114 poly16x8_t a_ = (a); \
16115 poly16x8_t result; \
16116 __asm__ ("sri %0.8h,%2.8h,%3" \
16118 : "0"(a_), "w"(b_), "i"(c) \
16119 : /* No clobbers */); \
16123 __extension__
static __inline
void __attribute__ ((__always_inline__
))
16124 vst1_f32 (float32_t
* a
, float32x2_t b
)
16126 __asm__ ("st1 {%1.2s},[%0]"
16132 __extension__
static __inline
void __attribute__ ((__always_inline__
))
16133 vst1_f64 (float64_t
* a
, float64x1_t b
)
16135 __asm__ ("st1 {%1.1d},[%0]"
16141 #define vst1_lane_f32(a, b, c) \
16144 float32x2_t b_ = (b); \
16145 float32_t * a_ = (a); \
16146 __asm__ ("st1 {%1.s}[%2],[%0]" \
16148 : "r"(a_), "w"(b_), "i"(c) \
16152 #define vst1_lane_f64(a, b, c) \
16155 float64x1_t b_ = (b); \
16156 float64_t * a_ = (a); \
16157 __asm__ ("st1 {%1.d}[%2],[%0]" \
16159 : "r"(a_), "w"(b_), "i"(c) \
16163 #define vst1_lane_p8(a, b, c) \
16166 poly8x8_t b_ = (b); \
16167 poly8_t * a_ = (a); \
16168 __asm__ ("st1 {%1.b}[%2],[%0]" \
16170 : "r"(a_), "w"(b_), "i"(c) \
16174 #define vst1_lane_p16(a, b, c) \
16177 poly16x4_t b_ = (b); \
16178 poly16_t * a_ = (a); \
16179 __asm__ ("st1 {%1.h}[%2],[%0]" \
16181 : "r"(a_), "w"(b_), "i"(c) \
16185 #define vst1_lane_s8(a, b, c) \
16188 int8x8_t b_ = (b); \
16189 int8_t * a_ = (a); \
16190 __asm__ ("st1 {%1.b}[%2],[%0]" \
16192 : "r"(a_), "w"(b_), "i"(c) \
16196 #define vst1_lane_s16(a, b, c) \
16199 int16x4_t b_ = (b); \
16200 int16_t * a_ = (a); \
16201 __asm__ ("st1 {%1.h}[%2],[%0]" \
16203 : "r"(a_), "w"(b_), "i"(c) \
16207 #define vst1_lane_s32(a, b, c) \
16210 int32x2_t b_ = (b); \
16211 int32_t * a_ = (a); \
16212 __asm__ ("st1 {%1.s}[%2],[%0]" \
16214 : "r"(a_), "w"(b_), "i"(c) \
16218 #define vst1_lane_s64(a, b, c) \
16221 int64x1_t b_ = (b); \
16222 int64_t * a_ = (a); \
16223 __asm__ ("st1 {%1.d}[%2],[%0]" \
16225 : "r"(a_), "w"(b_), "i"(c) \
16229 #define vst1_lane_u8(a, b, c) \
16232 uint8x8_t b_ = (b); \
16233 uint8_t * a_ = (a); \
16234 __asm__ ("st1 {%1.b}[%2],[%0]" \
16236 : "r"(a_), "w"(b_), "i"(c) \
16240 #define vst1_lane_u16(a, b, c) \
16243 uint16x4_t b_ = (b); \
16244 uint16_t * a_ = (a); \
16245 __asm__ ("st1 {%1.h}[%2],[%0]" \
16247 : "r"(a_), "w"(b_), "i"(c) \
16251 #define vst1_lane_u32(a, b, c) \
16254 uint32x2_t b_ = (b); \
16255 uint32_t * a_ = (a); \
16256 __asm__ ("st1 {%1.s}[%2],[%0]" \
16258 : "r"(a_), "w"(b_), "i"(c) \
16262 #define vst1_lane_u64(a, b, c) \
16265 uint64x1_t b_ = (b); \
16266 uint64_t * a_ = (a); \
16267 __asm__ ("st1 {%1.d}[%2],[%0]" \
16269 : "r"(a_), "w"(b_), "i"(c) \
16273 __extension__
static __inline
void __attribute__ ((__always_inline__
))
16274 vst1_p8 (poly8_t
* a
, poly8x8_t b
)
16276 __asm__ ("st1 {%1.8b},[%0]"
16282 __extension__
static __inline
void __attribute__ ((__always_inline__
))
16283 vst1_p16 (poly16_t
* a
, poly16x4_t b
)
16285 __asm__ ("st1 {%1.4h},[%0]"
16291 __extension__
static __inline
void __attribute__ ((__always_inline__
))
16292 vst1_s8 (int8_t * a
, int8x8_t b
)
16294 __asm__ ("st1 {%1.8b},[%0]"
16300 __extension__
static __inline
void __attribute__ ((__always_inline__
))
16301 vst1_s16 (int16_t * a
, int16x4_t b
)
16303 __asm__ ("st1 {%1.4h},[%0]"
16309 __extension__
static __inline
void __attribute__ ((__always_inline__
))
16310 vst1_s32 (int32_t * a
, int32x2_t b
)
16312 __asm__ ("st1 {%1.2s},[%0]"
16318 __extension__
static __inline
void __attribute__ ((__always_inline__
))
16319 vst1_s64 (int64_t * a
, int64x1_t b
)
16321 __asm__ ("st1 {%1.1d},[%0]"
16327 __extension__
static __inline
void __attribute__ ((__always_inline__
))
16328 vst1_u8 (uint8_t * a
, uint8x8_t b
)
16330 __asm__ ("st1 {%1.8b},[%0]"
16336 __extension__
static __inline
void __attribute__ ((__always_inline__
))
16337 vst1_u16 (uint16_t * a
, uint16x4_t b
)
16339 __asm__ ("st1 {%1.4h},[%0]"
16345 __extension__
static __inline
void __attribute__ ((__always_inline__
))
16346 vst1_u32 (uint32_t * a
, uint32x2_t b
)
16348 __asm__ ("st1 {%1.2s},[%0]"
16354 __extension__
static __inline
void __attribute__ ((__always_inline__
))
16355 vst1_u64 (uint64_t * a
, uint64x1_t b
)
16357 __asm__ ("st1 {%1.1d},[%0]"
16363 __extension__
static __inline
void __attribute__ ((__always_inline__
))
16364 vst1q_f32 (float32_t
* a
, float32x4_t b
)
16366 __asm__ ("st1 {%1.4s},[%0]"
16372 __extension__
static __inline
void __attribute__ ((__always_inline__
))
16373 vst1q_f64 (float64_t
* a
, float64x2_t b
)
16375 __asm__ ("st1 {%1.2d},[%0]"
16381 #define vst1q_lane_f32(a, b, c) \
16384 float32x4_t b_ = (b); \
16385 float32_t * a_ = (a); \
16386 __asm__ ("st1 {%1.s}[%2],[%0]" \
16388 : "r"(a_), "w"(b_), "i"(c) \
16392 #define vst1q_lane_f64(a, b, c) \
16395 float64x2_t b_ = (b); \
16396 float64_t * a_ = (a); \
16397 __asm__ ("st1 {%1.d}[%2],[%0]" \
16399 : "r"(a_), "w"(b_), "i"(c) \
16403 #define vst1q_lane_p8(a, b, c) \
16406 poly8x16_t b_ = (b); \
16407 poly8_t * a_ = (a); \
16408 __asm__ ("st1 {%1.b}[%2],[%0]" \
16410 : "r"(a_), "w"(b_), "i"(c) \
16414 #define vst1q_lane_p16(a, b, c) \
16417 poly16x8_t b_ = (b); \
16418 poly16_t * a_ = (a); \
16419 __asm__ ("st1 {%1.h}[%2],[%0]" \
16421 : "r"(a_), "w"(b_), "i"(c) \
16425 #define vst1q_lane_s8(a, b, c) \
16428 int8x16_t b_ = (b); \
16429 int8_t * a_ = (a); \
16430 __asm__ ("st1 {%1.b}[%2],[%0]" \
16432 : "r"(a_), "w"(b_), "i"(c) \
16436 #define vst1q_lane_s16(a, b, c) \
16439 int16x8_t b_ = (b); \
16440 int16_t * a_ = (a); \
16441 __asm__ ("st1 {%1.h}[%2],[%0]" \
16443 : "r"(a_), "w"(b_), "i"(c) \
16447 #define vst1q_lane_s32(a, b, c) \
16450 int32x4_t b_ = (b); \
16451 int32_t * a_ = (a); \
16452 __asm__ ("st1 {%1.s}[%2],[%0]" \
16454 : "r"(a_), "w"(b_), "i"(c) \
16458 #define vst1q_lane_s64(a, b, c) \
16461 int64x2_t b_ = (b); \
16462 int64_t * a_ = (a); \
16463 __asm__ ("st1 {%1.d}[%2],[%0]" \
16465 : "r"(a_), "w"(b_), "i"(c) \
16469 #define vst1q_lane_u8(a, b, c) \
16472 uint8x16_t b_ = (b); \
16473 uint8_t * a_ = (a); \
16474 __asm__ ("st1 {%1.b}[%2],[%0]" \
16476 : "r"(a_), "w"(b_), "i"(c) \
16480 #define vst1q_lane_u16(a, b, c) \
16483 uint16x8_t b_ = (b); \
16484 uint16_t * a_ = (a); \
16485 __asm__ ("st1 {%1.h}[%2],[%0]" \
16487 : "r"(a_), "w"(b_), "i"(c) \
16491 #define vst1q_lane_u32(a, b, c) \
16494 uint32x4_t b_ = (b); \
16495 uint32_t * a_ = (a); \
16496 __asm__ ("st1 {%1.s}[%2],[%0]" \
16498 : "r"(a_), "w"(b_), "i"(c) \
16502 #define vst1q_lane_u64(a, b, c) \
16505 uint64x2_t b_ = (b); \
16506 uint64_t * a_ = (a); \
16507 __asm__ ("st1 {%1.d}[%2],[%0]" \
16509 : "r"(a_), "w"(b_), "i"(c) \
16513 __extension__
static __inline
void __attribute__ ((__always_inline__
))
16514 vst1q_p8 (poly8_t
* a
, poly8x16_t b
)
16516 __asm__ ("st1 {%1.16b},[%0]"
16522 __extension__
static __inline
void __attribute__ ((__always_inline__
))
16523 vst1q_p16 (poly16_t
* a
, poly16x8_t b
)
16525 __asm__ ("st1 {%1.8h},[%0]"
16531 __extension__
static __inline
void __attribute__ ((__always_inline__
))
16532 vst1q_s8 (int8_t * a
, int8x16_t b
)
16534 __asm__ ("st1 {%1.16b},[%0]"
16540 __extension__
static __inline
void __attribute__ ((__always_inline__
))
16541 vst1q_s16 (int16_t * a
, int16x8_t b
)
16543 __asm__ ("st1 {%1.8h},[%0]"
16549 __extension__
static __inline
void __attribute__ ((__always_inline__
))
16550 vst1q_s32 (int32_t * a
, int32x4_t b
)
16552 __asm__ ("st1 {%1.4s},[%0]"
16558 __extension__
static __inline
void __attribute__ ((__always_inline__
))
16559 vst1q_s64 (int64_t * a
, int64x2_t b
)
16561 __asm__ ("st1 {%1.2d},[%0]"
16567 __extension__
static __inline
void __attribute__ ((__always_inline__
))
16568 vst1q_u8 (uint8_t * a
, uint8x16_t b
)
16570 __asm__ ("st1 {%1.16b},[%0]"
16576 __extension__
static __inline
void __attribute__ ((__always_inline__
))
16577 vst1q_u16 (uint16_t * a
, uint16x8_t b
)
16579 __asm__ ("st1 {%1.8h},[%0]"
16585 __extension__
static __inline
void __attribute__ ((__always_inline__
))
16586 vst1q_u32 (uint32_t * a
, uint32x4_t b
)
16588 __asm__ ("st1 {%1.4s},[%0]"
16594 __extension__
static __inline
void __attribute__ ((__always_inline__
))
16595 vst1q_u64 (uint64_t * a
, uint64x2_t b
)
16597 __asm__ ("st1 {%1.2d},[%0]"
16603 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
16604 vsubhn_high_s16 (int8x8_t a
, int16x8_t b
, int16x8_t c
)
16606 int8x16_t result
= vcombine_s8 (a
, vcreate_s8 (UINT64_C (0x0)));
16607 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
16610 : /* No clobbers */);
16614 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
16615 vsubhn_high_s32 (int16x4_t a
, int32x4_t b
, int32x4_t c
)
16617 int16x8_t result
= vcombine_s16 (a
, vcreate_s16 (UINT64_C (0x0)));
16618 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
16621 : /* No clobbers */);
16625 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
16626 vsubhn_high_s64 (int32x2_t a
, int64x2_t b
, int64x2_t c
)
16628 int32x4_t result
= vcombine_s32 (a
, vcreate_s32 (UINT64_C (0x0)));
16629 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
16632 : /* No clobbers */);
16636 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
16637 vsubhn_high_u16 (uint8x8_t a
, uint16x8_t b
, uint16x8_t c
)
16639 uint8x16_t result
= vcombine_u8 (a
, vcreate_u8 (UINT64_C (0x0)));
16640 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
16643 : /* No clobbers */);
16647 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
16648 vsubhn_high_u32 (uint16x4_t a
, uint32x4_t b
, uint32x4_t c
)
16650 uint16x8_t result
= vcombine_u16 (a
, vcreate_u16 (UINT64_C (0x0)));
16651 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
16654 : /* No clobbers */);
16658 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
16659 vsubhn_high_u64 (uint32x2_t a
, uint64x2_t b
, uint64x2_t c
)
16661 uint32x4_t result
= vcombine_u32 (a
, vcreate_u32 (UINT64_C (0x0)));
16662 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
16665 : /* No clobbers */);
16669 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
16670 vsubhn_s16 (int16x8_t a
, int16x8_t b
)
16673 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
16676 : /* No clobbers */);
16680 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
16681 vsubhn_s32 (int32x4_t a
, int32x4_t b
)
16684 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
16687 : /* No clobbers */);
16691 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
16692 vsubhn_s64 (int64x2_t a
, int64x2_t b
)
16695 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
16698 : /* No clobbers */);
16702 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
16703 vsubhn_u16 (uint16x8_t a
, uint16x8_t b
)
16706 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
16709 : /* No clobbers */);
16713 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
16714 vsubhn_u32 (uint32x4_t a
, uint32x4_t b
)
16717 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
16720 : /* No clobbers */);
16724 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
16725 vsubhn_u64 (uint64x2_t a
, uint64x2_t b
)
16728 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
16731 : /* No clobbers */);
16735 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
16736 vtrn1_f32 (float32x2_t a
, float32x2_t b
)
16738 float32x2_t result
;
16739 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
16742 : /* No clobbers */);
16746 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
16747 vtrn1_p8 (poly8x8_t a
, poly8x8_t b
)
16750 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
16753 : /* No clobbers */);
16757 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
16758 vtrn1_p16 (poly16x4_t a
, poly16x4_t b
)
16761 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
16764 : /* No clobbers */);
16768 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
16769 vtrn1_s8 (int8x8_t a
, int8x8_t b
)
16772 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
16775 : /* No clobbers */);
16779 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
16780 vtrn1_s16 (int16x4_t a
, int16x4_t b
)
16783 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
16786 : /* No clobbers */);
16790 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
16791 vtrn1_s32 (int32x2_t a
, int32x2_t b
)
16794 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
16797 : /* No clobbers */);
16801 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
16802 vtrn1_u8 (uint8x8_t a
, uint8x8_t b
)
16805 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
16808 : /* No clobbers */);
16812 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
16813 vtrn1_u16 (uint16x4_t a
, uint16x4_t b
)
16816 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
16819 : /* No clobbers */);
16823 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
16824 vtrn1_u32 (uint32x2_t a
, uint32x2_t b
)
16827 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
16830 : /* No clobbers */);
16834 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
16835 vtrn1q_f32 (float32x4_t a
, float32x4_t b
)
16837 float32x4_t result
;
16838 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
16841 : /* No clobbers */);
16845 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
16846 vtrn1q_f64 (float64x2_t a
, float64x2_t b
)
16848 float64x2_t result
;
16849 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
16852 : /* No clobbers */);
16856 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
16857 vtrn1q_p8 (poly8x16_t a
, poly8x16_t b
)
16860 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
16863 : /* No clobbers */);
16867 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
16868 vtrn1q_p16 (poly16x8_t a
, poly16x8_t b
)
16871 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
16874 : /* No clobbers */);
16878 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
16879 vtrn1q_s8 (int8x16_t a
, int8x16_t b
)
16882 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
16885 : /* No clobbers */);
16889 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
16890 vtrn1q_s16 (int16x8_t a
, int16x8_t b
)
16893 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
16896 : /* No clobbers */);
16900 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
16901 vtrn1q_s32 (int32x4_t a
, int32x4_t b
)
16904 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
16907 : /* No clobbers */);
16911 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
16912 vtrn1q_s64 (int64x2_t a
, int64x2_t b
)
16915 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
16918 : /* No clobbers */);
16922 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
16923 vtrn1q_u8 (uint8x16_t a
, uint8x16_t b
)
16926 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
16929 : /* No clobbers */);
16933 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
16934 vtrn1q_u16 (uint16x8_t a
, uint16x8_t b
)
16937 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
16940 : /* No clobbers */);
16944 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
16945 vtrn1q_u32 (uint32x4_t a
, uint32x4_t b
)
16948 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
16951 : /* No clobbers */);
16955 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
16956 vtrn1q_u64 (uint64x2_t a
, uint64x2_t b
)
16959 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
16962 : /* No clobbers */);
16966 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
16967 vtrn2_f32 (float32x2_t a
, float32x2_t b
)
16969 float32x2_t result
;
16970 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
16973 : /* No clobbers */);
16977 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
16978 vtrn2_p8 (poly8x8_t a
, poly8x8_t b
)
16981 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
16984 : /* No clobbers */);
16988 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
16989 vtrn2_p16 (poly16x4_t a
, poly16x4_t b
)
16992 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
16995 : /* No clobbers */);
16999 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
17000 vtrn2_s8 (int8x8_t a
, int8x8_t b
)
17003 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
17006 : /* No clobbers */);
17010 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
17011 vtrn2_s16 (int16x4_t a
, int16x4_t b
)
17014 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
17017 : /* No clobbers */);
17021 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
17022 vtrn2_s32 (int32x2_t a
, int32x2_t b
)
17025 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
17028 : /* No clobbers */);
17032 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
17033 vtrn2_u8 (uint8x8_t a
, uint8x8_t b
)
17036 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
17039 : /* No clobbers */);
17043 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
17044 vtrn2_u16 (uint16x4_t a
, uint16x4_t b
)
17047 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
17050 : /* No clobbers */);
17054 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
17055 vtrn2_u32 (uint32x2_t a
, uint32x2_t b
)
17058 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
17061 : /* No clobbers */);
17065 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
17066 vtrn2q_f32 (float32x4_t a
, float32x4_t b
)
17068 float32x4_t result
;
17069 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
17072 : /* No clobbers */);
17076 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
17077 vtrn2q_f64 (float64x2_t a
, float64x2_t b
)
17079 float64x2_t result
;
17080 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
17083 : /* No clobbers */);
17087 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
17088 vtrn2q_p8 (poly8x16_t a
, poly8x16_t b
)
17091 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
17094 : /* No clobbers */);
17098 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
17099 vtrn2q_p16 (poly16x8_t a
, poly16x8_t b
)
17102 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
17105 : /* No clobbers */);
17109 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
17110 vtrn2q_s8 (int8x16_t a
, int8x16_t b
)
17113 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
17116 : /* No clobbers */);
17120 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
17121 vtrn2q_s16 (int16x8_t a
, int16x8_t b
)
17124 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
17127 : /* No clobbers */);
17131 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
17132 vtrn2q_s32 (int32x4_t a
, int32x4_t b
)
17135 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
17138 : /* No clobbers */);
17142 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
17143 vtrn2q_s64 (int64x2_t a
, int64x2_t b
)
17146 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
17149 : /* No clobbers */);
17153 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
17154 vtrn2q_u8 (uint8x16_t a
, uint8x16_t b
)
17157 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
17160 : /* No clobbers */);
17164 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
17165 vtrn2q_u16 (uint16x8_t a
, uint16x8_t b
)
17168 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
17171 : /* No clobbers */);
17175 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
17176 vtrn2q_u32 (uint32x4_t a
, uint32x4_t b
)
17179 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
17182 : /* No clobbers */);
17186 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
17187 vtrn2q_u64 (uint64x2_t a
, uint64x2_t b
)
17190 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
17193 : /* No clobbers */);
17197 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
17198 vtst_p8 (poly8x8_t a
, poly8x8_t b
)
17201 __asm__ ("cmtst %0.8b, %1.8b, %2.8b"
17204 : /* No clobbers */);
17208 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
17209 vtst_p16 (poly16x4_t a
, poly16x4_t b
)
17212 __asm__ ("cmtst %0.4h, %1.4h, %2.4h"
17215 : /* No clobbers */);
17219 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
17220 vtstq_p8 (poly8x16_t a
, poly8x16_t b
)
17223 __asm__ ("cmtst %0.16b, %1.16b, %2.16b"
17226 : /* No clobbers */);
17230 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
17231 vtstq_p16 (poly16x8_t a
, poly16x8_t b
)
17234 __asm__ ("cmtst %0.8h, %1.8h, %2.8h"
17237 : /* No clobbers */);
17240 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
17241 vuzp1_f32 (float32x2_t a
, float32x2_t b
)
17243 float32x2_t result
;
17244 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
17247 : /* No clobbers */);
17251 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
17252 vuzp1_p8 (poly8x8_t a
, poly8x8_t b
)
17255 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
17258 : /* No clobbers */);
17262 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
17263 vuzp1_p16 (poly16x4_t a
, poly16x4_t b
)
17266 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
17269 : /* No clobbers */);
17273 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
17274 vuzp1_s8 (int8x8_t a
, int8x8_t b
)
17277 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
17280 : /* No clobbers */);
17284 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
17285 vuzp1_s16 (int16x4_t a
, int16x4_t b
)
17288 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
17291 : /* No clobbers */);
17295 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
17296 vuzp1_s32 (int32x2_t a
, int32x2_t b
)
17299 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
17302 : /* No clobbers */);
17306 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
17307 vuzp1_u8 (uint8x8_t a
, uint8x8_t b
)
17310 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
17313 : /* No clobbers */);
17317 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
17318 vuzp1_u16 (uint16x4_t a
, uint16x4_t b
)
17321 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
17324 : /* No clobbers */);
17328 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
17329 vuzp1_u32 (uint32x2_t a
, uint32x2_t b
)
17332 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
17335 : /* No clobbers */);
17339 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
17340 vuzp1q_f32 (float32x4_t a
, float32x4_t b
)
17342 float32x4_t result
;
17343 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
17346 : /* No clobbers */);
17350 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
17351 vuzp1q_f64 (float64x2_t a
, float64x2_t b
)
17353 float64x2_t result
;
17354 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
17357 : /* No clobbers */);
17361 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
17362 vuzp1q_p8 (poly8x16_t a
, poly8x16_t b
)
17365 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
17368 : /* No clobbers */);
17372 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
17373 vuzp1q_p16 (poly16x8_t a
, poly16x8_t b
)
17376 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
17379 : /* No clobbers */);
17383 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
17384 vuzp1q_s8 (int8x16_t a
, int8x16_t b
)
17387 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
17390 : /* No clobbers */);
17394 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
17395 vuzp1q_s16 (int16x8_t a
, int16x8_t b
)
17398 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
17401 : /* No clobbers */);
17405 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
17406 vuzp1q_s32 (int32x4_t a
, int32x4_t b
)
17409 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
17412 : /* No clobbers */);
17416 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
17417 vuzp1q_s64 (int64x2_t a
, int64x2_t b
)
17420 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
17423 : /* No clobbers */);
17427 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
17428 vuzp1q_u8 (uint8x16_t a
, uint8x16_t b
)
17431 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
17434 : /* No clobbers */);
17438 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
17439 vuzp1q_u16 (uint16x8_t a
, uint16x8_t b
)
17442 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
17445 : /* No clobbers */);
17449 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
17450 vuzp1q_u32 (uint32x4_t a
, uint32x4_t b
)
17453 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
17456 : /* No clobbers */);
17460 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
17461 vuzp1q_u64 (uint64x2_t a
, uint64x2_t b
)
17464 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
17467 : /* No clobbers */);
17471 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
17472 vuzp2_f32 (float32x2_t a
, float32x2_t b
)
17474 float32x2_t result
;
17475 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
17478 : /* No clobbers */);
17482 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
17483 vuzp2_p8 (poly8x8_t a
, poly8x8_t b
)
17486 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
17489 : /* No clobbers */);
17493 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
17494 vuzp2_p16 (poly16x4_t a
, poly16x4_t b
)
17497 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
17500 : /* No clobbers */);
17504 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
17505 vuzp2_s8 (int8x8_t a
, int8x8_t b
)
17508 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
17511 : /* No clobbers */);
17515 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
17516 vuzp2_s16 (int16x4_t a
, int16x4_t b
)
17519 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
17522 : /* No clobbers */);
17526 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
17527 vuzp2_s32 (int32x2_t a
, int32x2_t b
)
17530 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
17533 : /* No clobbers */);
17537 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
17538 vuzp2_u8 (uint8x8_t a
, uint8x8_t b
)
17541 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
17544 : /* No clobbers */);
17548 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
17549 vuzp2_u16 (uint16x4_t a
, uint16x4_t b
)
17552 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
17555 : /* No clobbers */);
17559 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
17560 vuzp2_u32 (uint32x2_t a
, uint32x2_t b
)
17563 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
17566 : /* No clobbers */);
17570 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
17571 vuzp2q_f32 (float32x4_t a
, float32x4_t b
)
17573 float32x4_t result
;
17574 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
17577 : /* No clobbers */);
17581 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
17582 vuzp2q_f64 (float64x2_t a
, float64x2_t b
)
17584 float64x2_t result
;
17585 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
17588 : /* No clobbers */);
17592 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
17593 vuzp2q_p8 (poly8x16_t a
, poly8x16_t b
)
17596 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
17599 : /* No clobbers */);
17603 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
17604 vuzp2q_p16 (poly16x8_t a
, poly16x8_t b
)
17607 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
17610 : /* No clobbers */);
17614 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
17615 vuzp2q_s8 (int8x16_t a
, int8x16_t b
)
17618 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
17621 : /* No clobbers */);
17625 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
17626 vuzp2q_s16 (int16x8_t a
, int16x8_t b
)
17629 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
17632 : /* No clobbers */);
17636 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
17637 vuzp2q_s32 (int32x4_t a
, int32x4_t b
)
17640 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
17643 : /* No clobbers */);
17647 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
17648 vuzp2q_s64 (int64x2_t a
, int64x2_t b
)
17651 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
17654 : /* No clobbers */);
17658 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
17659 vuzp2q_u8 (uint8x16_t a
, uint8x16_t b
)
17662 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
17665 : /* No clobbers */);
17669 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
17670 vuzp2q_u16 (uint16x8_t a
, uint16x8_t b
)
17673 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
17676 : /* No clobbers */);
17680 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
17681 vuzp2q_u32 (uint32x4_t a
, uint32x4_t b
)
17684 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
17687 : /* No clobbers */);
17691 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
17692 vuzp2q_u64 (uint64x2_t a
, uint64x2_t b
)
17695 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
17698 : /* No clobbers */);
17702 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
17703 vzip1_f32 (float32x2_t a
, float32x2_t b
)
17705 float32x2_t result
;
17706 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
17709 : /* No clobbers */);
17713 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
17714 vzip1_p8 (poly8x8_t a
, poly8x8_t b
)
17717 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
17720 : /* No clobbers */);
17724 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
17725 vzip1_p16 (poly16x4_t a
, poly16x4_t b
)
17728 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
17731 : /* No clobbers */);
17735 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
17736 vzip1_s8 (int8x8_t a
, int8x8_t b
)
17739 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
17742 : /* No clobbers */);
17746 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
17747 vzip1_s16 (int16x4_t a
, int16x4_t b
)
17750 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
17753 : /* No clobbers */);
17757 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
17758 vzip1_s32 (int32x2_t a
, int32x2_t b
)
17761 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
17764 : /* No clobbers */);
17768 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
17769 vzip1_u8 (uint8x8_t a
, uint8x8_t b
)
17772 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
17775 : /* No clobbers */);
17779 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
17780 vzip1_u16 (uint16x4_t a
, uint16x4_t b
)
17783 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
17786 : /* No clobbers */);
17790 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
17791 vzip1_u32 (uint32x2_t a
, uint32x2_t b
)
17794 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
17797 : /* No clobbers */);
17801 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
17802 vzip1q_f32 (float32x4_t a
, float32x4_t b
)
17804 float32x4_t result
;
17805 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
17808 : /* No clobbers */);
17812 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
17813 vzip1q_f64 (float64x2_t a
, float64x2_t b
)
17815 float64x2_t result
;
17816 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
17819 : /* No clobbers */);
17823 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
17824 vzip1q_p8 (poly8x16_t a
, poly8x16_t b
)
17827 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
17830 : /* No clobbers */);
17834 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
17835 vzip1q_p16 (poly16x8_t a
, poly16x8_t b
)
17838 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
17841 : /* No clobbers */);
17845 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
17846 vzip1q_s8 (int8x16_t a
, int8x16_t b
)
17849 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
17852 : /* No clobbers */);
17856 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
17857 vzip1q_s16 (int16x8_t a
, int16x8_t b
)
17860 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
17863 : /* No clobbers */);
17867 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
17868 vzip1q_s32 (int32x4_t a
, int32x4_t b
)
17871 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
17874 : /* No clobbers */);
17878 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
17879 vzip1q_s64 (int64x2_t a
, int64x2_t b
)
17882 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
17885 : /* No clobbers */);
17889 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
17890 vzip1q_u8 (uint8x16_t a
, uint8x16_t b
)
17893 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
17896 : /* No clobbers */);
17900 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
17901 vzip1q_u16 (uint16x8_t a
, uint16x8_t b
)
17904 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
17907 : /* No clobbers */);
17911 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
17912 vzip1q_u32 (uint32x4_t a
, uint32x4_t b
)
17915 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
17918 : /* No clobbers */);
17922 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
17923 vzip1q_u64 (uint64x2_t a
, uint64x2_t b
)
17926 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
17929 : /* No clobbers */);
17933 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
17934 vzip2_f32 (float32x2_t a
, float32x2_t b
)
17936 float32x2_t result
;
17937 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
17940 : /* No clobbers */);
17944 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
17945 vzip2_p8 (poly8x8_t a
, poly8x8_t b
)
17948 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
17951 : /* No clobbers */);
17955 __extension__
static __inline poly16x4_t
__attribute__ ((__always_inline__
))
17956 vzip2_p16 (poly16x4_t a
, poly16x4_t b
)
17959 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
17962 : /* No clobbers */);
17966 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
17967 vzip2_s8 (int8x8_t a
, int8x8_t b
)
17970 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
17973 : /* No clobbers */);
17977 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
17978 vzip2_s16 (int16x4_t a
, int16x4_t b
)
17981 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
17984 : /* No clobbers */);
17988 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
17989 vzip2_s32 (int32x2_t a
, int32x2_t b
)
17992 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
17995 : /* No clobbers */);
17999 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
18000 vzip2_u8 (uint8x8_t a
, uint8x8_t b
)
18003 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
18006 : /* No clobbers */);
18010 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
18011 vzip2_u16 (uint16x4_t a
, uint16x4_t b
)
18014 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
18017 : /* No clobbers */);
18021 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
18022 vzip2_u32 (uint32x2_t a
, uint32x2_t b
)
18025 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
18028 : /* No clobbers */);
18032 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
18033 vzip2q_f32 (float32x4_t a
, float32x4_t b
)
18035 float32x4_t result
;
18036 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
18039 : /* No clobbers */);
18043 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
18044 vzip2q_f64 (float64x2_t a
, float64x2_t b
)
18046 float64x2_t result
;
18047 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
18050 : /* No clobbers */);
18054 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
18055 vzip2q_p8 (poly8x16_t a
, poly8x16_t b
)
18058 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
18061 : /* No clobbers */);
18065 __extension__
static __inline poly16x8_t
__attribute__ ((__always_inline__
))
18066 vzip2q_p16 (poly16x8_t a
, poly16x8_t b
)
18069 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
18072 : /* No clobbers */);
18076 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
18077 vzip2q_s8 (int8x16_t a
, int8x16_t b
)
18080 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
18083 : /* No clobbers */);
18087 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
18088 vzip2q_s16 (int16x8_t a
, int16x8_t b
)
18091 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
18094 : /* No clobbers */);
18098 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
18099 vzip2q_s32 (int32x4_t a
, int32x4_t b
)
18102 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
18105 : /* No clobbers */);
18109 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
18110 vzip2q_s64 (int64x2_t a
, int64x2_t b
)
18113 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
18116 : /* No clobbers */);
18120 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
18121 vzip2q_u8 (uint8x16_t a
, uint8x16_t b
)
18124 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
18127 : /* No clobbers */);
18131 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
18132 vzip2q_u16 (uint16x8_t a
, uint16x8_t b
)
18135 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
18138 : /* No clobbers */);
18142 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
18143 vzip2q_u32 (uint32x4_t a
, uint32x4_t b
)
18146 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
18149 : /* No clobbers */);
18153 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
18154 vzip2q_u64 (uint64x2_t a
, uint64x2_t b
)
18157 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
18160 : /* No clobbers */);
18164 /* End of temporary inline asm implementations. */
18166 /* Start of temporary inline asm for vldn, vstn and friends. */
18168 /* Create struct element types for duplicating loads.
18170 Create 2 element structures of:
18172 +------+----+----+----+----+
18173 | | 8 | 16 | 32 | 64 |
18174 +------+----+----+----+----+
18175 |int | Y | Y | N | N |
18176 +------+----+----+----+----+
18177 |uint | Y | Y | N | N |
18178 +------+----+----+----+----+
18179 |float | - | - | N | N |
18180 +------+----+----+----+----+
18181 |poly | Y | Y | - | - |
18182 +------+----+----+----+----+
18184 Create 3 element structures of:
18186 +------+----+----+----+----+
18187 | | 8 | 16 | 32 | 64 |
18188 +------+----+----+----+----+
18189 |int | Y | Y | Y | Y |
18190 +------+----+----+----+----+
18191 |uint | Y | Y | Y | Y |
18192 +------+----+----+----+----+
18193 |float | - | - | Y | Y |
18194 +------+----+----+----+----+
18195 |poly | Y | Y | - | - |
18196 +------+----+----+----+----+
18198 Create 4 element structures of:
18200 +------+----+----+----+----+
18201 | | 8 | 16 | 32 | 64 |
18202 +------+----+----+----+----+
18203 |int | Y | N | N | Y |
18204 +------+----+----+----+----+
18205 |uint | Y | N | N | Y |
18206 +------+----+----+----+----+
18207 |float | - | - | N | Y |
18208 +------+----+----+----+----+
18209 |poly | Y | N | - | - |
18210 +------+----+----+----+----+
18212 This is required for casting memory reference. */
18213 #define __STRUCTN(t, sz, nelem) \
18214 typedef struct t ## sz ## x ## nelem ## _t { \
18215 t ## sz ## _t val[nelem]; \
18216 } t ## sz ## x ## nelem ## _t;
18218 /* 2-element structs. */
18219 __STRUCTN (int, 8, 2)
18220 __STRUCTN (int, 16, 2)
18221 __STRUCTN (uint
, 8, 2)
18222 __STRUCTN (uint
, 16, 2)
18223 __STRUCTN (poly
, 8, 2)
18224 __STRUCTN (poly
, 16, 2)
18225 /* 3-element structs. */
18226 __STRUCTN (int, 8, 3)
18227 __STRUCTN (int, 16, 3)
18228 __STRUCTN (int, 32, 3)
18229 __STRUCTN (int, 64, 3)
18230 __STRUCTN (uint
, 8, 3)
18231 __STRUCTN (uint
, 16, 3)
18232 __STRUCTN (uint
, 32, 3)
18233 __STRUCTN (uint
, 64, 3)
18234 __STRUCTN (float, 32, 3)
18235 __STRUCTN (float, 64, 3)
18236 __STRUCTN (poly
, 8, 3)
18237 __STRUCTN (poly
, 16, 3)
18238 /* 4-element structs. */
18239 __STRUCTN (int, 8, 4)
18240 __STRUCTN (int, 64, 4)
18241 __STRUCTN (uint
, 8, 4)
18242 __STRUCTN (uint
, 64, 4)
18243 __STRUCTN (poly
, 8, 4)
18244 __STRUCTN (float, 64, 4)
18247 #define __LD2R_FUNC(rettype, structtype, ptrtype, \
18248 regsuffix, funcsuffix, Q) \
18249 __extension__ static __inline rettype \
18250 __attribute__ ((__always_inline__)) \
18251 vld2 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
18254 __asm__ ("ld2r {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
18255 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
18257 : "Q"(*(const structtype *)ptr) \
18258 : "memory", "v16", "v17"); \
18262 __LD2R_FUNC (float32x2x2_t
, float32x2_t
, float32_t
, 2s
, f32
,)
18263 __LD2R_FUNC (float64x1x2_t
, float64x2_t
, float64_t
, 1d
, f64
,)
18264 __LD2R_FUNC (poly8x8x2_t
, poly8x2_t
, poly8_t
, 8b
, p8
,)
18265 __LD2R_FUNC (poly16x4x2_t
, poly16x2_t
, poly16_t
, 4h
, p16
,)
18266 __LD2R_FUNC (int8x8x2_t
, int8x2_t
, int8_t, 8b
, s8
,)
18267 __LD2R_FUNC (int16x4x2_t
, int16x2_t
, int16_t, 4h
, s16
,)
18268 __LD2R_FUNC (int32x2x2_t
, int32x2_t
, int32_t, 2s
, s32
,)
18269 __LD2R_FUNC (int64x1x2_t
, int64x2_t
, int64_t, 1d
, s64
,)
18270 __LD2R_FUNC (uint8x8x2_t
, uint8x2_t
, uint8_t, 8b
, u8
,)
18271 __LD2R_FUNC (uint16x4x2_t
, uint16x2_t
, uint16_t, 4h
, u16
,)
18272 __LD2R_FUNC (uint32x2x2_t
, uint32x2_t
, uint32_t, 2s
, u32
,)
18273 __LD2R_FUNC (uint64x1x2_t
, uint64x2_t
, uint64_t, 1d
, u64
,)
18274 __LD2R_FUNC (float32x4x2_t
, float32x2_t
, float32_t
, 4s
, f32
, q
)
18275 __LD2R_FUNC (float64x2x2_t
, float64x2_t
, float64_t
, 2d
, f64
, q
)
18276 __LD2R_FUNC (poly8x16x2_t
, poly8x2_t
, poly8_t
, 16b
, p8
, q
)
18277 __LD2R_FUNC (poly16x8x2_t
, poly16x2_t
, poly16_t
, 8h
, p16
, q
)
18278 __LD2R_FUNC (int8x16x2_t
, int8x2_t
, int8_t, 16b
, s8
, q
)
18279 __LD2R_FUNC (int16x8x2_t
, int16x2_t
, int16_t, 8h
, s16
, q
)
18280 __LD2R_FUNC (int32x4x2_t
, int32x2_t
, int32_t, 4s
, s32
, q
)
18281 __LD2R_FUNC (int64x2x2_t
, int64x2_t
, int64_t, 2d
, s64
, q
)
18282 __LD2R_FUNC (uint8x16x2_t
, uint8x2_t
, uint8_t, 16b
, u8
, q
)
18283 __LD2R_FUNC (uint16x8x2_t
, uint16x2_t
, uint16_t, 8h
, u16
, q
)
18284 __LD2R_FUNC (uint32x4x2_t
, uint32x2_t
, uint32_t, 4s
, u32
, q
)
18285 __LD2R_FUNC (uint64x2x2_t
, uint64x2_t
, uint64_t, 2d
, u64
, q
)
18287 #define __LD2_LANE_FUNC(rettype, ptrtype, regsuffix, \
18288 lnsuffix, funcsuffix, Q) \
18289 __extension__ static __inline rettype \
18290 __attribute__ ((__always_inline__)) \
18291 vld2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
18292 rettype b, const int c) \
18295 __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
18296 "ld2 {v16." #lnsuffix ", v17." #lnsuffix "}[%3], %2\n\t" \
18297 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
18299 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
18300 : "memory", "v16", "v17"); \
18304 __LD2_LANE_FUNC (int8x8x2_t
, uint8_t, 8b
, b
, s8
,)
18305 __LD2_LANE_FUNC (float32x2x2_t
, float32_t
, 2s
, s
, f32
,)
18306 __LD2_LANE_FUNC (float64x1x2_t
, float64_t
, 1d
, d
, f64
,)
18307 __LD2_LANE_FUNC (poly8x8x2_t
, poly8_t
, 8b
, b
, p8
,)
18308 __LD2_LANE_FUNC (poly16x4x2_t
, poly16_t
, 4h
, h
, p16
,)
18309 __LD2_LANE_FUNC (int16x4x2_t
, int16_t, 4h
, h
, s16
,)
18310 __LD2_LANE_FUNC (int32x2x2_t
, int32_t, 2s
, s
, s32
,)
18311 __LD2_LANE_FUNC (int64x1x2_t
, int64_t, 1d
, d
, s64
,)
18312 __LD2_LANE_FUNC (uint8x8x2_t
, uint8_t, 8b
, b
, u8
,)
18313 __LD2_LANE_FUNC (uint16x4x2_t
, uint16_t, 4h
, h
, u16
,)
18314 __LD2_LANE_FUNC (uint32x2x2_t
, uint32_t, 2s
, s
, u32
,)
18315 __LD2_LANE_FUNC (uint64x1x2_t
, uint64_t, 1d
, d
, u64
,)
18316 __LD2_LANE_FUNC (float32x4x2_t
, float32_t
, 4s
, s
, f32
, q
)
18317 __LD2_LANE_FUNC (float64x2x2_t
, float64_t
, 2d
, d
, f64
, q
)
18318 __LD2_LANE_FUNC (poly8x16x2_t
, poly8_t
, 16b
, b
, p8
, q
)
18319 __LD2_LANE_FUNC (poly16x8x2_t
, poly16_t
, 8h
, h
, p16
, q
)
18320 __LD2_LANE_FUNC (int8x16x2_t
, int8_t, 16b
, b
, s8
, q
)
18321 __LD2_LANE_FUNC (int16x8x2_t
, int16_t, 8h
, h
, s16
, q
)
18322 __LD2_LANE_FUNC (int32x4x2_t
, int32_t, 4s
, s
, s32
, q
)
18323 __LD2_LANE_FUNC (int64x2x2_t
, int64_t, 2d
, d
, s64
, q
)
18324 __LD2_LANE_FUNC (uint8x16x2_t
, uint8_t, 16b
, b
, u8
, q
)
18325 __LD2_LANE_FUNC (uint16x8x2_t
, uint16_t, 8h
, h
, u16
, q
)
18326 __LD2_LANE_FUNC (uint32x4x2_t
, uint32_t, 4s
, s
, u32
, q
)
18327 __LD2_LANE_FUNC (uint64x2x2_t
, uint64_t, 2d
, d
, u64
, q
)
18329 #define __LD3R_FUNC(rettype, structtype, ptrtype, \
18330 regsuffix, funcsuffix, Q) \
18331 __extension__ static __inline rettype \
18332 __attribute__ ((__always_inline__)) \
18333 vld3 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
18336 __asm__ ("ld3r {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
18337 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
18339 : "Q"(*(const structtype *)ptr) \
18340 : "memory", "v16", "v17", "v18"); \
18344 __LD3R_FUNC (float32x2x3_t
, float32x3_t
, float32_t
, 2s
, f32
,)
18345 __LD3R_FUNC (float64x1x3_t
, float64x3_t
, float64_t
, 1d
, f64
,)
18346 __LD3R_FUNC (poly8x8x3_t
, poly8x3_t
, poly8_t
, 8b
, p8
,)
18347 __LD3R_FUNC (poly16x4x3_t
, poly16x3_t
, poly16_t
, 4h
, p16
,)
18348 __LD3R_FUNC (int8x8x3_t
, int8x3_t
, int8_t, 8b
, s8
,)
18349 __LD3R_FUNC (int16x4x3_t
, int16x3_t
, int16_t, 4h
, s16
,)
18350 __LD3R_FUNC (int32x2x3_t
, int32x3_t
, int32_t, 2s
, s32
,)
18351 __LD3R_FUNC (int64x1x3_t
, int64x3_t
, int64_t, 1d
, s64
,)
18352 __LD3R_FUNC (uint8x8x3_t
, uint8x3_t
, uint8_t, 8b
, u8
,)
18353 __LD3R_FUNC (uint16x4x3_t
, uint16x3_t
, uint16_t, 4h
, u16
,)
18354 __LD3R_FUNC (uint32x2x3_t
, uint32x3_t
, uint32_t, 2s
, u32
,)
18355 __LD3R_FUNC (uint64x1x3_t
, uint64x3_t
, uint64_t, 1d
, u64
,)
18356 __LD3R_FUNC (float32x4x3_t
, float32x3_t
, float32_t
, 4s
, f32
, q
)
18357 __LD3R_FUNC (float64x2x3_t
, float64x3_t
, float64_t
, 2d
, f64
, q
)
18358 __LD3R_FUNC (poly8x16x3_t
, poly8x3_t
, poly8_t
, 16b
, p8
, q
)
18359 __LD3R_FUNC (poly16x8x3_t
, poly16x3_t
, poly16_t
, 8h
, p16
, q
)
18360 __LD3R_FUNC (int8x16x3_t
, int8x3_t
, int8_t, 16b
, s8
, q
)
18361 __LD3R_FUNC (int16x8x3_t
, int16x3_t
, int16_t, 8h
, s16
, q
)
18362 __LD3R_FUNC (int32x4x3_t
, int32x3_t
, int32_t, 4s
, s32
, q
)
18363 __LD3R_FUNC (int64x2x3_t
, int64x3_t
, int64_t, 2d
, s64
, q
)
18364 __LD3R_FUNC (uint8x16x3_t
, uint8x3_t
, uint8_t, 16b
, u8
, q
)
18365 __LD3R_FUNC (uint16x8x3_t
, uint16x3_t
, uint16_t, 8h
, u16
, q
)
18366 __LD3R_FUNC (uint32x4x3_t
, uint32x3_t
, uint32_t, 4s
, u32
, q
)
18367 __LD3R_FUNC (uint64x2x3_t
, uint64x3_t
, uint64_t, 2d
, u64
, q
)
18369 #define __LD3_LANE_FUNC(rettype, ptrtype, regsuffix, \
18370 lnsuffix, funcsuffix, Q) \
18371 __extension__ static __inline rettype \
18372 __attribute__ ((__always_inline__)) \
18373 vld3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
18374 rettype b, const int c) \
18377 __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
18378 "ld3 {v16." #lnsuffix " - v18." #lnsuffix "}[%3], %2\n\t" \
18379 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
18381 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
18382 : "memory", "v16", "v17", "v18"); \
18386 __LD3_LANE_FUNC (int8x8x3_t
, uint8_t, 8b
, b
, s8
,)
18387 __LD3_LANE_FUNC (float32x2x3_t
, float32_t
, 2s
, s
, f32
,)
18388 __LD3_LANE_FUNC (float64x1x3_t
, float64_t
, 1d
, d
, f64
,)
18389 __LD3_LANE_FUNC (poly8x8x3_t
, poly8_t
, 8b
, b
, p8
,)
18390 __LD3_LANE_FUNC (poly16x4x3_t
, poly16_t
, 4h
, h
, p16
,)
18391 __LD3_LANE_FUNC (int16x4x3_t
, int16_t, 4h
, h
, s16
,)
18392 __LD3_LANE_FUNC (int32x2x3_t
, int32_t, 2s
, s
, s32
,)
18393 __LD3_LANE_FUNC (int64x1x3_t
, int64_t, 1d
, d
, s64
,)
18394 __LD3_LANE_FUNC (uint8x8x3_t
, uint8_t, 8b
, b
, u8
,)
18395 __LD3_LANE_FUNC (uint16x4x3_t
, uint16_t, 4h
, h
, u16
,)
18396 __LD3_LANE_FUNC (uint32x2x3_t
, uint32_t, 2s
, s
, u32
,)
18397 __LD3_LANE_FUNC (uint64x1x3_t
, uint64_t, 1d
, d
, u64
,)
18398 __LD3_LANE_FUNC (float32x4x3_t
, float32_t
, 4s
, s
, f32
, q
)
18399 __LD3_LANE_FUNC (float64x2x3_t
, float64_t
, 2d
, d
, f64
, q
)
18400 __LD3_LANE_FUNC (poly8x16x3_t
, poly8_t
, 16b
, b
, p8
, q
)
18401 __LD3_LANE_FUNC (poly16x8x3_t
, poly16_t
, 8h
, h
, p16
, q
)
18402 __LD3_LANE_FUNC (int8x16x3_t
, int8_t, 16b
, b
, s8
, q
)
18403 __LD3_LANE_FUNC (int16x8x3_t
, int16_t, 8h
, h
, s16
, q
)
18404 __LD3_LANE_FUNC (int32x4x3_t
, int32_t, 4s
, s
, s32
, q
)
18405 __LD3_LANE_FUNC (int64x2x3_t
, int64_t, 2d
, d
, s64
, q
)
18406 __LD3_LANE_FUNC (uint8x16x3_t
, uint8_t, 16b
, b
, u8
, q
)
18407 __LD3_LANE_FUNC (uint16x8x3_t
, uint16_t, 8h
, h
, u16
, q
)
18408 __LD3_LANE_FUNC (uint32x4x3_t
, uint32_t, 4s
, s
, u32
, q
)
18409 __LD3_LANE_FUNC (uint64x2x3_t
, uint64_t, 2d
, d
, u64
, q
)
18411 #define __LD4R_FUNC(rettype, structtype, ptrtype, \
18412 regsuffix, funcsuffix, Q) \
18413 __extension__ static __inline rettype \
18414 __attribute__ ((__always_inline__)) \
18415 vld4 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
18418 __asm__ ("ld4r {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
18419 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
18421 : "Q"(*(const structtype *)ptr) \
18422 : "memory", "v16", "v17", "v18", "v19"); \
18426 __LD4R_FUNC (float32x2x4_t
, float32x4_t
, float32_t
, 2s
, f32
,)
18427 __LD4R_FUNC (float64x1x4_t
, float64x4_t
, float64_t
, 1d
, f64
,)
18428 __LD4R_FUNC (poly8x8x4_t
, poly8x4_t
, poly8_t
, 8b
, p8
,)
18429 __LD4R_FUNC (poly16x4x4_t
, poly16x4_t
, poly16_t
, 4h
, p16
,)
18430 __LD4R_FUNC (int8x8x4_t
, int8x4_t
, int8_t, 8b
, s8
,)
18431 __LD4R_FUNC (int16x4x4_t
, int16x4_t
, int16_t, 4h
, s16
,)
18432 __LD4R_FUNC (int32x2x4_t
, int32x4_t
, int32_t, 2s
, s32
,)
18433 __LD4R_FUNC (int64x1x4_t
, int64x4_t
, int64_t, 1d
, s64
,)
18434 __LD4R_FUNC (uint8x8x4_t
, uint8x4_t
, uint8_t, 8b
, u8
,)
18435 __LD4R_FUNC (uint16x4x4_t
, uint16x4_t
, uint16_t, 4h
, u16
,)
18436 __LD4R_FUNC (uint32x2x4_t
, uint32x4_t
, uint32_t, 2s
, u32
,)
18437 __LD4R_FUNC (uint64x1x4_t
, uint64x4_t
, uint64_t, 1d
, u64
,)
18438 __LD4R_FUNC (float32x4x4_t
, float32x4_t
, float32_t
, 4s
, f32
, q
)
18439 __LD4R_FUNC (float64x2x4_t
, float64x4_t
, float64_t
, 2d
, f64
, q
)
18440 __LD4R_FUNC (poly8x16x4_t
, poly8x4_t
, poly8_t
, 16b
, p8
, q
)
18441 __LD4R_FUNC (poly16x8x4_t
, poly16x4_t
, poly16_t
, 8h
, p16
, q
)
18442 __LD4R_FUNC (int8x16x4_t
, int8x4_t
, int8_t, 16b
, s8
, q
)
18443 __LD4R_FUNC (int16x8x4_t
, int16x4_t
, int16_t, 8h
, s16
, q
)
18444 __LD4R_FUNC (int32x4x4_t
, int32x4_t
, int32_t, 4s
, s32
, q
)
18445 __LD4R_FUNC (int64x2x4_t
, int64x4_t
, int64_t, 2d
, s64
, q
)
18446 __LD4R_FUNC (uint8x16x4_t
, uint8x4_t
, uint8_t, 16b
, u8
, q
)
18447 __LD4R_FUNC (uint16x8x4_t
, uint16x4_t
, uint16_t, 8h
, u16
, q
)
18448 __LD4R_FUNC (uint32x4x4_t
, uint32x4_t
, uint32_t, 4s
, u32
, q
)
18449 __LD4R_FUNC (uint64x2x4_t
, uint64x4_t
, uint64_t, 2d
, u64
, q
)
18451 #define __LD4_LANE_FUNC(rettype, ptrtype, regsuffix, \
18452 lnsuffix, funcsuffix, Q) \
18453 __extension__ static __inline rettype \
18454 __attribute__ ((__always_inline__)) \
18455 vld4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
18456 rettype b, const int c) \
18459 __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
18460 "ld4 {v16." #lnsuffix " - v19." #lnsuffix "}[%3], %2\n\t" \
18461 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
18463 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
18464 : "memory", "v16", "v17", "v18", "v19"); \
18468 __LD4_LANE_FUNC (int8x8x4_t
, uint8_t, 8b
, b
, s8
,)
18469 __LD4_LANE_FUNC (float32x2x4_t
, float32_t
, 2s
, s
, f32
,)
18470 __LD4_LANE_FUNC (float64x1x4_t
, float64_t
, 1d
, d
, f64
,)
18471 __LD4_LANE_FUNC (poly8x8x4_t
, poly8_t
, 8b
, b
, p8
,)
18472 __LD4_LANE_FUNC (poly16x4x4_t
, poly16_t
, 4h
, h
, p16
,)
18473 __LD4_LANE_FUNC (int16x4x4_t
, int16_t, 4h
, h
, s16
,)
18474 __LD4_LANE_FUNC (int32x2x4_t
, int32_t, 2s
, s
, s32
,)
18475 __LD4_LANE_FUNC (int64x1x4_t
, int64_t, 1d
, d
, s64
,)
18476 __LD4_LANE_FUNC (uint8x8x4_t
, uint8_t, 8b
, b
, u8
,)
18477 __LD4_LANE_FUNC (uint16x4x4_t
, uint16_t, 4h
, h
, u16
,)
18478 __LD4_LANE_FUNC (uint32x2x4_t
, uint32_t, 2s
, s
, u32
,)
18479 __LD4_LANE_FUNC (uint64x1x4_t
, uint64_t, 1d
, d
, u64
,)
18480 __LD4_LANE_FUNC (float32x4x4_t
, float32_t
, 4s
, s
, f32
, q
)
18481 __LD4_LANE_FUNC (float64x2x4_t
, float64_t
, 2d
, d
, f64
, q
)
18482 __LD4_LANE_FUNC (poly8x16x4_t
, poly8_t
, 16b
, b
, p8
, q
)
18483 __LD4_LANE_FUNC (poly16x8x4_t
, poly16_t
, 8h
, h
, p16
, q
)
18484 __LD4_LANE_FUNC (int8x16x4_t
, int8_t, 16b
, b
, s8
, q
)
18485 __LD4_LANE_FUNC (int16x8x4_t
, int16_t, 8h
, h
, s16
, q
)
18486 __LD4_LANE_FUNC (int32x4x4_t
, int32_t, 4s
, s
, s32
, q
)
18487 __LD4_LANE_FUNC (int64x2x4_t
, int64_t, 2d
, d
, s64
, q
)
18488 __LD4_LANE_FUNC (uint8x16x4_t
, uint8_t, 16b
, b
, u8
, q
)
18489 __LD4_LANE_FUNC (uint16x8x4_t
, uint16_t, 8h
, h
, u16
, q
)
18490 __LD4_LANE_FUNC (uint32x4x4_t
, uint32_t, 4s
, s
, u32
, q
)
18491 __LD4_LANE_FUNC (uint64x2x4_t
, uint64_t, 2d
, d
, u64
, q
)
18493 #define __ST2_LANE_FUNC(intype, ptrtype, regsuffix, \
18494 lnsuffix, funcsuffix, Q) \
18495 __extension__ static __inline void \
18496 __attribute__ ((__always_inline__)) \
18497 vst2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
18498 intype b, const int c) \
18500 __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
18501 "st2 {v16." #lnsuffix ", v17." #lnsuffix "}[%2], %0\n\t" \
18502 : "=Q"(*(intype *) ptr) \
18504 : "memory", "v16", "v17"); \
18507 __ST2_LANE_FUNC (int8x8x2_t
, int8_t, 8b
, b
, s8
,)
18508 __ST2_LANE_FUNC (float32x2x2_t
, float32_t
, 2s
, s
, f32
,)
18509 __ST2_LANE_FUNC (float64x1x2_t
, float64_t
, 1d
, d
, f64
,)
18510 __ST2_LANE_FUNC (poly8x8x2_t
, poly8_t
, 8b
, b
, p8
,)
18511 __ST2_LANE_FUNC (poly16x4x2_t
, poly16_t
, 4h
, h
, p16
,)
18512 __ST2_LANE_FUNC (int16x4x2_t
, int16_t, 4h
, h
, s16
,)
18513 __ST2_LANE_FUNC (int32x2x2_t
, int32_t, 2s
, s
, s32
,)
18514 __ST2_LANE_FUNC (int64x1x2_t
, int64_t, 1d
, d
, s64
,)
18515 __ST2_LANE_FUNC (uint8x8x2_t
, uint8_t, 8b
, b
, u8
,)
18516 __ST2_LANE_FUNC (uint16x4x2_t
, uint16_t, 4h
, h
, u16
,)
18517 __ST2_LANE_FUNC (uint32x2x2_t
, uint32_t, 2s
, s
, u32
,)
18518 __ST2_LANE_FUNC (uint64x1x2_t
, uint64_t, 1d
, d
, u64
,)
18519 __ST2_LANE_FUNC (float32x4x2_t
, float32_t
, 4s
, s
, f32
, q
)
18520 __ST2_LANE_FUNC (float64x2x2_t
, float64_t
, 2d
, d
, f64
, q
)
18521 __ST2_LANE_FUNC (poly8x16x2_t
, poly8_t
, 16b
, b
, p8
, q
)
18522 __ST2_LANE_FUNC (poly16x8x2_t
, poly16_t
, 8h
, h
, p16
, q
)
18523 __ST2_LANE_FUNC (int8x16x2_t
, int8_t, 16b
, b
, s8
, q
)
18524 __ST2_LANE_FUNC (int16x8x2_t
, int16_t, 8h
, h
, s16
, q
)
18525 __ST2_LANE_FUNC (int32x4x2_t
, int32_t, 4s
, s
, s32
, q
)
18526 __ST2_LANE_FUNC (int64x2x2_t
, int64_t, 2d
, d
, s64
, q
)
18527 __ST2_LANE_FUNC (uint8x16x2_t
, uint8_t, 16b
, b
, u8
, q
)
18528 __ST2_LANE_FUNC (uint16x8x2_t
, uint16_t, 8h
, h
, u16
, q
)
18529 __ST2_LANE_FUNC (uint32x4x2_t
, uint32_t, 4s
, s
, u32
, q
)
18530 __ST2_LANE_FUNC (uint64x2x2_t
, uint64_t, 2d
, d
, u64
, q
)
18532 #define __ST3_LANE_FUNC(intype, ptrtype, regsuffix, \
18533 lnsuffix, funcsuffix, Q) \
18534 __extension__ static __inline void \
18535 __attribute__ ((__always_inline__)) \
18536 vst3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
18537 intype b, const int c) \
18539 __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
18540 "st3 {v16." #lnsuffix " - v18." #lnsuffix "}[%2], %0\n\t" \
18541 : "=Q"(*(intype *) ptr) \
18543 : "memory", "v16", "v17", "v18"); \
18546 __ST3_LANE_FUNC (int8x8x3_t
, int8_t, 8b
, b
, s8
,)
18547 __ST3_LANE_FUNC (float32x2x3_t
, float32_t
, 2s
, s
, f32
,)
18548 __ST3_LANE_FUNC (float64x1x3_t
, float64_t
, 1d
, d
, f64
,)
18549 __ST3_LANE_FUNC (poly8x8x3_t
, poly8_t
, 8b
, b
, p8
,)
18550 __ST3_LANE_FUNC (poly16x4x3_t
, poly16_t
, 4h
, h
, p16
,)
18551 __ST3_LANE_FUNC (int16x4x3_t
, int16_t, 4h
, h
, s16
,)
18552 __ST3_LANE_FUNC (int32x2x3_t
, int32_t, 2s
, s
, s32
,)
18553 __ST3_LANE_FUNC (int64x1x3_t
, int64_t, 1d
, d
, s64
,)
18554 __ST3_LANE_FUNC (uint8x8x3_t
, uint8_t, 8b
, b
, u8
,)
18555 __ST3_LANE_FUNC (uint16x4x3_t
, uint16_t, 4h
, h
, u16
,)
18556 __ST3_LANE_FUNC (uint32x2x3_t
, uint32_t, 2s
, s
, u32
,)
18557 __ST3_LANE_FUNC (uint64x1x3_t
, uint64_t, 1d
, d
, u64
,)
18558 __ST3_LANE_FUNC (float32x4x3_t
, float32_t
, 4s
, s
, f32
, q
)
18559 __ST3_LANE_FUNC (float64x2x3_t
, float64_t
, 2d
, d
, f64
, q
)
18560 __ST3_LANE_FUNC (poly8x16x3_t
, poly8_t
, 16b
, b
, p8
, q
)
18561 __ST3_LANE_FUNC (poly16x8x3_t
, poly16_t
, 8h
, h
, p16
, q
)
18562 __ST3_LANE_FUNC (int8x16x3_t
, int8_t, 16b
, b
, s8
, q
)
18563 __ST3_LANE_FUNC (int16x8x3_t
, int16_t, 8h
, h
, s16
, q
)
18564 __ST3_LANE_FUNC (int32x4x3_t
, int32_t, 4s
, s
, s32
, q
)
18565 __ST3_LANE_FUNC (int64x2x3_t
, int64_t, 2d
, d
, s64
, q
)
18566 __ST3_LANE_FUNC (uint8x16x3_t
, uint8_t, 16b
, b
, u8
, q
)
18567 __ST3_LANE_FUNC (uint16x8x3_t
, uint16_t, 8h
, h
, u16
, q
)
18568 __ST3_LANE_FUNC (uint32x4x3_t
, uint32_t, 4s
, s
, u32
, q
)
18569 __ST3_LANE_FUNC (uint64x2x3_t
, uint64_t, 2d
, d
, u64
, q
)
18571 #define __ST4_LANE_FUNC(intype, ptrtype, regsuffix, \
18572 lnsuffix, funcsuffix, Q) \
18573 __extension__ static __inline void \
18574 __attribute__ ((__always_inline__)) \
18575 vst4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
18576 intype b, const int c) \
18578 __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
18579 "st4 {v16." #lnsuffix " - v19." #lnsuffix "}[%2], %0\n\t" \
18580 : "=Q"(*(intype *) ptr) \
18582 : "memory", "v16", "v17", "v18", "v19"); \
18585 __ST4_LANE_FUNC (int8x8x4_t
, int8_t, 8b
, b
, s8
,)
18586 __ST4_LANE_FUNC (float32x2x4_t
, float32_t
, 2s
, s
, f32
,)
18587 __ST4_LANE_FUNC (float64x1x4_t
, float64_t
, 1d
, d
, f64
,)
18588 __ST4_LANE_FUNC (poly8x8x4_t
, poly8_t
, 8b
, b
, p8
,)
18589 __ST4_LANE_FUNC (poly16x4x4_t
, poly16_t
, 4h
, h
, p16
,)
18590 __ST4_LANE_FUNC (int16x4x4_t
, int16_t, 4h
, h
, s16
,)
18591 __ST4_LANE_FUNC (int32x2x4_t
, int32_t, 2s
, s
, s32
,)
18592 __ST4_LANE_FUNC (int64x1x4_t
, int64_t, 1d
, d
, s64
,)
18593 __ST4_LANE_FUNC (uint8x8x4_t
, uint8_t, 8b
, b
, u8
,)
18594 __ST4_LANE_FUNC (uint16x4x4_t
, uint16_t, 4h
, h
, u16
,)
18595 __ST4_LANE_FUNC (uint32x2x4_t
, uint32_t, 2s
, s
, u32
,)
18596 __ST4_LANE_FUNC (uint64x1x4_t
, uint64_t, 1d
, d
, u64
,)
18597 __ST4_LANE_FUNC (float32x4x4_t
, float32_t
, 4s
, s
, f32
, q
)
18598 __ST4_LANE_FUNC (float64x2x4_t
, float64_t
, 2d
, d
, f64
, q
)
18599 __ST4_LANE_FUNC (poly8x16x4_t
, poly8_t
, 16b
, b
, p8
, q
)
18600 __ST4_LANE_FUNC (poly16x8x4_t
, poly16_t
, 8h
, h
, p16
, q
)
18601 __ST4_LANE_FUNC (int8x16x4_t
, int8_t, 16b
, b
, s8
, q
)
18602 __ST4_LANE_FUNC (int16x8x4_t
, int16_t, 8h
, h
, s16
, q
)
18603 __ST4_LANE_FUNC (int32x4x4_t
, int32_t, 4s
, s
, s32
, q
)
18604 __ST4_LANE_FUNC (int64x2x4_t
, int64_t, 2d
, d
, s64
, q
)
18605 __ST4_LANE_FUNC (uint8x16x4_t
, uint8_t, 16b
, b
, u8
, q
)
18606 __ST4_LANE_FUNC (uint16x8x4_t
, uint16_t, 8h
, h
, u16
, q
)
18607 __ST4_LANE_FUNC (uint32x4x4_t
, uint32_t, 4s
, s
, u32
, q
)
18608 __ST4_LANE_FUNC (uint64x2x4_t
, uint64_t, 2d
, d
, u64
, q
)
18610 __extension__
static __inline
int64_t __attribute__ ((__always_inline__
))
18611 vaddlv_s32 (int32x2_t a
)
18614 __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result
) : "w"(a
) : );
18618 __extension__
static __inline
uint64_t __attribute__ ((__always_inline__
))
18619 vaddlv_u32 (uint32x2_t a
)
18622 __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result
) : "w"(a
) : );
18626 __extension__
static __inline
int32_t __attribute__ ((__always_inline__
))
18627 vaddv_s32 (int32x2_t a
)
18630 __asm__ ("addp %0.2s, %1.2s, %1.2s" : "=w"(result
) : "w"(a
) : );
18634 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
18635 vaddv_u32 (uint32x2_t a
)
18638 __asm__ ("addp %0.2s, %1.2s, %1.2s" : "=w"(result
) : "w"(a
) : );
18642 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
18643 vmaxnmv_f32 (float32x2_t a
)
18646 __asm__ ("fmaxnmp %0.2s, %1.2s, %1.2s" : "=w"(result
) : "w"(a
) : );
18650 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
18651 vminnmv_f32 (float32x2_t a
)
18654 __asm__ ("fminnmp %0.2s, %1.2s, %1.2s" : "=w"(result
) : "w"(a
) : );
18658 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
18659 vmaxnmvq_f64 (float64x2_t a
)
18662 __asm__ ("fmaxnmp %0.2d, %1.2d, %1.2d" : "=w"(result
) : "w"(a
) : );
18666 __extension__
static __inline
int32_t __attribute__ ((__always_inline__
))
18667 vmaxv_s32 (int32x2_t a
)
18670 __asm__ ("smaxp %0.2s, %1.2s, %1.2s" : "=w"(result
) : "w"(a
) : );
18674 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
18675 vmaxv_u32 (uint32x2_t a
)
18678 __asm__ ("umaxp %0.2s, %1.2s, %1.2s" : "=w"(result
) : "w"(a
) : );
18682 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
18683 vminnmvq_f64 (float64x2_t a
)
18686 __asm__ ("fminnmp %0.2d, %1.2d, %1.2d" : "=w"(result
) : "w"(a
) : );
18690 __extension__
static __inline
int32_t __attribute__ ((__always_inline__
))
18691 vminv_s32 (int32x2_t a
)
18694 __asm__ ("sminp %0.2s, %1.2s, %1.2s" : "=w"(result
) : "w"(a
) : );
18698 __extension__
static __inline
uint32_t __attribute__ ((__always_inline__
))
18699 vminv_u32 (uint32x2_t a
)
18702 __asm__ ("uminp %0.2s, %1.2s, %1.2s" : "=w"(result
) : "w"(a
) : );
18706 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
18707 vpaddd_s64 (int64x2_t __a
)
18709 return __builtin_aarch64_addpdi (__a
);
18712 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
18713 vqdmulh_laneq_s16 (int16x4_t __a
, int16x8_t __b
, const int __c
)
18715 return __builtin_aarch64_sqdmulh_laneqv4hi (__a
, __b
, __c
);
18718 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
18719 vqdmulh_laneq_s32 (int32x2_t __a
, int32x4_t __b
, const int __c
)
18721 return __builtin_aarch64_sqdmulh_laneqv2si (__a
, __b
, __c
);
18724 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
18725 vqdmulhq_laneq_s16 (int16x8_t __a
, int16x8_t __b
, const int __c
)
18727 return __builtin_aarch64_sqdmulh_laneqv8hi (__a
, __b
, __c
);
18730 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
18731 vqdmulhq_laneq_s32 (int32x4_t __a
, int32x4_t __b
, const int __c
)
18733 return __builtin_aarch64_sqdmulh_laneqv4si (__a
, __b
, __c
);
18736 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
18737 vqrdmulh_laneq_s16 (int16x4_t __a
, int16x8_t __b
, const int __c
)
18739 return __builtin_aarch64_sqrdmulh_laneqv4hi (__a
, __b
, __c
);
18742 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
18743 vqrdmulh_laneq_s32 (int32x2_t __a
, int32x4_t __b
, const int __c
)
18745 return __builtin_aarch64_sqrdmulh_laneqv2si (__a
, __b
, __c
);
18748 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
18749 vqrdmulhq_laneq_s16 (int16x8_t __a
, int16x8_t __b
, const int __c
)
18751 return __builtin_aarch64_sqrdmulh_laneqv8hi (__a
, __b
, __c
);
18754 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
18755 vqrdmulhq_laneq_s32 (int32x4_t __a
, int32x4_t __b
, const int __c
)
18757 return __builtin_aarch64_sqrdmulh_laneqv4si (__a
, __b
, __c
);
18760 /* Table intrinsics. */
18762 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
18763 vqtbl1_p8 (poly8x16_t a
, uint8x8_t b
)
18766 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
18769 : /* No clobbers */);
18773 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
18774 vqtbl1_s8 (int8x16_t a
, int8x8_t b
)
18777 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
18780 : /* No clobbers */);
18784 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
18785 vqtbl1_u8 (uint8x16_t a
, uint8x8_t b
)
18788 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
18791 : /* No clobbers */);
18795 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
18796 vqtbl1q_p8 (poly8x16_t a
, uint8x16_t b
)
18799 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
18802 : /* No clobbers */);
18806 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
18807 vqtbl1q_s8 (int8x16_t a
, int8x16_t b
)
18810 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
18813 : /* No clobbers */);
18817 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
18818 vqtbl1q_u8 (uint8x16_t a
, uint8x16_t b
)
18821 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
18824 : /* No clobbers */);
18828 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
18829 vqtbl2_s8 (int8x16x2_t tab
, int8x8_t idx
)
18832 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
18833 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
18836 :"memory", "v16", "v17");
18840 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
18841 vqtbl2_u8 (uint8x16x2_t tab
, uint8x8_t idx
)
18844 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
18845 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
18848 :"memory", "v16", "v17");
18852 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
18853 vqtbl2_p8 (poly8x16x2_t tab
, uint8x8_t idx
)
18856 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
18857 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
18860 :"memory", "v16", "v17");
18864 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
18865 vqtbl2q_s8 (int8x16x2_t tab
, int8x16_t idx
)
18868 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
18869 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
18872 :"memory", "v16", "v17");
18876 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
18877 vqtbl2q_u8 (uint8x16x2_t tab
, uint8x16_t idx
)
18880 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
18881 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
18884 :"memory", "v16", "v17");
18888 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
18889 vqtbl2q_p8 (poly8x16x2_t tab
, uint8x16_t idx
)
18892 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
18893 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
18896 :"memory", "v16", "v17");
18900 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
18901 vqtbl3_s8 (int8x16x3_t tab
, int8x8_t idx
)
18904 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
18905 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
18908 :"memory", "v16", "v17", "v18");
18912 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
18913 vqtbl3_u8 (uint8x16x3_t tab
, uint8x8_t idx
)
18916 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
18917 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
18920 :"memory", "v16", "v17", "v18");
18924 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
18925 vqtbl3_p8 (poly8x16x3_t tab
, uint8x8_t idx
)
18928 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
18929 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
18932 :"memory", "v16", "v17", "v18");
18936 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
18937 vqtbl3q_s8 (int8x16x3_t tab
, int8x16_t idx
)
18940 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
18941 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
18944 :"memory", "v16", "v17", "v18");
18948 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
18949 vqtbl3q_u8 (uint8x16x3_t tab
, uint8x16_t idx
)
18952 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
18953 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
18956 :"memory", "v16", "v17", "v18");
18960 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
18961 vqtbl3q_p8 (poly8x16x3_t tab
, uint8x16_t idx
)
18964 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
18965 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
18968 :"memory", "v16", "v17", "v18");
18972 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
18973 vqtbl4_s8 (int8x16x4_t tab
, int8x8_t idx
)
18976 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
18977 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
18980 :"memory", "v16", "v17", "v18", "v19");
18984 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
18985 vqtbl4_u8 (uint8x16x4_t tab
, uint8x8_t idx
)
18988 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
18989 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
18992 :"memory", "v16", "v17", "v18", "v19");
18996 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
18997 vqtbl4_p8 (poly8x16x4_t tab
, uint8x8_t idx
)
19000 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19001 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
19004 :"memory", "v16", "v17", "v18", "v19");
19009 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
19010 vqtbl4q_s8 (int8x16x4_t tab
, int8x16_t idx
)
19013 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19014 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
19017 :"memory", "v16", "v17", "v18", "v19");
19021 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
19022 vqtbl4q_u8 (uint8x16x4_t tab
, uint8x16_t idx
)
19025 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19026 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
19029 :"memory", "v16", "v17", "v18", "v19");
19033 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
19034 vqtbl4q_p8 (poly8x16x4_t tab
, uint8x16_t idx
)
19037 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19038 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
19041 :"memory", "v16", "v17", "v18", "v19");
19046 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
19047 vqtbx1_s8 (int8x8_t r
, int8x16_t tab
, int8x8_t idx
)
19049 int8x8_t result
= r
;
19050 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
19052 : "w"(tab
), "w"(idx
)
19053 : /* No clobbers */);
19057 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
19058 vqtbx1_u8 (uint8x8_t r
, uint8x16_t tab
, uint8x8_t idx
)
19060 uint8x8_t result
= r
;
19061 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
19063 : "w"(tab
), "w"(idx
)
19064 : /* No clobbers */);
19068 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
19069 vqtbx1_p8 (poly8x8_t r
, poly8x16_t tab
, uint8x8_t idx
)
19071 poly8x8_t result
= r
;
19072 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
19074 : "w"(tab
), "w"(idx
)
19075 : /* No clobbers */);
19079 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
19080 vqtbx1q_s8 (int8x16_t r
, int8x16_t tab
, int8x16_t idx
)
19082 int8x16_t result
= r
;
19083 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
19085 : "w"(tab
), "w"(idx
)
19086 : /* No clobbers */);
19090 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
19091 vqtbx1q_u8 (uint8x16_t r
, uint8x16_t tab
, uint8x16_t idx
)
19093 uint8x16_t result
= r
;
19094 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
19096 : "w"(tab
), "w"(idx
)
19097 : /* No clobbers */);
19101 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
19102 vqtbx1q_p8 (poly8x16_t r
, poly8x16_t tab
, uint8x16_t idx
)
19104 poly8x16_t result
= r
;
19105 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
19107 : "w"(tab
), "w"(idx
)
19108 : /* No clobbers */);
19112 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
19113 vqtbx2_s8 (int8x8_t r
, int8x16x2_t tab
, int8x8_t idx
)
19115 int8x8_t result
= r
;
19116 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
19117 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
19120 :"memory", "v16", "v17");
19124 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
19125 vqtbx2_u8 (uint8x8_t r
, uint8x16x2_t tab
, uint8x8_t idx
)
19127 uint8x8_t result
= r
;
19128 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
19129 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
19132 :"memory", "v16", "v17");
19136 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
19137 vqtbx2_p8 (poly8x8_t r
, poly8x16x2_t tab
, uint8x8_t idx
)
19139 poly8x8_t result
= r
;
19140 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
19141 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
19144 :"memory", "v16", "v17");
19149 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
19150 vqtbx2q_s8 (int8x16_t r
, int8x16x2_t tab
, int8x16_t idx
)
19152 int8x16_t result
= r
;
19153 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
19154 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
19157 :"memory", "v16", "v17");
19161 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
19162 vqtbx2q_u8 (uint8x16_t r
, uint8x16x2_t tab
, uint8x16_t idx
)
19164 uint8x16_t result
= r
;
19165 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
19166 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
19169 :"memory", "v16", "v17");
19173 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
19174 vqtbx2q_p8 (poly8x16_t r
, poly8x16x2_t tab
, uint8x16_t idx
)
19176 poly8x16_t result
= r
;
19177 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
19178 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
19181 :"memory", "v16", "v17");
19186 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
19187 vqtbx3_s8 (int8x8_t r
, int8x16x3_t tab
, int8x8_t idx
)
19189 int8x8_t result
= r
;
19190 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
19191 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
19194 :"memory", "v16", "v17", "v18");
19198 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
19199 vqtbx3_u8 (uint8x8_t r
, uint8x16x3_t tab
, uint8x8_t idx
)
19201 uint8x8_t result
= r
;
19202 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
19203 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
19206 :"memory", "v16", "v17", "v18");
19210 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
19211 vqtbx3_p8 (poly8x8_t r
, poly8x16x3_t tab
, uint8x8_t idx
)
19213 poly8x8_t result
= r
;
19214 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
19215 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
19218 :"memory", "v16", "v17", "v18");
19223 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
19224 vqtbx3q_s8 (int8x16_t r
, int8x16x3_t tab
, int8x16_t idx
)
19226 int8x16_t result
= r
;
19227 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
19228 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
19231 :"memory", "v16", "v17", "v18");
19235 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
19236 vqtbx3q_u8 (uint8x16_t r
, uint8x16x3_t tab
, uint8x16_t idx
)
19238 uint8x16_t result
= r
;
19239 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
19240 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
19243 :"memory", "v16", "v17", "v18");
19247 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
19248 vqtbx3q_p8 (poly8x16_t r
, poly8x16x3_t tab
, uint8x16_t idx
)
19250 poly8x16_t result
= r
;
19251 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
19252 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
19255 :"memory", "v16", "v17", "v18");
19260 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
19261 vqtbx4_s8 (int8x8_t r
, int8x16x4_t tab
, int8x8_t idx
)
19263 int8x8_t result
= r
;
19264 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19265 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
19268 :"memory", "v16", "v17", "v18", "v19");
19272 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
19273 vqtbx4_u8 (uint8x8_t r
, uint8x16x4_t tab
, uint8x8_t idx
)
19275 uint8x8_t result
= r
;
19276 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19277 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
19280 :"memory", "v16", "v17", "v18", "v19");
19284 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
19285 vqtbx4_p8 (poly8x8_t r
, poly8x16x4_t tab
, uint8x8_t idx
)
19287 poly8x8_t result
= r
;
19288 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19289 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
19292 :"memory", "v16", "v17", "v18", "v19");
19297 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
19298 vqtbx4q_s8 (int8x16_t r
, int8x16x4_t tab
, int8x16_t idx
)
19300 int8x16_t result
= r
;
19301 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19302 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
19305 :"memory", "v16", "v17", "v18", "v19");
19309 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
19310 vqtbx4q_u8 (uint8x16_t r
, uint8x16x4_t tab
, uint8x16_t idx
)
19312 uint8x16_t result
= r
;
19313 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19314 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
19317 :"memory", "v16", "v17", "v18", "v19");
19321 __extension__
static __inline poly8x16_t
__attribute__ ((__always_inline__
))
19322 vqtbx4q_p8 (poly8x16_t r
, poly8x16x4_t tab
, uint8x16_t idx
)
19324 poly8x16_t result
= r
;
19325 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19326 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
19329 :"memory", "v16", "v17", "v18", "v19");
19333 /* V7 legacy table intrinsics. */
19335 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
19336 vtbl1_s8 (int8x8_t tab
, int8x8_t idx
)
19339 int8x16_t temp
= vcombine_s8 (tab
, vcreate_s8 (UINT64_C (0x0)));
19340 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
19342 : "w"(temp
), "w"(idx
)
19343 : /* No clobbers */);
19347 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
19348 vtbl1_u8 (uint8x8_t tab
, uint8x8_t idx
)
19351 uint8x16_t temp
= vcombine_u8 (tab
, vcreate_u8 (UINT64_C (0x0)));
19352 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
19354 : "w"(temp
), "w"(idx
)
19355 : /* No clobbers */);
19359 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
19360 vtbl1_p8 (poly8x8_t tab
, uint8x8_t idx
)
19363 poly8x16_t temp
= vcombine_p8 (tab
, vcreate_p8 (UINT64_C (0x0)));
19364 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
19366 : "w"(temp
), "w"(idx
)
19367 : /* No clobbers */);
19371 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
19372 vtbl2_s8 (int8x8x2_t tab
, int8x8_t idx
)
19375 int8x16_t temp
= vcombine_s8 (tab
.val
[0], tab
.val
[1]);
19376 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
19378 : "w"(temp
), "w"(idx
)
19379 : /* No clobbers */);
19383 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
19384 vtbl2_u8 (uint8x8x2_t tab
, uint8x8_t idx
)
19387 uint8x16_t temp
= vcombine_u8 (tab
.val
[0], tab
.val
[1]);
19388 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
19390 : "w"(temp
), "w"(idx
)
19391 : /* No clobbers */);
19395 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
19396 vtbl2_p8 (poly8x8x2_t tab
, uint8x8_t idx
)
19399 poly8x16_t temp
= vcombine_p8 (tab
.val
[0], tab
.val
[1]);
19400 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
19402 : "w"(temp
), "w"(idx
)
19403 : /* No clobbers */);
19407 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
19408 vtbl3_s8 (int8x8x3_t tab
, int8x8_t idx
)
19412 temp
.val
[0] = vcombine_s8 (tab
.val
[0], tab
.val
[1]);
19413 temp
.val
[1] = vcombine_s8 (tab
.val
[2], vcreate_s8 (UINT64_C (0x0)));
19414 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
19415 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
19417 : "Q"(temp
), "w"(idx
)
19418 : "v16", "v17", "memory");
19422 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
19423 vtbl3_u8 (uint8x8x3_t tab
, uint8x8_t idx
)
19427 temp
.val
[0] = vcombine_u8 (tab
.val
[0], tab
.val
[1]);
19428 temp
.val
[1] = vcombine_u8 (tab
.val
[2], vcreate_u8 (UINT64_C (0x0)));
19429 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
19430 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
19432 : "Q"(temp
), "w"(idx
)
19433 : "v16", "v17", "memory");
19437 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
19438 vtbl3_p8 (poly8x8x3_t tab
, uint8x8_t idx
)
19442 temp
.val
[0] = vcombine_p8 (tab
.val
[0], tab
.val
[1]);
19443 temp
.val
[1] = vcombine_p8 (tab
.val
[2], vcreate_p8 (UINT64_C (0x0)));
19444 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
19445 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
19447 : "Q"(temp
), "w"(idx
)
19448 : "v16", "v17", "memory");
19452 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
19453 vtbl4_s8 (int8x8x4_t tab
, int8x8_t idx
)
19457 temp
.val
[0] = vcombine_s8 (tab
.val
[0], tab
.val
[1]);
19458 temp
.val
[1] = vcombine_s8 (tab
.val
[2], tab
.val
[3]);
19459 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
19460 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
19462 : "Q"(temp
), "w"(idx
)
19463 : "v16", "v17", "memory");
19467 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
19468 vtbl4_u8 (uint8x8x4_t tab
, uint8x8_t idx
)
19472 temp
.val
[0] = vcombine_u8 (tab
.val
[0], tab
.val
[1]);
19473 temp
.val
[1] = vcombine_u8 (tab
.val
[2], tab
.val
[3]);
19474 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
19475 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
19477 : "Q"(temp
), "w"(idx
)
19478 : "v16", "v17", "memory");
19482 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
19483 vtbl4_p8 (poly8x8x4_t tab
, uint8x8_t idx
)
19487 temp
.val
[0] = vcombine_p8 (tab
.val
[0], tab
.val
[1]);
19488 temp
.val
[1] = vcombine_p8 (tab
.val
[2], tab
.val
[3]);
19489 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
19490 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
19492 : "Q"(temp
), "w"(idx
)
19493 : "v16", "v17", "memory");
19497 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
19498 vtbx1_s8 (int8x8_t r
, int8x8_t tab
, int8x8_t idx
)
19502 int8x16_t temp
= vcombine_s8 (tab
, vcreate_s8 (UINT64_C (0x0)));
19503 __asm__ ("movi %0.8b, 8\n\t"
19504 "cmhs %0.8b, %3.8b, %0.8b\n\t"
19505 "tbl %1.8b, {%2.16b}, %3.8b\n\t"
19506 "bsl %0.8b, %4.8b, %1.8b\n\t"
19507 : "+w"(result
), "=w"(tmp1
)
19508 : "w"(temp
), "w"(idx
), "w"(r
)
19509 : /* No clobbers */);
19513 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
19514 vtbx1_u8 (uint8x8_t r
, uint8x8_t tab
, uint8x8_t idx
)
19518 uint8x16_t temp
= vcombine_u8 (tab
, vcreate_u8 (UINT64_C (0x0)));
19519 __asm__ ("movi %0.8b, 8\n\t"
19520 "cmhs %0.8b, %3.8b, %0.8b\n\t"
19521 "tbl %1.8b, {%2.16b}, %3.8b\n\t"
19522 "bsl %0.8b, %4.8b, %1.8b\n\t"
19523 : "+w"(result
), "=w"(tmp1
)
19524 : "w"(temp
), "w"(idx
), "w"(r
)
19525 : /* No clobbers */);
19529 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
19530 vtbx1_p8 (poly8x8_t r
, poly8x8_t tab
, uint8x8_t idx
)
19534 poly8x16_t temp
= vcombine_p8 (tab
, vcreate_p8 (UINT64_C (0x0)));
19535 __asm__ ("movi %0.8b, 8\n\t"
19536 "cmhs %0.8b, %3.8b, %0.8b\n\t"
19537 "tbl %1.8b, {%2.16b}, %3.8b\n\t"
19538 "bsl %0.8b, %4.8b, %1.8b\n\t"
19539 : "+w"(result
), "=w"(tmp1
)
19540 : "w"(temp
), "w"(idx
), "w"(r
)
19541 : /* No clobbers */);
19545 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
19546 vtbx2_s8 (int8x8_t r
, int8x8x2_t tab
, int8x8_t idx
)
19548 int8x8_t result
= r
;
19549 int8x16_t temp
= vcombine_s8 (tab
.val
[0], tab
.val
[1]);
19550 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
19552 : "w"(temp
), "w"(idx
)
19553 : /* No clobbers */);
19557 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
19558 vtbx2_u8 (uint8x8_t r
, uint8x8x2_t tab
, uint8x8_t idx
)
19560 uint8x8_t result
= r
;
19561 uint8x16_t temp
= vcombine_u8 (tab
.val
[0], tab
.val
[1]);
19562 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
19564 : "w"(temp
), "w"(idx
)
19565 : /* No clobbers */);
19569 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
19570 vtbx2_p8 (poly8x8_t r
, poly8x8x2_t tab
, uint8x8_t idx
)
19572 poly8x8_t result
= r
;
19573 poly8x16_t temp
= vcombine_p8 (tab
.val
[0], tab
.val
[1]);
19574 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
19576 : "w"(temp
), "w"(idx
)
19577 : /* No clobbers */);
19581 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
19582 vtbx3_s8 (int8x8_t r
, int8x8x3_t tab
, int8x8_t idx
)
19587 temp
.val
[0] = vcombine_s8 (tab
.val
[0], tab
.val
[1]);
19588 temp
.val
[1] = vcombine_s8 (tab
.val
[2], vcreate_s8 (UINT64_C (0x0)));
19589 __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
19590 "movi %0.8b, 24\n\t"
19591 "cmhs %0.8b, %3.8b, %0.8b\n\t"
19592 "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
19593 "bsl %0.8b, %4.8b, %1.8b\n\t"
19594 : "+w"(result
), "=w"(tmp1
)
19595 : "Q"(temp
), "w"(idx
), "w"(r
)
19596 : "v16", "v17", "memory");
19600 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
19601 vtbx3_u8 (uint8x8_t r
, uint8x8x3_t tab
, uint8x8_t idx
)
19606 temp
.val
[0] = vcombine_u8 (tab
.val
[0], tab
.val
[1]);
19607 temp
.val
[1] = vcombine_u8 (tab
.val
[2], vcreate_u8 (UINT64_C (0x0)));
19608 __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
19609 "movi %0.8b, 24\n\t"
19610 "cmhs %0.8b, %3.8b, %0.8b\n\t"
19611 "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
19612 "bsl %0.8b, %4.8b, %1.8b\n\t"
19613 : "+w"(result
), "=w"(tmp1
)
19614 : "Q"(temp
), "w"(idx
), "w"(r
)
19615 : "v16", "v17", "memory");
19619 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
19620 vtbx3_p8 (poly8x8_t r
, poly8x8x3_t tab
, uint8x8_t idx
)
19625 temp
.val
[0] = vcombine_p8 (tab
.val
[0], tab
.val
[1]);
19626 temp
.val
[1] = vcombine_p8 (tab
.val
[2], vcreate_p8 (UINT64_C (0x0)));
19627 __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
19628 "movi %0.8b, 24\n\t"
19629 "cmhs %0.8b, %3.8b, %0.8b\n\t"
19630 "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
19631 "bsl %0.8b, %4.8b, %1.8b\n\t"
19632 : "+w"(result
), "=w"(tmp1
)
19633 : "Q"(temp
), "w"(idx
), "w"(r
)
19634 : "v16", "v17", "memory");
19638 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
19639 vtbx4_s8 (int8x8_t r
, int8x8x4_t tab
, int8x8_t idx
)
19641 int8x8_t result
= r
;
19643 temp
.val
[0] = vcombine_s8 (tab
.val
[0], tab
.val
[1]);
19644 temp
.val
[1] = vcombine_s8 (tab
.val
[2], tab
.val
[3]);
19645 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
19646 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
19648 : "Q"(temp
), "w"(idx
)
19649 : "v16", "v17", "memory");
19653 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
19654 vtbx4_u8 (uint8x8_t r
, uint8x8x4_t tab
, uint8x8_t idx
)
19656 uint8x8_t result
= r
;
19658 temp
.val
[0] = vcombine_u8 (tab
.val
[0], tab
.val
[1]);
19659 temp
.val
[1] = vcombine_u8 (tab
.val
[2], tab
.val
[3]);
19660 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
19661 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
19663 : "Q"(temp
), "w"(idx
)
19664 : "v16", "v17", "memory");
19668 __extension__
static __inline poly8x8_t
__attribute__ ((__always_inline__
))
19669 vtbx4_p8 (poly8x8_t r
, poly8x8x4_t tab
, uint8x8_t idx
)
19671 poly8x8_t result
= r
;
19673 temp
.val
[0] = vcombine_p8 (tab
.val
[0], tab
.val
[1]);
19674 temp
.val
[1] = vcombine_p8 (tab
.val
[2], tab
.val
[3]);
19675 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
19676 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
19678 : "Q"(temp
), "w"(idx
)
19679 : "v16", "v17", "memory");
19683 /* End of temporary inline asm. */
19685 /* Start of optimal implementations in approved order. */
19689 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
19690 vabs_f32 (float32x2_t __a
)
19692 return __builtin_aarch64_absv2sf (__a
);
19695 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
19696 vabsq_f32 (float32x4_t __a
)
19698 return __builtin_aarch64_absv4sf (__a
);
19701 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
19702 vabsq_f64 (float64x2_t __a
)
19704 return __builtin_aarch64_absv2df (__a
);
19709 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
19710 vaddd_s64 (int64x1_t __a
, int64x1_t __b
)
19715 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
19716 vaddd_u64 (uint64x1_t __a
, uint64x1_t __b
)
19721 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
19722 vaddv_f32 (float32x2_t __a
)
19724 float32x2_t t
= __builtin_aarch64_addvv2sf (__a
);
19725 return vget_lane_f32 (t
, 0);
19728 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
19729 vaddvq_f32 (float32x4_t __a
)
19731 float32x4_t t
= __builtin_aarch64_addvv4sf (__a
);
19732 return vgetq_lane_f32 (t
, 0);
19735 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
19736 vaddvq_f64 (float64x2_t __a
)
19738 float64x2_t t
= __builtin_aarch64_addvv2df (__a
);
19739 return vgetq_lane_f64 (t
, 0);
19744 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
19745 vceq_p8 (poly8x8_t __a
, poly8x8_t __b
)
19747 return (uint8x8_t
) __builtin_aarch64_cmeqv8qi ((int8x8_t
) __a
,
19751 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
19752 vceq_s8 (int8x8_t __a
, int8x8_t __b
)
19754 return (uint8x8_t
) __builtin_aarch64_cmeqv8qi (__a
, __b
);
19757 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
19758 vceq_s16 (int16x4_t __a
, int16x4_t __b
)
19760 return (uint16x4_t
) __builtin_aarch64_cmeqv4hi (__a
, __b
);
19763 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
19764 vceq_s32 (int32x2_t __a
, int32x2_t __b
)
19766 return (uint32x2_t
) __builtin_aarch64_cmeqv2si (__a
, __b
);
19769 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
19770 vceq_s64 (int64x1_t __a
, int64x1_t __b
)
19772 return (uint64x1_t
) __builtin_aarch64_cmeqdi (__a
, __b
);
19775 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
19776 vceq_u8 (uint8x8_t __a
, uint8x8_t __b
)
19778 return (uint8x8_t
) __builtin_aarch64_cmeqv8qi ((int8x8_t
) __a
,
19782 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
19783 vceq_u16 (uint16x4_t __a
, uint16x4_t __b
)
19785 return (uint16x4_t
) __builtin_aarch64_cmeqv4hi ((int16x4_t
) __a
,
19789 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
19790 vceq_u32 (uint32x2_t __a
, uint32x2_t __b
)
19792 return (uint32x2_t
) __builtin_aarch64_cmeqv2si ((int32x2_t
) __a
,
19796 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
19797 vceq_u64 (uint64x1_t __a
, uint64x1_t __b
)
19799 return (uint64x1_t
) __builtin_aarch64_cmeqdi ((int64x1_t
) __a
,
19803 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
19804 vceqq_p8 (poly8x16_t __a
, poly8x16_t __b
)
19806 return (uint8x16_t
) __builtin_aarch64_cmeqv16qi ((int8x16_t
) __a
,
19810 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
19811 vceqq_s8 (int8x16_t __a
, int8x16_t __b
)
19813 return (uint8x16_t
) __builtin_aarch64_cmeqv16qi (__a
, __b
);
19816 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
19817 vceqq_s16 (int16x8_t __a
, int16x8_t __b
)
19819 return (uint16x8_t
) __builtin_aarch64_cmeqv8hi (__a
, __b
);
19822 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
19823 vceqq_s32 (int32x4_t __a
, int32x4_t __b
)
19825 return (uint32x4_t
) __builtin_aarch64_cmeqv4si (__a
, __b
);
19828 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
19829 vceqq_s64 (int64x2_t __a
, int64x2_t __b
)
19831 return (uint64x2_t
) __builtin_aarch64_cmeqv2di (__a
, __b
);
19834 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
19835 vceqq_u8 (uint8x16_t __a
, uint8x16_t __b
)
19837 return (uint8x16_t
) __builtin_aarch64_cmeqv16qi ((int8x16_t
) __a
,
19841 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
19842 vceqq_u16 (uint16x8_t __a
, uint16x8_t __b
)
19844 return (uint16x8_t
) __builtin_aarch64_cmeqv8hi ((int16x8_t
) __a
,
19848 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
19849 vceqq_u32 (uint32x4_t __a
, uint32x4_t __b
)
19851 return (uint32x4_t
) __builtin_aarch64_cmeqv4si ((int32x4_t
) __a
,
19855 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
19856 vceqq_u64 (uint64x2_t __a
, uint64x2_t __b
)
19858 return (uint64x2_t
) __builtin_aarch64_cmeqv2di ((int64x2_t
) __a
,
19862 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
19863 vceqd_s64 (int64x1_t __a
, int64x1_t __b
)
19865 return (uint64x1_t
) __builtin_aarch64_cmeqdi (__a
, __b
);
19868 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
19869 vceqd_u64 (uint64x1_t __a
, uint64x1_t __b
)
19871 return (uint64x1_t
) __builtin_aarch64_cmeqdi (__a
, __b
);
19874 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
19875 vceqzd_s64 (int64x1_t __a
)
19877 return (uint64x1_t
) __builtin_aarch64_cmeqdi (__a
, 0);
19882 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
19883 vcge_s8 (int8x8_t __a
, int8x8_t __b
)
19885 return (uint8x8_t
) __builtin_aarch64_cmgev8qi (__a
, __b
);
19888 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
19889 vcge_s16 (int16x4_t __a
, int16x4_t __b
)
19891 return (uint16x4_t
) __builtin_aarch64_cmgev4hi (__a
, __b
);
19894 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
19895 vcge_s32 (int32x2_t __a
, int32x2_t __b
)
19897 return (uint32x2_t
) __builtin_aarch64_cmgev2si (__a
, __b
);
19900 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
19901 vcge_s64 (int64x1_t __a
, int64x1_t __b
)
19903 return (uint64x1_t
) __builtin_aarch64_cmgedi (__a
, __b
);
19906 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
19907 vcge_u8 (uint8x8_t __a
, uint8x8_t __b
)
19909 return (uint8x8_t
) __builtin_aarch64_cmhsv8qi ((int8x8_t
) __a
,
19913 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
19914 vcge_u16 (uint16x4_t __a
, uint16x4_t __b
)
19916 return (uint16x4_t
) __builtin_aarch64_cmhsv4hi ((int16x4_t
) __a
,
19920 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
19921 vcge_u32 (uint32x2_t __a
, uint32x2_t __b
)
19923 return (uint32x2_t
) __builtin_aarch64_cmhsv2si ((int32x2_t
) __a
,
19927 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
19928 vcge_u64 (uint64x1_t __a
, uint64x1_t __b
)
19930 return (uint64x1_t
) __builtin_aarch64_cmhsdi ((int64x1_t
) __a
,
19934 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
19935 vcgeq_s8 (int8x16_t __a
, int8x16_t __b
)
19937 return (uint8x16_t
) __builtin_aarch64_cmgev16qi (__a
, __b
);
19940 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
19941 vcgeq_s16 (int16x8_t __a
, int16x8_t __b
)
19943 return (uint16x8_t
) __builtin_aarch64_cmgev8hi (__a
, __b
);
19946 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
19947 vcgeq_s32 (int32x4_t __a
, int32x4_t __b
)
19949 return (uint32x4_t
) __builtin_aarch64_cmgev4si (__a
, __b
);
19952 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
19953 vcgeq_s64 (int64x2_t __a
, int64x2_t __b
)
19955 return (uint64x2_t
) __builtin_aarch64_cmgev2di (__a
, __b
);
19958 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
19959 vcgeq_u8 (uint8x16_t __a
, uint8x16_t __b
)
19961 return (uint8x16_t
) __builtin_aarch64_cmhsv16qi ((int8x16_t
) __a
,
19965 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
19966 vcgeq_u16 (uint16x8_t __a
, uint16x8_t __b
)
19968 return (uint16x8_t
) __builtin_aarch64_cmhsv8hi ((int16x8_t
) __a
,
19972 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
19973 vcgeq_u32 (uint32x4_t __a
, uint32x4_t __b
)
19975 return (uint32x4_t
) __builtin_aarch64_cmhsv4si ((int32x4_t
) __a
,
19979 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
19980 vcgeq_u64 (uint64x2_t __a
, uint64x2_t __b
)
19982 return (uint64x2_t
) __builtin_aarch64_cmhsv2di ((int64x2_t
) __a
,
19986 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
19987 vcged_s64 (int64x1_t __a
, int64x1_t __b
)
19989 return (uint64x1_t
) __builtin_aarch64_cmgedi (__a
, __b
);
19992 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
19993 vcged_u64 (uint64x1_t __a
, uint64x1_t __b
)
19995 return (uint64x1_t
) __builtin_aarch64_cmhsdi ((int64x1_t
) __a
,
19999 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
20000 vcgezd_s64 (int64x1_t __a
)
20002 return (uint64x1_t
) __builtin_aarch64_cmgedi (__a
, 0);
20007 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
20008 vcgt_s8 (int8x8_t __a
, int8x8_t __b
)
20010 return (uint8x8_t
) __builtin_aarch64_cmgtv8qi (__a
, __b
);
20013 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
20014 vcgt_s16 (int16x4_t __a
, int16x4_t __b
)
20016 return (uint16x4_t
) __builtin_aarch64_cmgtv4hi (__a
, __b
);
20019 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
20020 vcgt_s32 (int32x2_t __a
, int32x2_t __b
)
20022 return (uint32x2_t
) __builtin_aarch64_cmgtv2si (__a
, __b
);
20025 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
20026 vcgt_s64 (int64x1_t __a
, int64x1_t __b
)
20028 return (uint64x1_t
) __builtin_aarch64_cmgtdi (__a
, __b
);
20031 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
20032 vcgt_u8 (uint8x8_t __a
, uint8x8_t __b
)
20034 return (uint8x8_t
) __builtin_aarch64_cmhiv8qi ((int8x8_t
) __a
,
20038 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
20039 vcgt_u16 (uint16x4_t __a
, uint16x4_t __b
)
20041 return (uint16x4_t
) __builtin_aarch64_cmhiv4hi ((int16x4_t
) __a
,
20045 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
20046 vcgt_u32 (uint32x2_t __a
, uint32x2_t __b
)
20048 return (uint32x2_t
) __builtin_aarch64_cmhiv2si ((int32x2_t
) __a
,
20052 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
20053 vcgt_u64 (uint64x1_t __a
, uint64x1_t __b
)
20055 return (uint64x1_t
) __builtin_aarch64_cmhidi ((int64x1_t
) __a
,
20059 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
20060 vcgtq_s8 (int8x16_t __a
, int8x16_t __b
)
20062 return (uint8x16_t
) __builtin_aarch64_cmgtv16qi (__a
, __b
);
20065 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
20066 vcgtq_s16 (int16x8_t __a
, int16x8_t __b
)
20068 return (uint16x8_t
) __builtin_aarch64_cmgtv8hi (__a
, __b
);
20071 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
20072 vcgtq_s32 (int32x4_t __a
, int32x4_t __b
)
20074 return (uint32x4_t
) __builtin_aarch64_cmgtv4si (__a
, __b
);
20077 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
20078 vcgtq_s64 (int64x2_t __a
, int64x2_t __b
)
20080 return (uint64x2_t
) __builtin_aarch64_cmgtv2di (__a
, __b
);
20083 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
20084 vcgtq_u8 (uint8x16_t __a
, uint8x16_t __b
)
20086 return (uint8x16_t
) __builtin_aarch64_cmhiv16qi ((int8x16_t
) __a
,
20090 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
20091 vcgtq_u16 (uint16x8_t __a
, uint16x8_t __b
)
20093 return (uint16x8_t
) __builtin_aarch64_cmhiv8hi ((int16x8_t
) __a
,
20097 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
20098 vcgtq_u32 (uint32x4_t __a
, uint32x4_t __b
)
20100 return (uint32x4_t
) __builtin_aarch64_cmhiv4si ((int32x4_t
) __a
,
20104 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
20105 vcgtq_u64 (uint64x2_t __a
, uint64x2_t __b
)
20107 return (uint64x2_t
) __builtin_aarch64_cmhiv2di ((int64x2_t
) __a
,
20111 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
20112 vcgtd_s64 (int64x1_t __a
, int64x1_t __b
)
20114 return (uint64x1_t
) __builtin_aarch64_cmgtdi (__a
, __b
);
20117 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
20118 vcgtd_u64 (uint64x1_t __a
, uint64x1_t __b
)
20120 return (uint64x1_t
) __builtin_aarch64_cmhidi ((int64x1_t
) __a
,
20124 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
20125 vcgtzd_s64 (int64x1_t __a
)
20127 return (uint64x1_t
) __builtin_aarch64_cmgtdi (__a
, 0);
20132 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
20133 vcle_s8 (int8x8_t __a
, int8x8_t __b
)
20135 return (uint8x8_t
) __builtin_aarch64_cmgev8qi (__b
, __a
);
20138 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
20139 vcle_s16 (int16x4_t __a
, int16x4_t __b
)
20141 return (uint16x4_t
) __builtin_aarch64_cmgev4hi (__b
, __a
);
20144 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
20145 vcle_s32 (int32x2_t __a
, int32x2_t __b
)
20147 return (uint32x2_t
) __builtin_aarch64_cmgev2si (__b
, __a
);
20150 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
20151 vcle_s64 (int64x1_t __a
, int64x1_t __b
)
20153 return (uint64x1_t
) __builtin_aarch64_cmgedi (__b
, __a
);
20156 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
20157 vcle_u8 (uint8x8_t __a
, uint8x8_t __b
)
20159 return (uint8x8_t
) __builtin_aarch64_cmhsv8qi ((int8x8_t
) __b
,
20163 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
20164 vcle_u16 (uint16x4_t __a
, uint16x4_t __b
)
20166 return (uint16x4_t
) __builtin_aarch64_cmhsv4hi ((int16x4_t
) __b
,
20170 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
20171 vcle_u32 (uint32x2_t __a
, uint32x2_t __b
)
20173 return (uint32x2_t
) __builtin_aarch64_cmhsv2si ((int32x2_t
) __b
,
20177 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
20178 vcle_u64 (uint64x1_t __a
, uint64x1_t __b
)
20180 return (uint64x1_t
) __builtin_aarch64_cmhsdi ((int64x1_t
) __b
,
20184 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
20185 vcleq_s8 (int8x16_t __a
, int8x16_t __b
)
20187 return (uint8x16_t
) __builtin_aarch64_cmgev16qi (__b
, __a
);
20190 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
20191 vcleq_s16 (int16x8_t __a
, int16x8_t __b
)
20193 return (uint16x8_t
) __builtin_aarch64_cmgev8hi (__b
, __a
);
20196 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
20197 vcleq_s32 (int32x4_t __a
, int32x4_t __b
)
20199 return (uint32x4_t
) __builtin_aarch64_cmgev4si (__b
, __a
);
20202 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
20203 vcleq_s64 (int64x2_t __a
, int64x2_t __b
)
20205 return (uint64x2_t
) __builtin_aarch64_cmgev2di (__b
, __a
);
20208 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
20209 vcleq_u8 (uint8x16_t __a
, uint8x16_t __b
)
20211 return (uint8x16_t
) __builtin_aarch64_cmhsv16qi ((int8x16_t
) __b
,
20215 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
20216 vcleq_u16 (uint16x8_t __a
, uint16x8_t __b
)
20218 return (uint16x8_t
) __builtin_aarch64_cmhsv8hi ((int16x8_t
) __b
,
20222 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
20223 vcleq_u32 (uint32x4_t __a
, uint32x4_t __b
)
20225 return (uint32x4_t
) __builtin_aarch64_cmhsv4si ((int32x4_t
) __b
,
20229 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
20230 vcleq_u64 (uint64x2_t __a
, uint64x2_t __b
)
20232 return (uint64x2_t
) __builtin_aarch64_cmhsv2di ((int64x2_t
) __b
,
20236 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
20237 vcled_s64 (int64x1_t __a
, int64x1_t __b
)
20239 return (uint64x1_t
) __builtin_aarch64_cmgedi (__b
, __a
);
20242 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
20243 vclezd_s64 (int64x1_t __a
)
20245 return (uint64x1_t
) __builtin_aarch64_cmledi (__a
, 0);
20250 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
20251 vclt_s8 (int8x8_t __a
, int8x8_t __b
)
20253 return (uint8x8_t
) __builtin_aarch64_cmgtv8qi (__b
, __a
);
20256 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
20257 vclt_s16 (int16x4_t __a
, int16x4_t __b
)
20259 return (uint16x4_t
) __builtin_aarch64_cmgtv4hi (__b
, __a
);
20262 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
20263 vclt_s32 (int32x2_t __a
, int32x2_t __b
)
20265 return (uint32x2_t
) __builtin_aarch64_cmgtv2si (__b
, __a
);
20268 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
20269 vclt_s64 (int64x1_t __a
, int64x1_t __b
)
20271 return (uint64x1_t
) __builtin_aarch64_cmgtdi (__b
, __a
);
20274 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
20275 vclt_u8 (uint8x8_t __a
, uint8x8_t __b
)
20277 return (uint8x8_t
) __builtin_aarch64_cmhiv8qi ((int8x8_t
) __b
,
20281 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
20282 vclt_u16 (uint16x4_t __a
, uint16x4_t __b
)
20284 return (uint16x4_t
) __builtin_aarch64_cmhiv4hi ((int16x4_t
) __b
,
20288 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
20289 vclt_u32 (uint32x2_t __a
, uint32x2_t __b
)
20291 return (uint32x2_t
) __builtin_aarch64_cmhiv2si ((int32x2_t
) __b
,
20295 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
20296 vclt_u64 (uint64x1_t __a
, uint64x1_t __b
)
20298 return (uint64x1_t
) __builtin_aarch64_cmhidi ((int64x1_t
) __b
,
20302 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
20303 vcltq_s8 (int8x16_t __a
, int8x16_t __b
)
20305 return (uint8x16_t
) __builtin_aarch64_cmgtv16qi (__b
, __a
);
20308 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
20309 vcltq_s16 (int16x8_t __a
, int16x8_t __b
)
20311 return (uint16x8_t
) __builtin_aarch64_cmgtv8hi (__b
, __a
);
20314 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
20315 vcltq_s32 (int32x4_t __a
, int32x4_t __b
)
20317 return (uint32x4_t
) __builtin_aarch64_cmgtv4si (__b
, __a
);
20320 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
20321 vcltq_s64 (int64x2_t __a
, int64x2_t __b
)
20323 return (uint64x2_t
) __builtin_aarch64_cmgtv2di (__b
, __a
);
20326 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
20327 vcltq_u8 (uint8x16_t __a
, uint8x16_t __b
)
20329 return (uint8x16_t
) __builtin_aarch64_cmhiv16qi ((int8x16_t
) __b
,
20333 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
20334 vcltq_u16 (uint16x8_t __a
, uint16x8_t __b
)
20336 return (uint16x8_t
) __builtin_aarch64_cmhiv8hi ((int16x8_t
) __b
,
20340 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
20341 vcltq_u32 (uint32x4_t __a
, uint32x4_t __b
)
20343 return (uint32x4_t
) __builtin_aarch64_cmhiv4si ((int32x4_t
) __b
,
20347 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
20348 vcltq_u64 (uint64x2_t __a
, uint64x2_t __b
)
20350 return (uint64x2_t
) __builtin_aarch64_cmhiv2di ((int64x2_t
) __b
,
20354 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
20355 vcltd_s64 (int64x1_t __a
, int64x1_t __b
)
20357 return (uint64x1_t
) __builtin_aarch64_cmgtdi (__b
, __a
);
20360 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
20361 vcltzd_s64 (int64x1_t __a
)
20363 return (uint64x1_t
) __builtin_aarch64_cmltdi (__a
, 0);
20368 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
20369 vdupb_lane_s8 (int8x16_t a
, int const b
)
20371 return __builtin_aarch64_dup_laneqi (a
, b
);
20374 __extension__
static __inline uint8x1_t
__attribute__ ((__always_inline__
))
20375 vdupb_lane_u8 (uint8x16_t a
, int const b
)
20377 return (uint8x1_t
) __builtin_aarch64_dup_laneqi ((int8x16_t
) a
, b
);
20380 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
20381 vduph_lane_s16 (int16x8_t a
, int const b
)
20383 return __builtin_aarch64_dup_lanehi (a
, b
);
20386 __extension__
static __inline uint16x1_t
__attribute__ ((__always_inline__
))
20387 vduph_lane_u16 (uint16x8_t a
, int const b
)
20389 return (uint16x1_t
) __builtin_aarch64_dup_lanehi ((int16x8_t
) a
, b
);
20392 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
20393 vdups_lane_s32 (int32x4_t a
, int const b
)
20395 return __builtin_aarch64_dup_lanesi (a
, b
);
20398 __extension__
static __inline uint32x1_t
__attribute__ ((__always_inline__
))
20399 vdups_lane_u32 (uint32x4_t a
, int const b
)
20401 return (uint32x1_t
) __builtin_aarch64_dup_lanesi ((int32x4_t
) a
, b
);
20404 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
20405 vdupd_lane_s64 (int64x2_t a
, int const b
)
20407 return __builtin_aarch64_dup_lanedi (a
, b
);
20410 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
20411 vdupd_lane_u64 (uint64x2_t a
, int const b
)
20413 return (uint64x1_t
) __builtin_aarch64_dup_lanedi ((int64x2_t
) a
, b
);
20418 __extension__
static __inline int64x1x2_t
__attribute__ ((__always_inline__
))
20419 vld2_s64 (const int64_t * __a
)
20422 __builtin_aarch64_simd_oi __o
;
20423 __o
= __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di
*) __a
);
20424 ret
.val
[0] = (int64x1_t
) __builtin_aarch64_get_dregoidi (__o
, 0);
20425 ret
.val
[1] = (int64x1_t
) __builtin_aarch64_get_dregoidi (__o
, 1);
20429 __extension__
static __inline uint64x1x2_t
__attribute__ ((__always_inline__
))
20430 vld2_u64 (const uint64_t * __a
)
20433 __builtin_aarch64_simd_oi __o
;
20434 __o
= __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di
*) __a
);
20435 ret
.val
[0] = (uint64x1_t
) __builtin_aarch64_get_dregoidi (__o
, 0);
20436 ret
.val
[1] = (uint64x1_t
) __builtin_aarch64_get_dregoidi (__o
, 1);
20440 __extension__
static __inline float64x1x2_t
__attribute__ ((__always_inline__
))
20441 vld2_f64 (const float64_t
* __a
)
20444 __builtin_aarch64_simd_oi __o
;
20445 __o
= __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df
*) __a
);
20446 ret
.val
[0] = (float64x1_t
) __builtin_aarch64_get_dregoidf (__o
, 0);
20447 ret
.val
[1] = (float64x1_t
) __builtin_aarch64_get_dregoidf (__o
, 1);
20451 __extension__
static __inline int8x8x2_t
__attribute__ ((__always_inline__
))
20452 vld2_s8 (const int8_t * __a
)
20455 __builtin_aarch64_simd_oi __o
;
20456 __o
= __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi
*) __a
);
20457 ret
.val
[0] = (int8x8_t
) __builtin_aarch64_get_dregoiv8qi (__o
, 0);
20458 ret
.val
[1] = (int8x8_t
) __builtin_aarch64_get_dregoiv8qi (__o
, 1);
20462 __extension__
static __inline poly8x8x2_t
__attribute__ ((__always_inline__
))
20463 vld2_p8 (const poly8_t
* __a
)
20466 __builtin_aarch64_simd_oi __o
;
20467 __o
= __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi
*) __a
);
20468 ret
.val
[0] = (poly8x8_t
) __builtin_aarch64_get_dregoiv8qi (__o
, 0);
20469 ret
.val
[1] = (poly8x8_t
) __builtin_aarch64_get_dregoiv8qi (__o
, 1);
20473 __extension__
static __inline int16x4x2_t
__attribute__ ((__always_inline__
))
20474 vld2_s16 (const int16_t * __a
)
20477 __builtin_aarch64_simd_oi __o
;
20478 __o
= __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi
*) __a
);
20479 ret
.val
[0] = (int16x4_t
) __builtin_aarch64_get_dregoiv4hi (__o
, 0);
20480 ret
.val
[1] = (int16x4_t
) __builtin_aarch64_get_dregoiv4hi (__o
, 1);
20484 __extension__
static __inline poly16x4x2_t
__attribute__ ((__always_inline__
))
20485 vld2_p16 (const poly16_t
* __a
)
20488 __builtin_aarch64_simd_oi __o
;
20489 __o
= __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi
*) __a
);
20490 ret
.val
[0] = (poly16x4_t
) __builtin_aarch64_get_dregoiv4hi (__o
, 0);
20491 ret
.val
[1] = (poly16x4_t
) __builtin_aarch64_get_dregoiv4hi (__o
, 1);
20495 __extension__
static __inline int32x2x2_t
__attribute__ ((__always_inline__
))
20496 vld2_s32 (const int32_t * __a
)
20499 __builtin_aarch64_simd_oi __o
;
20500 __o
= __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si
*) __a
);
20501 ret
.val
[0] = (int32x2_t
) __builtin_aarch64_get_dregoiv2si (__o
, 0);
20502 ret
.val
[1] = (int32x2_t
) __builtin_aarch64_get_dregoiv2si (__o
, 1);
20506 __extension__
static __inline uint8x8x2_t
__attribute__ ((__always_inline__
))
20507 vld2_u8 (const uint8_t * __a
)
20510 __builtin_aarch64_simd_oi __o
;
20511 __o
= __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi
*) __a
);
20512 ret
.val
[0] = (uint8x8_t
) __builtin_aarch64_get_dregoiv8qi (__o
, 0);
20513 ret
.val
[1] = (uint8x8_t
) __builtin_aarch64_get_dregoiv8qi (__o
, 1);
20517 __extension__
static __inline uint16x4x2_t
__attribute__ ((__always_inline__
))
20518 vld2_u16 (const uint16_t * __a
)
20521 __builtin_aarch64_simd_oi __o
;
20522 __o
= __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi
*) __a
);
20523 ret
.val
[0] = (uint16x4_t
) __builtin_aarch64_get_dregoiv4hi (__o
, 0);
20524 ret
.val
[1] = (uint16x4_t
) __builtin_aarch64_get_dregoiv4hi (__o
, 1);
20528 __extension__
static __inline uint32x2x2_t
__attribute__ ((__always_inline__
))
20529 vld2_u32 (const uint32_t * __a
)
20532 __builtin_aarch64_simd_oi __o
;
20533 __o
= __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si
*) __a
);
20534 ret
.val
[0] = (uint32x2_t
) __builtin_aarch64_get_dregoiv2si (__o
, 0);
20535 ret
.val
[1] = (uint32x2_t
) __builtin_aarch64_get_dregoiv2si (__o
, 1);
20539 __extension__
static __inline float32x2x2_t
__attribute__ ((__always_inline__
))
20540 vld2_f32 (const float32_t
* __a
)
20543 __builtin_aarch64_simd_oi __o
;
20544 __o
= __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf
*) __a
);
20545 ret
.val
[0] = (float32x2_t
) __builtin_aarch64_get_dregoiv2sf (__o
, 0);
20546 ret
.val
[1] = (float32x2_t
) __builtin_aarch64_get_dregoiv2sf (__o
, 1);
20550 __extension__
static __inline int8x16x2_t
__attribute__ ((__always_inline__
))
20551 vld2q_s8 (const int8_t * __a
)
20554 __builtin_aarch64_simd_oi __o
;
20555 __o
= __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi
*) __a
);
20556 ret
.val
[0] = (int8x16_t
) __builtin_aarch64_get_qregoiv16qi (__o
, 0);
20557 ret
.val
[1] = (int8x16_t
) __builtin_aarch64_get_qregoiv16qi (__o
, 1);
20561 __extension__
static __inline poly8x16x2_t
__attribute__ ((__always_inline__
))
20562 vld2q_p8 (const poly8_t
* __a
)
20565 __builtin_aarch64_simd_oi __o
;
20566 __o
= __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi
*) __a
);
20567 ret
.val
[0] = (poly8x16_t
) __builtin_aarch64_get_qregoiv16qi (__o
, 0);
20568 ret
.val
[1] = (poly8x16_t
) __builtin_aarch64_get_qregoiv16qi (__o
, 1);
20572 __extension__
static __inline int16x8x2_t
__attribute__ ((__always_inline__
))
20573 vld2q_s16 (const int16_t * __a
)
20576 __builtin_aarch64_simd_oi __o
;
20577 __o
= __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi
*) __a
);
20578 ret
.val
[0] = (int16x8_t
) __builtin_aarch64_get_qregoiv8hi (__o
, 0);
20579 ret
.val
[1] = (int16x8_t
) __builtin_aarch64_get_qregoiv8hi (__o
, 1);
20583 __extension__
static __inline poly16x8x2_t
__attribute__ ((__always_inline__
))
20584 vld2q_p16 (const poly16_t
* __a
)
20587 __builtin_aarch64_simd_oi __o
;
20588 __o
= __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi
*) __a
);
20589 ret
.val
[0] = (poly16x8_t
) __builtin_aarch64_get_qregoiv8hi (__o
, 0);
20590 ret
.val
[1] = (poly16x8_t
) __builtin_aarch64_get_qregoiv8hi (__o
, 1);
20594 __extension__
static __inline int32x4x2_t
__attribute__ ((__always_inline__
))
20595 vld2q_s32 (const int32_t * __a
)
20598 __builtin_aarch64_simd_oi __o
;
20599 __o
= __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si
*) __a
);
20600 ret
.val
[0] = (int32x4_t
) __builtin_aarch64_get_qregoiv4si (__o
, 0);
20601 ret
.val
[1] = (int32x4_t
) __builtin_aarch64_get_qregoiv4si (__o
, 1);
20605 __extension__
static __inline int64x2x2_t
__attribute__ ((__always_inline__
))
20606 vld2q_s64 (const int64_t * __a
)
20609 __builtin_aarch64_simd_oi __o
;
20610 __o
= __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di
*) __a
);
20611 ret
.val
[0] = (int64x2_t
) __builtin_aarch64_get_qregoiv2di (__o
, 0);
20612 ret
.val
[1] = (int64x2_t
) __builtin_aarch64_get_qregoiv2di (__o
, 1);
20616 __extension__
static __inline uint8x16x2_t
__attribute__ ((__always_inline__
))
20617 vld2q_u8 (const uint8_t * __a
)
20620 __builtin_aarch64_simd_oi __o
;
20621 __o
= __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi
*) __a
);
20622 ret
.val
[0] = (uint8x16_t
) __builtin_aarch64_get_qregoiv16qi (__o
, 0);
20623 ret
.val
[1] = (uint8x16_t
) __builtin_aarch64_get_qregoiv16qi (__o
, 1);
20627 __extension__
static __inline uint16x8x2_t
__attribute__ ((__always_inline__
))
20628 vld2q_u16 (const uint16_t * __a
)
20631 __builtin_aarch64_simd_oi __o
;
20632 __o
= __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi
*) __a
);
20633 ret
.val
[0] = (uint16x8_t
) __builtin_aarch64_get_qregoiv8hi (__o
, 0);
20634 ret
.val
[1] = (uint16x8_t
) __builtin_aarch64_get_qregoiv8hi (__o
, 1);
20638 __extension__
static __inline uint32x4x2_t
__attribute__ ((__always_inline__
))
20639 vld2q_u32 (const uint32_t * __a
)
20642 __builtin_aarch64_simd_oi __o
;
20643 __o
= __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si
*) __a
);
20644 ret
.val
[0] = (uint32x4_t
) __builtin_aarch64_get_qregoiv4si (__o
, 0);
20645 ret
.val
[1] = (uint32x4_t
) __builtin_aarch64_get_qregoiv4si (__o
, 1);
20649 __extension__
static __inline uint64x2x2_t
__attribute__ ((__always_inline__
))
20650 vld2q_u64 (const uint64_t * __a
)
20653 __builtin_aarch64_simd_oi __o
;
20654 __o
= __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di
*) __a
);
20655 ret
.val
[0] = (uint64x2_t
) __builtin_aarch64_get_qregoiv2di (__o
, 0);
20656 ret
.val
[1] = (uint64x2_t
) __builtin_aarch64_get_qregoiv2di (__o
, 1);
20660 __extension__
static __inline float32x4x2_t
__attribute__ ((__always_inline__
))
20661 vld2q_f32 (const float32_t
* __a
)
20664 __builtin_aarch64_simd_oi __o
;
20665 __o
= __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf
*) __a
);
20666 ret
.val
[0] = (float32x4_t
) __builtin_aarch64_get_qregoiv4sf (__o
, 0);
20667 ret
.val
[1] = (float32x4_t
) __builtin_aarch64_get_qregoiv4sf (__o
, 1);
20671 __extension__
static __inline float64x2x2_t
__attribute__ ((__always_inline__
))
20672 vld2q_f64 (const float64_t
* __a
)
20675 __builtin_aarch64_simd_oi __o
;
20676 __o
= __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df
*) __a
);
20677 ret
.val
[0] = (float64x2_t
) __builtin_aarch64_get_qregoiv2df (__o
, 0);
20678 ret
.val
[1] = (float64x2_t
) __builtin_aarch64_get_qregoiv2df (__o
, 1);
20682 __extension__
static __inline int64x1x3_t
__attribute__ ((__always_inline__
))
20683 vld3_s64 (const int64_t * __a
)
20686 __builtin_aarch64_simd_ci __o
;
20687 __o
= __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di
*) __a
);
20688 ret
.val
[0] = (int64x1_t
) __builtin_aarch64_get_dregcidi (__o
, 0);
20689 ret
.val
[1] = (int64x1_t
) __builtin_aarch64_get_dregcidi (__o
, 1);
20690 ret
.val
[2] = (int64x1_t
) __builtin_aarch64_get_dregcidi (__o
, 2);
20694 __extension__
static __inline uint64x1x3_t
__attribute__ ((__always_inline__
))
20695 vld3_u64 (const uint64_t * __a
)
20698 __builtin_aarch64_simd_ci __o
;
20699 __o
= __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di
*) __a
);
20700 ret
.val
[0] = (uint64x1_t
) __builtin_aarch64_get_dregcidi (__o
, 0);
20701 ret
.val
[1] = (uint64x1_t
) __builtin_aarch64_get_dregcidi (__o
, 1);
20702 ret
.val
[2] = (uint64x1_t
) __builtin_aarch64_get_dregcidi (__o
, 2);
20706 __extension__
static __inline float64x1x3_t
__attribute__ ((__always_inline__
))
20707 vld3_f64 (const float64_t
* __a
)
20710 __builtin_aarch64_simd_ci __o
;
20711 __o
= __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df
*) __a
);
20712 ret
.val
[0] = (float64x1_t
) __builtin_aarch64_get_dregcidf (__o
, 0);
20713 ret
.val
[1] = (float64x1_t
) __builtin_aarch64_get_dregcidf (__o
, 1);
20714 ret
.val
[2] = (float64x1_t
) __builtin_aarch64_get_dregcidf (__o
, 2);
20718 __extension__
static __inline int8x8x3_t
__attribute__ ((__always_inline__
))
20719 vld3_s8 (const int8_t * __a
)
20722 __builtin_aarch64_simd_ci __o
;
20723 __o
= __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi
*) __a
);
20724 ret
.val
[0] = (int8x8_t
) __builtin_aarch64_get_dregciv8qi (__o
, 0);
20725 ret
.val
[1] = (int8x8_t
) __builtin_aarch64_get_dregciv8qi (__o
, 1);
20726 ret
.val
[2] = (int8x8_t
) __builtin_aarch64_get_dregciv8qi (__o
, 2);
20730 __extension__
static __inline poly8x8x3_t
__attribute__ ((__always_inline__
))
20731 vld3_p8 (const poly8_t
* __a
)
20734 __builtin_aarch64_simd_ci __o
;
20735 __o
= __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi
*) __a
);
20736 ret
.val
[0] = (poly8x8_t
) __builtin_aarch64_get_dregciv8qi (__o
, 0);
20737 ret
.val
[1] = (poly8x8_t
) __builtin_aarch64_get_dregciv8qi (__o
, 1);
20738 ret
.val
[2] = (poly8x8_t
) __builtin_aarch64_get_dregciv8qi (__o
, 2);
20742 __extension__
static __inline int16x4x3_t
__attribute__ ((__always_inline__
))
20743 vld3_s16 (const int16_t * __a
)
20746 __builtin_aarch64_simd_ci __o
;
20747 __o
= __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi
*) __a
);
20748 ret
.val
[0] = (int16x4_t
) __builtin_aarch64_get_dregciv4hi (__o
, 0);
20749 ret
.val
[1] = (int16x4_t
) __builtin_aarch64_get_dregciv4hi (__o
, 1);
20750 ret
.val
[2] = (int16x4_t
) __builtin_aarch64_get_dregciv4hi (__o
, 2);
20754 __extension__
static __inline poly16x4x3_t
__attribute__ ((__always_inline__
))
20755 vld3_p16 (const poly16_t
* __a
)
20758 __builtin_aarch64_simd_ci __o
;
20759 __o
= __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi
*) __a
);
20760 ret
.val
[0] = (poly16x4_t
) __builtin_aarch64_get_dregciv4hi (__o
, 0);
20761 ret
.val
[1] = (poly16x4_t
) __builtin_aarch64_get_dregciv4hi (__o
, 1);
20762 ret
.val
[2] = (poly16x4_t
) __builtin_aarch64_get_dregciv4hi (__o
, 2);
20766 __extension__
static __inline int32x2x3_t
__attribute__ ((__always_inline__
))
20767 vld3_s32 (const int32_t * __a
)
20770 __builtin_aarch64_simd_ci __o
;
20771 __o
= __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si
*) __a
);
20772 ret
.val
[0] = (int32x2_t
) __builtin_aarch64_get_dregciv2si (__o
, 0);
20773 ret
.val
[1] = (int32x2_t
) __builtin_aarch64_get_dregciv2si (__o
, 1);
20774 ret
.val
[2] = (int32x2_t
) __builtin_aarch64_get_dregciv2si (__o
, 2);
20778 __extension__
static __inline uint8x8x3_t
__attribute__ ((__always_inline__
))
20779 vld3_u8 (const uint8_t * __a
)
20782 __builtin_aarch64_simd_ci __o
;
20783 __o
= __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi
*) __a
);
20784 ret
.val
[0] = (uint8x8_t
) __builtin_aarch64_get_dregciv8qi (__o
, 0);
20785 ret
.val
[1] = (uint8x8_t
) __builtin_aarch64_get_dregciv8qi (__o
, 1);
20786 ret
.val
[2] = (uint8x8_t
) __builtin_aarch64_get_dregciv8qi (__o
, 2);
20790 __extension__
static __inline uint16x4x3_t
__attribute__ ((__always_inline__
))
20791 vld3_u16 (const uint16_t * __a
)
20794 __builtin_aarch64_simd_ci __o
;
20795 __o
= __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi
*) __a
);
20796 ret
.val
[0] = (uint16x4_t
) __builtin_aarch64_get_dregciv4hi (__o
, 0);
20797 ret
.val
[1] = (uint16x4_t
) __builtin_aarch64_get_dregciv4hi (__o
, 1);
20798 ret
.val
[2] = (uint16x4_t
) __builtin_aarch64_get_dregciv4hi (__o
, 2);
20802 __extension__
static __inline uint32x2x3_t
__attribute__ ((__always_inline__
))
20803 vld3_u32 (const uint32_t * __a
)
20806 __builtin_aarch64_simd_ci __o
;
20807 __o
= __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si
*) __a
);
20808 ret
.val
[0] = (uint32x2_t
) __builtin_aarch64_get_dregciv2si (__o
, 0);
20809 ret
.val
[1] = (uint32x2_t
) __builtin_aarch64_get_dregciv2si (__o
, 1);
20810 ret
.val
[2] = (uint32x2_t
) __builtin_aarch64_get_dregciv2si (__o
, 2);
20814 __extension__
static __inline float32x2x3_t
__attribute__ ((__always_inline__
))
20815 vld3_f32 (const float32_t
* __a
)
20818 __builtin_aarch64_simd_ci __o
;
20819 __o
= __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf
*) __a
);
20820 ret
.val
[0] = (float32x2_t
) __builtin_aarch64_get_dregciv2sf (__o
, 0);
20821 ret
.val
[1] = (float32x2_t
) __builtin_aarch64_get_dregciv2sf (__o
, 1);
20822 ret
.val
[2] = (float32x2_t
) __builtin_aarch64_get_dregciv2sf (__o
, 2);
20826 __extension__
static __inline int8x16x3_t
__attribute__ ((__always_inline__
))
20827 vld3q_s8 (const int8_t * __a
)
20830 __builtin_aarch64_simd_ci __o
;
20831 __o
= __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi
*) __a
);
20832 ret
.val
[0] = (int8x16_t
) __builtin_aarch64_get_qregciv16qi (__o
, 0);
20833 ret
.val
[1] = (int8x16_t
) __builtin_aarch64_get_qregciv16qi (__o
, 1);
20834 ret
.val
[2] = (int8x16_t
) __builtin_aarch64_get_qregciv16qi (__o
, 2);
20838 __extension__
static __inline poly8x16x3_t
__attribute__ ((__always_inline__
))
20839 vld3q_p8 (const poly8_t
* __a
)
20842 __builtin_aarch64_simd_ci __o
;
20843 __o
= __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi
*) __a
);
20844 ret
.val
[0] = (poly8x16_t
) __builtin_aarch64_get_qregciv16qi (__o
, 0);
20845 ret
.val
[1] = (poly8x16_t
) __builtin_aarch64_get_qregciv16qi (__o
, 1);
20846 ret
.val
[2] = (poly8x16_t
) __builtin_aarch64_get_qregciv16qi (__o
, 2);
20850 __extension__
static __inline int16x8x3_t
__attribute__ ((__always_inline__
))
20851 vld3q_s16 (const int16_t * __a
)
20854 __builtin_aarch64_simd_ci __o
;
20855 __o
= __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi
*) __a
);
20856 ret
.val
[0] = (int16x8_t
) __builtin_aarch64_get_qregciv8hi (__o
, 0);
20857 ret
.val
[1] = (int16x8_t
) __builtin_aarch64_get_qregciv8hi (__o
, 1);
20858 ret
.val
[2] = (int16x8_t
) __builtin_aarch64_get_qregciv8hi (__o
, 2);
20862 __extension__
static __inline poly16x8x3_t
__attribute__ ((__always_inline__
))
20863 vld3q_p16 (const poly16_t
* __a
)
20866 __builtin_aarch64_simd_ci __o
;
20867 __o
= __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi
*) __a
);
20868 ret
.val
[0] = (poly16x8_t
) __builtin_aarch64_get_qregciv8hi (__o
, 0);
20869 ret
.val
[1] = (poly16x8_t
) __builtin_aarch64_get_qregciv8hi (__o
, 1);
20870 ret
.val
[2] = (poly16x8_t
) __builtin_aarch64_get_qregciv8hi (__o
, 2);
20874 __extension__
static __inline int32x4x3_t
__attribute__ ((__always_inline__
))
20875 vld3q_s32 (const int32_t * __a
)
20878 __builtin_aarch64_simd_ci __o
;
20879 __o
= __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si
*) __a
);
20880 ret
.val
[0] = (int32x4_t
) __builtin_aarch64_get_qregciv4si (__o
, 0);
20881 ret
.val
[1] = (int32x4_t
) __builtin_aarch64_get_qregciv4si (__o
, 1);
20882 ret
.val
[2] = (int32x4_t
) __builtin_aarch64_get_qregciv4si (__o
, 2);
20886 __extension__
static __inline int64x2x3_t
__attribute__ ((__always_inline__
))
20887 vld3q_s64 (const int64_t * __a
)
20890 __builtin_aarch64_simd_ci __o
;
20891 __o
= __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di
*) __a
);
20892 ret
.val
[0] = (int64x2_t
) __builtin_aarch64_get_qregciv2di (__o
, 0);
20893 ret
.val
[1] = (int64x2_t
) __builtin_aarch64_get_qregciv2di (__o
, 1);
20894 ret
.val
[2] = (int64x2_t
) __builtin_aarch64_get_qregciv2di (__o
, 2);
20898 __extension__
static __inline uint8x16x3_t
__attribute__ ((__always_inline__
))
20899 vld3q_u8 (const uint8_t * __a
)
20902 __builtin_aarch64_simd_ci __o
;
20903 __o
= __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi
*) __a
);
20904 ret
.val
[0] = (uint8x16_t
) __builtin_aarch64_get_qregciv16qi (__o
, 0);
20905 ret
.val
[1] = (uint8x16_t
) __builtin_aarch64_get_qregciv16qi (__o
, 1);
20906 ret
.val
[2] = (uint8x16_t
) __builtin_aarch64_get_qregciv16qi (__o
, 2);
20910 __extension__
static __inline uint16x8x3_t
__attribute__ ((__always_inline__
))
20911 vld3q_u16 (const uint16_t * __a
)
20914 __builtin_aarch64_simd_ci __o
;
20915 __o
= __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi
*) __a
);
20916 ret
.val
[0] = (uint16x8_t
) __builtin_aarch64_get_qregciv8hi (__o
, 0);
20917 ret
.val
[1] = (uint16x8_t
) __builtin_aarch64_get_qregciv8hi (__o
, 1);
20918 ret
.val
[2] = (uint16x8_t
) __builtin_aarch64_get_qregciv8hi (__o
, 2);
20922 __extension__
static __inline uint32x4x3_t
__attribute__ ((__always_inline__
))
20923 vld3q_u32 (const uint32_t * __a
)
20926 __builtin_aarch64_simd_ci __o
;
20927 __o
= __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si
*) __a
);
20928 ret
.val
[0] = (uint32x4_t
) __builtin_aarch64_get_qregciv4si (__o
, 0);
20929 ret
.val
[1] = (uint32x4_t
) __builtin_aarch64_get_qregciv4si (__o
, 1);
20930 ret
.val
[2] = (uint32x4_t
) __builtin_aarch64_get_qregciv4si (__o
, 2);
20934 __extension__
static __inline uint64x2x3_t
__attribute__ ((__always_inline__
))
20935 vld3q_u64 (const uint64_t * __a
)
20938 __builtin_aarch64_simd_ci __o
;
20939 __o
= __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di
*) __a
);
20940 ret
.val
[0] = (uint64x2_t
) __builtin_aarch64_get_qregciv2di (__o
, 0);
20941 ret
.val
[1] = (uint64x2_t
) __builtin_aarch64_get_qregciv2di (__o
, 1);
20942 ret
.val
[2] = (uint64x2_t
) __builtin_aarch64_get_qregciv2di (__o
, 2);
20946 __extension__
static __inline float32x4x3_t
__attribute__ ((__always_inline__
))
20947 vld3q_f32 (const float32_t
* __a
)
20950 __builtin_aarch64_simd_ci __o
;
20951 __o
= __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf
*) __a
);
20952 ret
.val
[0] = (float32x4_t
) __builtin_aarch64_get_qregciv4sf (__o
, 0);
20953 ret
.val
[1] = (float32x4_t
) __builtin_aarch64_get_qregciv4sf (__o
, 1);
20954 ret
.val
[2] = (float32x4_t
) __builtin_aarch64_get_qregciv4sf (__o
, 2);
20958 __extension__
static __inline float64x2x3_t
__attribute__ ((__always_inline__
))
20959 vld3q_f64 (const float64_t
* __a
)
20962 __builtin_aarch64_simd_ci __o
;
20963 __o
= __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df
*) __a
);
20964 ret
.val
[0] = (float64x2_t
) __builtin_aarch64_get_qregciv2df (__o
, 0);
20965 ret
.val
[1] = (float64x2_t
) __builtin_aarch64_get_qregciv2df (__o
, 1);
20966 ret
.val
[2] = (float64x2_t
) __builtin_aarch64_get_qregciv2df (__o
, 2);
20970 __extension__
static __inline int64x1x4_t
__attribute__ ((__always_inline__
))
20971 vld4_s64 (const int64_t * __a
)
20974 __builtin_aarch64_simd_xi __o
;
20975 __o
= __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di
*) __a
);
20976 ret
.val
[0] = (int64x1_t
) __builtin_aarch64_get_dregxidi (__o
, 0);
20977 ret
.val
[1] = (int64x1_t
) __builtin_aarch64_get_dregxidi (__o
, 1);
20978 ret
.val
[2] = (int64x1_t
) __builtin_aarch64_get_dregxidi (__o
, 2);
20979 ret
.val
[3] = (int64x1_t
) __builtin_aarch64_get_dregxidi (__o
, 3);
20983 __extension__
static __inline uint64x1x4_t
__attribute__ ((__always_inline__
))
20984 vld4_u64 (const uint64_t * __a
)
20987 __builtin_aarch64_simd_xi __o
;
20988 __o
= __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di
*) __a
);
20989 ret
.val
[0] = (uint64x1_t
) __builtin_aarch64_get_dregxidi (__o
, 0);
20990 ret
.val
[1] = (uint64x1_t
) __builtin_aarch64_get_dregxidi (__o
, 1);
20991 ret
.val
[2] = (uint64x1_t
) __builtin_aarch64_get_dregxidi (__o
, 2);
20992 ret
.val
[3] = (uint64x1_t
) __builtin_aarch64_get_dregxidi (__o
, 3);
20996 __extension__
static __inline float64x1x4_t
__attribute__ ((__always_inline__
))
20997 vld4_f64 (const float64_t
* __a
)
21000 __builtin_aarch64_simd_xi __o
;
21001 __o
= __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df
*) __a
);
21002 ret
.val
[0] = (float64x1_t
) __builtin_aarch64_get_dregxidf (__o
, 0);
21003 ret
.val
[1] = (float64x1_t
) __builtin_aarch64_get_dregxidf (__o
, 1);
21004 ret
.val
[2] = (float64x1_t
) __builtin_aarch64_get_dregxidf (__o
, 2);
21005 ret
.val
[3] = (float64x1_t
) __builtin_aarch64_get_dregxidf (__o
, 3);
21009 __extension__
static __inline int8x8x4_t
__attribute__ ((__always_inline__
))
21010 vld4_s8 (const int8_t * __a
)
21013 __builtin_aarch64_simd_xi __o
;
21014 __o
= __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi
*) __a
);
21015 ret
.val
[0] = (int8x8_t
) __builtin_aarch64_get_dregxiv8qi (__o
, 0);
21016 ret
.val
[1] = (int8x8_t
) __builtin_aarch64_get_dregxiv8qi (__o
, 1);
21017 ret
.val
[2] = (int8x8_t
) __builtin_aarch64_get_dregxiv8qi (__o
, 2);
21018 ret
.val
[3] = (int8x8_t
) __builtin_aarch64_get_dregxiv8qi (__o
, 3);
21022 __extension__
static __inline poly8x8x4_t
__attribute__ ((__always_inline__
))
21023 vld4_p8 (const poly8_t
* __a
)
21026 __builtin_aarch64_simd_xi __o
;
21027 __o
= __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi
*) __a
);
21028 ret
.val
[0] = (poly8x8_t
) __builtin_aarch64_get_dregxiv8qi (__o
, 0);
21029 ret
.val
[1] = (poly8x8_t
) __builtin_aarch64_get_dregxiv8qi (__o
, 1);
21030 ret
.val
[2] = (poly8x8_t
) __builtin_aarch64_get_dregxiv8qi (__o
, 2);
21031 ret
.val
[3] = (poly8x8_t
) __builtin_aarch64_get_dregxiv8qi (__o
, 3);
21035 __extension__
static __inline int16x4x4_t
__attribute__ ((__always_inline__
))
21036 vld4_s16 (const int16_t * __a
)
21039 __builtin_aarch64_simd_xi __o
;
21040 __o
= __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi
*) __a
);
21041 ret
.val
[0] = (int16x4_t
) __builtin_aarch64_get_dregxiv4hi (__o
, 0);
21042 ret
.val
[1] = (int16x4_t
) __builtin_aarch64_get_dregxiv4hi (__o
, 1);
21043 ret
.val
[2] = (int16x4_t
) __builtin_aarch64_get_dregxiv4hi (__o
, 2);
21044 ret
.val
[3] = (int16x4_t
) __builtin_aarch64_get_dregxiv4hi (__o
, 3);
21048 __extension__
static __inline poly16x4x4_t
__attribute__ ((__always_inline__
))
21049 vld4_p16 (const poly16_t
* __a
)
21052 __builtin_aarch64_simd_xi __o
;
21053 __o
= __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi
*) __a
);
21054 ret
.val
[0] = (poly16x4_t
) __builtin_aarch64_get_dregxiv4hi (__o
, 0);
21055 ret
.val
[1] = (poly16x4_t
) __builtin_aarch64_get_dregxiv4hi (__o
, 1);
21056 ret
.val
[2] = (poly16x4_t
) __builtin_aarch64_get_dregxiv4hi (__o
, 2);
21057 ret
.val
[3] = (poly16x4_t
) __builtin_aarch64_get_dregxiv4hi (__o
, 3);
21061 __extension__
static __inline int32x2x4_t
__attribute__ ((__always_inline__
))
21062 vld4_s32 (const int32_t * __a
)
21065 __builtin_aarch64_simd_xi __o
;
21066 __o
= __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si
*) __a
);
21067 ret
.val
[0] = (int32x2_t
) __builtin_aarch64_get_dregxiv2si (__o
, 0);
21068 ret
.val
[1] = (int32x2_t
) __builtin_aarch64_get_dregxiv2si (__o
, 1);
21069 ret
.val
[2] = (int32x2_t
) __builtin_aarch64_get_dregxiv2si (__o
, 2);
21070 ret
.val
[3] = (int32x2_t
) __builtin_aarch64_get_dregxiv2si (__o
, 3);
21074 __extension__
static __inline uint8x8x4_t
__attribute__ ((__always_inline__
))
21075 vld4_u8 (const uint8_t * __a
)
21078 __builtin_aarch64_simd_xi __o
;
21079 __o
= __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi
*) __a
);
21080 ret
.val
[0] = (uint8x8_t
) __builtin_aarch64_get_dregxiv8qi (__o
, 0);
21081 ret
.val
[1] = (uint8x8_t
) __builtin_aarch64_get_dregxiv8qi (__o
, 1);
21082 ret
.val
[2] = (uint8x8_t
) __builtin_aarch64_get_dregxiv8qi (__o
, 2);
21083 ret
.val
[3] = (uint8x8_t
) __builtin_aarch64_get_dregxiv8qi (__o
, 3);
21087 __extension__
static __inline uint16x4x4_t
__attribute__ ((__always_inline__
))
21088 vld4_u16 (const uint16_t * __a
)
21091 __builtin_aarch64_simd_xi __o
;
21092 __o
= __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi
*) __a
);
21093 ret
.val
[0] = (uint16x4_t
) __builtin_aarch64_get_dregxiv4hi (__o
, 0);
21094 ret
.val
[1] = (uint16x4_t
) __builtin_aarch64_get_dregxiv4hi (__o
, 1);
21095 ret
.val
[2] = (uint16x4_t
) __builtin_aarch64_get_dregxiv4hi (__o
, 2);
21096 ret
.val
[3] = (uint16x4_t
) __builtin_aarch64_get_dregxiv4hi (__o
, 3);
21100 __extension__
static __inline uint32x2x4_t
__attribute__ ((__always_inline__
))
21101 vld4_u32 (const uint32_t * __a
)
21104 __builtin_aarch64_simd_xi __o
;
21105 __o
= __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si
*) __a
);
21106 ret
.val
[0] = (uint32x2_t
) __builtin_aarch64_get_dregxiv2si (__o
, 0);
21107 ret
.val
[1] = (uint32x2_t
) __builtin_aarch64_get_dregxiv2si (__o
, 1);
21108 ret
.val
[2] = (uint32x2_t
) __builtin_aarch64_get_dregxiv2si (__o
, 2);
21109 ret
.val
[3] = (uint32x2_t
) __builtin_aarch64_get_dregxiv2si (__o
, 3);
21113 __extension__
static __inline float32x2x4_t
__attribute__ ((__always_inline__
))
21114 vld4_f32 (const float32_t
* __a
)
21117 __builtin_aarch64_simd_xi __o
;
21118 __o
= __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf
*) __a
);
21119 ret
.val
[0] = (float32x2_t
) __builtin_aarch64_get_dregxiv2sf (__o
, 0);
21120 ret
.val
[1] = (float32x2_t
) __builtin_aarch64_get_dregxiv2sf (__o
, 1);
21121 ret
.val
[2] = (float32x2_t
) __builtin_aarch64_get_dregxiv2sf (__o
, 2);
21122 ret
.val
[3] = (float32x2_t
) __builtin_aarch64_get_dregxiv2sf (__o
, 3);
21126 __extension__
static __inline int8x16x4_t
__attribute__ ((__always_inline__
))
21127 vld4q_s8 (const int8_t * __a
)
21130 __builtin_aarch64_simd_xi __o
;
21131 __o
= __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi
*) __a
);
21132 ret
.val
[0] = (int8x16_t
) __builtin_aarch64_get_qregxiv16qi (__o
, 0);
21133 ret
.val
[1] = (int8x16_t
) __builtin_aarch64_get_qregxiv16qi (__o
, 1);
21134 ret
.val
[2] = (int8x16_t
) __builtin_aarch64_get_qregxiv16qi (__o
, 2);
21135 ret
.val
[3] = (int8x16_t
) __builtin_aarch64_get_qregxiv16qi (__o
, 3);
21139 __extension__
static __inline poly8x16x4_t
__attribute__ ((__always_inline__
))
21140 vld4q_p8 (const poly8_t
* __a
)
21143 __builtin_aarch64_simd_xi __o
;
21144 __o
= __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi
*) __a
);
21145 ret
.val
[0] = (poly8x16_t
) __builtin_aarch64_get_qregxiv16qi (__o
, 0);
21146 ret
.val
[1] = (poly8x16_t
) __builtin_aarch64_get_qregxiv16qi (__o
, 1);
21147 ret
.val
[2] = (poly8x16_t
) __builtin_aarch64_get_qregxiv16qi (__o
, 2);
21148 ret
.val
[3] = (poly8x16_t
) __builtin_aarch64_get_qregxiv16qi (__o
, 3);
21152 __extension__
static __inline int16x8x4_t
__attribute__ ((__always_inline__
))
21153 vld4q_s16 (const int16_t * __a
)
21156 __builtin_aarch64_simd_xi __o
;
21157 __o
= __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi
*) __a
);
21158 ret
.val
[0] = (int16x8_t
) __builtin_aarch64_get_qregxiv8hi (__o
, 0);
21159 ret
.val
[1] = (int16x8_t
) __builtin_aarch64_get_qregxiv8hi (__o
, 1);
21160 ret
.val
[2] = (int16x8_t
) __builtin_aarch64_get_qregxiv8hi (__o
, 2);
21161 ret
.val
[3] = (int16x8_t
) __builtin_aarch64_get_qregxiv8hi (__o
, 3);
21165 __extension__
static __inline poly16x8x4_t
__attribute__ ((__always_inline__
))
21166 vld4q_p16 (const poly16_t
* __a
)
21169 __builtin_aarch64_simd_xi __o
;
21170 __o
= __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi
*) __a
);
21171 ret
.val
[0] = (poly16x8_t
) __builtin_aarch64_get_qregxiv8hi (__o
, 0);
21172 ret
.val
[1] = (poly16x8_t
) __builtin_aarch64_get_qregxiv8hi (__o
, 1);
21173 ret
.val
[2] = (poly16x8_t
) __builtin_aarch64_get_qregxiv8hi (__o
, 2);
21174 ret
.val
[3] = (poly16x8_t
) __builtin_aarch64_get_qregxiv8hi (__o
, 3);
21178 __extension__
static __inline int32x4x4_t
__attribute__ ((__always_inline__
))
21179 vld4q_s32 (const int32_t * __a
)
21182 __builtin_aarch64_simd_xi __o
;
21183 __o
= __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si
*) __a
);
21184 ret
.val
[0] = (int32x4_t
) __builtin_aarch64_get_qregxiv4si (__o
, 0);
21185 ret
.val
[1] = (int32x4_t
) __builtin_aarch64_get_qregxiv4si (__o
, 1);
21186 ret
.val
[2] = (int32x4_t
) __builtin_aarch64_get_qregxiv4si (__o
, 2);
21187 ret
.val
[3] = (int32x4_t
) __builtin_aarch64_get_qregxiv4si (__o
, 3);
21191 __extension__
static __inline int64x2x4_t
__attribute__ ((__always_inline__
))
21192 vld4q_s64 (const int64_t * __a
)
21195 __builtin_aarch64_simd_xi __o
;
21196 __o
= __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di
*) __a
);
21197 ret
.val
[0] = (int64x2_t
) __builtin_aarch64_get_qregxiv2di (__o
, 0);
21198 ret
.val
[1] = (int64x2_t
) __builtin_aarch64_get_qregxiv2di (__o
, 1);
21199 ret
.val
[2] = (int64x2_t
) __builtin_aarch64_get_qregxiv2di (__o
, 2);
21200 ret
.val
[3] = (int64x2_t
) __builtin_aarch64_get_qregxiv2di (__o
, 3);
21204 __extension__
static __inline uint8x16x4_t
__attribute__ ((__always_inline__
))
21205 vld4q_u8 (const uint8_t * __a
)
21208 __builtin_aarch64_simd_xi __o
;
21209 __o
= __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi
*) __a
);
21210 ret
.val
[0] = (uint8x16_t
) __builtin_aarch64_get_qregxiv16qi (__o
, 0);
21211 ret
.val
[1] = (uint8x16_t
) __builtin_aarch64_get_qregxiv16qi (__o
, 1);
21212 ret
.val
[2] = (uint8x16_t
) __builtin_aarch64_get_qregxiv16qi (__o
, 2);
21213 ret
.val
[3] = (uint8x16_t
) __builtin_aarch64_get_qregxiv16qi (__o
, 3);
21217 __extension__
static __inline uint16x8x4_t
__attribute__ ((__always_inline__
))
21218 vld4q_u16 (const uint16_t * __a
)
21221 __builtin_aarch64_simd_xi __o
;
21222 __o
= __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi
*) __a
);
21223 ret
.val
[0] = (uint16x8_t
) __builtin_aarch64_get_qregxiv8hi (__o
, 0);
21224 ret
.val
[1] = (uint16x8_t
) __builtin_aarch64_get_qregxiv8hi (__o
, 1);
21225 ret
.val
[2] = (uint16x8_t
) __builtin_aarch64_get_qregxiv8hi (__o
, 2);
21226 ret
.val
[3] = (uint16x8_t
) __builtin_aarch64_get_qregxiv8hi (__o
, 3);
21230 __extension__
static __inline uint32x4x4_t
__attribute__ ((__always_inline__
))
21231 vld4q_u32 (const uint32_t * __a
)
21234 __builtin_aarch64_simd_xi __o
;
21235 __o
= __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si
*) __a
);
21236 ret
.val
[0] = (uint32x4_t
) __builtin_aarch64_get_qregxiv4si (__o
, 0);
21237 ret
.val
[1] = (uint32x4_t
) __builtin_aarch64_get_qregxiv4si (__o
, 1);
21238 ret
.val
[2] = (uint32x4_t
) __builtin_aarch64_get_qregxiv4si (__o
, 2);
21239 ret
.val
[3] = (uint32x4_t
) __builtin_aarch64_get_qregxiv4si (__o
, 3);
21243 __extension__
static __inline uint64x2x4_t
__attribute__ ((__always_inline__
))
21244 vld4q_u64 (const uint64_t * __a
)
21247 __builtin_aarch64_simd_xi __o
;
21248 __o
= __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di
*) __a
);
21249 ret
.val
[0] = (uint64x2_t
) __builtin_aarch64_get_qregxiv2di (__o
, 0);
21250 ret
.val
[1] = (uint64x2_t
) __builtin_aarch64_get_qregxiv2di (__o
, 1);
21251 ret
.val
[2] = (uint64x2_t
) __builtin_aarch64_get_qregxiv2di (__o
, 2);
21252 ret
.val
[3] = (uint64x2_t
) __builtin_aarch64_get_qregxiv2di (__o
, 3);
21256 __extension__
static __inline float32x4x4_t
__attribute__ ((__always_inline__
))
21257 vld4q_f32 (const float32_t
* __a
)
21260 __builtin_aarch64_simd_xi __o
;
21261 __o
= __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf
*) __a
);
21262 ret
.val
[0] = (float32x4_t
) __builtin_aarch64_get_qregxiv4sf (__o
, 0);
21263 ret
.val
[1] = (float32x4_t
) __builtin_aarch64_get_qregxiv4sf (__o
, 1);
21264 ret
.val
[2] = (float32x4_t
) __builtin_aarch64_get_qregxiv4sf (__o
, 2);
21265 ret
.val
[3] = (float32x4_t
) __builtin_aarch64_get_qregxiv4sf (__o
, 3);
21269 __extension__
static __inline float64x2x4_t
__attribute__ ((__always_inline__
))
21270 vld4q_f64 (const float64_t
* __a
)
21273 __builtin_aarch64_simd_xi __o
;
21274 __o
= __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df
*) __a
);
21275 ret
.val
[0] = (float64x2_t
) __builtin_aarch64_get_qregxiv2df (__o
, 0);
21276 ret
.val
[1] = (float64x2_t
) __builtin_aarch64_get_qregxiv2df (__o
, 1);
21277 ret
.val
[2] = (float64x2_t
) __builtin_aarch64_get_qregxiv2df (__o
, 2);
21278 ret
.val
[3] = (float64x2_t
) __builtin_aarch64_get_qregxiv2df (__o
, 3);
21284 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
21285 vmax_f32 (float32x2_t __a
, float32x2_t __b
)
21287 return __builtin_aarch64_fmaxv2sf (__a
, __b
);
21290 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
21291 vmax_s8 (int8x8_t __a
, int8x8_t __b
)
21293 return __builtin_aarch64_smaxv8qi (__a
, __b
);
21296 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
21297 vmax_s16 (int16x4_t __a
, int16x4_t __b
)
21299 return __builtin_aarch64_smaxv4hi (__a
, __b
);
21302 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
21303 vmax_s32 (int32x2_t __a
, int32x2_t __b
)
21305 return __builtin_aarch64_smaxv2si (__a
, __b
);
21308 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
21309 vmax_u8 (uint8x8_t __a
, uint8x8_t __b
)
21311 return (uint8x8_t
) __builtin_aarch64_umaxv8qi ((int8x8_t
) __a
,
21315 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
21316 vmax_u16 (uint16x4_t __a
, uint16x4_t __b
)
21318 return (uint16x4_t
) __builtin_aarch64_umaxv4hi ((int16x4_t
) __a
,
21322 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
21323 vmax_u32 (uint32x2_t __a
, uint32x2_t __b
)
21325 return (uint32x2_t
) __builtin_aarch64_umaxv2si ((int32x2_t
) __a
,
21329 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
21330 vmaxq_f32 (float32x4_t __a
, float32x4_t __b
)
21332 return __builtin_aarch64_fmaxv4sf (__a
, __b
);
21335 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
21336 vmaxq_f64 (float64x2_t __a
, float64x2_t __b
)
21338 return __builtin_aarch64_fmaxv2df (__a
, __b
);
21341 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
21342 vmaxq_s8 (int8x16_t __a
, int8x16_t __b
)
21344 return __builtin_aarch64_smaxv16qi (__a
, __b
);
21347 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
21348 vmaxq_s16 (int16x8_t __a
, int16x8_t __b
)
21350 return __builtin_aarch64_smaxv8hi (__a
, __b
);
21353 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21354 vmaxq_s32 (int32x4_t __a
, int32x4_t __b
)
21356 return __builtin_aarch64_smaxv4si (__a
, __b
);
21359 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
21360 vmaxq_u8 (uint8x16_t __a
, uint8x16_t __b
)
21362 return (uint8x16_t
) __builtin_aarch64_umaxv16qi ((int8x16_t
) __a
,
21366 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
21367 vmaxq_u16 (uint16x8_t __a
, uint16x8_t __b
)
21369 return (uint16x8_t
) __builtin_aarch64_umaxv8hi ((int16x8_t
) __a
,
21373 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
21374 vmaxq_u32 (uint32x4_t __a
, uint32x4_t __b
)
21376 return (uint32x4_t
) __builtin_aarch64_umaxv4si ((int32x4_t
) __a
,
21382 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
21383 vmin_f32 (float32x2_t __a
, float32x2_t __b
)
21385 return __builtin_aarch64_fminv2sf (__a
, __b
);
21388 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
21389 vmin_s8 (int8x8_t __a
, int8x8_t __b
)
21391 return __builtin_aarch64_sminv8qi (__a
, __b
);
21394 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
21395 vmin_s16 (int16x4_t __a
, int16x4_t __b
)
21397 return __builtin_aarch64_sminv4hi (__a
, __b
);
21400 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
21401 vmin_s32 (int32x2_t __a
, int32x2_t __b
)
21403 return __builtin_aarch64_sminv2si (__a
, __b
);
21406 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
21407 vmin_u8 (uint8x8_t __a
, uint8x8_t __b
)
21409 return (uint8x8_t
) __builtin_aarch64_uminv8qi ((int8x8_t
) __a
,
21413 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
21414 vmin_u16 (uint16x4_t __a
, uint16x4_t __b
)
21416 return (uint16x4_t
) __builtin_aarch64_uminv4hi ((int16x4_t
) __a
,
21420 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
21421 vmin_u32 (uint32x2_t __a
, uint32x2_t __b
)
21423 return (uint32x2_t
) __builtin_aarch64_uminv2si ((int32x2_t
) __a
,
21427 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
21428 vminq_f32 (float32x4_t __a
, float32x4_t __b
)
21430 return __builtin_aarch64_fminv4sf (__a
, __b
);
21433 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
21434 vminq_f64 (float64x2_t __a
, float64x2_t __b
)
21436 return __builtin_aarch64_fminv2df (__a
, __b
);
21439 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
21440 vminq_s8 (int8x16_t __a
, int8x16_t __b
)
21442 return __builtin_aarch64_sminv16qi (__a
, __b
);
21445 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
21446 vminq_s16 (int16x8_t __a
, int16x8_t __b
)
21448 return __builtin_aarch64_sminv8hi (__a
, __b
);
21451 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21452 vminq_s32 (int32x4_t __a
, int32x4_t __b
)
21454 return __builtin_aarch64_sminv4si (__a
, __b
);
21457 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
21458 vminq_u8 (uint8x16_t __a
, uint8x16_t __b
)
21460 return (uint8x16_t
) __builtin_aarch64_uminv16qi ((int8x16_t
) __a
,
21464 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
21465 vminq_u16 (uint16x8_t __a
, uint16x8_t __b
)
21467 return (uint16x8_t
) __builtin_aarch64_uminv8hi ((int16x8_t
) __a
,
21471 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
21472 vminq_u32 (uint32x4_t __a
, uint32x4_t __b
)
21474 return (uint32x4_t
) __builtin_aarch64_uminv4si ((int32x4_t
) __a
,
21480 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
21481 vmla_f32 (float32x2_t a
, float32x2_t b
, float32x2_t c
)
21486 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
21487 vmlaq_f32 (float32x4_t a
, float32x4_t b
, float32x4_t c
)
21492 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
21493 vmlaq_f64 (float64x2_t a
, float64x2_t b
, float64x2_t c
)
21498 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
21499 vmls_f32 (float32x2_t a
, float32x2_t b
, float32x2_t c
)
21504 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
21505 vmlsq_f32 (float32x4_t a
, float32x4_t b
, float32x4_t c
)
21510 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
21511 vmlsq_f64 (float64x2_t a
, float64x2_t b
, float64x2_t c
)
21518 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21519 vqabsq_s64 (int64x2_t __a
)
21521 return (int64x2_t
) __builtin_aarch64_sqabsv2di (__a
);
21524 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
21525 vqabsb_s8 (int8x1_t __a
)
21527 return (int8x1_t
) __builtin_aarch64_sqabsqi (__a
);
21530 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
21531 vqabsh_s16 (int16x1_t __a
)
21533 return (int16x1_t
) __builtin_aarch64_sqabshi (__a
);
21536 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
21537 vqabss_s32 (int32x1_t __a
)
21539 return (int32x1_t
) __builtin_aarch64_sqabssi (__a
);
21544 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
21545 vqaddb_s8 (int8x1_t __a
, int8x1_t __b
)
21547 return (int8x1_t
) __builtin_aarch64_sqaddqi (__a
, __b
);
21550 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
21551 vqaddh_s16 (int16x1_t __a
, int16x1_t __b
)
21553 return (int16x1_t
) __builtin_aarch64_sqaddhi (__a
, __b
);
21556 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
21557 vqadds_s32 (int32x1_t __a
, int32x1_t __b
)
21559 return (int32x1_t
) __builtin_aarch64_sqaddsi (__a
, __b
);
21562 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
21563 vqaddd_s64 (int64x1_t __a
, int64x1_t __b
)
21565 return (int64x1_t
) __builtin_aarch64_sqadddi (__a
, __b
);
21568 __extension__
static __inline uint8x1_t
__attribute__ ((__always_inline__
))
21569 vqaddb_u8 (uint8x1_t __a
, uint8x1_t __b
)
21571 return (uint8x1_t
) __builtin_aarch64_uqaddqi (__a
, __b
);
21574 __extension__
static __inline uint16x1_t
__attribute__ ((__always_inline__
))
21575 vqaddh_u16 (uint16x1_t __a
, uint16x1_t __b
)
21577 return (uint16x1_t
) __builtin_aarch64_uqaddhi (__a
, __b
);
21580 __extension__
static __inline uint32x1_t
__attribute__ ((__always_inline__
))
21581 vqadds_u32 (uint32x1_t __a
, uint32x1_t __b
)
21583 return (uint32x1_t
) __builtin_aarch64_uqaddsi (__a
, __b
);
21586 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
21587 vqaddd_u64 (uint64x1_t __a
, uint64x1_t __b
)
21589 return (uint64x1_t
) __builtin_aarch64_uqadddi (__a
, __b
);
21594 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21595 vqdmlal_s16 (int32x4_t __a
, int16x4_t __b
, int16x4_t __c
)
21597 return __builtin_aarch64_sqdmlalv4hi (__a
, __b
, __c
);
21600 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21601 vqdmlal_high_s16 (int32x4_t __a
, int16x8_t __b
, int16x8_t __c
)
21603 return __builtin_aarch64_sqdmlal2v8hi (__a
, __b
, __c
);
21606 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21607 vqdmlal_high_lane_s16 (int32x4_t __a
, int16x8_t __b
, int16x8_t __c
,
21610 return __builtin_aarch64_sqdmlal2_lanev8hi (__a
, __b
, __c
, __d
);
21613 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21614 vqdmlal_high_laneq_s16 (int32x4_t __a
, int16x8_t __b
, int16x8_t __c
,
21617 return __builtin_aarch64_sqdmlal2_laneqv8hi (__a
, __b
, __c
, __d
);
21620 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21621 vqdmlal_high_n_s16 (int32x4_t __a
, int16x8_t __b
, int16_t __c
)
21623 return __builtin_aarch64_sqdmlal2_nv8hi (__a
, __b
, __c
);
21626 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21627 vqdmlal_lane_s16 (int32x4_t __a
, int16x4_t __b
, int16x4_t __c
, int const __d
)
21629 int16x8_t __tmp
= vcombine_s16 (__c
, vcreate_s16 (INT64_C (0)));
21630 return __builtin_aarch64_sqdmlal_lanev4hi (__a
, __b
, __tmp
, __d
);
21633 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21634 vqdmlal_laneq_s16 (int32x4_t __a
, int16x4_t __b
, int16x8_t __c
, int const __d
)
21636 return __builtin_aarch64_sqdmlal_laneqv4hi (__a
, __b
, __c
, __d
);
21639 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21640 vqdmlal_n_s16 (int32x4_t __a
, int16x4_t __b
, int16_t __c
)
21642 return __builtin_aarch64_sqdmlal_nv4hi (__a
, __b
, __c
);
21645 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21646 vqdmlal_s32 (int64x2_t __a
, int32x2_t __b
, int32x2_t __c
)
21648 return __builtin_aarch64_sqdmlalv2si (__a
, __b
, __c
);
21651 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21652 vqdmlal_high_s32 (int64x2_t __a
, int32x4_t __b
, int32x4_t __c
)
21654 return __builtin_aarch64_sqdmlal2v4si (__a
, __b
, __c
);
21657 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21658 vqdmlal_high_lane_s32 (int64x2_t __a
, int32x4_t __b
, int32x4_t __c
,
21661 return __builtin_aarch64_sqdmlal2_lanev4si (__a
, __b
, __c
, __d
);
21664 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21665 vqdmlal_high_laneq_s32 (int64x2_t __a
, int32x4_t __b
, int32x4_t __c
,
21668 return __builtin_aarch64_sqdmlal2_laneqv4si (__a
, __b
, __c
, __d
);
21671 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21672 vqdmlal_high_n_s32 (int64x2_t __a
, int32x4_t __b
, int32_t __c
)
21674 return __builtin_aarch64_sqdmlal2_nv4si (__a
, __b
, __c
);
21677 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21678 vqdmlal_lane_s32 (int64x2_t __a
, int32x2_t __b
, int32x2_t __c
, int const __d
)
21680 int32x4_t __tmp
= vcombine_s32 (__c
, vcreate_s32 (INT64_C (0)));
21681 return __builtin_aarch64_sqdmlal_lanev2si (__a
, __b
, __tmp
, __d
);
21684 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21685 vqdmlal_laneq_s32 (int64x2_t __a
, int32x2_t __b
, int32x4_t __c
, int const __d
)
21687 return __builtin_aarch64_sqdmlal_laneqv2si (__a
, __b
, __c
, __d
);
21690 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21691 vqdmlal_n_s32 (int64x2_t __a
, int32x2_t __b
, int32_t __c
)
21693 return __builtin_aarch64_sqdmlal_nv2si (__a
, __b
, __c
);
21696 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
21697 vqdmlalh_s16 (int32x1_t __a
, int16x1_t __b
, int16x1_t __c
)
21699 return __builtin_aarch64_sqdmlalhi (__a
, __b
, __c
);
21702 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
21703 vqdmlalh_lane_s16 (int32x1_t __a
, int16x1_t __b
, int16x8_t __c
, const int __d
)
21705 return __builtin_aarch64_sqdmlal_lanehi (__a
, __b
, __c
, __d
);
21708 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
21709 vqdmlals_s32 (int64x1_t __a
, int32x1_t __b
, int32x1_t __c
)
21711 return __builtin_aarch64_sqdmlalsi (__a
, __b
, __c
);
21714 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
21715 vqdmlals_lane_s32 (int64x1_t __a
, int32x1_t __b
, int32x4_t __c
, const int __d
)
21717 return __builtin_aarch64_sqdmlal_lanesi (__a
, __b
, __c
, __d
);
21722 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21723 vqdmlsl_s16 (int32x4_t __a
, int16x4_t __b
, int16x4_t __c
)
21725 return __builtin_aarch64_sqdmlslv4hi (__a
, __b
, __c
);
21728 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21729 vqdmlsl_high_s16 (int32x4_t __a
, int16x8_t __b
, int16x8_t __c
)
21731 return __builtin_aarch64_sqdmlsl2v8hi (__a
, __b
, __c
);
21734 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21735 vqdmlsl_high_lane_s16 (int32x4_t __a
, int16x8_t __b
, int16x8_t __c
,
21738 return __builtin_aarch64_sqdmlsl2_lanev8hi (__a
, __b
, __c
, __d
);
21741 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21742 vqdmlsl_high_laneq_s16 (int32x4_t __a
, int16x8_t __b
, int16x8_t __c
,
21745 return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a
, __b
, __c
, __d
);
21748 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21749 vqdmlsl_high_n_s16 (int32x4_t __a
, int16x8_t __b
, int16_t __c
)
21751 return __builtin_aarch64_sqdmlsl2_nv8hi (__a
, __b
, __c
);
21754 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21755 vqdmlsl_lane_s16 (int32x4_t __a
, int16x4_t __b
, int16x4_t __c
, int const __d
)
21757 int16x8_t __tmp
= vcombine_s16 (__c
, vcreate_s16 (INT64_C (0)));
21758 return __builtin_aarch64_sqdmlsl_lanev4hi (__a
, __b
, __tmp
, __d
);
21761 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21762 vqdmlsl_laneq_s16 (int32x4_t __a
, int16x4_t __b
, int16x8_t __c
, int const __d
)
21764 return __builtin_aarch64_sqdmlsl_laneqv4hi (__a
, __b
, __c
, __d
);
21767 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21768 vqdmlsl_n_s16 (int32x4_t __a
, int16x4_t __b
, int16_t __c
)
21770 return __builtin_aarch64_sqdmlsl_nv4hi (__a
, __b
, __c
);
21773 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21774 vqdmlsl_s32 (int64x2_t __a
, int32x2_t __b
, int32x2_t __c
)
21776 return __builtin_aarch64_sqdmlslv2si (__a
, __b
, __c
);
21779 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21780 vqdmlsl_high_s32 (int64x2_t __a
, int32x4_t __b
, int32x4_t __c
)
21782 return __builtin_aarch64_sqdmlsl2v4si (__a
, __b
, __c
);
21785 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21786 vqdmlsl_high_lane_s32 (int64x2_t __a
, int32x4_t __b
, int32x4_t __c
,
21789 return __builtin_aarch64_sqdmlsl2_lanev4si (__a
, __b
, __c
, __d
);
21792 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21793 vqdmlsl_high_laneq_s32 (int64x2_t __a
, int32x4_t __b
, int32x4_t __c
,
21796 return __builtin_aarch64_sqdmlsl2_laneqv4si (__a
, __b
, __c
, __d
);
21799 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21800 vqdmlsl_high_n_s32 (int64x2_t __a
, int32x4_t __b
, int32_t __c
)
21802 return __builtin_aarch64_sqdmlsl2_nv4si (__a
, __b
, __c
);
21805 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21806 vqdmlsl_lane_s32 (int64x2_t __a
, int32x2_t __b
, int32x2_t __c
, int const __d
)
21808 int32x4_t __tmp
= vcombine_s32 (__c
, vcreate_s32 (INT64_C (0)));
21809 return __builtin_aarch64_sqdmlsl_lanev2si (__a
, __b
, __tmp
, __d
);
21812 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21813 vqdmlsl_laneq_s32 (int64x2_t __a
, int32x2_t __b
, int32x4_t __c
, int const __d
)
21815 return __builtin_aarch64_sqdmlsl_laneqv2si (__a
, __b
, __c
, __d
);
21818 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21819 vqdmlsl_n_s32 (int64x2_t __a
, int32x2_t __b
, int32_t __c
)
21821 return __builtin_aarch64_sqdmlsl_nv2si (__a
, __b
, __c
);
21824 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
21825 vqdmlslh_s16 (int32x1_t __a
, int16x1_t __b
, int16x1_t __c
)
21827 return __builtin_aarch64_sqdmlslhi (__a
, __b
, __c
);
21830 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
21831 vqdmlslh_lane_s16 (int32x1_t __a
, int16x1_t __b
, int16x8_t __c
, const int __d
)
21833 return __builtin_aarch64_sqdmlsl_lanehi (__a
, __b
, __c
, __d
);
21836 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
21837 vqdmlsls_s32 (int64x1_t __a
, int32x1_t __b
, int32x1_t __c
)
21839 return __builtin_aarch64_sqdmlslsi (__a
, __b
, __c
);
21842 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
21843 vqdmlsls_lane_s32 (int64x1_t __a
, int32x1_t __b
, int32x4_t __c
, const int __d
)
21845 return __builtin_aarch64_sqdmlsl_lanesi (__a
, __b
, __c
, __d
);
21850 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
21851 vqdmulh_lane_s16 (int16x4_t __a
, int16x4_t __b
, const int __c
)
21853 return __builtin_aarch64_sqdmulh_lanev4hi (__a
, __b
, __c
);
21856 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
21857 vqdmulh_lane_s32 (int32x2_t __a
, int32x2_t __b
, const int __c
)
21859 return __builtin_aarch64_sqdmulh_lanev2si (__a
, __b
, __c
);
21862 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
21863 vqdmulhq_lane_s16 (int16x8_t __a
, int16x4_t __b
, const int __c
)
21865 return __builtin_aarch64_sqdmulh_lanev8hi (__a
, __b
, __c
);
21868 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21869 vqdmulhq_lane_s32 (int32x4_t __a
, int32x2_t __b
, const int __c
)
21871 return __builtin_aarch64_sqdmulh_lanev4si (__a
, __b
, __c
);
21874 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
21875 vqdmulhh_s16 (int16x1_t __a
, int16x1_t __b
)
21877 return (int16x1_t
) __builtin_aarch64_sqdmulhhi (__a
, __b
);
21880 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
21881 vqdmulhh_lane_s16 (int16x1_t __a
, int16x8_t __b
, const int __c
)
21883 return __builtin_aarch64_sqdmulh_lanehi (__a
, __b
, __c
);
21886 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
21887 vqdmulhs_s32 (int32x1_t __a
, int32x1_t __b
)
21889 return (int32x1_t
) __builtin_aarch64_sqdmulhsi (__a
, __b
);
21892 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
21893 vqdmulhs_lane_s32 (int32x1_t __a
, int32x4_t __b
, const int __c
)
21895 return __builtin_aarch64_sqdmulh_lanesi (__a
, __b
, __c
);
21900 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21901 vqdmull_s16 (int16x4_t __a
, int16x4_t __b
)
21903 return __builtin_aarch64_sqdmullv4hi (__a
, __b
);
21906 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21907 vqdmull_high_s16 (int16x8_t __a
, int16x8_t __b
)
21909 return __builtin_aarch64_sqdmull2v8hi (__a
, __b
);
21912 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21913 vqdmull_high_lane_s16 (int16x8_t __a
, int16x8_t __b
, int const __c
)
21915 return __builtin_aarch64_sqdmull2_lanev8hi (__a
, __b
,__c
);
21918 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21919 vqdmull_high_laneq_s16 (int16x8_t __a
, int16x8_t __b
, int const __c
)
21921 return __builtin_aarch64_sqdmull2_laneqv8hi (__a
, __b
,__c
);
21924 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21925 vqdmull_high_n_s16 (int16x8_t __a
, int16_t __b
)
21927 return __builtin_aarch64_sqdmull2_nv8hi (__a
, __b
);
21930 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21931 vqdmull_lane_s16 (int16x4_t __a
, int16x4_t __b
, int const __c
)
21933 int16x8_t __tmp
= vcombine_s16 (__b
, vcreate_s16 (INT64_C (0)));
21934 return __builtin_aarch64_sqdmull_lanev4hi (__a
, __tmp
, __c
);
21937 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21938 vqdmull_laneq_s16 (int16x4_t __a
, int16x8_t __b
, int const __c
)
21940 return __builtin_aarch64_sqdmull_laneqv4hi (__a
, __b
, __c
);
21943 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
21944 vqdmull_n_s16 (int16x4_t __a
, int16_t __b
)
21946 return __builtin_aarch64_sqdmull_nv4hi (__a
, __b
);
21949 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21950 vqdmull_s32 (int32x2_t __a
, int32x2_t __b
)
21952 return __builtin_aarch64_sqdmullv2si (__a
, __b
);
21955 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21956 vqdmull_high_s32 (int32x4_t __a
, int32x4_t __b
)
21958 return __builtin_aarch64_sqdmull2v4si (__a
, __b
);
21961 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21962 vqdmull_high_lane_s32 (int32x4_t __a
, int32x4_t __b
, int const __c
)
21964 return __builtin_aarch64_sqdmull2_lanev4si (__a
, __b
, __c
);
21967 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21968 vqdmull_high_laneq_s32 (int32x4_t __a
, int32x4_t __b
, int const __c
)
21970 return __builtin_aarch64_sqdmull2_laneqv4si (__a
, __b
, __c
);
21973 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21974 vqdmull_high_n_s32 (int32x4_t __a
, int32_t __b
)
21976 return __builtin_aarch64_sqdmull2_nv4si (__a
, __b
);
21979 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21980 vqdmull_lane_s32 (int32x2_t __a
, int32x2_t __b
, int const __c
)
21982 int32x4_t __tmp
= vcombine_s32 (__b
, vcreate_s32 (INT64_C (0)));
21983 return __builtin_aarch64_sqdmull_lanev2si (__a
, __tmp
, __c
);
21986 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21987 vqdmull_laneq_s32 (int32x2_t __a
, int32x4_t __b
, int const __c
)
21989 return __builtin_aarch64_sqdmull_laneqv2si (__a
, __b
, __c
);
21992 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
21993 vqdmull_n_s32 (int32x2_t __a
, int32_t __b
)
21995 return __builtin_aarch64_sqdmull_nv2si (__a
, __b
);
21998 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
21999 vqdmullh_s16 (int16x1_t __a
, int16x1_t __b
)
22001 return (int32x1_t
) __builtin_aarch64_sqdmullhi (__a
, __b
);
22004 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
22005 vqdmullh_lane_s16 (int16x1_t __a
, int16x8_t __b
, const int __c
)
22007 return __builtin_aarch64_sqdmull_lanehi (__a
, __b
, __c
);
22010 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
22011 vqdmulls_s32 (int32x1_t __a
, int32x1_t __b
)
22013 return (int64x1_t
) __builtin_aarch64_sqdmullsi (__a
, __b
);
22016 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
22017 vqdmulls_lane_s32 (int32x1_t __a
, int32x4_t __b
, const int __c
)
22019 return __builtin_aarch64_sqdmull_lanesi (__a
, __b
, __c
);
22024 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
22025 vqmovn_s16 (int16x8_t __a
)
22027 return (int8x8_t
) __builtin_aarch64_sqmovnv8hi (__a
);
22030 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
22031 vqmovn_s32 (int32x4_t __a
)
22033 return (int16x4_t
) __builtin_aarch64_sqmovnv4si (__a
);
22036 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
22037 vqmovn_s64 (int64x2_t __a
)
22039 return (int32x2_t
) __builtin_aarch64_sqmovnv2di (__a
);
22042 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
22043 vqmovn_u16 (uint16x8_t __a
)
22045 return (uint8x8_t
) __builtin_aarch64_uqmovnv8hi ((int16x8_t
) __a
);
22048 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
22049 vqmovn_u32 (uint32x4_t __a
)
22051 return (uint16x4_t
) __builtin_aarch64_uqmovnv4si ((int32x4_t
) __a
);
22054 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
22055 vqmovn_u64 (uint64x2_t __a
)
22057 return (uint32x2_t
) __builtin_aarch64_uqmovnv2di ((int64x2_t
) __a
);
22060 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
22061 vqmovnh_s16 (int16x1_t __a
)
22063 return (int8x1_t
) __builtin_aarch64_sqmovnhi (__a
);
22066 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
22067 vqmovns_s32 (int32x1_t __a
)
22069 return (int16x1_t
) __builtin_aarch64_sqmovnsi (__a
);
22072 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
22073 vqmovnd_s64 (int64x1_t __a
)
22075 return (int32x1_t
) __builtin_aarch64_sqmovndi (__a
);
22078 __extension__
static __inline uint8x1_t
__attribute__ ((__always_inline__
))
22079 vqmovnh_u16 (uint16x1_t __a
)
22081 return (uint8x1_t
) __builtin_aarch64_uqmovnhi (__a
);
22084 __extension__
static __inline uint16x1_t
__attribute__ ((__always_inline__
))
22085 vqmovns_u32 (uint32x1_t __a
)
22087 return (uint16x1_t
) __builtin_aarch64_uqmovnsi (__a
);
22090 __extension__
static __inline uint32x1_t
__attribute__ ((__always_inline__
))
22091 vqmovnd_u64 (uint64x1_t __a
)
22093 return (uint32x1_t
) __builtin_aarch64_uqmovndi (__a
);
22098 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
22099 vqmovun_s16 (int16x8_t __a
)
22101 return (uint8x8_t
) __builtin_aarch64_sqmovunv8hi (__a
);
22104 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
22105 vqmovun_s32 (int32x4_t __a
)
22107 return (uint16x4_t
) __builtin_aarch64_sqmovunv4si (__a
);
22110 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
22111 vqmovun_s64 (int64x2_t __a
)
22113 return (uint32x2_t
) __builtin_aarch64_sqmovunv2di (__a
);
22116 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
22117 vqmovunh_s16 (int16x1_t __a
)
22119 return (int8x1_t
) __builtin_aarch64_sqmovunhi (__a
);
22122 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
22123 vqmovuns_s32 (int32x1_t __a
)
22125 return (int16x1_t
) __builtin_aarch64_sqmovunsi (__a
);
22128 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
22129 vqmovund_s64 (int64x1_t __a
)
22131 return (int32x1_t
) __builtin_aarch64_sqmovundi (__a
);
22136 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
22137 vqnegq_s64 (int64x2_t __a
)
22139 return (int64x2_t
) __builtin_aarch64_sqnegv2di (__a
);
22142 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
22143 vqnegb_s8 (int8x1_t __a
)
22145 return (int8x1_t
) __builtin_aarch64_sqnegqi (__a
);
22148 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
22149 vqnegh_s16 (int16x1_t __a
)
22151 return (int16x1_t
) __builtin_aarch64_sqneghi (__a
);
22154 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
22155 vqnegs_s32 (int32x1_t __a
)
22157 return (int32x1_t
) __builtin_aarch64_sqnegsi (__a
);
22162 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
22163 vqrdmulh_lane_s16 (int16x4_t __a
, int16x4_t __b
, const int __c
)
22165 return __builtin_aarch64_sqrdmulh_lanev4hi (__a
, __b
, __c
);
22168 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
22169 vqrdmulh_lane_s32 (int32x2_t __a
, int32x2_t __b
, const int __c
)
22171 return __builtin_aarch64_sqrdmulh_lanev2si (__a
, __b
, __c
);
22174 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
22175 vqrdmulhq_lane_s16 (int16x8_t __a
, int16x4_t __b
, const int __c
)
22177 return __builtin_aarch64_sqrdmulh_lanev8hi (__a
, __b
, __c
);
22180 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
22181 vqrdmulhq_lane_s32 (int32x4_t __a
, int32x2_t __b
, const int __c
)
22183 return __builtin_aarch64_sqrdmulh_lanev4si (__a
, __b
, __c
);
22186 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
22187 vqrdmulhh_s16 (int16x1_t __a
, int16x1_t __b
)
22189 return (int16x1_t
) __builtin_aarch64_sqrdmulhhi (__a
, __b
);
22192 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
22193 vqrdmulhh_lane_s16 (int16x1_t __a
, int16x8_t __b
, const int __c
)
22195 return __builtin_aarch64_sqrdmulh_lanehi (__a
, __b
, __c
);
22198 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
22199 vqrdmulhs_s32 (int32x1_t __a
, int32x1_t __b
)
22201 return (int32x1_t
) __builtin_aarch64_sqrdmulhsi (__a
, __b
);
22204 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
22205 vqrdmulhs_lane_s32 (int32x1_t __a
, int32x4_t __b
, const int __c
)
22207 return __builtin_aarch64_sqrdmulh_lanesi (__a
, __b
, __c
);
22212 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
22213 vqrshl_s8 (int8x8_t __a
, int8x8_t __b
)
22215 return __builtin_aarch64_sqrshlv8qi (__a
, __b
);
22218 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
22219 vqrshl_s16 (int16x4_t __a
, int16x4_t __b
)
22221 return __builtin_aarch64_sqrshlv4hi (__a
, __b
);
22224 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
22225 vqrshl_s32 (int32x2_t __a
, int32x2_t __b
)
22227 return __builtin_aarch64_sqrshlv2si (__a
, __b
);
22230 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
22231 vqrshl_s64 (int64x1_t __a
, int64x1_t __b
)
22233 return __builtin_aarch64_sqrshldi (__a
, __b
);
22236 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
22237 vqrshl_u8 (uint8x8_t __a
, int8x8_t __b
)
22239 return (uint8x8_t
) __builtin_aarch64_uqrshlv8qi ((int8x8_t
) __a
, __b
);
22242 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
22243 vqrshl_u16 (uint16x4_t __a
, int16x4_t __b
)
22245 return (uint16x4_t
) __builtin_aarch64_uqrshlv4hi ((int16x4_t
) __a
, __b
);
22248 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
22249 vqrshl_u32 (uint32x2_t __a
, int32x2_t __b
)
22251 return (uint32x2_t
) __builtin_aarch64_uqrshlv2si ((int32x2_t
) __a
, __b
);
22254 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
22255 vqrshl_u64 (uint64x1_t __a
, int64x1_t __b
)
22257 return (uint64x1_t
) __builtin_aarch64_uqrshldi ((int64x1_t
) __a
, __b
);
22260 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
22261 vqrshlq_s8 (int8x16_t __a
, int8x16_t __b
)
22263 return __builtin_aarch64_sqrshlv16qi (__a
, __b
);
22266 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
22267 vqrshlq_s16 (int16x8_t __a
, int16x8_t __b
)
22269 return __builtin_aarch64_sqrshlv8hi (__a
, __b
);
22272 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
22273 vqrshlq_s32 (int32x4_t __a
, int32x4_t __b
)
22275 return __builtin_aarch64_sqrshlv4si (__a
, __b
);
22278 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
22279 vqrshlq_s64 (int64x2_t __a
, int64x2_t __b
)
22281 return __builtin_aarch64_sqrshlv2di (__a
, __b
);
22284 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
22285 vqrshlq_u8 (uint8x16_t __a
, int8x16_t __b
)
22287 return (uint8x16_t
) __builtin_aarch64_uqrshlv16qi ((int8x16_t
) __a
, __b
);
22290 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
22291 vqrshlq_u16 (uint16x8_t __a
, int16x8_t __b
)
22293 return (uint16x8_t
) __builtin_aarch64_uqrshlv8hi ((int16x8_t
) __a
, __b
);
22296 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
22297 vqrshlq_u32 (uint32x4_t __a
, int32x4_t __b
)
22299 return (uint32x4_t
) __builtin_aarch64_uqrshlv4si ((int32x4_t
) __a
, __b
);
22302 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
22303 vqrshlq_u64 (uint64x2_t __a
, int64x2_t __b
)
22305 return (uint64x2_t
) __builtin_aarch64_uqrshlv2di ((int64x2_t
) __a
, __b
);
22308 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
22309 vqrshlb_s8 (int8x1_t __a
, int8x1_t __b
)
22311 return __builtin_aarch64_sqrshlqi (__a
, __b
);
22314 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
22315 vqrshlh_s16 (int16x1_t __a
, int16x1_t __b
)
22317 return __builtin_aarch64_sqrshlhi (__a
, __b
);
22320 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
22321 vqrshls_s32 (int32x1_t __a
, int32x1_t __b
)
22323 return __builtin_aarch64_sqrshlsi (__a
, __b
);
22326 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
22327 vqrshld_s64 (int64x1_t __a
, int64x1_t __b
)
22329 return __builtin_aarch64_sqrshldi (__a
, __b
);
22332 __extension__
static __inline uint8x1_t
__attribute__ ((__always_inline__
))
22333 vqrshlb_u8 (uint8x1_t __a
, uint8x1_t __b
)
22335 return (uint8x1_t
) __builtin_aarch64_uqrshlqi (__a
, __b
);
22338 __extension__
static __inline uint16x1_t
__attribute__ ((__always_inline__
))
22339 vqrshlh_u16 (uint16x1_t __a
, uint16x1_t __b
)
22341 return (uint16x1_t
) __builtin_aarch64_uqrshlhi (__a
, __b
);
22344 __extension__
static __inline uint32x1_t
__attribute__ ((__always_inline__
))
22345 vqrshls_u32 (uint32x1_t __a
, uint32x1_t __b
)
22347 return (uint32x1_t
) __builtin_aarch64_uqrshlsi (__a
, __b
);
22350 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
22351 vqrshld_u64 (uint64x1_t __a
, uint64x1_t __b
)
22353 return (uint64x1_t
) __builtin_aarch64_uqrshldi (__a
, __b
);
22358 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
22359 vqrshrn_n_s16 (int16x8_t __a
, const int __b
)
22361 return (int8x8_t
) __builtin_aarch64_sqrshrn_nv8hi (__a
, __b
);
22364 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
22365 vqrshrn_n_s32 (int32x4_t __a
, const int __b
)
22367 return (int16x4_t
) __builtin_aarch64_sqrshrn_nv4si (__a
, __b
);
22370 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
22371 vqrshrn_n_s64 (int64x2_t __a
, const int __b
)
22373 return (int32x2_t
) __builtin_aarch64_sqrshrn_nv2di (__a
, __b
);
22376 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
22377 vqrshrn_n_u16 (uint16x8_t __a
, const int __b
)
22379 return (uint8x8_t
) __builtin_aarch64_uqrshrn_nv8hi ((int16x8_t
) __a
, __b
);
22382 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
22383 vqrshrn_n_u32 (uint32x4_t __a
, const int __b
)
22385 return (uint16x4_t
) __builtin_aarch64_uqrshrn_nv4si ((int32x4_t
) __a
, __b
);
22388 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
22389 vqrshrn_n_u64 (uint64x2_t __a
, const int __b
)
22391 return (uint32x2_t
) __builtin_aarch64_uqrshrn_nv2di ((int64x2_t
) __a
, __b
);
22394 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
22395 vqrshrnh_n_s16 (int16x1_t __a
, const int __b
)
22397 return (int8x1_t
) __builtin_aarch64_sqrshrn_nhi (__a
, __b
);
22400 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
22401 vqrshrns_n_s32 (int32x1_t __a
, const int __b
)
22403 return (int16x1_t
) __builtin_aarch64_sqrshrn_nsi (__a
, __b
);
22406 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
22407 vqrshrnd_n_s64 (int64x1_t __a
, const int __b
)
22409 return (int32x1_t
) __builtin_aarch64_sqrshrn_ndi (__a
, __b
);
22412 __extension__
static __inline uint8x1_t
__attribute__ ((__always_inline__
))
22413 vqrshrnh_n_u16 (uint16x1_t __a
, const int __b
)
22415 return (uint8x1_t
) __builtin_aarch64_uqrshrn_nhi (__a
, __b
);
22418 __extension__
static __inline uint16x1_t
__attribute__ ((__always_inline__
))
22419 vqrshrns_n_u32 (uint32x1_t __a
, const int __b
)
22421 return (uint16x1_t
) __builtin_aarch64_uqrshrn_nsi (__a
, __b
);
22424 __extension__
static __inline uint32x1_t
__attribute__ ((__always_inline__
))
22425 vqrshrnd_n_u64 (uint64x1_t __a
, const int __b
)
22427 return (uint32x1_t
) __builtin_aarch64_uqrshrn_ndi (__a
, __b
);
22432 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
22433 vqrshrun_n_s16 (int16x8_t __a
, const int __b
)
22435 return (uint8x8_t
) __builtin_aarch64_sqrshrun_nv8hi (__a
, __b
);
22438 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
22439 vqrshrun_n_s32 (int32x4_t __a
, const int __b
)
22441 return (uint16x4_t
) __builtin_aarch64_sqrshrun_nv4si (__a
, __b
);
22444 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
22445 vqrshrun_n_s64 (int64x2_t __a
, const int __b
)
22447 return (uint32x2_t
) __builtin_aarch64_sqrshrun_nv2di (__a
, __b
);
22450 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
22451 vqrshrunh_n_s16 (int16x1_t __a
, const int __b
)
22453 return (int8x1_t
) __builtin_aarch64_sqrshrun_nhi (__a
, __b
);
22456 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
22457 vqrshruns_n_s32 (int32x1_t __a
, const int __b
)
22459 return (int16x1_t
) __builtin_aarch64_sqrshrun_nsi (__a
, __b
);
22462 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
22463 vqrshrund_n_s64 (int64x1_t __a
, const int __b
)
22465 return (int32x1_t
) __builtin_aarch64_sqrshrun_ndi (__a
, __b
);
22470 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
22471 vqshl_s8 (int8x8_t __a
, int8x8_t __b
)
22473 return __builtin_aarch64_sqshlv8qi (__a
, __b
);
22476 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
22477 vqshl_s16 (int16x4_t __a
, int16x4_t __b
)
22479 return __builtin_aarch64_sqshlv4hi (__a
, __b
);
22482 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
22483 vqshl_s32 (int32x2_t __a
, int32x2_t __b
)
22485 return __builtin_aarch64_sqshlv2si (__a
, __b
);
22488 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
22489 vqshl_s64 (int64x1_t __a
, int64x1_t __b
)
22491 return __builtin_aarch64_sqshldi (__a
, __b
);
22494 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
22495 vqshl_u8 (uint8x8_t __a
, int8x8_t __b
)
22497 return (uint8x8_t
) __builtin_aarch64_uqshlv8qi ((int8x8_t
) __a
, __b
);
22500 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
22501 vqshl_u16 (uint16x4_t __a
, int16x4_t __b
)
22503 return (uint16x4_t
) __builtin_aarch64_uqshlv4hi ((int16x4_t
) __a
, __b
);
22506 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
22507 vqshl_u32 (uint32x2_t __a
, int32x2_t __b
)
22509 return (uint32x2_t
) __builtin_aarch64_uqshlv2si ((int32x2_t
) __a
, __b
);
22512 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
22513 vqshl_u64 (uint64x1_t __a
, int64x1_t __b
)
22515 return (uint64x1_t
) __builtin_aarch64_uqshldi ((int64x1_t
) __a
, __b
);
22518 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
22519 vqshlq_s8 (int8x16_t __a
, int8x16_t __b
)
22521 return __builtin_aarch64_sqshlv16qi (__a
, __b
);
22524 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
22525 vqshlq_s16 (int16x8_t __a
, int16x8_t __b
)
22527 return __builtin_aarch64_sqshlv8hi (__a
, __b
);
22530 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
22531 vqshlq_s32 (int32x4_t __a
, int32x4_t __b
)
22533 return __builtin_aarch64_sqshlv4si (__a
, __b
);
22536 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
22537 vqshlq_s64 (int64x2_t __a
, int64x2_t __b
)
22539 return __builtin_aarch64_sqshlv2di (__a
, __b
);
22542 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
22543 vqshlq_u8 (uint8x16_t __a
, int8x16_t __b
)
22545 return (uint8x16_t
) __builtin_aarch64_uqshlv16qi ((int8x16_t
) __a
, __b
);
22548 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
22549 vqshlq_u16 (uint16x8_t __a
, int16x8_t __b
)
22551 return (uint16x8_t
) __builtin_aarch64_uqshlv8hi ((int16x8_t
) __a
, __b
);
22554 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
22555 vqshlq_u32 (uint32x4_t __a
, int32x4_t __b
)
22557 return (uint32x4_t
) __builtin_aarch64_uqshlv4si ((int32x4_t
) __a
, __b
);
22560 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
22561 vqshlq_u64 (uint64x2_t __a
, int64x2_t __b
)
22563 return (uint64x2_t
) __builtin_aarch64_uqshlv2di ((int64x2_t
) __a
, __b
);
22566 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
22567 vqshlb_s8 (int8x1_t __a
, int8x1_t __b
)
22569 return __builtin_aarch64_sqshlqi (__a
, __b
);
22572 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
22573 vqshlh_s16 (int16x1_t __a
, int16x1_t __b
)
22575 return __builtin_aarch64_sqshlhi (__a
, __b
);
22578 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
22579 vqshls_s32 (int32x1_t __a
, int32x1_t __b
)
22581 return __builtin_aarch64_sqshlsi (__a
, __b
);
22584 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
22585 vqshld_s64 (int64x1_t __a
, int64x1_t __b
)
22587 return __builtin_aarch64_sqshldi (__a
, __b
);
22590 __extension__
static __inline uint8x1_t
__attribute__ ((__always_inline__
))
22591 vqshlb_u8 (uint8x1_t __a
, uint8x1_t __b
)
22593 return (uint8x1_t
) __builtin_aarch64_uqshlqi (__a
, __b
);
22596 __extension__
static __inline uint16x1_t
__attribute__ ((__always_inline__
))
22597 vqshlh_u16 (uint16x1_t __a
, uint16x1_t __b
)
22599 return (uint16x1_t
) __builtin_aarch64_uqshlhi (__a
, __b
);
22602 __extension__
static __inline uint32x1_t
__attribute__ ((__always_inline__
))
22603 vqshls_u32 (uint32x1_t __a
, uint32x1_t __b
)
22605 return (uint32x1_t
) __builtin_aarch64_uqshlsi (__a
, __b
);
22608 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
22609 vqshld_u64 (uint64x1_t __a
, uint64x1_t __b
)
22611 return (uint64x1_t
) __builtin_aarch64_uqshldi (__a
, __b
);
22614 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
22615 vqshl_n_s8 (int8x8_t __a
, const int __b
)
22617 return (int8x8_t
) __builtin_aarch64_sqshl_nv8qi (__a
, __b
);
22620 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
22621 vqshl_n_s16 (int16x4_t __a
, const int __b
)
22623 return (int16x4_t
) __builtin_aarch64_sqshl_nv4hi (__a
, __b
);
22626 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
22627 vqshl_n_s32 (int32x2_t __a
, const int __b
)
22629 return (int32x2_t
) __builtin_aarch64_sqshl_nv2si (__a
, __b
);
22632 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
22633 vqshl_n_s64 (int64x1_t __a
, const int __b
)
22635 return (int64x1_t
) __builtin_aarch64_sqshl_ndi (__a
, __b
);
22638 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
22639 vqshl_n_u8 (uint8x8_t __a
, const int __b
)
22641 return (uint8x8_t
) __builtin_aarch64_uqshl_nv8qi ((int8x8_t
) __a
, __b
);
22644 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
22645 vqshl_n_u16 (uint16x4_t __a
, const int __b
)
22647 return (uint16x4_t
) __builtin_aarch64_uqshl_nv4hi ((int16x4_t
) __a
, __b
);
22650 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
22651 vqshl_n_u32 (uint32x2_t __a
, const int __b
)
22653 return (uint32x2_t
) __builtin_aarch64_uqshl_nv2si ((int32x2_t
) __a
, __b
);
22656 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
22657 vqshl_n_u64 (uint64x1_t __a
, const int __b
)
22659 return (uint64x1_t
) __builtin_aarch64_uqshl_ndi ((int64x1_t
) __a
, __b
);
22662 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
22663 vqshlq_n_s8 (int8x16_t __a
, const int __b
)
22665 return (int8x16_t
) __builtin_aarch64_sqshl_nv16qi (__a
, __b
);
22668 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
22669 vqshlq_n_s16 (int16x8_t __a
, const int __b
)
22671 return (int16x8_t
) __builtin_aarch64_sqshl_nv8hi (__a
, __b
);
22674 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
22675 vqshlq_n_s32 (int32x4_t __a
, const int __b
)
22677 return (int32x4_t
) __builtin_aarch64_sqshl_nv4si (__a
, __b
);
22680 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
22681 vqshlq_n_s64 (int64x2_t __a
, const int __b
)
22683 return (int64x2_t
) __builtin_aarch64_sqshl_nv2di (__a
, __b
);
22686 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
22687 vqshlq_n_u8 (uint8x16_t __a
, const int __b
)
22689 return (uint8x16_t
) __builtin_aarch64_uqshl_nv16qi ((int8x16_t
) __a
, __b
);
22692 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
22693 vqshlq_n_u16 (uint16x8_t __a
, const int __b
)
22695 return (uint16x8_t
) __builtin_aarch64_uqshl_nv8hi ((int16x8_t
) __a
, __b
);
22698 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
22699 vqshlq_n_u32 (uint32x4_t __a
, const int __b
)
22701 return (uint32x4_t
) __builtin_aarch64_uqshl_nv4si ((int32x4_t
) __a
, __b
);
22704 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
22705 vqshlq_n_u64 (uint64x2_t __a
, const int __b
)
22707 return (uint64x2_t
) __builtin_aarch64_uqshl_nv2di ((int64x2_t
) __a
, __b
);
22710 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
22711 vqshlb_n_s8 (int8x1_t __a
, const int __b
)
22713 return (int8x1_t
) __builtin_aarch64_sqshl_nqi (__a
, __b
);
22716 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
22717 vqshlh_n_s16 (int16x1_t __a
, const int __b
)
22719 return (int16x1_t
) __builtin_aarch64_sqshl_nhi (__a
, __b
);
22722 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
22723 vqshls_n_s32 (int32x1_t __a
, const int __b
)
22725 return (int32x1_t
) __builtin_aarch64_sqshl_nsi (__a
, __b
);
22728 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
22729 vqshld_n_s64 (int64x1_t __a
, const int __b
)
22731 return (int64x1_t
) __builtin_aarch64_sqshl_ndi (__a
, __b
);
22734 __extension__
static __inline uint8x1_t
__attribute__ ((__always_inline__
))
22735 vqshlb_n_u8 (uint8x1_t __a
, const int __b
)
22737 return (uint8x1_t
) __builtin_aarch64_uqshl_nqi (__a
, __b
);
22740 __extension__
static __inline uint16x1_t
__attribute__ ((__always_inline__
))
22741 vqshlh_n_u16 (uint16x1_t __a
, const int __b
)
22743 return (uint16x1_t
) __builtin_aarch64_uqshl_nhi (__a
, __b
);
22746 __extension__
static __inline uint32x1_t
__attribute__ ((__always_inline__
))
22747 vqshls_n_u32 (uint32x1_t __a
, const int __b
)
22749 return (uint32x1_t
) __builtin_aarch64_uqshl_nsi (__a
, __b
);
22752 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
22753 vqshld_n_u64 (uint64x1_t __a
, const int __b
)
22755 return (uint64x1_t
) __builtin_aarch64_uqshl_ndi (__a
, __b
);
22760 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
22761 vqshlu_n_s8 (int8x8_t __a
, const int __b
)
22763 return (uint8x8_t
) __builtin_aarch64_sqshlu_nv8qi (__a
, __b
);
22766 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
22767 vqshlu_n_s16 (int16x4_t __a
, const int __b
)
22769 return (uint16x4_t
) __builtin_aarch64_sqshlu_nv4hi (__a
, __b
);
22772 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
22773 vqshlu_n_s32 (int32x2_t __a
, const int __b
)
22775 return (uint32x2_t
) __builtin_aarch64_sqshlu_nv2si (__a
, __b
);
22778 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
22779 vqshlu_n_s64 (int64x1_t __a
, const int __b
)
22781 return (uint64x1_t
) __builtin_aarch64_sqshlu_ndi (__a
, __b
);
22784 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
22785 vqshluq_n_s8 (int8x16_t __a
, const int __b
)
22787 return (uint8x16_t
) __builtin_aarch64_sqshlu_nv16qi (__a
, __b
);
22790 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
22791 vqshluq_n_s16 (int16x8_t __a
, const int __b
)
22793 return (uint16x8_t
) __builtin_aarch64_sqshlu_nv8hi (__a
, __b
);
22796 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
22797 vqshluq_n_s32 (int32x4_t __a
, const int __b
)
22799 return (uint32x4_t
) __builtin_aarch64_sqshlu_nv4si (__a
, __b
);
22802 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
22803 vqshluq_n_s64 (int64x2_t __a
, const int __b
)
22805 return (uint64x2_t
) __builtin_aarch64_sqshlu_nv2di (__a
, __b
);
22808 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
22809 vqshlub_n_s8 (int8x1_t __a
, const int __b
)
22811 return (int8x1_t
) __builtin_aarch64_sqshlu_nqi (__a
, __b
);
22814 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
22815 vqshluh_n_s16 (int16x1_t __a
, const int __b
)
22817 return (int16x1_t
) __builtin_aarch64_sqshlu_nhi (__a
, __b
);
22820 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
22821 vqshlus_n_s32 (int32x1_t __a
, const int __b
)
22823 return (int32x1_t
) __builtin_aarch64_sqshlu_nsi (__a
, __b
);
22826 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
22827 vqshlud_n_s64 (int64x1_t __a
, const int __b
)
22829 return (int64x1_t
) __builtin_aarch64_sqshlu_ndi (__a
, __b
);
22834 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
22835 vqshrn_n_s16 (int16x8_t __a
, const int __b
)
22837 return (int8x8_t
) __builtin_aarch64_sqshrn_nv8hi (__a
, __b
);
22840 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
22841 vqshrn_n_s32 (int32x4_t __a
, const int __b
)
22843 return (int16x4_t
) __builtin_aarch64_sqshrn_nv4si (__a
, __b
);
22846 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
22847 vqshrn_n_s64 (int64x2_t __a
, const int __b
)
22849 return (int32x2_t
) __builtin_aarch64_sqshrn_nv2di (__a
, __b
);
22852 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
22853 vqshrn_n_u16 (uint16x8_t __a
, const int __b
)
22855 return (uint8x8_t
) __builtin_aarch64_uqshrn_nv8hi ((int16x8_t
) __a
, __b
);
22858 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
22859 vqshrn_n_u32 (uint32x4_t __a
, const int __b
)
22861 return (uint16x4_t
) __builtin_aarch64_uqshrn_nv4si ((int32x4_t
) __a
, __b
);
22864 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
22865 vqshrn_n_u64 (uint64x2_t __a
, const int __b
)
22867 return (uint32x2_t
) __builtin_aarch64_uqshrn_nv2di ((int64x2_t
) __a
, __b
);
22870 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
22871 vqshrnh_n_s16 (int16x1_t __a
, const int __b
)
22873 return (int8x1_t
) __builtin_aarch64_sqshrn_nhi (__a
, __b
);
22876 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
22877 vqshrns_n_s32 (int32x1_t __a
, const int __b
)
22879 return (int16x1_t
) __builtin_aarch64_sqshrn_nsi (__a
, __b
);
22882 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
22883 vqshrnd_n_s64 (int64x1_t __a
, const int __b
)
22885 return (int32x1_t
) __builtin_aarch64_sqshrn_ndi (__a
, __b
);
22888 __extension__
static __inline uint8x1_t
__attribute__ ((__always_inline__
))
22889 vqshrnh_n_u16 (uint16x1_t __a
, const int __b
)
22891 return (uint8x1_t
) __builtin_aarch64_uqshrn_nhi (__a
, __b
);
22894 __extension__
static __inline uint16x1_t
__attribute__ ((__always_inline__
))
22895 vqshrns_n_u32 (uint32x1_t __a
, const int __b
)
22897 return (uint16x1_t
) __builtin_aarch64_uqshrn_nsi (__a
, __b
);
22900 __extension__
static __inline uint32x1_t
__attribute__ ((__always_inline__
))
22901 vqshrnd_n_u64 (uint64x1_t __a
, const int __b
)
22903 return (uint32x1_t
) __builtin_aarch64_uqshrn_ndi (__a
, __b
);
22908 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
22909 vqshrun_n_s16 (int16x8_t __a
, const int __b
)
22911 return (uint8x8_t
) __builtin_aarch64_sqshrun_nv8hi (__a
, __b
);
22914 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
22915 vqshrun_n_s32 (int32x4_t __a
, const int __b
)
22917 return (uint16x4_t
) __builtin_aarch64_sqshrun_nv4si (__a
, __b
);
22920 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
22921 vqshrun_n_s64 (int64x2_t __a
, const int __b
)
22923 return (uint32x2_t
) __builtin_aarch64_sqshrun_nv2di (__a
, __b
);
22926 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
22927 vqshrunh_n_s16 (int16x1_t __a
, const int __b
)
22929 return (int8x1_t
) __builtin_aarch64_sqshrun_nhi (__a
, __b
);
22932 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
22933 vqshruns_n_s32 (int32x1_t __a
, const int __b
)
22935 return (int16x1_t
) __builtin_aarch64_sqshrun_nsi (__a
, __b
);
22938 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
22939 vqshrund_n_s64 (int64x1_t __a
, const int __b
)
22941 return (int32x1_t
) __builtin_aarch64_sqshrun_ndi (__a
, __b
);
22946 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
22947 vqsubb_s8 (int8x1_t __a
, int8x1_t __b
)
22949 return (int8x1_t
) __builtin_aarch64_sqsubqi (__a
, __b
);
22952 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
22953 vqsubh_s16 (int16x1_t __a
, int16x1_t __b
)
22955 return (int16x1_t
) __builtin_aarch64_sqsubhi (__a
, __b
);
22958 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
22959 vqsubs_s32 (int32x1_t __a
, int32x1_t __b
)
22961 return (int32x1_t
) __builtin_aarch64_sqsubsi (__a
, __b
);
22964 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
22965 vqsubd_s64 (int64x1_t __a
, int64x1_t __b
)
22967 return (int64x1_t
) __builtin_aarch64_sqsubdi (__a
, __b
);
22970 __extension__
static __inline uint8x1_t
__attribute__ ((__always_inline__
))
22971 vqsubb_u8 (uint8x1_t __a
, uint8x1_t __b
)
22973 return (uint8x1_t
) __builtin_aarch64_uqsubqi (__a
, __b
);
22976 __extension__
static __inline uint16x1_t
__attribute__ ((__always_inline__
))
22977 vqsubh_u16 (uint16x1_t __a
, uint16x1_t __b
)
22979 return (uint16x1_t
) __builtin_aarch64_uqsubhi (__a
, __b
);
22982 __extension__
static __inline uint32x1_t
__attribute__ ((__always_inline__
))
22983 vqsubs_u32 (uint32x1_t __a
, uint32x1_t __b
)
22985 return (uint32x1_t
) __builtin_aarch64_uqsubsi (__a
, __b
);
22988 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
22989 vqsubd_u64 (uint64x1_t __a
, uint64x1_t __b
)
22991 return (uint64x1_t
) __builtin_aarch64_uqsubdi (__a
, __b
);
22996 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
22997 vrecpes_f32 (float32_t __a
)
22999 return __builtin_aarch64_frecpesf (__a
);
23002 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
23003 vrecped_f64 (float64_t __a
)
23005 return __builtin_aarch64_frecpedf (__a
);
23008 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
23009 vrecpe_f32 (float32x2_t __a
)
23011 return __builtin_aarch64_frecpev2sf (__a
);
23014 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
23015 vrecpeq_f32 (float32x4_t __a
)
23017 return __builtin_aarch64_frecpev4sf (__a
);
23020 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
23021 vrecpeq_f64 (float64x2_t __a
)
23023 return __builtin_aarch64_frecpev2df (__a
);
23028 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
23029 vrecpss_f32 (float32_t __a
, float32_t __b
)
23031 return __builtin_aarch64_frecpssf (__a
, __b
);
23034 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
23035 vrecpsd_f64 (float64_t __a
, float64_t __b
)
23037 return __builtin_aarch64_frecpsdf (__a
, __b
);
23040 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
23041 vrecps_f32 (float32x2_t __a
, float32x2_t __b
)
23043 return __builtin_aarch64_frecpsv2sf (__a
, __b
);
23046 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
23047 vrecpsq_f32 (float32x4_t __a
, float32x4_t __b
)
23049 return __builtin_aarch64_frecpsv4sf (__a
, __b
);
23052 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
23053 vrecpsq_f64 (float64x2_t __a
, float64x2_t __b
)
23055 return __builtin_aarch64_frecpsv2df (__a
, __b
);
23060 __extension__
static __inline float32_t
__attribute__ ((__always_inline__
))
23061 vrecpxs_f32 (float32_t __a
)
23063 return __builtin_aarch64_frecpxsf (__a
);
23066 __extension__
static __inline float64_t
__attribute__ ((__always_inline__
))
23067 vrecpxd_f64 (float64_t __a
)
23069 return __builtin_aarch64_frecpxdf (__a
);
23074 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
23075 vrshl_s8 (int8x8_t __a
, int8x8_t __b
)
23077 return (int8x8_t
) __builtin_aarch64_srshlv8qi (__a
, __b
);
23080 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
23081 vrshl_s16 (int16x4_t __a
, int16x4_t __b
)
23083 return (int16x4_t
) __builtin_aarch64_srshlv4hi (__a
, __b
);
23086 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
23087 vrshl_s32 (int32x2_t __a
, int32x2_t __b
)
23089 return (int32x2_t
) __builtin_aarch64_srshlv2si (__a
, __b
);
23092 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
23093 vrshl_s64 (int64x1_t __a
, int64x1_t __b
)
23095 return (int64x1_t
) __builtin_aarch64_srshldi (__a
, __b
);
23098 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
23099 vrshl_u8 (uint8x8_t __a
, int8x8_t __b
)
23101 return (uint8x8_t
) __builtin_aarch64_urshlv8qi ((int8x8_t
) __a
, __b
);
23104 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
23105 vrshl_u16 (uint16x4_t __a
, int16x4_t __b
)
23107 return (uint16x4_t
) __builtin_aarch64_urshlv4hi ((int16x4_t
) __a
, __b
);
23110 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
23111 vrshl_u32 (uint32x2_t __a
, int32x2_t __b
)
23113 return (uint32x2_t
) __builtin_aarch64_urshlv2si ((int32x2_t
) __a
, __b
);
23116 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23117 vrshl_u64 (uint64x1_t __a
, int64x1_t __b
)
23119 return (uint64x1_t
) __builtin_aarch64_urshldi ((int64x1_t
) __a
, __b
);
23122 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
23123 vrshlq_s8 (int8x16_t __a
, int8x16_t __b
)
23125 return (int8x16_t
) __builtin_aarch64_srshlv16qi (__a
, __b
);
23128 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
23129 vrshlq_s16 (int16x8_t __a
, int16x8_t __b
)
23131 return (int16x8_t
) __builtin_aarch64_srshlv8hi (__a
, __b
);
23134 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
23135 vrshlq_s32 (int32x4_t __a
, int32x4_t __b
)
23137 return (int32x4_t
) __builtin_aarch64_srshlv4si (__a
, __b
);
23140 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
23141 vrshlq_s64 (int64x2_t __a
, int64x2_t __b
)
23143 return (int64x2_t
) __builtin_aarch64_srshlv2di (__a
, __b
);
23146 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
23147 vrshlq_u8 (uint8x16_t __a
, int8x16_t __b
)
23149 return (uint8x16_t
) __builtin_aarch64_urshlv16qi ((int8x16_t
) __a
, __b
);
23152 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
23153 vrshlq_u16 (uint16x8_t __a
, int16x8_t __b
)
23155 return (uint16x8_t
) __builtin_aarch64_urshlv8hi ((int16x8_t
) __a
, __b
);
23158 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
23159 vrshlq_u32 (uint32x4_t __a
, int32x4_t __b
)
23161 return (uint32x4_t
) __builtin_aarch64_urshlv4si ((int32x4_t
) __a
, __b
);
23164 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
23165 vrshlq_u64 (uint64x2_t __a
, int64x2_t __b
)
23167 return (uint64x2_t
) __builtin_aarch64_urshlv2di ((int64x2_t
) __a
, __b
);
23170 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
23171 vrshld_s64 (int64x1_t __a
, int64x1_t __b
)
23173 return (int64x1_t
) __builtin_aarch64_srshldi (__a
, __b
);
23176 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23177 vrshld_u64 (uint64x1_t __a
, uint64x1_t __b
)
23179 return (uint64x1_t
) __builtin_aarch64_urshldi (__a
, __b
);
23184 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
23185 vrshr_n_s8 (int8x8_t __a
, const int __b
)
23187 return (int8x8_t
) __builtin_aarch64_srshr_nv8qi (__a
, __b
);
23190 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
23191 vrshr_n_s16 (int16x4_t __a
, const int __b
)
23193 return (int16x4_t
) __builtin_aarch64_srshr_nv4hi (__a
, __b
);
23196 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
23197 vrshr_n_s32 (int32x2_t __a
, const int __b
)
23199 return (int32x2_t
) __builtin_aarch64_srshr_nv2si (__a
, __b
);
23202 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
23203 vrshr_n_s64 (int64x1_t __a
, const int __b
)
23205 return (int64x1_t
) __builtin_aarch64_srshr_ndi (__a
, __b
);
23208 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
23209 vrshr_n_u8 (uint8x8_t __a
, const int __b
)
23211 return (uint8x8_t
) __builtin_aarch64_urshr_nv8qi ((int8x8_t
) __a
, __b
);
23214 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
23215 vrshr_n_u16 (uint16x4_t __a
, const int __b
)
23217 return (uint16x4_t
) __builtin_aarch64_urshr_nv4hi ((int16x4_t
) __a
, __b
);
23220 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
23221 vrshr_n_u32 (uint32x2_t __a
, const int __b
)
23223 return (uint32x2_t
) __builtin_aarch64_urshr_nv2si ((int32x2_t
) __a
, __b
);
23226 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23227 vrshr_n_u64 (uint64x1_t __a
, const int __b
)
23229 return (uint64x1_t
) __builtin_aarch64_urshr_ndi ((int64x1_t
) __a
, __b
);
23232 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
23233 vrshrq_n_s8 (int8x16_t __a
, const int __b
)
23235 return (int8x16_t
) __builtin_aarch64_srshr_nv16qi (__a
, __b
);
23238 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
23239 vrshrq_n_s16 (int16x8_t __a
, const int __b
)
23241 return (int16x8_t
) __builtin_aarch64_srshr_nv8hi (__a
, __b
);
23244 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
23245 vrshrq_n_s32 (int32x4_t __a
, const int __b
)
23247 return (int32x4_t
) __builtin_aarch64_srshr_nv4si (__a
, __b
);
23250 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
23251 vrshrq_n_s64 (int64x2_t __a
, const int __b
)
23253 return (int64x2_t
) __builtin_aarch64_srshr_nv2di (__a
, __b
);
23256 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
23257 vrshrq_n_u8 (uint8x16_t __a
, const int __b
)
23259 return (uint8x16_t
) __builtin_aarch64_urshr_nv16qi ((int8x16_t
) __a
, __b
);
23262 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
23263 vrshrq_n_u16 (uint16x8_t __a
, const int __b
)
23265 return (uint16x8_t
) __builtin_aarch64_urshr_nv8hi ((int16x8_t
) __a
, __b
);
23268 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
23269 vrshrq_n_u32 (uint32x4_t __a
, const int __b
)
23271 return (uint32x4_t
) __builtin_aarch64_urshr_nv4si ((int32x4_t
) __a
, __b
);
23274 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
23275 vrshrq_n_u64 (uint64x2_t __a
, const int __b
)
23277 return (uint64x2_t
) __builtin_aarch64_urshr_nv2di ((int64x2_t
) __a
, __b
);
23280 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
23281 vrshrd_n_s64 (int64x1_t __a
, const int __b
)
23283 return (int64x1_t
) __builtin_aarch64_srshr_ndi (__a
, __b
);
23286 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23287 vrshrd_n_u64 (uint64x1_t __a
, const int __b
)
23289 return (uint64x1_t
) __builtin_aarch64_urshr_ndi (__a
, __b
);
23294 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
23295 vrsra_n_s8 (int8x8_t __a
, int8x8_t __b
, const int __c
)
23297 return (int8x8_t
) __builtin_aarch64_srsra_nv8qi (__a
, __b
, __c
);
23300 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
23301 vrsra_n_s16 (int16x4_t __a
, int16x4_t __b
, const int __c
)
23303 return (int16x4_t
) __builtin_aarch64_srsra_nv4hi (__a
, __b
, __c
);
23306 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
23307 vrsra_n_s32 (int32x2_t __a
, int32x2_t __b
, const int __c
)
23309 return (int32x2_t
) __builtin_aarch64_srsra_nv2si (__a
, __b
, __c
);
23312 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
23313 vrsra_n_s64 (int64x1_t __a
, int64x1_t __b
, const int __c
)
23315 return (int64x1_t
) __builtin_aarch64_srsra_ndi (__a
, __b
, __c
);
23318 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
23319 vrsra_n_u8 (uint8x8_t __a
, uint8x8_t __b
, const int __c
)
23321 return (uint8x8_t
) __builtin_aarch64_ursra_nv8qi ((int8x8_t
) __a
,
23322 (int8x8_t
) __b
, __c
);
23325 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
23326 vrsra_n_u16 (uint16x4_t __a
, uint16x4_t __b
, const int __c
)
23328 return (uint16x4_t
) __builtin_aarch64_ursra_nv4hi ((int16x4_t
) __a
,
23329 (int16x4_t
) __b
, __c
);
23332 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
23333 vrsra_n_u32 (uint32x2_t __a
, uint32x2_t __b
, const int __c
)
23335 return (uint32x2_t
) __builtin_aarch64_ursra_nv2si ((int32x2_t
) __a
,
23336 (int32x2_t
) __b
, __c
);
23339 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23340 vrsra_n_u64 (uint64x1_t __a
, uint64x1_t __b
, const int __c
)
23342 return (uint64x1_t
) __builtin_aarch64_ursra_ndi ((int64x1_t
) __a
,
23343 (int64x1_t
) __b
, __c
);
23346 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
23347 vrsraq_n_s8 (int8x16_t __a
, int8x16_t __b
, const int __c
)
23349 return (int8x16_t
) __builtin_aarch64_srsra_nv16qi (__a
, __b
, __c
);
23352 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
23353 vrsraq_n_s16 (int16x8_t __a
, int16x8_t __b
, const int __c
)
23355 return (int16x8_t
) __builtin_aarch64_srsra_nv8hi (__a
, __b
, __c
);
23358 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
23359 vrsraq_n_s32 (int32x4_t __a
, int32x4_t __b
, const int __c
)
23361 return (int32x4_t
) __builtin_aarch64_srsra_nv4si (__a
, __b
, __c
);
23364 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
23365 vrsraq_n_s64 (int64x2_t __a
, int64x2_t __b
, const int __c
)
23367 return (int64x2_t
) __builtin_aarch64_srsra_nv2di (__a
, __b
, __c
);
23370 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
23371 vrsraq_n_u8 (uint8x16_t __a
, uint8x16_t __b
, const int __c
)
23373 return (uint8x16_t
) __builtin_aarch64_ursra_nv16qi ((int8x16_t
) __a
,
23374 (int8x16_t
) __b
, __c
);
23377 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
23378 vrsraq_n_u16 (uint16x8_t __a
, uint16x8_t __b
, const int __c
)
23380 return (uint16x8_t
) __builtin_aarch64_ursra_nv8hi ((int16x8_t
) __a
,
23381 (int16x8_t
) __b
, __c
);
23384 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
23385 vrsraq_n_u32 (uint32x4_t __a
, uint32x4_t __b
, const int __c
)
23387 return (uint32x4_t
) __builtin_aarch64_ursra_nv4si ((int32x4_t
) __a
,
23388 (int32x4_t
) __b
, __c
);
23391 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
23392 vrsraq_n_u64 (uint64x2_t __a
, uint64x2_t __b
, const int __c
)
23394 return (uint64x2_t
) __builtin_aarch64_ursra_nv2di ((int64x2_t
) __a
,
23395 (int64x2_t
) __b
, __c
);
23398 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
23399 vrsrad_n_s64 (int64x1_t __a
, int64x1_t __b
, const int __c
)
23401 return (int64x1_t
) __builtin_aarch64_srsra_ndi (__a
, __b
, __c
);
23404 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23405 vrsrad_n_u64 (uint64x1_t __a
, uint64x1_t __b
, const int __c
)
23407 return (uint64x1_t
) __builtin_aarch64_ursra_ndi (__a
, __b
, __c
);
23412 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
23413 vshl_n_s8 (int8x8_t __a
, const int __b
)
23415 return (int8x8_t
) __builtin_aarch64_ashlv8qi (__a
, __b
);
23418 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
23419 vshl_n_s16 (int16x4_t __a
, const int __b
)
23421 return (int16x4_t
) __builtin_aarch64_ashlv4hi (__a
, __b
);
23424 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
23425 vshl_n_s32 (int32x2_t __a
, const int __b
)
23427 return (int32x2_t
) __builtin_aarch64_ashlv2si (__a
, __b
);
23430 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
23431 vshl_n_s64 (int64x1_t __a
, const int __b
)
23433 return (int64x1_t
) __builtin_aarch64_ashldi (__a
, __b
);
23436 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
23437 vshl_n_u8 (uint8x8_t __a
, const int __b
)
23439 return (uint8x8_t
) __builtin_aarch64_ashlv8qi ((int8x8_t
) __a
, __b
);
23442 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
23443 vshl_n_u16 (uint16x4_t __a
, const int __b
)
23445 return (uint16x4_t
) __builtin_aarch64_ashlv4hi ((int16x4_t
) __a
, __b
);
23448 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
23449 vshl_n_u32 (uint32x2_t __a
, const int __b
)
23451 return (uint32x2_t
) __builtin_aarch64_ashlv2si ((int32x2_t
) __a
, __b
);
23454 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23455 vshl_n_u64 (uint64x1_t __a
, const int __b
)
23457 return (uint64x1_t
) __builtin_aarch64_ashldi ((int64x1_t
) __a
, __b
);
23460 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
23461 vshlq_n_s8 (int8x16_t __a
, const int __b
)
23463 return (int8x16_t
) __builtin_aarch64_ashlv16qi (__a
, __b
);
23466 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
23467 vshlq_n_s16 (int16x8_t __a
, const int __b
)
23469 return (int16x8_t
) __builtin_aarch64_ashlv8hi (__a
, __b
);
23472 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
23473 vshlq_n_s32 (int32x4_t __a
, const int __b
)
23475 return (int32x4_t
) __builtin_aarch64_ashlv4si (__a
, __b
);
23478 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
23479 vshlq_n_s64 (int64x2_t __a
, const int __b
)
23481 return (int64x2_t
) __builtin_aarch64_ashlv2di (__a
, __b
);
23484 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
23485 vshlq_n_u8 (uint8x16_t __a
, const int __b
)
23487 return (uint8x16_t
) __builtin_aarch64_ashlv16qi ((int8x16_t
) __a
, __b
);
23490 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
23491 vshlq_n_u16 (uint16x8_t __a
, const int __b
)
23493 return (uint16x8_t
) __builtin_aarch64_ashlv8hi ((int16x8_t
) __a
, __b
);
23496 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
23497 vshlq_n_u32 (uint32x4_t __a
, const int __b
)
23499 return (uint32x4_t
) __builtin_aarch64_ashlv4si ((int32x4_t
) __a
, __b
);
23502 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
23503 vshlq_n_u64 (uint64x2_t __a
, const int __b
)
23505 return (uint64x2_t
) __builtin_aarch64_ashlv2di ((int64x2_t
) __a
, __b
);
23508 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
23509 vshld_n_s64 (int64x1_t __a
, const int __b
)
23511 return (int64x1_t
) __builtin_aarch64_ashldi (__a
, __b
);
23514 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23515 vshld_n_u64 (uint64x1_t __a
, const int __b
)
23517 return (uint64x1_t
) __builtin_aarch64_ashldi (__a
, __b
);
23520 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
23521 vshl_s8 (int8x8_t __a
, int8x8_t __b
)
23523 return (int8x8_t
) __builtin_aarch64_sshlv8qi (__a
, __b
);
23526 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
23527 vshl_s16 (int16x4_t __a
, int16x4_t __b
)
23529 return (int16x4_t
) __builtin_aarch64_sshlv4hi (__a
, __b
);
23532 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
23533 vshl_s32 (int32x2_t __a
, int32x2_t __b
)
23535 return (int32x2_t
) __builtin_aarch64_sshlv2si (__a
, __b
);
23538 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
23539 vshl_s64 (int64x1_t __a
, int64x1_t __b
)
23541 return (int64x1_t
) __builtin_aarch64_sshldi (__a
, __b
);
23544 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
23545 vshl_u8 (uint8x8_t __a
, int8x8_t __b
)
23547 return (uint8x8_t
) __builtin_aarch64_ushlv8qi ((int8x8_t
) __a
, __b
);
23550 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
23551 vshl_u16 (uint16x4_t __a
, int16x4_t __b
)
23553 return (uint16x4_t
) __builtin_aarch64_ushlv4hi ((int16x4_t
) __a
, __b
);
23556 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
23557 vshl_u32 (uint32x2_t __a
, int32x2_t __b
)
23559 return (uint32x2_t
) __builtin_aarch64_ushlv2si ((int32x2_t
) __a
, __b
);
23562 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23563 vshl_u64 (uint64x1_t __a
, int64x1_t __b
)
23565 return (uint64x1_t
) __builtin_aarch64_ushldi ((int64x1_t
) __a
, __b
);
23568 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
23569 vshlq_s8 (int8x16_t __a
, int8x16_t __b
)
23571 return (int8x16_t
) __builtin_aarch64_sshlv16qi (__a
, __b
);
23574 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
23575 vshlq_s16 (int16x8_t __a
, int16x8_t __b
)
23577 return (int16x8_t
) __builtin_aarch64_sshlv8hi (__a
, __b
);
23580 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
23581 vshlq_s32 (int32x4_t __a
, int32x4_t __b
)
23583 return (int32x4_t
) __builtin_aarch64_sshlv4si (__a
, __b
);
23586 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
23587 vshlq_s64 (int64x2_t __a
, int64x2_t __b
)
23589 return (int64x2_t
) __builtin_aarch64_sshlv2di (__a
, __b
);
23592 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
23593 vshlq_u8 (uint8x16_t __a
, int8x16_t __b
)
23595 return (uint8x16_t
) __builtin_aarch64_ushlv16qi ((int8x16_t
) __a
, __b
);
23598 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
23599 vshlq_u16 (uint16x8_t __a
, int16x8_t __b
)
23601 return (uint16x8_t
) __builtin_aarch64_ushlv8hi ((int16x8_t
) __a
, __b
);
23604 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
23605 vshlq_u32 (uint32x4_t __a
, int32x4_t __b
)
23607 return (uint32x4_t
) __builtin_aarch64_ushlv4si ((int32x4_t
) __a
, __b
);
23610 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
23611 vshlq_u64 (uint64x2_t __a
, int64x2_t __b
)
23613 return (uint64x2_t
) __builtin_aarch64_ushlv2di ((int64x2_t
) __a
, __b
);
23616 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
23617 vshld_s64 (int64x1_t __a
, int64x1_t __b
)
23619 return (int64x1_t
) __builtin_aarch64_sshldi (__a
, __b
);
23622 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23623 vshld_u64 (uint64x1_t __a
, uint64x1_t __b
)
23625 return (uint64x1_t
) __builtin_aarch64_ushldi (__a
, __b
);
23628 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
23629 vshll_high_n_s8 (int8x16_t __a
, const int __b
)
23631 return __builtin_aarch64_sshll2_nv16qi (__a
, __b
);
23634 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
23635 vshll_high_n_s16 (int16x8_t __a
, const int __b
)
23637 return __builtin_aarch64_sshll2_nv8hi (__a
, __b
);
23640 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
23641 vshll_high_n_s32 (int32x4_t __a
, const int __b
)
23643 return __builtin_aarch64_sshll2_nv4si (__a
, __b
);
23646 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
23647 vshll_high_n_u8 (uint8x16_t __a
, const int __b
)
23649 return (uint16x8_t
) __builtin_aarch64_ushll2_nv16qi ((int8x16_t
) __a
, __b
);
23652 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
23653 vshll_high_n_u16 (uint16x8_t __a
, const int __b
)
23655 return (uint32x4_t
) __builtin_aarch64_ushll2_nv8hi ((int16x8_t
) __a
, __b
);
23658 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
23659 vshll_high_n_u32 (uint32x4_t __a
, const int __b
)
23661 return (uint64x2_t
) __builtin_aarch64_ushll2_nv4si ((int32x4_t
) __a
, __b
);
23664 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
23665 vshll_n_s8 (int8x8_t __a
, const int __b
)
23667 return __builtin_aarch64_sshll_nv8qi (__a
, __b
);
23670 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
23671 vshll_n_s16 (int16x4_t __a
, const int __b
)
23673 return __builtin_aarch64_sshll_nv4hi (__a
, __b
);
23676 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
23677 vshll_n_s32 (int32x2_t __a
, const int __b
)
23679 return __builtin_aarch64_sshll_nv2si (__a
, __b
);
23682 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
23683 vshll_n_u8 (uint8x8_t __a
, const int __b
)
23685 return (uint16x8_t
) __builtin_aarch64_ushll_nv8qi ((int8x8_t
) __a
, __b
);
23688 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
23689 vshll_n_u16 (uint16x4_t __a
, const int __b
)
23691 return (uint32x4_t
) __builtin_aarch64_ushll_nv4hi ((int16x4_t
) __a
, __b
);
23694 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
23695 vshll_n_u32 (uint32x2_t __a
, const int __b
)
23697 return (uint64x2_t
) __builtin_aarch64_ushll_nv2si ((int32x2_t
) __a
, __b
);
23702 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
23703 vshr_n_s8 (int8x8_t __a
, const int __b
)
23705 return (int8x8_t
) __builtin_aarch64_ashrv8qi (__a
, __b
);
23708 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
23709 vshr_n_s16 (int16x4_t __a
, const int __b
)
23711 return (int16x4_t
) __builtin_aarch64_ashrv4hi (__a
, __b
);
23714 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
23715 vshr_n_s32 (int32x2_t __a
, const int __b
)
23717 return (int32x2_t
) __builtin_aarch64_ashrv2si (__a
, __b
);
23720 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
23721 vshr_n_s64 (int64x1_t __a
, const int __b
)
23723 return (int64x1_t
) __builtin_aarch64_ashrdi (__a
, __b
);
23726 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
23727 vshr_n_u8 (uint8x8_t __a
, const int __b
)
23729 return (uint8x8_t
) __builtin_aarch64_lshrv8qi ((int8x8_t
) __a
, __b
);
23732 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
23733 vshr_n_u16 (uint16x4_t __a
, const int __b
)
23735 return (uint16x4_t
) __builtin_aarch64_lshrv4hi ((int16x4_t
) __a
, __b
);
23738 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
23739 vshr_n_u32 (uint32x2_t __a
, const int __b
)
23741 return (uint32x2_t
) __builtin_aarch64_lshrv2si ((int32x2_t
) __a
, __b
);
23744 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23745 vshr_n_u64 (uint64x1_t __a
, const int __b
)
23747 return (uint64x1_t
) __builtin_aarch64_lshrdi ((int64x1_t
) __a
, __b
);
23750 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
23751 vshrq_n_s8 (int8x16_t __a
, const int __b
)
23753 return (int8x16_t
) __builtin_aarch64_ashrv16qi (__a
, __b
);
23756 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
23757 vshrq_n_s16 (int16x8_t __a
, const int __b
)
23759 return (int16x8_t
) __builtin_aarch64_ashrv8hi (__a
, __b
);
23762 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
23763 vshrq_n_s32 (int32x4_t __a
, const int __b
)
23765 return (int32x4_t
) __builtin_aarch64_ashrv4si (__a
, __b
);
23768 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
23769 vshrq_n_s64 (int64x2_t __a
, const int __b
)
23771 return (int64x2_t
) __builtin_aarch64_ashrv2di (__a
, __b
);
23774 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
23775 vshrq_n_u8 (uint8x16_t __a
, const int __b
)
23777 return (uint8x16_t
) __builtin_aarch64_lshrv16qi ((int8x16_t
) __a
, __b
);
23780 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
23781 vshrq_n_u16 (uint16x8_t __a
, const int __b
)
23783 return (uint16x8_t
) __builtin_aarch64_lshrv8hi ((int16x8_t
) __a
, __b
);
23786 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
23787 vshrq_n_u32 (uint32x4_t __a
, const int __b
)
23789 return (uint32x4_t
) __builtin_aarch64_lshrv4si ((int32x4_t
) __a
, __b
);
23792 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
23793 vshrq_n_u64 (uint64x2_t __a
, const int __b
)
23795 return (uint64x2_t
) __builtin_aarch64_lshrv2di ((int64x2_t
) __a
, __b
);
23798 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
23799 vshrd_n_s64 (int64x1_t __a
, const int __b
)
23801 return (int64x1_t
) __builtin_aarch64_ashrdi (__a
, __b
);
23804 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23805 vshrd_n_u64 (uint64x1_t __a
, const int __b
)
23807 return (uint64x1_t
) __builtin_aarch64_lshrdi (__a
, __b
);
23812 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
23813 vsli_n_s8 (int8x8_t __a
, int8x8_t __b
, const int __c
)
23815 return (int8x8_t
) __builtin_aarch64_ssli_nv8qi (__a
, __b
, __c
);
23818 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
23819 vsli_n_s16 (int16x4_t __a
, int16x4_t __b
, const int __c
)
23821 return (int16x4_t
) __builtin_aarch64_ssli_nv4hi (__a
, __b
, __c
);
23824 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
23825 vsli_n_s32 (int32x2_t __a
, int32x2_t __b
, const int __c
)
23827 return (int32x2_t
) __builtin_aarch64_ssli_nv2si (__a
, __b
, __c
);
23830 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
23831 vsli_n_s64 (int64x1_t __a
, int64x1_t __b
, const int __c
)
23833 return (int64x1_t
) __builtin_aarch64_ssli_ndi (__a
, __b
, __c
);
23836 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
23837 vsli_n_u8 (uint8x8_t __a
, uint8x8_t __b
, const int __c
)
23839 return (uint8x8_t
) __builtin_aarch64_usli_nv8qi ((int8x8_t
) __a
,
23840 (int8x8_t
) __b
, __c
);
23843 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
23844 vsli_n_u16 (uint16x4_t __a
, uint16x4_t __b
, const int __c
)
23846 return (uint16x4_t
) __builtin_aarch64_usli_nv4hi ((int16x4_t
) __a
,
23847 (int16x4_t
) __b
, __c
);
23850 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
23851 vsli_n_u32 (uint32x2_t __a
, uint32x2_t __b
, const int __c
)
23853 return (uint32x2_t
) __builtin_aarch64_usli_nv2si ((int32x2_t
) __a
,
23854 (int32x2_t
) __b
, __c
);
23857 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23858 vsli_n_u64 (uint64x1_t __a
, uint64x1_t __b
, const int __c
)
23860 return (uint64x1_t
) __builtin_aarch64_usli_ndi ((int64x1_t
) __a
,
23861 (int64x1_t
) __b
, __c
);
23864 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
23865 vsliq_n_s8 (int8x16_t __a
, int8x16_t __b
, const int __c
)
23867 return (int8x16_t
) __builtin_aarch64_ssli_nv16qi (__a
, __b
, __c
);
23870 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
23871 vsliq_n_s16 (int16x8_t __a
, int16x8_t __b
, const int __c
)
23873 return (int16x8_t
) __builtin_aarch64_ssli_nv8hi (__a
, __b
, __c
);
23876 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
23877 vsliq_n_s32 (int32x4_t __a
, int32x4_t __b
, const int __c
)
23879 return (int32x4_t
) __builtin_aarch64_ssli_nv4si (__a
, __b
, __c
);
23882 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
23883 vsliq_n_s64 (int64x2_t __a
, int64x2_t __b
, const int __c
)
23885 return (int64x2_t
) __builtin_aarch64_ssli_nv2di (__a
, __b
, __c
);
23888 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
23889 vsliq_n_u8 (uint8x16_t __a
, uint8x16_t __b
, const int __c
)
23891 return (uint8x16_t
) __builtin_aarch64_usli_nv16qi ((int8x16_t
) __a
,
23892 (int8x16_t
) __b
, __c
);
23895 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
23896 vsliq_n_u16 (uint16x8_t __a
, uint16x8_t __b
, const int __c
)
23898 return (uint16x8_t
) __builtin_aarch64_usli_nv8hi ((int16x8_t
) __a
,
23899 (int16x8_t
) __b
, __c
);
23902 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
23903 vsliq_n_u32 (uint32x4_t __a
, uint32x4_t __b
, const int __c
)
23905 return (uint32x4_t
) __builtin_aarch64_usli_nv4si ((int32x4_t
) __a
,
23906 (int32x4_t
) __b
, __c
);
23909 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
23910 vsliq_n_u64 (uint64x2_t __a
, uint64x2_t __b
, const int __c
)
23912 return (uint64x2_t
) __builtin_aarch64_usli_nv2di ((int64x2_t
) __a
,
23913 (int64x2_t
) __b
, __c
);
23916 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
23917 vslid_n_s64 (int64x1_t __a
, int64x1_t __b
, const int __c
)
23919 return (int64x1_t
) __builtin_aarch64_ssli_ndi (__a
, __b
, __c
);
23922 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23923 vslid_n_u64 (uint64x1_t __a
, uint64x1_t __b
, const int __c
)
23925 return (uint64x1_t
) __builtin_aarch64_usli_ndi (__a
, __b
, __c
);
23930 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
23931 vsqadd_u8 (uint8x8_t __a
, int8x8_t __b
)
23933 return (uint8x8_t
) __builtin_aarch64_usqaddv8qi ((int8x8_t
) __a
,
23937 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
23938 vsqadd_u16 (uint16x4_t __a
, int16x4_t __b
)
23940 return (uint16x4_t
) __builtin_aarch64_usqaddv4hi ((int16x4_t
) __a
,
23944 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
23945 vsqadd_u32 (uint32x2_t __a
, int32x2_t __b
)
23947 return (uint32x2_t
) __builtin_aarch64_usqaddv2si ((int32x2_t
) __a
,
23951 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
23952 vsqadd_u64 (uint64x1_t __a
, int64x1_t __b
)
23954 return (uint64x1_t
) __builtin_aarch64_usqadddi ((int64x1_t
) __a
, __b
);
23957 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
23958 vsqaddq_u8 (uint8x16_t __a
, int8x16_t __b
)
23960 return (uint8x16_t
) __builtin_aarch64_usqaddv16qi ((int8x16_t
) __a
,
23964 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
23965 vsqaddq_u16 (uint16x8_t __a
, int16x8_t __b
)
23967 return (uint16x8_t
) __builtin_aarch64_usqaddv8hi ((int16x8_t
) __a
,
23971 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
23972 vsqaddq_u32 (uint32x4_t __a
, int32x4_t __b
)
23974 return (uint32x4_t
) __builtin_aarch64_usqaddv4si ((int32x4_t
) __a
,
23978 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
23979 vsqaddq_u64 (uint64x2_t __a
, int64x2_t __b
)
23981 return (uint64x2_t
) __builtin_aarch64_usqaddv2di ((int64x2_t
) __a
,
23985 __extension__
static __inline uint8x1_t
__attribute__ ((__always_inline__
))
23986 vsqaddb_u8 (uint8x1_t __a
, int8x1_t __b
)
23988 return (uint8x1_t
) __builtin_aarch64_usqaddqi ((int8x1_t
) __a
, __b
);
23991 __extension__
static __inline uint16x1_t
__attribute__ ((__always_inline__
))
23992 vsqaddh_u16 (uint16x1_t __a
, int16x1_t __b
)
23994 return (uint16x1_t
) __builtin_aarch64_usqaddhi ((int16x1_t
) __a
, __b
);
23997 __extension__
static __inline uint32x1_t
__attribute__ ((__always_inline__
))
23998 vsqadds_u32 (uint32x1_t __a
, int32x1_t __b
)
24000 return (uint32x1_t
) __builtin_aarch64_usqaddsi ((int32x1_t
) __a
, __b
);
24003 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
24004 vsqaddd_u64 (uint64x1_t __a
, int64x1_t __b
)
24006 return (uint64x1_t
) __builtin_aarch64_usqadddi ((int64x1_t
) __a
, __b
);
24010 __extension__
static __inline float32x2_t
__attribute__ ((__always_inline__
))
24011 vsqrt_f32 (float32x2_t a
)
24013 return __builtin_aarch64_sqrtv2sf (a
);
24016 __extension__
static __inline float32x4_t
__attribute__ ((__always_inline__
))
24017 vsqrtq_f32 (float32x4_t a
)
24019 return __builtin_aarch64_sqrtv4sf (a
);
24022 __extension__
static __inline float64x2_t
__attribute__ ((__always_inline__
))
24023 vsqrtq_f64 (float64x2_t a
)
24025 return __builtin_aarch64_sqrtv2df (a
);
24030 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
24031 vsra_n_s8 (int8x8_t __a
, int8x8_t __b
, const int __c
)
24033 return (int8x8_t
) __builtin_aarch64_ssra_nv8qi (__a
, __b
, __c
);
24036 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
24037 vsra_n_s16 (int16x4_t __a
, int16x4_t __b
, const int __c
)
24039 return (int16x4_t
) __builtin_aarch64_ssra_nv4hi (__a
, __b
, __c
);
24042 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
24043 vsra_n_s32 (int32x2_t __a
, int32x2_t __b
, const int __c
)
24045 return (int32x2_t
) __builtin_aarch64_ssra_nv2si (__a
, __b
, __c
);
24048 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
24049 vsra_n_s64 (int64x1_t __a
, int64x1_t __b
, const int __c
)
24051 return (int64x1_t
) __builtin_aarch64_ssra_ndi (__a
, __b
, __c
);
24054 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
24055 vsra_n_u8 (uint8x8_t __a
, uint8x8_t __b
, const int __c
)
24057 return (uint8x8_t
) __builtin_aarch64_usra_nv8qi ((int8x8_t
) __a
,
24058 (int8x8_t
) __b
, __c
);
24061 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
24062 vsra_n_u16 (uint16x4_t __a
, uint16x4_t __b
, const int __c
)
24064 return (uint16x4_t
) __builtin_aarch64_usra_nv4hi ((int16x4_t
) __a
,
24065 (int16x4_t
) __b
, __c
);
24068 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
24069 vsra_n_u32 (uint32x2_t __a
, uint32x2_t __b
, const int __c
)
24071 return (uint32x2_t
) __builtin_aarch64_usra_nv2si ((int32x2_t
) __a
,
24072 (int32x2_t
) __b
, __c
);
24075 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
24076 vsra_n_u64 (uint64x1_t __a
, uint64x1_t __b
, const int __c
)
24078 return (uint64x1_t
) __builtin_aarch64_usra_ndi ((int64x1_t
) __a
,
24079 (int64x1_t
) __b
, __c
);
24082 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
24083 vsraq_n_s8 (int8x16_t __a
, int8x16_t __b
, const int __c
)
24085 return (int8x16_t
) __builtin_aarch64_ssra_nv16qi (__a
, __b
, __c
);
24088 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
24089 vsraq_n_s16 (int16x8_t __a
, int16x8_t __b
, const int __c
)
24091 return (int16x8_t
) __builtin_aarch64_ssra_nv8hi (__a
, __b
, __c
);
24094 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
24095 vsraq_n_s32 (int32x4_t __a
, int32x4_t __b
, const int __c
)
24097 return (int32x4_t
) __builtin_aarch64_ssra_nv4si (__a
, __b
, __c
);
24100 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
24101 vsraq_n_s64 (int64x2_t __a
, int64x2_t __b
, const int __c
)
24103 return (int64x2_t
) __builtin_aarch64_ssra_nv2di (__a
, __b
, __c
);
24106 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
24107 vsraq_n_u8 (uint8x16_t __a
, uint8x16_t __b
, const int __c
)
24109 return (uint8x16_t
) __builtin_aarch64_usra_nv16qi ((int8x16_t
) __a
,
24110 (int8x16_t
) __b
, __c
);
24113 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
24114 vsraq_n_u16 (uint16x8_t __a
, uint16x8_t __b
, const int __c
)
24116 return (uint16x8_t
) __builtin_aarch64_usra_nv8hi ((int16x8_t
) __a
,
24117 (int16x8_t
) __b
, __c
);
24120 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
24121 vsraq_n_u32 (uint32x4_t __a
, uint32x4_t __b
, const int __c
)
24123 return (uint32x4_t
) __builtin_aarch64_usra_nv4si ((int32x4_t
) __a
,
24124 (int32x4_t
) __b
, __c
);
24127 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
24128 vsraq_n_u64 (uint64x2_t __a
, uint64x2_t __b
, const int __c
)
24130 return (uint64x2_t
) __builtin_aarch64_usra_nv2di ((int64x2_t
) __a
,
24131 (int64x2_t
) __b
, __c
);
24134 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
24135 vsrad_n_s64 (int64x1_t __a
, int64x1_t __b
, const int __c
)
24137 return (int64x1_t
) __builtin_aarch64_ssra_ndi (__a
, __b
, __c
);
24140 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
24141 vsrad_n_u64 (uint64x1_t __a
, uint64x1_t __b
, const int __c
)
24143 return (uint64x1_t
) __builtin_aarch64_usra_ndi (__a
, __b
, __c
);
24148 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
24149 vsri_n_s8 (int8x8_t __a
, int8x8_t __b
, const int __c
)
24151 return (int8x8_t
) __builtin_aarch64_ssri_nv8qi (__a
, __b
, __c
);
24154 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
24155 vsri_n_s16 (int16x4_t __a
, int16x4_t __b
, const int __c
)
24157 return (int16x4_t
) __builtin_aarch64_ssri_nv4hi (__a
, __b
, __c
);
24160 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
24161 vsri_n_s32 (int32x2_t __a
, int32x2_t __b
, const int __c
)
24163 return (int32x2_t
) __builtin_aarch64_ssri_nv2si (__a
, __b
, __c
);
24166 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
24167 vsri_n_s64 (int64x1_t __a
, int64x1_t __b
, const int __c
)
24169 return (int64x1_t
) __builtin_aarch64_ssri_ndi (__a
, __b
, __c
);
24172 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
24173 vsri_n_u8 (uint8x8_t __a
, uint8x8_t __b
, const int __c
)
24175 return (uint8x8_t
) __builtin_aarch64_usri_nv8qi ((int8x8_t
) __a
,
24176 (int8x8_t
) __b
, __c
);
24179 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
24180 vsri_n_u16 (uint16x4_t __a
, uint16x4_t __b
, const int __c
)
24182 return (uint16x4_t
) __builtin_aarch64_usri_nv4hi ((int16x4_t
) __a
,
24183 (int16x4_t
) __b
, __c
);
24186 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
24187 vsri_n_u32 (uint32x2_t __a
, uint32x2_t __b
, const int __c
)
24189 return (uint32x2_t
) __builtin_aarch64_usri_nv2si ((int32x2_t
) __a
,
24190 (int32x2_t
) __b
, __c
);
24193 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
24194 vsri_n_u64 (uint64x1_t __a
, uint64x1_t __b
, const int __c
)
24196 return (uint64x1_t
) __builtin_aarch64_usri_ndi ((int64x1_t
) __a
,
24197 (int64x1_t
) __b
, __c
);
24200 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
24201 vsriq_n_s8 (int8x16_t __a
, int8x16_t __b
, const int __c
)
24203 return (int8x16_t
) __builtin_aarch64_ssri_nv16qi (__a
, __b
, __c
);
24206 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
24207 vsriq_n_s16 (int16x8_t __a
, int16x8_t __b
, const int __c
)
24209 return (int16x8_t
) __builtin_aarch64_ssri_nv8hi (__a
, __b
, __c
);
24212 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
24213 vsriq_n_s32 (int32x4_t __a
, int32x4_t __b
, const int __c
)
24215 return (int32x4_t
) __builtin_aarch64_ssri_nv4si (__a
, __b
, __c
);
24218 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
24219 vsriq_n_s64 (int64x2_t __a
, int64x2_t __b
, const int __c
)
24221 return (int64x2_t
) __builtin_aarch64_ssri_nv2di (__a
, __b
, __c
);
24224 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
24225 vsriq_n_u8 (uint8x16_t __a
, uint8x16_t __b
, const int __c
)
24227 return (uint8x16_t
) __builtin_aarch64_usri_nv16qi ((int8x16_t
) __a
,
24228 (int8x16_t
) __b
, __c
);
24231 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
24232 vsriq_n_u16 (uint16x8_t __a
, uint16x8_t __b
, const int __c
)
24234 return (uint16x8_t
) __builtin_aarch64_usri_nv8hi ((int16x8_t
) __a
,
24235 (int16x8_t
) __b
, __c
);
24238 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
24239 vsriq_n_u32 (uint32x4_t __a
, uint32x4_t __b
, const int __c
)
24241 return (uint32x4_t
) __builtin_aarch64_usri_nv4si ((int32x4_t
) __a
,
24242 (int32x4_t
) __b
, __c
);
24245 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
24246 vsriq_n_u64 (uint64x2_t __a
, uint64x2_t __b
, const int __c
)
24248 return (uint64x2_t
) __builtin_aarch64_usri_nv2di ((int64x2_t
) __a
,
24249 (int64x2_t
) __b
, __c
);
24252 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
24253 vsrid_n_s64 (int64x1_t __a
, int64x1_t __b
, const int __c
)
24255 return (int64x1_t
) __builtin_aarch64_ssri_ndi (__a
, __b
, __c
);
24258 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
24259 vsrid_n_u64 (uint64x1_t __a
, uint64x1_t __b
, const int __c
)
24261 return (uint64x1_t
) __builtin_aarch64_usri_ndi (__a
, __b
, __c
);
24266 __extension__
static __inline
void
24267 vst2_s64 (int64_t * __a
, int64x1x2_t val
)
24269 __builtin_aarch64_simd_oi __o
;
24271 temp
.val
[0] = vcombine_s64 (val
.val
[0], vcreate_s64 (INT64_C (0)));
24272 temp
.val
[1] = vcombine_s64 (val
.val
[1], vcreate_s64 (INT64_C (0)));
24273 __o
= __builtin_aarch64_set_qregoiv2di (__o
, (int64x2_t
) temp
.val
[0], 0);
24274 __o
= __builtin_aarch64_set_qregoiv2di (__o
, (int64x2_t
) temp
.val
[1], 1);
24275 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di
*) __a
, __o
);
24278 __extension__
static __inline
void
24279 vst2_u64 (uint64_t * __a
, uint64x1x2_t val
)
24281 __builtin_aarch64_simd_oi __o
;
24283 temp
.val
[0] = vcombine_u64 (val
.val
[0], vcreate_u64 (UINT64_C (0)));
24284 temp
.val
[1] = vcombine_u64 (val
.val
[1], vcreate_u64 (UINT64_C (0)));
24285 __o
= __builtin_aarch64_set_qregoiv2di (__o
, (int64x2_t
) temp
.val
[0], 0);
24286 __o
= __builtin_aarch64_set_qregoiv2di (__o
, (int64x2_t
) temp
.val
[1], 1);
24287 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di
*) __a
, __o
);
24290 __extension__
static __inline
void
24291 vst2_f64 (float64_t
* __a
, float64x1x2_t val
)
24293 __builtin_aarch64_simd_oi __o
;
24294 float64x2x2_t temp
;
24295 temp
.val
[0] = vcombine_f64 (val
.val
[0], vcreate_f64 (UINT64_C (0)));
24296 temp
.val
[1] = vcombine_f64 (val
.val
[1], vcreate_f64 (UINT64_C (0)));
24297 __o
= __builtin_aarch64_set_qregoiv2df (__o
, (float64x2_t
) temp
.val
[0], 0);
24298 __o
= __builtin_aarch64_set_qregoiv2df (__o
, (float64x2_t
) temp
.val
[1], 1);
24299 __builtin_aarch64_st2df ((__builtin_aarch64_simd_df
*) __a
, __o
);
24302 __extension__
static __inline
void
24303 vst2_s8 (int8_t * __a
, int8x8x2_t val
)
24305 __builtin_aarch64_simd_oi __o
;
24307 temp
.val
[0] = vcombine_s8 (val
.val
[0], vcreate_s8 (INT64_C (0)));
24308 temp
.val
[1] = vcombine_s8 (val
.val
[1], vcreate_s8 (INT64_C (0)));
24309 __o
= __builtin_aarch64_set_qregoiv16qi (__o
, (int8x16_t
) temp
.val
[0], 0);
24310 __o
= __builtin_aarch64_set_qregoiv16qi (__o
, (int8x16_t
) temp
.val
[1], 1);
24311 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
24314 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24315 vst2_p8 (poly8_t
* __a
, poly8x8x2_t val
)
24317 __builtin_aarch64_simd_oi __o
;
24319 temp
.val
[0] = vcombine_p8 (val
.val
[0], vcreate_p8 (UINT64_C (0)));
24320 temp
.val
[1] = vcombine_p8 (val
.val
[1], vcreate_p8 (UINT64_C (0)));
24321 __o
= __builtin_aarch64_set_qregoiv16qi (__o
, (int8x16_t
) temp
.val
[0], 0);
24322 __o
= __builtin_aarch64_set_qregoiv16qi (__o
, (int8x16_t
) temp
.val
[1], 1);
24323 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
24326 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24327 vst2_s16 (int16_t * __a
, int16x4x2_t val
)
24329 __builtin_aarch64_simd_oi __o
;
24331 temp
.val
[0] = vcombine_s16 (val
.val
[0], vcreate_s16 (INT64_C (0)));
24332 temp
.val
[1] = vcombine_s16 (val
.val
[1], vcreate_s16 (INT64_C (0)));
24333 __o
= __builtin_aarch64_set_qregoiv8hi (__o
, (int16x8_t
) temp
.val
[0], 0);
24334 __o
= __builtin_aarch64_set_qregoiv8hi (__o
, (int16x8_t
) temp
.val
[1], 1);
24335 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
24338 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24339 vst2_p16 (poly16_t
* __a
, poly16x4x2_t val
)
24341 __builtin_aarch64_simd_oi __o
;
24343 temp
.val
[0] = vcombine_p16 (val
.val
[0], vcreate_p16 (UINT64_C (0)));
24344 temp
.val
[1] = vcombine_p16 (val
.val
[1], vcreate_p16 (UINT64_C (0)));
24345 __o
= __builtin_aarch64_set_qregoiv8hi (__o
, (int16x8_t
) temp
.val
[0], 0);
24346 __o
= __builtin_aarch64_set_qregoiv8hi (__o
, (int16x8_t
) temp
.val
[1], 1);
24347 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
24350 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24351 vst2_s32 (int32_t * __a
, int32x2x2_t val
)
24353 __builtin_aarch64_simd_oi __o
;
24355 temp
.val
[0] = vcombine_s32 (val
.val
[0], vcreate_s32 (INT64_C (0)));
24356 temp
.val
[1] = vcombine_s32 (val
.val
[1], vcreate_s32 (INT64_C (0)));
24357 __o
= __builtin_aarch64_set_qregoiv4si (__o
, (int32x4_t
) temp
.val
[0], 0);
24358 __o
= __builtin_aarch64_set_qregoiv4si (__o
, (int32x4_t
) temp
.val
[1], 1);
24359 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si
*) __a
, __o
);
24362 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24363 vst2_u8 (uint8_t * __a
, uint8x8x2_t val
)
24365 __builtin_aarch64_simd_oi __o
;
24367 temp
.val
[0] = vcombine_u8 (val
.val
[0], vcreate_u8 (UINT64_C (0)));
24368 temp
.val
[1] = vcombine_u8 (val
.val
[1], vcreate_u8 (UINT64_C (0)));
24369 __o
= __builtin_aarch64_set_qregoiv16qi (__o
, (int8x16_t
) temp
.val
[0], 0);
24370 __o
= __builtin_aarch64_set_qregoiv16qi (__o
, (int8x16_t
) temp
.val
[1], 1);
24371 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
24374 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24375 vst2_u16 (uint16_t * __a
, uint16x4x2_t val
)
24377 __builtin_aarch64_simd_oi __o
;
24379 temp
.val
[0] = vcombine_u16 (val
.val
[0], vcreate_u16 (UINT64_C (0)));
24380 temp
.val
[1] = vcombine_u16 (val
.val
[1], vcreate_u16 (UINT64_C (0)));
24381 __o
= __builtin_aarch64_set_qregoiv8hi (__o
, (int16x8_t
) temp
.val
[0], 0);
24382 __o
= __builtin_aarch64_set_qregoiv8hi (__o
, (int16x8_t
) temp
.val
[1], 1);
24383 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
24386 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24387 vst2_u32 (uint32_t * __a
, uint32x2x2_t val
)
24389 __builtin_aarch64_simd_oi __o
;
24391 temp
.val
[0] = vcombine_u32 (val
.val
[0], vcreate_u32 (UINT64_C (0)));
24392 temp
.val
[1] = vcombine_u32 (val
.val
[1], vcreate_u32 (UINT64_C (0)));
24393 __o
= __builtin_aarch64_set_qregoiv4si (__o
, (int32x4_t
) temp
.val
[0], 0);
24394 __o
= __builtin_aarch64_set_qregoiv4si (__o
, (int32x4_t
) temp
.val
[1], 1);
24395 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si
*) __a
, __o
);
24398 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24399 vst2_f32 (float32_t
* __a
, float32x2x2_t val
)
24401 __builtin_aarch64_simd_oi __o
;
24402 float32x4x2_t temp
;
24403 temp
.val
[0] = vcombine_f32 (val
.val
[0], vcreate_f32 (UINT64_C (0)));
24404 temp
.val
[1] = vcombine_f32 (val
.val
[1], vcreate_f32 (UINT64_C (0)));
24405 __o
= __builtin_aarch64_set_qregoiv4sf (__o
, (float32x4_t
) temp
.val
[0], 0);
24406 __o
= __builtin_aarch64_set_qregoiv4sf (__o
, (float32x4_t
) temp
.val
[1], 1);
24407 __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf
*) __a
, __o
);
24410 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24411 vst2q_s8 (int8_t * __a
, int8x16x2_t val
)
24413 __builtin_aarch64_simd_oi __o
;
24414 __o
= __builtin_aarch64_set_qregoiv16qi (__o
, (int8x16_t
) val
.val
[0], 0);
24415 __o
= __builtin_aarch64_set_qregoiv16qi (__o
, (int8x16_t
) val
.val
[1], 1);
24416 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
24419 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24420 vst2q_p8 (poly8_t
* __a
, poly8x16x2_t val
)
24422 __builtin_aarch64_simd_oi __o
;
24423 __o
= __builtin_aarch64_set_qregoiv16qi (__o
, (int8x16_t
) val
.val
[0], 0);
24424 __o
= __builtin_aarch64_set_qregoiv16qi (__o
, (int8x16_t
) val
.val
[1], 1);
24425 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
24428 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24429 vst2q_s16 (int16_t * __a
, int16x8x2_t val
)
24431 __builtin_aarch64_simd_oi __o
;
24432 __o
= __builtin_aarch64_set_qregoiv8hi (__o
, (int16x8_t
) val
.val
[0], 0);
24433 __o
= __builtin_aarch64_set_qregoiv8hi (__o
, (int16x8_t
) val
.val
[1], 1);
24434 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
24437 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24438 vst2q_p16 (poly16_t
* __a
, poly16x8x2_t val
)
24440 __builtin_aarch64_simd_oi __o
;
24441 __o
= __builtin_aarch64_set_qregoiv8hi (__o
, (int16x8_t
) val
.val
[0], 0);
24442 __o
= __builtin_aarch64_set_qregoiv8hi (__o
, (int16x8_t
) val
.val
[1], 1);
24443 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
24446 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24447 vst2q_s32 (int32_t * __a
, int32x4x2_t val
)
24449 __builtin_aarch64_simd_oi __o
;
24450 __o
= __builtin_aarch64_set_qregoiv4si (__o
, (int32x4_t
) val
.val
[0], 0);
24451 __o
= __builtin_aarch64_set_qregoiv4si (__o
, (int32x4_t
) val
.val
[1], 1);
24452 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si
*) __a
, __o
);
24455 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24456 vst2q_s64 (int64_t * __a
, int64x2x2_t val
)
24458 __builtin_aarch64_simd_oi __o
;
24459 __o
= __builtin_aarch64_set_qregoiv2di (__o
, (int64x2_t
) val
.val
[0], 0);
24460 __o
= __builtin_aarch64_set_qregoiv2di (__o
, (int64x2_t
) val
.val
[1], 1);
24461 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di
*) __a
, __o
);
24464 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24465 vst2q_u8 (uint8_t * __a
, uint8x16x2_t val
)
24467 __builtin_aarch64_simd_oi __o
;
24468 __o
= __builtin_aarch64_set_qregoiv16qi (__o
, (int8x16_t
) val
.val
[0], 0);
24469 __o
= __builtin_aarch64_set_qregoiv16qi (__o
, (int8x16_t
) val
.val
[1], 1);
24470 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
24473 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24474 vst2q_u16 (uint16_t * __a
, uint16x8x2_t val
)
24476 __builtin_aarch64_simd_oi __o
;
24477 __o
= __builtin_aarch64_set_qregoiv8hi (__o
, (int16x8_t
) val
.val
[0], 0);
24478 __o
= __builtin_aarch64_set_qregoiv8hi (__o
, (int16x8_t
) val
.val
[1], 1);
24479 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
24482 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24483 vst2q_u32 (uint32_t * __a
, uint32x4x2_t val
)
24485 __builtin_aarch64_simd_oi __o
;
24486 __o
= __builtin_aarch64_set_qregoiv4si (__o
, (int32x4_t
) val
.val
[0], 0);
24487 __o
= __builtin_aarch64_set_qregoiv4si (__o
, (int32x4_t
) val
.val
[1], 1);
24488 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si
*) __a
, __o
);
24491 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24492 vst2q_u64 (uint64_t * __a
, uint64x2x2_t val
)
24494 __builtin_aarch64_simd_oi __o
;
24495 __o
= __builtin_aarch64_set_qregoiv2di (__o
, (int64x2_t
) val
.val
[0], 0);
24496 __o
= __builtin_aarch64_set_qregoiv2di (__o
, (int64x2_t
) val
.val
[1], 1);
24497 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di
*) __a
, __o
);
24500 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24501 vst2q_f32 (float32_t
* __a
, float32x4x2_t val
)
24503 __builtin_aarch64_simd_oi __o
;
24504 __o
= __builtin_aarch64_set_qregoiv4sf (__o
, (float32x4_t
) val
.val
[0], 0);
24505 __o
= __builtin_aarch64_set_qregoiv4sf (__o
, (float32x4_t
) val
.val
[1], 1);
24506 __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf
*) __a
, __o
);
24509 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24510 vst2q_f64 (float64_t
* __a
, float64x2x2_t val
)
24512 __builtin_aarch64_simd_oi __o
;
24513 __o
= __builtin_aarch64_set_qregoiv2df (__o
, (float64x2_t
) val
.val
[0], 0);
24514 __o
= __builtin_aarch64_set_qregoiv2df (__o
, (float64x2_t
) val
.val
[1], 1);
24515 __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df
*) __a
, __o
);
24518 __extension__
static __inline
void
24519 vst3_s64 (int64_t * __a
, int64x1x3_t val
)
24521 __builtin_aarch64_simd_ci __o
;
24523 temp
.val
[0] = vcombine_s64 (val
.val
[0], vcreate_s64 (INT64_C (0)));
24524 temp
.val
[1] = vcombine_s64 (val
.val
[1], vcreate_s64 (INT64_C (0)));
24525 temp
.val
[2] = vcombine_s64 (val
.val
[2], vcreate_s64 (INT64_C (0)));
24526 __o
= __builtin_aarch64_set_qregciv2di (__o
, (int64x2_t
) temp
.val
[0], 0);
24527 __o
= __builtin_aarch64_set_qregciv2di (__o
, (int64x2_t
) temp
.val
[1], 1);
24528 __o
= __builtin_aarch64_set_qregciv2di (__o
, (int64x2_t
) temp
.val
[2], 2);
24529 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di
*) __a
, __o
);
24532 __extension__
static __inline
void
24533 vst3_u64 (uint64_t * __a
, uint64x1x3_t val
)
24535 __builtin_aarch64_simd_ci __o
;
24537 temp
.val
[0] = vcombine_u64 (val
.val
[0], vcreate_u64 (UINT64_C (0)));
24538 temp
.val
[1] = vcombine_u64 (val
.val
[1], vcreate_u64 (UINT64_C (0)));
24539 temp
.val
[2] = vcombine_u64 (val
.val
[2], vcreate_u64 (UINT64_C (0)));
24540 __o
= __builtin_aarch64_set_qregciv2di (__o
, (int64x2_t
) temp
.val
[0], 0);
24541 __o
= __builtin_aarch64_set_qregciv2di (__o
, (int64x2_t
) temp
.val
[1], 1);
24542 __o
= __builtin_aarch64_set_qregciv2di (__o
, (int64x2_t
) temp
.val
[2], 2);
24543 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di
*) __a
, __o
);
24546 __extension__
static __inline
void
24547 vst3_f64 (float64_t
* __a
, float64x1x3_t val
)
24549 __builtin_aarch64_simd_ci __o
;
24550 float64x2x3_t temp
;
24551 temp
.val
[0] = vcombine_f64 (val
.val
[0], vcreate_f64 (UINT64_C (0)));
24552 temp
.val
[1] = vcombine_f64 (val
.val
[1], vcreate_f64 (UINT64_C (0)));
24553 temp
.val
[2] = vcombine_f64 (val
.val
[2], vcreate_f64 (UINT64_C (0)));
24554 __o
= __builtin_aarch64_set_qregciv2df (__o
, (float64x2_t
) temp
.val
[0], 0);
24555 __o
= __builtin_aarch64_set_qregciv2df (__o
, (float64x2_t
) temp
.val
[1], 1);
24556 __o
= __builtin_aarch64_set_qregciv2df (__o
, (float64x2_t
) temp
.val
[2], 2);
24557 __builtin_aarch64_st3df ((__builtin_aarch64_simd_df
*) __a
, __o
);
24560 __extension__
static __inline
void
24561 vst3_s8 (int8_t * __a
, int8x8x3_t val
)
24563 __builtin_aarch64_simd_ci __o
;
24565 temp
.val
[0] = vcombine_s8 (val
.val
[0], vcreate_s8 (INT64_C (0)));
24566 temp
.val
[1] = vcombine_s8 (val
.val
[1], vcreate_s8 (INT64_C (0)));
24567 temp
.val
[2] = vcombine_s8 (val
.val
[2], vcreate_s8 (INT64_C (0)));
24568 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) temp
.val
[0], 0);
24569 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) temp
.val
[1], 1);
24570 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) temp
.val
[2], 2);
24571 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
24574 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24575 vst3_p8 (poly8_t
* __a
, poly8x8x3_t val
)
24577 __builtin_aarch64_simd_ci __o
;
24579 temp
.val
[0] = vcombine_p8 (val
.val
[0], vcreate_p8 (UINT64_C (0)));
24580 temp
.val
[1] = vcombine_p8 (val
.val
[1], vcreate_p8 (UINT64_C (0)));
24581 temp
.val
[2] = vcombine_p8 (val
.val
[2], vcreate_p8 (UINT64_C (0)));
24582 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) temp
.val
[0], 0);
24583 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) temp
.val
[1], 1);
24584 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) temp
.val
[2], 2);
24585 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
24588 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24589 vst3_s16 (int16_t * __a
, int16x4x3_t val
)
24591 __builtin_aarch64_simd_ci __o
;
24593 temp
.val
[0] = vcombine_s16 (val
.val
[0], vcreate_s16 (INT64_C (0)));
24594 temp
.val
[1] = vcombine_s16 (val
.val
[1], vcreate_s16 (INT64_C (0)));
24595 temp
.val
[2] = vcombine_s16 (val
.val
[2], vcreate_s16 (INT64_C (0)));
24596 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) temp
.val
[0], 0);
24597 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) temp
.val
[1], 1);
24598 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) temp
.val
[2], 2);
24599 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
24602 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24603 vst3_p16 (poly16_t
* __a
, poly16x4x3_t val
)
24605 __builtin_aarch64_simd_ci __o
;
24607 temp
.val
[0] = vcombine_p16 (val
.val
[0], vcreate_p16 (UINT64_C (0)));
24608 temp
.val
[1] = vcombine_p16 (val
.val
[1], vcreate_p16 (UINT64_C (0)));
24609 temp
.val
[2] = vcombine_p16 (val
.val
[2], vcreate_p16 (UINT64_C (0)));
24610 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) temp
.val
[0], 0);
24611 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) temp
.val
[1], 1);
24612 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) temp
.val
[2], 2);
24613 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
24616 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24617 vst3_s32 (int32_t * __a
, int32x2x3_t val
)
24619 __builtin_aarch64_simd_ci __o
;
24621 temp
.val
[0] = vcombine_s32 (val
.val
[0], vcreate_s32 (INT64_C (0)));
24622 temp
.val
[1] = vcombine_s32 (val
.val
[1], vcreate_s32 (INT64_C (0)));
24623 temp
.val
[2] = vcombine_s32 (val
.val
[2], vcreate_s32 (INT64_C (0)));
24624 __o
= __builtin_aarch64_set_qregciv4si (__o
, (int32x4_t
) temp
.val
[0], 0);
24625 __o
= __builtin_aarch64_set_qregciv4si (__o
, (int32x4_t
) temp
.val
[1], 1);
24626 __o
= __builtin_aarch64_set_qregciv4si (__o
, (int32x4_t
) temp
.val
[2], 2);
24627 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si
*) __a
, __o
);
24630 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24631 vst3_u8 (uint8_t * __a
, uint8x8x3_t val
)
24633 __builtin_aarch64_simd_ci __o
;
24635 temp
.val
[0] = vcombine_u8 (val
.val
[0], vcreate_u8 (UINT64_C (0)));
24636 temp
.val
[1] = vcombine_u8 (val
.val
[1], vcreate_u8 (UINT64_C (0)));
24637 temp
.val
[2] = vcombine_u8 (val
.val
[2], vcreate_u8 (UINT64_C (0)));
24638 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) temp
.val
[0], 0);
24639 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) temp
.val
[1], 1);
24640 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) temp
.val
[2], 2);
24641 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
24644 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24645 vst3_u16 (uint16_t * __a
, uint16x4x3_t val
)
24647 __builtin_aarch64_simd_ci __o
;
24649 temp
.val
[0] = vcombine_u16 (val
.val
[0], vcreate_u16 (UINT64_C (0)));
24650 temp
.val
[1] = vcombine_u16 (val
.val
[1], vcreate_u16 (UINT64_C (0)));
24651 temp
.val
[2] = vcombine_u16 (val
.val
[2], vcreate_u16 (UINT64_C (0)));
24652 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) temp
.val
[0], 0);
24653 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) temp
.val
[1], 1);
24654 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) temp
.val
[2], 2);
24655 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
24658 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24659 vst3_u32 (uint32_t * __a
, uint32x2x3_t val
)
24661 __builtin_aarch64_simd_ci __o
;
24663 temp
.val
[0] = vcombine_u32 (val
.val
[0], vcreate_u32 (UINT64_C (0)));
24664 temp
.val
[1] = vcombine_u32 (val
.val
[1], vcreate_u32 (UINT64_C (0)));
24665 temp
.val
[2] = vcombine_u32 (val
.val
[2], vcreate_u32 (UINT64_C (0)));
24666 __o
= __builtin_aarch64_set_qregciv4si (__o
, (int32x4_t
) temp
.val
[0], 0);
24667 __o
= __builtin_aarch64_set_qregciv4si (__o
, (int32x4_t
) temp
.val
[1], 1);
24668 __o
= __builtin_aarch64_set_qregciv4si (__o
, (int32x4_t
) temp
.val
[2], 2);
24669 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si
*) __a
, __o
);
24672 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24673 vst3_f32 (float32_t
* __a
, float32x2x3_t val
)
24675 __builtin_aarch64_simd_ci __o
;
24676 float32x4x3_t temp
;
24677 temp
.val
[0] = vcombine_f32 (val
.val
[0], vcreate_f32 (UINT64_C (0)));
24678 temp
.val
[1] = vcombine_f32 (val
.val
[1], vcreate_f32 (UINT64_C (0)));
24679 temp
.val
[2] = vcombine_f32 (val
.val
[2], vcreate_f32 (UINT64_C (0)));
24680 __o
= __builtin_aarch64_set_qregciv4sf (__o
, (float32x4_t
) temp
.val
[0], 0);
24681 __o
= __builtin_aarch64_set_qregciv4sf (__o
, (float32x4_t
) temp
.val
[1], 1);
24682 __o
= __builtin_aarch64_set_qregciv4sf (__o
, (float32x4_t
) temp
.val
[2], 2);
24683 __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf
*) __a
, __o
);
24686 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24687 vst3q_s8 (int8_t * __a
, int8x16x3_t val
)
24689 __builtin_aarch64_simd_ci __o
;
24690 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) val
.val
[0], 0);
24691 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) val
.val
[1], 1);
24692 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) val
.val
[2], 2);
24693 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
24696 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24697 vst3q_p8 (poly8_t
* __a
, poly8x16x3_t val
)
24699 __builtin_aarch64_simd_ci __o
;
24700 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) val
.val
[0], 0);
24701 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) val
.val
[1], 1);
24702 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) val
.val
[2], 2);
24703 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
24706 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24707 vst3q_s16 (int16_t * __a
, int16x8x3_t val
)
24709 __builtin_aarch64_simd_ci __o
;
24710 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) val
.val
[0], 0);
24711 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) val
.val
[1], 1);
24712 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) val
.val
[2], 2);
24713 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
24716 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24717 vst3q_p16 (poly16_t
* __a
, poly16x8x3_t val
)
24719 __builtin_aarch64_simd_ci __o
;
24720 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) val
.val
[0], 0);
24721 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) val
.val
[1], 1);
24722 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) val
.val
[2], 2);
24723 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
24726 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24727 vst3q_s32 (int32_t * __a
, int32x4x3_t val
)
24729 __builtin_aarch64_simd_ci __o
;
24730 __o
= __builtin_aarch64_set_qregciv4si (__o
, (int32x4_t
) val
.val
[0], 0);
24731 __o
= __builtin_aarch64_set_qregciv4si (__o
, (int32x4_t
) val
.val
[1], 1);
24732 __o
= __builtin_aarch64_set_qregciv4si (__o
, (int32x4_t
) val
.val
[2], 2);
24733 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si
*) __a
, __o
);
24736 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24737 vst3q_s64 (int64_t * __a
, int64x2x3_t val
)
24739 __builtin_aarch64_simd_ci __o
;
24740 __o
= __builtin_aarch64_set_qregciv2di (__o
, (int64x2_t
) val
.val
[0], 0);
24741 __o
= __builtin_aarch64_set_qregciv2di (__o
, (int64x2_t
) val
.val
[1], 1);
24742 __o
= __builtin_aarch64_set_qregciv2di (__o
, (int64x2_t
) val
.val
[2], 2);
24743 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di
*) __a
, __o
);
24746 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24747 vst3q_u8 (uint8_t * __a
, uint8x16x3_t val
)
24749 __builtin_aarch64_simd_ci __o
;
24750 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) val
.val
[0], 0);
24751 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) val
.val
[1], 1);
24752 __o
= __builtin_aarch64_set_qregciv16qi (__o
, (int8x16_t
) val
.val
[2], 2);
24753 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
24756 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24757 vst3q_u16 (uint16_t * __a
, uint16x8x3_t val
)
24759 __builtin_aarch64_simd_ci __o
;
24760 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) val
.val
[0], 0);
24761 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) val
.val
[1], 1);
24762 __o
= __builtin_aarch64_set_qregciv8hi (__o
, (int16x8_t
) val
.val
[2], 2);
24763 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
24766 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24767 vst3q_u32 (uint32_t * __a
, uint32x4x3_t val
)
24769 __builtin_aarch64_simd_ci __o
;
24770 __o
= __builtin_aarch64_set_qregciv4si (__o
, (int32x4_t
) val
.val
[0], 0);
24771 __o
= __builtin_aarch64_set_qregciv4si (__o
, (int32x4_t
) val
.val
[1], 1);
24772 __o
= __builtin_aarch64_set_qregciv4si (__o
, (int32x4_t
) val
.val
[2], 2);
24773 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si
*) __a
, __o
);
24776 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24777 vst3q_u64 (uint64_t * __a
, uint64x2x3_t val
)
24779 __builtin_aarch64_simd_ci __o
;
24780 __o
= __builtin_aarch64_set_qregciv2di (__o
, (int64x2_t
) val
.val
[0], 0);
24781 __o
= __builtin_aarch64_set_qregciv2di (__o
, (int64x2_t
) val
.val
[1], 1);
24782 __o
= __builtin_aarch64_set_qregciv2di (__o
, (int64x2_t
) val
.val
[2], 2);
24783 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di
*) __a
, __o
);
24786 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24787 vst3q_f32 (float32_t
* __a
, float32x4x3_t val
)
24789 __builtin_aarch64_simd_ci __o
;
24790 __o
= __builtin_aarch64_set_qregciv4sf (__o
, (float32x4_t
) val
.val
[0], 0);
24791 __o
= __builtin_aarch64_set_qregciv4sf (__o
, (float32x4_t
) val
.val
[1], 1);
24792 __o
= __builtin_aarch64_set_qregciv4sf (__o
, (float32x4_t
) val
.val
[2], 2);
24793 __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf
*) __a
, __o
);
24796 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24797 vst3q_f64 (float64_t
* __a
, float64x2x3_t val
)
24799 __builtin_aarch64_simd_ci __o
;
24800 __o
= __builtin_aarch64_set_qregciv2df (__o
, (float64x2_t
) val
.val
[0], 0);
24801 __o
= __builtin_aarch64_set_qregciv2df (__o
, (float64x2_t
) val
.val
[1], 1);
24802 __o
= __builtin_aarch64_set_qregciv2df (__o
, (float64x2_t
) val
.val
[2], 2);
24803 __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df
*) __a
, __o
);
24806 __extension__
static __inline
void
24807 vst4_s64 (int64_t * __a
, int64x1x4_t val
)
24809 __builtin_aarch64_simd_xi __o
;
24811 temp
.val
[0] = vcombine_s64 (val
.val
[0], vcreate_s64 (INT64_C (0)));
24812 temp
.val
[1] = vcombine_s64 (val
.val
[1], vcreate_s64 (INT64_C (0)));
24813 temp
.val
[2] = vcombine_s64 (val
.val
[2], vcreate_s64 (INT64_C (0)));
24814 temp
.val
[3] = vcombine_s64 (val
.val
[3], vcreate_s64 (INT64_C (0)));
24815 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) temp
.val
[0], 0);
24816 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) temp
.val
[1], 1);
24817 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) temp
.val
[2], 2);
24818 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) temp
.val
[3], 3);
24819 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di
*) __a
, __o
);
24822 __extension__
static __inline
void
24823 vst4_u64 (uint64_t * __a
, uint64x1x4_t val
)
24825 __builtin_aarch64_simd_xi __o
;
24827 temp
.val
[0] = vcombine_u64 (val
.val
[0], vcreate_u64 (UINT64_C (0)));
24828 temp
.val
[1] = vcombine_u64 (val
.val
[1], vcreate_u64 (UINT64_C (0)));
24829 temp
.val
[2] = vcombine_u64 (val
.val
[2], vcreate_u64 (UINT64_C (0)));
24830 temp
.val
[3] = vcombine_u64 (val
.val
[3], vcreate_u64 (UINT64_C (0)));
24831 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) temp
.val
[0], 0);
24832 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) temp
.val
[1], 1);
24833 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) temp
.val
[2], 2);
24834 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) temp
.val
[3], 3);
24835 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di
*) __a
, __o
);
24838 __extension__
static __inline
void
24839 vst4_f64 (float64_t
* __a
, float64x1x4_t val
)
24841 __builtin_aarch64_simd_xi __o
;
24842 float64x2x4_t temp
;
24843 temp
.val
[0] = vcombine_f64 (val
.val
[0], vcreate_f64 (UINT64_C (0)));
24844 temp
.val
[1] = vcombine_f64 (val
.val
[1], vcreate_f64 (UINT64_C (0)));
24845 temp
.val
[2] = vcombine_f64 (val
.val
[2], vcreate_f64 (UINT64_C (0)));
24846 temp
.val
[3] = vcombine_f64 (val
.val
[3], vcreate_f64 (UINT64_C (0)));
24847 __o
= __builtin_aarch64_set_qregxiv2df (__o
, (float64x2_t
) temp
.val
[0], 0);
24848 __o
= __builtin_aarch64_set_qregxiv2df (__o
, (float64x2_t
) temp
.val
[1], 1);
24849 __o
= __builtin_aarch64_set_qregxiv2df (__o
, (float64x2_t
) temp
.val
[2], 2);
24850 __o
= __builtin_aarch64_set_qregxiv2df (__o
, (float64x2_t
) temp
.val
[3], 3);
24851 __builtin_aarch64_st4df ((__builtin_aarch64_simd_df
*) __a
, __o
);
24854 __extension__
static __inline
void
24855 vst4_s8 (int8_t * __a
, int8x8x4_t val
)
24857 __builtin_aarch64_simd_xi __o
;
24859 temp
.val
[0] = vcombine_s8 (val
.val
[0], vcreate_s8 (INT64_C (0)));
24860 temp
.val
[1] = vcombine_s8 (val
.val
[1], vcreate_s8 (INT64_C (0)));
24861 temp
.val
[2] = vcombine_s8 (val
.val
[2], vcreate_s8 (INT64_C (0)));
24862 temp
.val
[3] = vcombine_s8 (val
.val
[3], vcreate_s8 (INT64_C (0)));
24863 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) temp
.val
[0], 0);
24864 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) temp
.val
[1], 1);
24865 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) temp
.val
[2], 2);
24866 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) temp
.val
[3], 3);
24867 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
24870 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24871 vst4_p8 (poly8_t
* __a
, poly8x8x4_t val
)
24873 __builtin_aarch64_simd_xi __o
;
24875 temp
.val
[0] = vcombine_p8 (val
.val
[0], vcreate_p8 (UINT64_C (0)));
24876 temp
.val
[1] = vcombine_p8 (val
.val
[1], vcreate_p8 (UINT64_C (0)));
24877 temp
.val
[2] = vcombine_p8 (val
.val
[2], vcreate_p8 (UINT64_C (0)));
24878 temp
.val
[3] = vcombine_p8 (val
.val
[3], vcreate_p8 (UINT64_C (0)));
24879 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) temp
.val
[0], 0);
24880 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) temp
.val
[1], 1);
24881 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) temp
.val
[2], 2);
24882 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) temp
.val
[3], 3);
24883 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
24886 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24887 vst4_s16 (int16_t * __a
, int16x4x4_t val
)
24889 __builtin_aarch64_simd_xi __o
;
24891 temp
.val
[0] = vcombine_s16 (val
.val
[0], vcreate_s16 (INT64_C (0)));
24892 temp
.val
[1] = vcombine_s16 (val
.val
[1], vcreate_s16 (INT64_C (0)));
24893 temp
.val
[2] = vcombine_s16 (val
.val
[2], vcreate_s16 (INT64_C (0)));
24894 temp
.val
[3] = vcombine_s16 (val
.val
[3], vcreate_s16 (INT64_C (0)));
24895 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) temp
.val
[0], 0);
24896 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) temp
.val
[1], 1);
24897 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) temp
.val
[2], 2);
24898 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) temp
.val
[3], 3);
24899 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
24902 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24903 vst4_p16 (poly16_t
* __a
, poly16x4x4_t val
)
24905 __builtin_aarch64_simd_xi __o
;
24907 temp
.val
[0] = vcombine_p16 (val
.val
[0], vcreate_p16 (UINT64_C (0)));
24908 temp
.val
[1] = vcombine_p16 (val
.val
[1], vcreate_p16 (UINT64_C (0)));
24909 temp
.val
[2] = vcombine_p16 (val
.val
[2], vcreate_p16 (UINT64_C (0)));
24910 temp
.val
[3] = vcombine_p16 (val
.val
[3], vcreate_p16 (UINT64_C (0)));
24911 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) temp
.val
[0], 0);
24912 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) temp
.val
[1], 1);
24913 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) temp
.val
[2], 2);
24914 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) temp
.val
[3], 3);
24915 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
24918 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24919 vst4_s32 (int32_t * __a
, int32x2x4_t val
)
24921 __builtin_aarch64_simd_xi __o
;
24923 temp
.val
[0] = vcombine_s32 (val
.val
[0], vcreate_s32 (INT64_C (0)));
24924 temp
.val
[1] = vcombine_s32 (val
.val
[1], vcreate_s32 (INT64_C (0)));
24925 temp
.val
[2] = vcombine_s32 (val
.val
[2], vcreate_s32 (INT64_C (0)));
24926 temp
.val
[3] = vcombine_s32 (val
.val
[3], vcreate_s32 (INT64_C (0)));
24927 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) temp
.val
[0], 0);
24928 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) temp
.val
[1], 1);
24929 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) temp
.val
[2], 2);
24930 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) temp
.val
[3], 3);
24931 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si
*) __a
, __o
);
24934 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24935 vst4_u8 (uint8_t * __a
, uint8x8x4_t val
)
24937 __builtin_aarch64_simd_xi __o
;
24939 temp
.val
[0] = vcombine_u8 (val
.val
[0], vcreate_u8 (UINT64_C (0)));
24940 temp
.val
[1] = vcombine_u8 (val
.val
[1], vcreate_u8 (UINT64_C (0)));
24941 temp
.val
[2] = vcombine_u8 (val
.val
[2], vcreate_u8 (UINT64_C (0)));
24942 temp
.val
[3] = vcombine_u8 (val
.val
[3], vcreate_u8 (UINT64_C (0)));
24943 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) temp
.val
[0], 0);
24944 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) temp
.val
[1], 1);
24945 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) temp
.val
[2], 2);
24946 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) temp
.val
[3], 3);
24947 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
24950 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24951 vst4_u16 (uint16_t * __a
, uint16x4x4_t val
)
24953 __builtin_aarch64_simd_xi __o
;
24955 temp
.val
[0] = vcombine_u16 (val
.val
[0], vcreate_u16 (UINT64_C (0)));
24956 temp
.val
[1] = vcombine_u16 (val
.val
[1], vcreate_u16 (UINT64_C (0)));
24957 temp
.val
[2] = vcombine_u16 (val
.val
[2], vcreate_u16 (UINT64_C (0)));
24958 temp
.val
[3] = vcombine_u16 (val
.val
[3], vcreate_u16 (UINT64_C (0)));
24959 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) temp
.val
[0], 0);
24960 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) temp
.val
[1], 1);
24961 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) temp
.val
[2], 2);
24962 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) temp
.val
[3], 3);
24963 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
24966 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24967 vst4_u32 (uint32_t * __a
, uint32x2x4_t val
)
24969 __builtin_aarch64_simd_xi __o
;
24971 temp
.val
[0] = vcombine_u32 (val
.val
[0], vcreate_u32 (UINT64_C (0)));
24972 temp
.val
[1] = vcombine_u32 (val
.val
[1], vcreate_u32 (UINT64_C (0)));
24973 temp
.val
[2] = vcombine_u32 (val
.val
[2], vcreate_u32 (UINT64_C (0)));
24974 temp
.val
[3] = vcombine_u32 (val
.val
[3], vcreate_u32 (UINT64_C (0)));
24975 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) temp
.val
[0], 0);
24976 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) temp
.val
[1], 1);
24977 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) temp
.val
[2], 2);
24978 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) temp
.val
[3], 3);
24979 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si
*) __a
, __o
);
24982 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24983 vst4_f32 (float32_t
* __a
, float32x2x4_t val
)
24985 __builtin_aarch64_simd_xi __o
;
24986 float32x4x4_t temp
;
24987 temp
.val
[0] = vcombine_f32 (val
.val
[0], vcreate_f32 (UINT64_C (0)));
24988 temp
.val
[1] = vcombine_f32 (val
.val
[1], vcreate_f32 (UINT64_C (0)));
24989 temp
.val
[2] = vcombine_f32 (val
.val
[2], vcreate_f32 (UINT64_C (0)));
24990 temp
.val
[3] = vcombine_f32 (val
.val
[3], vcreate_f32 (UINT64_C (0)));
24991 __o
= __builtin_aarch64_set_qregxiv4sf (__o
, (float32x4_t
) temp
.val
[0], 0);
24992 __o
= __builtin_aarch64_set_qregxiv4sf (__o
, (float32x4_t
) temp
.val
[1], 1);
24993 __o
= __builtin_aarch64_set_qregxiv4sf (__o
, (float32x4_t
) temp
.val
[2], 2);
24994 __o
= __builtin_aarch64_set_qregxiv4sf (__o
, (float32x4_t
) temp
.val
[3], 3);
24995 __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf
*) __a
, __o
);
24998 __extension__
static __inline
void __attribute__ ((__always_inline__
))
24999 vst4q_s8 (int8_t * __a
, int8x16x4_t val
)
25001 __builtin_aarch64_simd_xi __o
;
25002 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) val
.val
[0], 0);
25003 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) val
.val
[1], 1);
25004 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) val
.val
[2], 2);
25005 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) val
.val
[3], 3);
25006 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
25009 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25010 vst4q_p8 (poly8_t
* __a
, poly8x16x4_t val
)
25012 __builtin_aarch64_simd_xi __o
;
25013 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) val
.val
[0], 0);
25014 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) val
.val
[1], 1);
25015 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) val
.val
[2], 2);
25016 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) val
.val
[3], 3);
25017 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
25020 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25021 vst4q_s16 (int16_t * __a
, int16x8x4_t val
)
25023 __builtin_aarch64_simd_xi __o
;
25024 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) val
.val
[0], 0);
25025 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) val
.val
[1], 1);
25026 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) val
.val
[2], 2);
25027 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) val
.val
[3], 3);
25028 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
25031 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25032 vst4q_p16 (poly16_t
* __a
, poly16x8x4_t val
)
25034 __builtin_aarch64_simd_xi __o
;
25035 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) val
.val
[0], 0);
25036 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) val
.val
[1], 1);
25037 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) val
.val
[2], 2);
25038 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) val
.val
[3], 3);
25039 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
25042 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25043 vst4q_s32 (int32_t * __a
, int32x4x4_t val
)
25045 __builtin_aarch64_simd_xi __o
;
25046 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) val
.val
[0], 0);
25047 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) val
.val
[1], 1);
25048 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) val
.val
[2], 2);
25049 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) val
.val
[3], 3);
25050 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si
*) __a
, __o
);
25053 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25054 vst4q_s64 (int64_t * __a
, int64x2x4_t val
)
25056 __builtin_aarch64_simd_xi __o
;
25057 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) val
.val
[0], 0);
25058 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) val
.val
[1], 1);
25059 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) val
.val
[2], 2);
25060 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) val
.val
[3], 3);
25061 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di
*) __a
, __o
);
25064 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25065 vst4q_u8 (uint8_t * __a
, uint8x16x4_t val
)
25067 __builtin_aarch64_simd_xi __o
;
25068 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) val
.val
[0], 0);
25069 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) val
.val
[1], 1);
25070 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) val
.val
[2], 2);
25071 __o
= __builtin_aarch64_set_qregxiv16qi (__o
, (int8x16_t
) val
.val
[3], 3);
25072 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi
*) __a
, __o
);
25075 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25076 vst4q_u16 (uint16_t * __a
, uint16x8x4_t val
)
25078 __builtin_aarch64_simd_xi __o
;
25079 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) val
.val
[0], 0);
25080 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) val
.val
[1], 1);
25081 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) val
.val
[2], 2);
25082 __o
= __builtin_aarch64_set_qregxiv8hi (__o
, (int16x8_t
) val
.val
[3], 3);
25083 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi
*) __a
, __o
);
25086 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25087 vst4q_u32 (uint32_t * __a
, uint32x4x4_t val
)
25089 __builtin_aarch64_simd_xi __o
;
25090 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) val
.val
[0], 0);
25091 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) val
.val
[1], 1);
25092 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) val
.val
[2], 2);
25093 __o
= __builtin_aarch64_set_qregxiv4si (__o
, (int32x4_t
) val
.val
[3], 3);
25094 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si
*) __a
, __o
);
25097 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25098 vst4q_u64 (uint64_t * __a
, uint64x2x4_t val
)
25100 __builtin_aarch64_simd_xi __o
;
25101 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) val
.val
[0], 0);
25102 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) val
.val
[1], 1);
25103 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) val
.val
[2], 2);
25104 __o
= __builtin_aarch64_set_qregxiv2di (__o
, (int64x2_t
) val
.val
[3], 3);
25105 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di
*) __a
, __o
);
25108 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25109 vst4q_f32 (float32_t
* __a
, float32x4x4_t val
)
25111 __builtin_aarch64_simd_xi __o
;
25112 __o
= __builtin_aarch64_set_qregxiv4sf (__o
, (float32x4_t
) val
.val
[0], 0);
25113 __o
= __builtin_aarch64_set_qregxiv4sf (__o
, (float32x4_t
) val
.val
[1], 1);
25114 __o
= __builtin_aarch64_set_qregxiv4sf (__o
, (float32x4_t
) val
.val
[2], 2);
25115 __o
= __builtin_aarch64_set_qregxiv4sf (__o
, (float32x4_t
) val
.val
[3], 3);
25116 __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf
*) __a
, __o
);
25119 __extension__
static __inline
void __attribute__ ((__always_inline__
))
25120 vst4q_f64 (float64_t
* __a
, float64x2x4_t val
)
25122 __builtin_aarch64_simd_xi __o
;
25123 __o
= __builtin_aarch64_set_qregxiv2df (__o
, (float64x2_t
) val
.val
[0], 0);
25124 __o
= __builtin_aarch64_set_qregxiv2df (__o
, (float64x2_t
) val
.val
[1], 1);
25125 __o
= __builtin_aarch64_set_qregxiv2df (__o
, (float64x2_t
) val
.val
[2], 2);
25126 __o
= __builtin_aarch64_set_qregxiv2df (__o
, (float64x2_t
) val
.val
[3], 3);
25127 __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df
*) __a
, __o
);
25132 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
25133 vsubd_s64 (int64x1_t __a
, int64x1_t __b
)
25138 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
25139 vsubd_u64 (uint64x1_t __a
, uint64x1_t __b
)
25146 __extension__
static __inline float32x2x2_t
__attribute__ ((__always_inline__
))
25147 vtrn_f32 (float32x2_t a
, float32x2_t b
)
25149 return (float32x2x2_t
) {vtrn1_f32 (a
, b
), vtrn2_f32 (a
, b
)};
25152 __extension__
static __inline poly8x8x2_t
__attribute__ ((__always_inline__
))
25153 vtrn_p8 (poly8x8_t a
, poly8x8_t b
)
25155 return (poly8x8x2_t
) {vtrn1_p8 (a
, b
), vtrn2_p8 (a
, b
)};
25158 __extension__
static __inline poly16x4x2_t
__attribute__ ((__always_inline__
))
25159 vtrn_p16 (poly16x4_t a
, poly16x4_t b
)
25161 return (poly16x4x2_t
) {vtrn1_p16 (a
, b
), vtrn2_p16 (a
, b
)};
25164 __extension__
static __inline int8x8x2_t
__attribute__ ((__always_inline__
))
25165 vtrn_s8 (int8x8_t a
, int8x8_t b
)
25167 return (int8x8x2_t
) {vtrn1_s8 (a
, b
), vtrn2_s8 (a
, b
)};
25170 __extension__
static __inline int16x4x2_t
__attribute__ ((__always_inline__
))
25171 vtrn_s16 (int16x4_t a
, int16x4_t b
)
25173 return (int16x4x2_t
) {vtrn1_s16 (a
, b
), vtrn2_s16 (a
, b
)};
25176 __extension__
static __inline int32x2x2_t
__attribute__ ((__always_inline__
))
25177 vtrn_s32 (int32x2_t a
, int32x2_t b
)
25179 return (int32x2x2_t
) {vtrn1_s32 (a
, b
), vtrn2_s32 (a
, b
)};
25182 __extension__
static __inline uint8x8x2_t
__attribute__ ((__always_inline__
))
25183 vtrn_u8 (uint8x8_t a
, uint8x8_t b
)
25185 return (uint8x8x2_t
) {vtrn1_u8 (a
, b
), vtrn2_u8 (a
, b
)};
25188 __extension__
static __inline uint16x4x2_t
__attribute__ ((__always_inline__
))
25189 vtrn_u16 (uint16x4_t a
, uint16x4_t b
)
25191 return (uint16x4x2_t
) {vtrn1_u16 (a
, b
), vtrn2_u16 (a
, b
)};
25194 __extension__
static __inline uint32x2x2_t
__attribute__ ((__always_inline__
))
25195 vtrn_u32 (uint32x2_t a
, uint32x2_t b
)
25197 return (uint32x2x2_t
) {vtrn1_u32 (a
, b
), vtrn2_u32 (a
, b
)};
25200 __extension__
static __inline float32x4x2_t
__attribute__ ((__always_inline__
))
25201 vtrnq_f32 (float32x4_t a
, float32x4_t b
)
25203 return (float32x4x2_t
) {vtrn1q_f32 (a
, b
), vtrn2q_f32 (a
, b
)};
25206 __extension__
static __inline poly8x16x2_t
__attribute__ ((__always_inline__
))
25207 vtrnq_p8 (poly8x16_t a
, poly8x16_t b
)
25209 return (poly8x16x2_t
) {vtrn1q_p8 (a
, b
), vtrn2q_p8 (a
, b
)};
25212 __extension__
static __inline poly16x8x2_t
__attribute__ ((__always_inline__
))
25213 vtrnq_p16 (poly16x8_t a
, poly16x8_t b
)
25215 return (poly16x8x2_t
) {vtrn1q_p16 (a
, b
), vtrn2q_p16 (a
, b
)};
25218 __extension__
static __inline int8x16x2_t
__attribute__ ((__always_inline__
))
25219 vtrnq_s8 (int8x16_t a
, int8x16_t b
)
25221 return (int8x16x2_t
) {vtrn1q_s8 (a
, b
), vtrn2q_s8 (a
, b
)};
25224 __extension__
static __inline int16x8x2_t
__attribute__ ((__always_inline__
))
25225 vtrnq_s16 (int16x8_t a
, int16x8_t b
)
25227 return (int16x8x2_t
) {vtrn1q_s16 (a
, b
), vtrn2q_s16 (a
, b
)};
25230 __extension__
static __inline int32x4x2_t
__attribute__ ((__always_inline__
))
25231 vtrnq_s32 (int32x4_t a
, int32x4_t b
)
25233 return (int32x4x2_t
) {vtrn1q_s32 (a
, b
), vtrn2q_s32 (a
, b
)};
25236 __extension__
static __inline uint8x16x2_t
__attribute__ ((__always_inline__
))
25237 vtrnq_u8 (uint8x16_t a
, uint8x16_t b
)
25239 return (uint8x16x2_t
) {vtrn1q_u8 (a
, b
), vtrn2q_u8 (a
, b
)};
25242 __extension__
static __inline uint16x8x2_t
__attribute__ ((__always_inline__
))
25243 vtrnq_u16 (uint16x8_t a
, uint16x8_t b
)
25245 return (uint16x8x2_t
) {vtrn1q_u16 (a
, b
), vtrn2q_u16 (a
, b
)};
25248 __extension__
static __inline uint32x4x2_t
__attribute__ ((__always_inline__
))
25249 vtrnq_u32 (uint32x4_t a
, uint32x4_t b
)
25251 return (uint32x4x2_t
) {vtrn1q_u32 (a
, b
), vtrn2q_u32 (a
, b
)};
25256 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
25257 vtst_s8 (int8x8_t __a
, int8x8_t __b
)
25259 return (uint8x8_t
) __builtin_aarch64_cmtstv8qi (__a
, __b
);
25262 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
25263 vtst_s16 (int16x4_t __a
, int16x4_t __b
)
25265 return (uint16x4_t
) __builtin_aarch64_cmtstv4hi (__a
, __b
);
25268 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
25269 vtst_s32 (int32x2_t __a
, int32x2_t __b
)
25271 return (uint32x2_t
) __builtin_aarch64_cmtstv2si (__a
, __b
);
25274 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
25275 vtst_s64 (int64x1_t __a
, int64x1_t __b
)
25277 return (uint64x1_t
) __builtin_aarch64_cmtstdi (__a
, __b
);
25280 __extension__
static __inline uint8x8_t
__attribute__ ((__always_inline__
))
25281 vtst_u8 (uint8x8_t __a
, uint8x8_t __b
)
25283 return (uint8x8_t
) __builtin_aarch64_cmtstv8qi ((int8x8_t
) __a
,
25287 __extension__
static __inline uint16x4_t
__attribute__ ((__always_inline__
))
25288 vtst_u16 (uint16x4_t __a
, uint16x4_t __b
)
25290 return (uint16x4_t
) __builtin_aarch64_cmtstv4hi ((int16x4_t
) __a
,
25294 __extension__
static __inline uint32x2_t
__attribute__ ((__always_inline__
))
25295 vtst_u32 (uint32x2_t __a
, uint32x2_t __b
)
25297 return (uint32x2_t
) __builtin_aarch64_cmtstv2si ((int32x2_t
) __a
,
25301 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
25302 vtst_u64 (uint64x1_t __a
, uint64x1_t __b
)
25304 return (uint64x1_t
) __builtin_aarch64_cmtstdi ((int64x1_t
) __a
,
25308 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
25309 vtstq_s8 (int8x16_t __a
, int8x16_t __b
)
25311 return (uint8x16_t
) __builtin_aarch64_cmtstv16qi (__a
, __b
);
25314 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
25315 vtstq_s16 (int16x8_t __a
, int16x8_t __b
)
25317 return (uint16x8_t
) __builtin_aarch64_cmtstv8hi (__a
, __b
);
25320 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
25321 vtstq_s32 (int32x4_t __a
, int32x4_t __b
)
25323 return (uint32x4_t
) __builtin_aarch64_cmtstv4si (__a
, __b
);
25326 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
25327 vtstq_s64 (int64x2_t __a
, int64x2_t __b
)
25329 return (uint64x2_t
) __builtin_aarch64_cmtstv2di (__a
, __b
);
25332 __extension__
static __inline uint8x16_t
__attribute__ ((__always_inline__
))
25333 vtstq_u8 (uint8x16_t __a
, uint8x16_t __b
)
25335 return (uint8x16_t
) __builtin_aarch64_cmtstv16qi ((int8x16_t
) __a
,
25339 __extension__
static __inline uint16x8_t
__attribute__ ((__always_inline__
))
25340 vtstq_u16 (uint16x8_t __a
, uint16x8_t __b
)
25342 return (uint16x8_t
) __builtin_aarch64_cmtstv8hi ((int16x8_t
) __a
,
25346 __extension__
static __inline uint32x4_t
__attribute__ ((__always_inline__
))
25347 vtstq_u32 (uint32x4_t __a
, uint32x4_t __b
)
25349 return (uint32x4_t
) __builtin_aarch64_cmtstv4si ((int32x4_t
) __a
,
25353 __extension__
static __inline uint64x2_t
__attribute__ ((__always_inline__
))
25354 vtstq_u64 (uint64x2_t __a
, uint64x2_t __b
)
25356 return (uint64x2_t
) __builtin_aarch64_cmtstv2di ((int64x2_t
) __a
,
25360 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
25361 vtstd_s64 (int64x1_t __a
, int64x1_t __b
)
25363 return (uint64x1_t
) __builtin_aarch64_cmtstdi (__a
, __b
);
25366 __extension__
static __inline uint64x1_t
__attribute__ ((__always_inline__
))
25367 vtstd_u64 (uint64x1_t __a
, uint64x1_t __b
)
25369 return (uint64x1_t
) __builtin_aarch64_cmtstdi ((int64x1_t
) __a
,
25375 __extension__
static __inline int8x8_t
__attribute__ ((__always_inline__
))
25376 vuqadd_s8 (int8x8_t __a
, uint8x8_t __b
)
25378 return (int8x8_t
) __builtin_aarch64_suqaddv8qi (__a
, (int8x8_t
) __b
);
25381 __extension__
static __inline int16x4_t
__attribute__ ((__always_inline__
))
25382 vuqadd_s16 (int16x4_t __a
, uint16x4_t __b
)
25384 return (int16x4_t
) __builtin_aarch64_suqaddv4hi (__a
, (int16x4_t
) __b
);
25387 __extension__
static __inline int32x2_t
__attribute__ ((__always_inline__
))
25388 vuqadd_s32 (int32x2_t __a
, uint32x2_t __b
)
25390 return (int32x2_t
) __builtin_aarch64_suqaddv2si (__a
, (int32x2_t
) __b
);
25393 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
25394 vuqadd_s64 (int64x1_t __a
, uint64x1_t __b
)
25396 return (int64x1_t
) __builtin_aarch64_suqadddi (__a
, (int64x1_t
) __b
);
25399 __extension__
static __inline int8x16_t
__attribute__ ((__always_inline__
))
25400 vuqaddq_s8 (int8x16_t __a
, uint8x16_t __b
)
25402 return (int8x16_t
) __builtin_aarch64_suqaddv16qi (__a
, (int8x16_t
) __b
);
25405 __extension__
static __inline int16x8_t
__attribute__ ((__always_inline__
))
25406 vuqaddq_s16 (int16x8_t __a
, uint16x8_t __b
)
25408 return (int16x8_t
) __builtin_aarch64_suqaddv8hi (__a
, (int16x8_t
) __b
);
25411 __extension__
static __inline int32x4_t
__attribute__ ((__always_inline__
))
25412 vuqaddq_s32 (int32x4_t __a
, uint32x4_t __b
)
25414 return (int32x4_t
) __builtin_aarch64_suqaddv4si (__a
, (int32x4_t
) __b
);
25417 __extension__
static __inline int64x2_t
__attribute__ ((__always_inline__
))
25418 vuqaddq_s64 (int64x2_t __a
, uint64x2_t __b
)
25420 return (int64x2_t
) __builtin_aarch64_suqaddv2di (__a
, (int64x2_t
) __b
);
25423 __extension__
static __inline int8x1_t
__attribute__ ((__always_inline__
))
25424 vuqaddb_s8 (int8x1_t __a
, uint8x1_t __b
)
25426 return (int8x1_t
) __builtin_aarch64_suqaddqi (__a
, (int8x1_t
) __b
);
25429 __extension__
static __inline int16x1_t
__attribute__ ((__always_inline__
))
25430 vuqaddh_s16 (int16x1_t __a
, uint16x1_t __b
)
25432 return (int16x1_t
) __builtin_aarch64_suqaddhi (__a
, (int16x1_t
) __b
);
25435 __extension__
static __inline int32x1_t
__attribute__ ((__always_inline__
))
25436 vuqadds_s32 (int32x1_t __a
, uint32x1_t __b
)
25438 return (int32x1_t
) __builtin_aarch64_suqaddsi (__a
, (int32x1_t
) __b
);
25441 __extension__
static __inline int64x1_t
__attribute__ ((__always_inline__
))
25442 vuqaddd_s64 (int64x1_t __a
, uint64x1_t __b
)
25444 return (int64x1_t
) __builtin_aarch64_suqadddi (__a
, (int64x1_t
) __b
);
25447 #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \
25448 __extension__ static __inline rettype \
25449 __attribute__ ((__always_inline__)) \
25450 v ## op ## Q ## _ ## funcsuffix (intype a, intype b) \
25452 return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b), \
25453 v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)}; \
25456 #define __INTERLEAVE_LIST(op) \
25457 __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,) \
25458 __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,) \
25459 __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,) \
25460 __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,) \
25461 __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,) \
25462 __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,) \
25463 __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,) \
25464 __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,) \
25465 __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,) \
25466 __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q) \
25467 __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q) \
25468 __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q) \
25469 __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q) \
25470 __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q) \
25471 __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q) \
25472 __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q) \
25473 __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q) \
25474 __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q)
25478 __INTERLEAVE_LIST (uzp
)
25482 __INTERLEAVE_LIST (zip
)
25484 #undef __INTERLEAVE_LIST
25485 #undef __DEFINTERLEAVE
25487 /* End of optimal implementations in approved order. */