[AArch64] Rewrite vabs<q>_s<8,16,32,64> AdvSIMD intrinsics to fold to tree
[official-gcc.git] / gcc / config / aarch64 / arm_neon.h
blob99cf123e29ed0ff220ff6d9f72e556437797e06a
1 /* ARM NEON intrinsics include file.
3 Copyright (C) 2011-2013 Free Software Foundation, Inc.
4 Contributed by ARM Ltd.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 #ifndef _AARCH64_NEON_H_
28 #define _AARCH64_NEON_H_
30 #include <stdint.h>
32 typedef __builtin_aarch64_simd_qi int8x8_t
33 __attribute__ ((__vector_size__ (8)));
34 typedef __builtin_aarch64_simd_hi int16x4_t
35 __attribute__ ((__vector_size__ (8)));
36 typedef __builtin_aarch64_simd_si int32x2_t
37 __attribute__ ((__vector_size__ (8)));
38 typedef int64_t int64x1_t;
39 typedef int32_t int32x1_t;
40 typedef int16_t int16x1_t;
41 typedef int8_t int8x1_t;
42 typedef double float64x1_t;
43 typedef __builtin_aarch64_simd_sf float32x2_t
44 __attribute__ ((__vector_size__ (8)));
45 typedef __builtin_aarch64_simd_poly8 poly8x8_t
46 __attribute__ ((__vector_size__ (8)));
47 typedef __builtin_aarch64_simd_poly16 poly16x4_t
48 __attribute__ ((__vector_size__ (8)));
49 typedef __builtin_aarch64_simd_uqi uint8x8_t
50 __attribute__ ((__vector_size__ (8)));
51 typedef __builtin_aarch64_simd_uhi uint16x4_t
52 __attribute__ ((__vector_size__ (8)));
53 typedef __builtin_aarch64_simd_usi uint32x2_t
54 __attribute__ ((__vector_size__ (8)));
55 typedef uint64_t uint64x1_t;
56 typedef uint32_t uint32x1_t;
57 typedef uint16_t uint16x1_t;
58 typedef uint8_t uint8x1_t;
59 typedef __builtin_aarch64_simd_qi int8x16_t
60 __attribute__ ((__vector_size__ (16)));
61 typedef __builtin_aarch64_simd_hi int16x8_t
62 __attribute__ ((__vector_size__ (16)));
63 typedef __builtin_aarch64_simd_si int32x4_t
64 __attribute__ ((__vector_size__ (16)));
65 typedef __builtin_aarch64_simd_di int64x2_t
66 __attribute__ ((__vector_size__ (16)));
67 typedef __builtin_aarch64_simd_sf float32x4_t
68 __attribute__ ((__vector_size__ (16)));
69 typedef __builtin_aarch64_simd_df float64x2_t
70 __attribute__ ((__vector_size__ (16)));
71 typedef __builtin_aarch64_simd_poly8 poly8x16_t
72 __attribute__ ((__vector_size__ (16)));
73 typedef __builtin_aarch64_simd_poly16 poly16x8_t
74 __attribute__ ((__vector_size__ (16)));
75 typedef __builtin_aarch64_simd_uqi uint8x16_t
76 __attribute__ ((__vector_size__ (16)));
77 typedef __builtin_aarch64_simd_uhi uint16x8_t
78 __attribute__ ((__vector_size__ (16)));
79 typedef __builtin_aarch64_simd_usi uint32x4_t
80 __attribute__ ((__vector_size__ (16)));
81 typedef __builtin_aarch64_simd_udi uint64x2_t
82 __attribute__ ((__vector_size__ (16)));
84 typedef float float32_t;
85 typedef double float64_t;
86 typedef __builtin_aarch64_simd_poly8 poly8_t;
87 typedef __builtin_aarch64_simd_poly16 poly16_t;
89 typedef struct int8x8x2_t
91 int8x8_t val[2];
92 } int8x8x2_t;
94 typedef struct int8x16x2_t
96 int8x16_t val[2];
97 } int8x16x2_t;
99 typedef struct int16x4x2_t
101 int16x4_t val[2];
102 } int16x4x2_t;
104 typedef struct int16x8x2_t
106 int16x8_t val[2];
107 } int16x8x2_t;
109 typedef struct int32x2x2_t
111 int32x2_t val[2];
112 } int32x2x2_t;
114 typedef struct int32x4x2_t
116 int32x4_t val[2];
117 } int32x4x2_t;
119 typedef struct int64x1x2_t
121 int64x1_t val[2];
122 } int64x1x2_t;
124 typedef struct int64x2x2_t
126 int64x2_t val[2];
127 } int64x2x2_t;
129 typedef struct uint8x8x2_t
131 uint8x8_t val[2];
132 } uint8x8x2_t;
134 typedef struct uint8x16x2_t
136 uint8x16_t val[2];
137 } uint8x16x2_t;
139 typedef struct uint16x4x2_t
141 uint16x4_t val[2];
142 } uint16x4x2_t;
144 typedef struct uint16x8x2_t
146 uint16x8_t val[2];
147 } uint16x8x2_t;
149 typedef struct uint32x2x2_t
151 uint32x2_t val[2];
152 } uint32x2x2_t;
154 typedef struct uint32x4x2_t
156 uint32x4_t val[2];
157 } uint32x4x2_t;
159 typedef struct uint64x1x2_t
161 uint64x1_t val[2];
162 } uint64x1x2_t;
164 typedef struct uint64x2x2_t
166 uint64x2_t val[2];
167 } uint64x2x2_t;
169 typedef struct float32x2x2_t
171 float32x2_t val[2];
172 } float32x2x2_t;
174 typedef struct float32x4x2_t
176 float32x4_t val[2];
177 } float32x4x2_t;
179 typedef struct float64x2x2_t
181 float64x2_t val[2];
182 } float64x2x2_t;
184 typedef struct float64x1x2_t
186 float64x1_t val[2];
187 } float64x1x2_t;
189 typedef struct poly8x8x2_t
191 poly8x8_t val[2];
192 } poly8x8x2_t;
194 typedef struct poly8x16x2_t
196 poly8x16_t val[2];
197 } poly8x16x2_t;
199 typedef struct poly16x4x2_t
201 poly16x4_t val[2];
202 } poly16x4x2_t;
204 typedef struct poly16x8x2_t
206 poly16x8_t val[2];
207 } poly16x8x2_t;
209 typedef struct int8x8x3_t
211 int8x8_t val[3];
212 } int8x8x3_t;
214 typedef struct int8x16x3_t
216 int8x16_t val[3];
217 } int8x16x3_t;
219 typedef struct int16x4x3_t
221 int16x4_t val[3];
222 } int16x4x3_t;
224 typedef struct int16x8x3_t
226 int16x8_t val[3];
227 } int16x8x3_t;
229 typedef struct int32x2x3_t
231 int32x2_t val[3];
232 } int32x2x3_t;
234 typedef struct int32x4x3_t
236 int32x4_t val[3];
237 } int32x4x3_t;
239 typedef struct int64x1x3_t
241 int64x1_t val[3];
242 } int64x1x3_t;
244 typedef struct int64x2x3_t
246 int64x2_t val[3];
247 } int64x2x3_t;
249 typedef struct uint8x8x3_t
251 uint8x8_t val[3];
252 } uint8x8x3_t;
254 typedef struct uint8x16x3_t
256 uint8x16_t val[3];
257 } uint8x16x3_t;
259 typedef struct uint16x4x3_t
261 uint16x4_t val[3];
262 } uint16x4x3_t;
264 typedef struct uint16x8x3_t
266 uint16x8_t val[3];
267 } uint16x8x3_t;
269 typedef struct uint32x2x3_t
271 uint32x2_t val[3];
272 } uint32x2x3_t;
274 typedef struct uint32x4x3_t
276 uint32x4_t val[3];
277 } uint32x4x3_t;
279 typedef struct uint64x1x3_t
281 uint64x1_t val[3];
282 } uint64x1x3_t;
284 typedef struct uint64x2x3_t
286 uint64x2_t val[3];
287 } uint64x2x3_t;
289 typedef struct float32x2x3_t
291 float32x2_t val[3];
292 } float32x2x3_t;
294 typedef struct float32x4x3_t
296 float32x4_t val[3];
297 } float32x4x3_t;
299 typedef struct float64x2x3_t
301 float64x2_t val[3];
302 } float64x2x3_t;
304 typedef struct float64x1x3_t
306 float64x1_t val[3];
307 } float64x1x3_t;
309 typedef struct poly8x8x3_t
311 poly8x8_t val[3];
312 } poly8x8x3_t;
314 typedef struct poly8x16x3_t
316 poly8x16_t val[3];
317 } poly8x16x3_t;
319 typedef struct poly16x4x3_t
321 poly16x4_t val[3];
322 } poly16x4x3_t;
324 typedef struct poly16x8x3_t
326 poly16x8_t val[3];
327 } poly16x8x3_t;
329 typedef struct int8x8x4_t
331 int8x8_t val[4];
332 } int8x8x4_t;
334 typedef struct int8x16x4_t
336 int8x16_t val[4];
337 } int8x16x4_t;
339 typedef struct int16x4x4_t
341 int16x4_t val[4];
342 } int16x4x4_t;
344 typedef struct int16x8x4_t
346 int16x8_t val[4];
347 } int16x8x4_t;
349 typedef struct int32x2x4_t
351 int32x2_t val[4];
352 } int32x2x4_t;
354 typedef struct int32x4x4_t
356 int32x4_t val[4];
357 } int32x4x4_t;
359 typedef struct int64x1x4_t
361 int64x1_t val[4];
362 } int64x1x4_t;
364 typedef struct int64x2x4_t
366 int64x2_t val[4];
367 } int64x2x4_t;
369 typedef struct uint8x8x4_t
371 uint8x8_t val[4];
372 } uint8x8x4_t;
374 typedef struct uint8x16x4_t
376 uint8x16_t val[4];
377 } uint8x16x4_t;
379 typedef struct uint16x4x4_t
381 uint16x4_t val[4];
382 } uint16x4x4_t;
384 typedef struct uint16x8x4_t
386 uint16x8_t val[4];
387 } uint16x8x4_t;
389 typedef struct uint32x2x4_t
391 uint32x2_t val[4];
392 } uint32x2x4_t;
394 typedef struct uint32x4x4_t
396 uint32x4_t val[4];
397 } uint32x4x4_t;
399 typedef struct uint64x1x4_t
401 uint64x1_t val[4];
402 } uint64x1x4_t;
404 typedef struct uint64x2x4_t
406 uint64x2_t val[4];
407 } uint64x2x4_t;
409 typedef struct float32x2x4_t
411 float32x2_t val[4];
412 } float32x2x4_t;
414 typedef struct float32x4x4_t
416 float32x4_t val[4];
417 } float32x4x4_t;
419 typedef struct float64x2x4_t
421 float64x2_t val[4];
422 } float64x2x4_t;
424 typedef struct float64x1x4_t
426 float64x1_t val[4];
427 } float64x1x4_t;
429 typedef struct poly8x8x4_t
431 poly8x8_t val[4];
432 } poly8x8x4_t;
434 typedef struct poly8x16x4_t
436 poly8x16_t val[4];
437 } poly8x16x4_t;
439 typedef struct poly16x4x4_t
441 poly16x4_t val[4];
442 } poly16x4x4_t;
444 typedef struct poly16x8x4_t
446 poly16x8_t val[4];
447 } poly16x8x4_t;
450 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
451 vadd_s8 (int8x8_t __a, int8x8_t __b)
453 return __a + __b;
456 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
457 vadd_s16 (int16x4_t __a, int16x4_t __b)
459 return __a + __b;
462 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
463 vadd_s32 (int32x2_t __a, int32x2_t __b)
465 return __a + __b;
468 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
469 vadd_f32 (float32x2_t __a, float32x2_t __b)
471 return __a + __b;
474 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
475 vadd_u8 (uint8x8_t __a, uint8x8_t __b)
477 return __a + __b;
480 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
481 vadd_u16 (uint16x4_t __a, uint16x4_t __b)
483 return __a + __b;
486 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
487 vadd_u32 (uint32x2_t __a, uint32x2_t __b)
489 return __a + __b;
492 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
493 vadd_s64 (int64x1_t __a, int64x1_t __b)
495 return __a + __b;
498 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
499 vadd_u64 (uint64x1_t __a, uint64x1_t __b)
501 return __a + __b;
504 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
505 vaddq_s8 (int8x16_t __a, int8x16_t __b)
507 return __a + __b;
510 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
511 vaddq_s16 (int16x8_t __a, int16x8_t __b)
513 return __a + __b;
516 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
517 vaddq_s32 (int32x4_t __a, int32x4_t __b)
519 return __a + __b;
522 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
523 vaddq_s64 (int64x2_t __a, int64x2_t __b)
525 return __a + __b;
528 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
529 vaddq_f32 (float32x4_t __a, float32x4_t __b)
531 return __a + __b;
534 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
535 vaddq_f64 (float64x2_t __a, float64x2_t __b)
537 return __a + __b;
540 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
541 vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
543 return __a + __b;
546 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
547 vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
549 return __a + __b;
552 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
553 vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
555 return __a + __b;
558 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
559 vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
561 return __a + __b;
564 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
565 vaddl_s8 (int8x8_t __a, int8x8_t __b)
567 return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b);
570 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
571 vaddl_s16 (int16x4_t __a, int16x4_t __b)
573 return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b);
576 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
577 vaddl_s32 (int32x2_t __a, int32x2_t __b)
579 return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b);
582 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
583 vaddl_u8 (uint8x8_t __a, uint8x8_t __b)
585 return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a,
586 (int8x8_t) __b);
589 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
590 vaddl_u16 (uint16x4_t __a, uint16x4_t __b)
592 return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a,
593 (int16x4_t) __b);
596 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
597 vaddl_u32 (uint32x2_t __a, uint32x2_t __b)
599 return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a,
600 (int32x2_t) __b);
603 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
604 vaddl_high_s8 (int8x16_t __a, int8x16_t __b)
606 return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b);
609 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
610 vaddl_high_s16 (int16x8_t __a, int16x8_t __b)
612 return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b);
615 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
616 vaddl_high_s32 (int32x4_t __a, int32x4_t __b)
618 return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b);
621 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
622 vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b)
624 return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a,
625 (int8x16_t) __b);
628 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
629 vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b)
631 return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a,
632 (int16x8_t) __b);
635 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
636 vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b)
638 return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a,
639 (int32x4_t) __b);
642 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
643 vaddw_s8 (int16x8_t __a, int8x8_t __b)
645 return (int16x8_t) __builtin_aarch64_saddwv8qi (__a, __b);
648 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
649 vaddw_s16 (int32x4_t __a, int16x4_t __b)
651 return (int32x4_t) __builtin_aarch64_saddwv4hi (__a, __b);
654 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
655 vaddw_s32 (int64x2_t __a, int32x2_t __b)
657 return (int64x2_t) __builtin_aarch64_saddwv2si (__a, __b);
660 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
661 vaddw_u8 (uint16x8_t __a, uint8x8_t __b)
663 return (uint16x8_t) __builtin_aarch64_uaddwv8qi ((int16x8_t) __a,
664 (int8x8_t) __b);
667 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
668 vaddw_u16 (uint32x4_t __a, uint16x4_t __b)
670 return (uint32x4_t) __builtin_aarch64_uaddwv4hi ((int32x4_t) __a,
671 (int16x4_t) __b);
674 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
675 vaddw_u32 (uint64x2_t __a, uint32x2_t __b)
677 return (uint64x2_t) __builtin_aarch64_uaddwv2si ((int64x2_t) __a,
678 (int32x2_t) __b);
681 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
682 vaddw_high_s8 (int16x8_t __a, int8x16_t __b)
684 return (int16x8_t) __builtin_aarch64_saddw2v16qi (__a, __b);
687 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
688 vaddw_high_s16 (int32x4_t __a, int16x8_t __b)
690 return (int32x4_t) __builtin_aarch64_saddw2v8hi (__a, __b);
693 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
694 vaddw_high_s32 (int64x2_t __a, int32x4_t __b)
696 return (int64x2_t) __builtin_aarch64_saddw2v4si (__a, __b);
699 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
700 vaddw_high_u8 (uint16x8_t __a, uint8x16_t __b)
702 return (uint16x8_t) __builtin_aarch64_uaddw2v16qi ((int16x8_t) __a,
703 (int8x16_t) __b);
706 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
707 vaddw_high_u16 (uint32x4_t __a, uint16x8_t __b)
709 return (uint32x4_t) __builtin_aarch64_uaddw2v8hi ((int32x4_t) __a,
710 (int16x8_t) __b);
713 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
714 vaddw_high_u32 (uint64x2_t __a, uint32x4_t __b)
716 return (uint64x2_t) __builtin_aarch64_uaddw2v4si ((int64x2_t) __a,
717 (int32x4_t) __b);
720 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
721 vhadd_s8 (int8x8_t __a, int8x8_t __b)
723 return (int8x8_t) __builtin_aarch64_shaddv8qi (__a, __b);
726 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
727 vhadd_s16 (int16x4_t __a, int16x4_t __b)
729 return (int16x4_t) __builtin_aarch64_shaddv4hi (__a, __b);
732 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
733 vhadd_s32 (int32x2_t __a, int32x2_t __b)
735 return (int32x2_t) __builtin_aarch64_shaddv2si (__a, __b);
738 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
739 vhadd_u8 (uint8x8_t __a, uint8x8_t __b)
741 return (uint8x8_t) __builtin_aarch64_uhaddv8qi ((int8x8_t) __a,
742 (int8x8_t) __b);
745 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
746 vhadd_u16 (uint16x4_t __a, uint16x4_t __b)
748 return (uint16x4_t) __builtin_aarch64_uhaddv4hi ((int16x4_t) __a,
749 (int16x4_t) __b);
752 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
753 vhadd_u32 (uint32x2_t __a, uint32x2_t __b)
755 return (uint32x2_t) __builtin_aarch64_uhaddv2si ((int32x2_t) __a,
756 (int32x2_t) __b);
759 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
760 vhaddq_s8 (int8x16_t __a, int8x16_t __b)
762 return (int8x16_t) __builtin_aarch64_shaddv16qi (__a, __b);
765 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
766 vhaddq_s16 (int16x8_t __a, int16x8_t __b)
768 return (int16x8_t) __builtin_aarch64_shaddv8hi (__a, __b);
771 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
772 vhaddq_s32 (int32x4_t __a, int32x4_t __b)
774 return (int32x4_t) __builtin_aarch64_shaddv4si (__a, __b);
777 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
778 vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
780 return (uint8x16_t) __builtin_aarch64_uhaddv16qi ((int8x16_t) __a,
781 (int8x16_t) __b);
784 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
785 vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
787 return (uint16x8_t) __builtin_aarch64_uhaddv8hi ((int16x8_t) __a,
788 (int16x8_t) __b);
791 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
792 vhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
794 return (uint32x4_t) __builtin_aarch64_uhaddv4si ((int32x4_t) __a,
795 (int32x4_t) __b);
798 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
799 vrhadd_s8 (int8x8_t __a, int8x8_t __b)
801 return (int8x8_t) __builtin_aarch64_srhaddv8qi (__a, __b);
804 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
805 vrhadd_s16 (int16x4_t __a, int16x4_t __b)
807 return (int16x4_t) __builtin_aarch64_srhaddv4hi (__a, __b);
810 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
811 vrhadd_s32 (int32x2_t __a, int32x2_t __b)
813 return (int32x2_t) __builtin_aarch64_srhaddv2si (__a, __b);
816 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
817 vrhadd_u8 (uint8x8_t __a, uint8x8_t __b)
819 return (uint8x8_t) __builtin_aarch64_urhaddv8qi ((int8x8_t) __a,
820 (int8x8_t) __b);
823 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
824 vrhadd_u16 (uint16x4_t __a, uint16x4_t __b)
826 return (uint16x4_t) __builtin_aarch64_urhaddv4hi ((int16x4_t) __a,
827 (int16x4_t) __b);
830 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
831 vrhadd_u32 (uint32x2_t __a, uint32x2_t __b)
833 return (uint32x2_t) __builtin_aarch64_urhaddv2si ((int32x2_t) __a,
834 (int32x2_t) __b);
837 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
838 vrhaddq_s8 (int8x16_t __a, int8x16_t __b)
840 return (int8x16_t) __builtin_aarch64_srhaddv16qi (__a, __b);
843 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
844 vrhaddq_s16 (int16x8_t __a, int16x8_t __b)
846 return (int16x8_t) __builtin_aarch64_srhaddv8hi (__a, __b);
849 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
850 vrhaddq_s32 (int32x4_t __a, int32x4_t __b)
852 return (int32x4_t) __builtin_aarch64_srhaddv4si (__a, __b);
855 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
856 vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
858 return (uint8x16_t) __builtin_aarch64_urhaddv16qi ((int8x16_t) __a,
859 (int8x16_t) __b);
862 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
863 vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
865 return (uint16x8_t) __builtin_aarch64_urhaddv8hi ((int16x8_t) __a,
866 (int16x8_t) __b);
869 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
870 vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
872 return (uint32x4_t) __builtin_aarch64_urhaddv4si ((int32x4_t) __a,
873 (int32x4_t) __b);
876 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
877 vaddhn_s16 (int16x8_t __a, int16x8_t __b)
879 return (int8x8_t) __builtin_aarch64_addhnv8hi (__a, __b);
882 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
883 vaddhn_s32 (int32x4_t __a, int32x4_t __b)
885 return (int16x4_t) __builtin_aarch64_addhnv4si (__a, __b);
888 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
889 vaddhn_s64 (int64x2_t __a, int64x2_t __b)
891 return (int32x2_t) __builtin_aarch64_addhnv2di (__a, __b);
894 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
895 vaddhn_u16 (uint16x8_t __a, uint16x8_t __b)
897 return (uint8x8_t) __builtin_aarch64_addhnv8hi ((int16x8_t) __a,
898 (int16x8_t) __b);
901 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
902 vaddhn_u32 (uint32x4_t __a, uint32x4_t __b)
904 return (uint16x4_t) __builtin_aarch64_addhnv4si ((int32x4_t) __a,
905 (int32x4_t) __b);
908 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
909 vaddhn_u64 (uint64x2_t __a, uint64x2_t __b)
911 return (uint32x2_t) __builtin_aarch64_addhnv2di ((int64x2_t) __a,
912 (int64x2_t) __b);
915 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
916 vraddhn_s16 (int16x8_t __a, int16x8_t __b)
918 return (int8x8_t) __builtin_aarch64_raddhnv8hi (__a, __b);
921 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
922 vraddhn_s32 (int32x4_t __a, int32x4_t __b)
924 return (int16x4_t) __builtin_aarch64_raddhnv4si (__a, __b);
927 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
928 vraddhn_s64 (int64x2_t __a, int64x2_t __b)
930 return (int32x2_t) __builtin_aarch64_raddhnv2di (__a, __b);
933 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
934 vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)
936 return (uint8x8_t) __builtin_aarch64_raddhnv8hi ((int16x8_t) __a,
937 (int16x8_t) __b);
940 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
941 vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)
943 return (uint16x4_t) __builtin_aarch64_raddhnv4si ((int32x4_t) __a,
944 (int32x4_t) __b);
947 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
948 vraddhn_u64 (uint64x2_t __a, uint64x2_t __b)
950 return (uint32x2_t) __builtin_aarch64_raddhnv2di ((int64x2_t) __a,
951 (int64x2_t) __b);
954 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
955 vaddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
957 return (int8x16_t) __builtin_aarch64_addhn2v8hi (__a, __b, __c);
960 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
961 vaddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
963 return (int16x8_t) __builtin_aarch64_addhn2v4si (__a, __b, __c);
966 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
967 vaddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
969 return (int32x4_t) __builtin_aarch64_addhn2v2di (__a, __b, __c);
972 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
973 vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
975 return (uint8x16_t) __builtin_aarch64_addhn2v8hi ((int8x8_t) __a,
976 (int16x8_t) __b,
977 (int16x8_t) __c);
980 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
981 vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
983 return (uint16x8_t) __builtin_aarch64_addhn2v4si ((int16x4_t) __a,
984 (int32x4_t) __b,
985 (int32x4_t) __c);
988 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
989 vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
991 return (uint32x4_t) __builtin_aarch64_addhn2v2di ((int32x2_t) __a,
992 (int64x2_t) __b,
993 (int64x2_t) __c);
996 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
997 vraddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
999 return (int8x16_t) __builtin_aarch64_raddhn2v8hi (__a, __b, __c);
1002 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1003 vraddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1005 return (int16x8_t) __builtin_aarch64_raddhn2v4si (__a, __b, __c);
1008 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1009 vraddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1011 return (int32x4_t) __builtin_aarch64_raddhn2v2di (__a, __b, __c);
1014 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1015 vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1017 return (uint8x16_t) __builtin_aarch64_raddhn2v8hi ((int8x8_t) __a,
1018 (int16x8_t) __b,
1019 (int16x8_t) __c);
1022 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1023 vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1025 return (uint16x8_t) __builtin_aarch64_raddhn2v4si ((int16x4_t) __a,
1026 (int32x4_t) __b,
1027 (int32x4_t) __c);
1030 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1031 vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1033 return (uint32x4_t) __builtin_aarch64_raddhn2v2di ((int32x2_t) __a,
1034 (int64x2_t) __b,
1035 (int64x2_t) __c);
1038 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1039 vdiv_f32 (float32x2_t __a, float32x2_t __b)
1041 return __a / __b;
1044 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1045 vdivq_f32 (float32x4_t __a, float32x4_t __b)
1047 return __a / __b;
1050 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1051 vdivq_f64 (float64x2_t __a, float64x2_t __b)
1053 return __a / __b;
1056 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1057 vmul_s8 (int8x8_t __a, int8x8_t __b)
1059 return __a * __b;
1062 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1063 vmul_s16 (int16x4_t __a, int16x4_t __b)
1065 return __a * __b;
1068 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1069 vmul_s32 (int32x2_t __a, int32x2_t __b)
1071 return __a * __b;
1074 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1075 vmul_f32 (float32x2_t __a, float32x2_t __b)
1077 return __a * __b;
1080 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1081 vmul_u8 (uint8x8_t __a, uint8x8_t __b)
1083 return __a * __b;
1086 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1087 vmul_u16 (uint16x4_t __a, uint16x4_t __b)
1089 return __a * __b;
1092 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1093 vmul_u32 (uint32x2_t __a, uint32x2_t __b)
1095 return __a * __b;
1098 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
1099 vmul_p8 (poly8x8_t __a, poly8x8_t __b)
1101 return (poly8x8_t) __builtin_aarch64_pmulv8qi ((int8x8_t) __a,
1102 (int8x8_t) __b);
1105 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1106 vmulq_s8 (int8x16_t __a, int8x16_t __b)
1108 return __a * __b;
1111 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1112 vmulq_s16 (int16x8_t __a, int16x8_t __b)
1114 return __a * __b;
1117 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1118 vmulq_s32 (int32x4_t __a, int32x4_t __b)
1120 return __a * __b;
1123 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1124 vmulq_f32 (float32x4_t __a, float32x4_t __b)
1126 return __a * __b;
1129 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1130 vmulq_f64 (float64x2_t __a, float64x2_t __b)
1132 return __a * __b;
1135 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1136 vmulq_u8 (uint8x16_t __a, uint8x16_t __b)
1138 return __a * __b;
1141 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1142 vmulq_u16 (uint16x8_t __a, uint16x8_t __b)
1144 return __a * __b;
1147 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1148 vmulq_u32 (uint32x4_t __a, uint32x4_t __b)
1150 return __a * __b;
1153 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
1154 vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
1156 return (poly8x16_t) __builtin_aarch64_pmulv16qi ((int8x16_t) __a,
1157 (int8x16_t) __b);
1160 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1161 vand_s8 (int8x8_t __a, int8x8_t __b)
1163 return __a & __b;
1166 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1167 vand_s16 (int16x4_t __a, int16x4_t __b)
1169 return __a & __b;
1172 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1173 vand_s32 (int32x2_t __a, int32x2_t __b)
1175 return __a & __b;
1178 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1179 vand_u8 (uint8x8_t __a, uint8x8_t __b)
1181 return __a & __b;
1184 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1185 vand_u16 (uint16x4_t __a, uint16x4_t __b)
1187 return __a & __b;
1190 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1191 vand_u32 (uint32x2_t __a, uint32x2_t __b)
1193 return __a & __b;
1196 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1197 vand_s64 (int64x1_t __a, int64x1_t __b)
1199 return __a & __b;
1202 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1203 vand_u64 (uint64x1_t __a, uint64x1_t __b)
1205 return __a & __b;
1208 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1209 vandq_s8 (int8x16_t __a, int8x16_t __b)
1211 return __a & __b;
1214 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1215 vandq_s16 (int16x8_t __a, int16x8_t __b)
1217 return __a & __b;
1220 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1221 vandq_s32 (int32x4_t __a, int32x4_t __b)
1223 return __a & __b;
1226 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1227 vandq_s64 (int64x2_t __a, int64x2_t __b)
1229 return __a & __b;
1232 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1233 vandq_u8 (uint8x16_t __a, uint8x16_t __b)
1235 return __a & __b;
1238 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1239 vandq_u16 (uint16x8_t __a, uint16x8_t __b)
1241 return __a & __b;
1244 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1245 vandq_u32 (uint32x4_t __a, uint32x4_t __b)
1247 return __a & __b;
1250 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1251 vandq_u64 (uint64x2_t __a, uint64x2_t __b)
1253 return __a & __b;
1256 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1257 vorr_s8 (int8x8_t __a, int8x8_t __b)
1259 return __a | __b;
1262 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1263 vorr_s16 (int16x4_t __a, int16x4_t __b)
1265 return __a | __b;
1268 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1269 vorr_s32 (int32x2_t __a, int32x2_t __b)
1271 return __a | __b;
1274 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1275 vorr_u8 (uint8x8_t __a, uint8x8_t __b)
1277 return __a | __b;
1280 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1281 vorr_u16 (uint16x4_t __a, uint16x4_t __b)
1283 return __a | __b;
1286 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1287 vorr_u32 (uint32x2_t __a, uint32x2_t __b)
1289 return __a | __b;
1292 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1293 vorr_s64 (int64x1_t __a, int64x1_t __b)
1295 return __a | __b;
1298 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1299 vorr_u64 (uint64x1_t __a, uint64x1_t __b)
1301 return __a | __b;
1304 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1305 vorrq_s8 (int8x16_t __a, int8x16_t __b)
1307 return __a | __b;
1310 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1311 vorrq_s16 (int16x8_t __a, int16x8_t __b)
1313 return __a | __b;
1316 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1317 vorrq_s32 (int32x4_t __a, int32x4_t __b)
1319 return __a | __b;
1322 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1323 vorrq_s64 (int64x2_t __a, int64x2_t __b)
1325 return __a | __b;
1328 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1329 vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
1331 return __a | __b;
1334 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1335 vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
1337 return __a | __b;
1340 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1341 vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
1343 return __a | __b;
1346 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1347 vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
1349 return __a | __b;
1352 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1353 veor_s8 (int8x8_t __a, int8x8_t __b)
1355 return __a ^ __b;
1358 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1359 veor_s16 (int16x4_t __a, int16x4_t __b)
1361 return __a ^ __b;
1364 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1365 veor_s32 (int32x2_t __a, int32x2_t __b)
1367 return __a ^ __b;
1370 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1371 veor_u8 (uint8x8_t __a, uint8x8_t __b)
1373 return __a ^ __b;
1376 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1377 veor_u16 (uint16x4_t __a, uint16x4_t __b)
1379 return __a ^ __b;
1382 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1383 veor_u32 (uint32x2_t __a, uint32x2_t __b)
1385 return __a ^ __b;
1388 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1389 veor_s64 (int64x1_t __a, int64x1_t __b)
1391 return __a ^ __b;
1394 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1395 veor_u64 (uint64x1_t __a, uint64x1_t __b)
1397 return __a ^ __b;
1400 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1401 veorq_s8 (int8x16_t __a, int8x16_t __b)
1403 return __a ^ __b;
1406 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1407 veorq_s16 (int16x8_t __a, int16x8_t __b)
1409 return __a ^ __b;
1412 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1413 veorq_s32 (int32x4_t __a, int32x4_t __b)
1415 return __a ^ __b;
1418 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1419 veorq_s64 (int64x2_t __a, int64x2_t __b)
1421 return __a ^ __b;
1424 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1425 veorq_u8 (uint8x16_t __a, uint8x16_t __b)
1427 return __a ^ __b;
1430 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1431 veorq_u16 (uint16x8_t __a, uint16x8_t __b)
1433 return __a ^ __b;
1436 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1437 veorq_u32 (uint32x4_t __a, uint32x4_t __b)
1439 return __a ^ __b;
1442 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1443 veorq_u64 (uint64x2_t __a, uint64x2_t __b)
1445 return __a ^ __b;
1448 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1449 vbic_s8 (int8x8_t __a, int8x8_t __b)
1451 return __a & ~__b;
1454 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1455 vbic_s16 (int16x4_t __a, int16x4_t __b)
1457 return __a & ~__b;
1460 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1461 vbic_s32 (int32x2_t __a, int32x2_t __b)
1463 return __a & ~__b;
1466 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1467 vbic_u8 (uint8x8_t __a, uint8x8_t __b)
1469 return __a & ~__b;
1472 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1473 vbic_u16 (uint16x4_t __a, uint16x4_t __b)
1475 return __a & ~__b;
1478 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1479 vbic_u32 (uint32x2_t __a, uint32x2_t __b)
1481 return __a & ~__b;
1484 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1485 vbic_s64 (int64x1_t __a, int64x1_t __b)
1487 return __a & ~__b;
1490 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1491 vbic_u64 (uint64x1_t __a, uint64x1_t __b)
1493 return __a & ~__b;
1496 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1497 vbicq_s8 (int8x16_t __a, int8x16_t __b)
1499 return __a & ~__b;
1502 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1503 vbicq_s16 (int16x8_t __a, int16x8_t __b)
1505 return __a & ~__b;
1508 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1509 vbicq_s32 (int32x4_t __a, int32x4_t __b)
1511 return __a & ~__b;
1514 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1515 vbicq_s64 (int64x2_t __a, int64x2_t __b)
1517 return __a & ~__b;
1520 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1521 vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
1523 return __a & ~__b;
1526 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1527 vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
1529 return __a & ~__b;
1532 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1533 vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
1535 return __a & ~__b;
1538 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1539 vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
1541 return __a & ~__b;
1544 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1545 vorn_s8 (int8x8_t __a, int8x8_t __b)
1547 return __a | ~__b;
1550 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1551 vorn_s16 (int16x4_t __a, int16x4_t __b)
1553 return __a | ~__b;
1556 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1557 vorn_s32 (int32x2_t __a, int32x2_t __b)
1559 return __a | ~__b;
1562 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1563 vorn_u8 (uint8x8_t __a, uint8x8_t __b)
1565 return __a | ~__b;
1568 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1569 vorn_u16 (uint16x4_t __a, uint16x4_t __b)
1571 return __a | ~__b;
1574 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1575 vorn_u32 (uint32x2_t __a, uint32x2_t __b)
1577 return __a | ~__b;
1580 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1581 vorn_s64 (int64x1_t __a, int64x1_t __b)
1583 return __a | ~__b;
1586 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1587 vorn_u64 (uint64x1_t __a, uint64x1_t __b)
1589 return __a | ~__b;
1592 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1593 vornq_s8 (int8x16_t __a, int8x16_t __b)
1595 return __a | ~__b;
1598 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1599 vornq_s16 (int16x8_t __a, int16x8_t __b)
1601 return __a | ~__b;
1604 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1605 vornq_s32 (int32x4_t __a, int32x4_t __b)
1607 return __a | ~__b;
1610 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1611 vornq_s64 (int64x2_t __a, int64x2_t __b)
1613 return __a | ~__b;
1616 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1617 vornq_u8 (uint8x16_t __a, uint8x16_t __b)
1619 return __a | ~__b;
1622 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1623 vornq_u16 (uint16x8_t __a, uint16x8_t __b)
1625 return __a | ~__b;
1628 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1629 vornq_u32 (uint32x4_t __a, uint32x4_t __b)
1631 return __a | ~__b;
1634 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1635 vornq_u64 (uint64x2_t __a, uint64x2_t __b)
1637 return __a | ~__b;
1640 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1641 vsub_s8 (int8x8_t __a, int8x8_t __b)
1643 return __a - __b;
1646 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1647 vsub_s16 (int16x4_t __a, int16x4_t __b)
1649 return __a - __b;
1652 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1653 vsub_s32 (int32x2_t __a, int32x2_t __b)
1655 return __a - __b;
1658 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1659 vsub_f32 (float32x2_t __a, float32x2_t __b)
1661 return __a - __b;
1664 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1665 vsub_u8 (uint8x8_t __a, uint8x8_t __b)
1667 return __a - __b;
1670 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1671 vsub_u16 (uint16x4_t __a, uint16x4_t __b)
1673 return __a - __b;
1676 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1677 vsub_u32 (uint32x2_t __a, uint32x2_t __b)
1679 return __a - __b;
1682 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1683 vsub_s64 (int64x1_t __a, int64x1_t __b)
1685 return __a - __b;
1688 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1689 vsub_u64 (uint64x1_t __a, uint64x1_t __b)
1691 return __a - __b;
1694 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1695 vsubq_s8 (int8x16_t __a, int8x16_t __b)
1697 return __a - __b;
1700 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1701 vsubq_s16 (int16x8_t __a, int16x8_t __b)
1703 return __a - __b;
1706 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1707 vsubq_s32 (int32x4_t __a, int32x4_t __b)
1709 return __a - __b;
1712 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1713 vsubq_s64 (int64x2_t __a, int64x2_t __b)
1715 return __a - __b;
1718 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1719 vsubq_f32 (float32x4_t __a, float32x4_t __b)
1721 return __a - __b;
1724 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1725 vsubq_f64 (float64x2_t __a, float64x2_t __b)
1727 return __a - __b;
1730 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1731 vsubq_u8 (uint8x16_t __a, uint8x16_t __b)
1733 return __a - __b;
1736 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1737 vsubq_u16 (uint16x8_t __a, uint16x8_t __b)
1739 return __a - __b;
1742 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1743 vsubq_u32 (uint32x4_t __a, uint32x4_t __b)
1745 return __a - __b;
1748 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1749 vsubq_u64 (uint64x2_t __a, uint64x2_t __b)
1751 return __a - __b;
1754 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1755 vsubl_s8 (int8x8_t __a, int8x8_t __b)
1757 return (int16x8_t) __builtin_aarch64_ssublv8qi (__a, __b);
1760 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1761 vsubl_s16 (int16x4_t __a, int16x4_t __b)
1763 return (int32x4_t) __builtin_aarch64_ssublv4hi (__a, __b);
1766 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1767 vsubl_s32 (int32x2_t __a, int32x2_t __b)
1769 return (int64x2_t) __builtin_aarch64_ssublv2si (__a, __b);
1772 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1773 vsubl_u8 (uint8x8_t __a, uint8x8_t __b)
1775 return (uint16x8_t) __builtin_aarch64_usublv8qi ((int8x8_t) __a,
1776 (int8x8_t) __b);
1779 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1780 vsubl_u16 (uint16x4_t __a, uint16x4_t __b)
1782 return (uint32x4_t) __builtin_aarch64_usublv4hi ((int16x4_t) __a,
1783 (int16x4_t) __b);
1786 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1787 vsubl_u32 (uint32x2_t __a, uint32x2_t __b)
1789 return (uint64x2_t) __builtin_aarch64_usublv2si ((int32x2_t) __a,
1790 (int32x2_t) __b);
1793 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1794 vsubl_high_s8 (int8x16_t __a, int8x16_t __b)
1796 return (int16x8_t) __builtin_aarch64_ssubl2v16qi (__a, __b);
1799 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1800 vsubl_high_s16 (int16x8_t __a, int16x8_t __b)
1802 return (int32x4_t) __builtin_aarch64_ssubl2v8hi (__a, __b);
1805 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1806 vsubl_high_s32 (int32x4_t __a, int32x4_t __b)
1808 return (int64x2_t) __builtin_aarch64_ssubl2v4si (__a, __b);
1811 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1812 vsubl_high_u8 (uint8x16_t __a, uint8x16_t __b)
1814 return (uint16x8_t) __builtin_aarch64_usubl2v16qi ((int8x16_t) __a,
1815 (int8x16_t) __b);
1818 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1819 vsubl_high_u16 (uint16x8_t __a, uint16x8_t __b)
1821 return (uint32x4_t) __builtin_aarch64_usubl2v8hi ((int16x8_t) __a,
1822 (int16x8_t) __b);
1825 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1826 vsubl_high_u32 (uint32x4_t __a, uint32x4_t __b)
1828 return (uint64x2_t) __builtin_aarch64_usubl2v4si ((int32x4_t) __a,
1829 (int32x4_t) __b);
1832 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1833 vsubw_s8 (int16x8_t __a, int8x8_t __b)
1835 return (int16x8_t) __builtin_aarch64_ssubwv8qi (__a, __b);
1838 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1839 vsubw_s16 (int32x4_t __a, int16x4_t __b)
1841 return (int32x4_t) __builtin_aarch64_ssubwv4hi (__a, __b);
1844 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1845 vsubw_s32 (int64x2_t __a, int32x2_t __b)
1847 return (int64x2_t) __builtin_aarch64_ssubwv2si (__a, __b);
1850 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1851 vsubw_u8 (uint16x8_t __a, uint8x8_t __b)
1853 return (uint16x8_t) __builtin_aarch64_usubwv8qi ((int16x8_t) __a,
1854 (int8x8_t) __b);
1857 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1858 vsubw_u16 (uint32x4_t __a, uint16x4_t __b)
1860 return (uint32x4_t) __builtin_aarch64_usubwv4hi ((int32x4_t) __a,
1861 (int16x4_t) __b);
1864 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1865 vsubw_u32 (uint64x2_t __a, uint32x2_t __b)
1867 return (uint64x2_t) __builtin_aarch64_usubwv2si ((int64x2_t) __a,
1868 (int32x2_t) __b);
1871 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1872 vsubw_high_s8 (int16x8_t __a, int8x16_t __b)
1874 return (int16x8_t) __builtin_aarch64_ssubw2v16qi (__a, __b);
1877 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1878 vsubw_high_s16 (int32x4_t __a, int16x8_t __b)
1880 return (int32x4_t) __builtin_aarch64_ssubw2v8hi (__a, __b);
1883 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1884 vsubw_high_s32 (int64x2_t __a, int32x4_t __b)
1886 return (int64x2_t) __builtin_aarch64_ssubw2v4si (__a, __b);
1889 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1890 vsubw_high_u8 (uint16x8_t __a, uint8x16_t __b)
1892 return (uint16x8_t) __builtin_aarch64_usubw2v16qi ((int16x8_t) __a,
1893 (int8x16_t) __b);
1896 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1897 vsubw_high_u16 (uint32x4_t __a, uint16x8_t __b)
1899 return (uint32x4_t) __builtin_aarch64_usubw2v8hi ((int32x4_t) __a,
1900 (int16x8_t) __b);
1903 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1904 vsubw_high_u32 (uint64x2_t __a, uint32x4_t __b)
1906 return (uint64x2_t) __builtin_aarch64_usubw2v4si ((int64x2_t) __a,
1907 (int32x4_t) __b);
1910 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1911 vqadd_s8 (int8x8_t __a, int8x8_t __b)
1913 return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b);
1916 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1917 vqadd_s16 (int16x4_t __a, int16x4_t __b)
1919 return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b);
1922 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1923 vqadd_s32 (int32x2_t __a, int32x2_t __b)
1925 return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b);
1928 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1929 vqadd_s64 (int64x1_t __a, int64x1_t __b)
1931 return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b);
1934 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1935 vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
1937 return (uint8x8_t) __builtin_aarch64_uqaddv8qi ((int8x8_t) __a,
1938 (int8x8_t) __b);
1941 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1942 vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
1944 return (uint16x4_t) __builtin_aarch64_uqaddv4hi ((int16x4_t) __a,
1945 (int16x4_t) __b);
1948 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1949 vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
1951 return (uint32x2_t) __builtin_aarch64_uqaddv2si ((int32x2_t) __a,
1952 (int32x2_t) __b);
1955 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1956 vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
1958 return (uint64x1_t) __builtin_aarch64_uqadddi ((int64x1_t) __a,
1959 (int64x1_t) __b);
1962 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1963 vqaddq_s8 (int8x16_t __a, int8x16_t __b)
1965 return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b);
1968 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1969 vqaddq_s16 (int16x8_t __a, int16x8_t __b)
1971 return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b);
1974 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1975 vqaddq_s32 (int32x4_t __a, int32x4_t __b)
1977 return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b);
1980 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1981 vqaddq_s64 (int64x2_t __a, int64x2_t __b)
1983 return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b);
1986 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1987 vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
1989 return (uint8x16_t) __builtin_aarch64_uqaddv16qi ((int8x16_t) __a,
1990 (int8x16_t) __b);
1993 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1994 vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
1996 return (uint16x8_t) __builtin_aarch64_uqaddv8hi ((int16x8_t) __a,
1997 (int16x8_t) __b);
2000 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2001 vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
2003 return (uint32x4_t) __builtin_aarch64_uqaddv4si ((int32x4_t) __a,
2004 (int32x4_t) __b);
2007 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2008 vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
2010 return (uint64x2_t) __builtin_aarch64_uqaddv2di ((int64x2_t) __a,
2011 (int64x2_t) __b);
2014 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2015 vqsub_s8 (int8x8_t __a, int8x8_t __b)
2017 return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b);
2020 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2021 vqsub_s16 (int16x4_t __a, int16x4_t __b)
2023 return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b);
2026 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2027 vqsub_s32 (int32x2_t __a, int32x2_t __b)
2029 return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b);
2032 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2033 vqsub_s64 (int64x1_t __a, int64x1_t __b)
2035 return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b);
2038 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2039 vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
2041 return (uint8x8_t) __builtin_aarch64_uqsubv8qi ((int8x8_t) __a,
2042 (int8x8_t) __b);
2045 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2046 vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
2048 return (uint16x4_t) __builtin_aarch64_uqsubv4hi ((int16x4_t) __a,
2049 (int16x4_t) __b);
2052 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2053 vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
2055 return (uint32x2_t) __builtin_aarch64_uqsubv2si ((int32x2_t) __a,
2056 (int32x2_t) __b);
2059 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2060 vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
2062 return (uint64x1_t) __builtin_aarch64_uqsubdi ((int64x1_t) __a,
2063 (int64x1_t) __b);
2066 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2067 vqsubq_s8 (int8x16_t __a, int8x16_t __b)
2069 return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b);
2072 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2073 vqsubq_s16 (int16x8_t __a, int16x8_t __b)
2075 return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b);
2078 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2079 vqsubq_s32 (int32x4_t __a, int32x4_t __b)
2081 return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b);
2084 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2085 vqsubq_s64 (int64x2_t __a, int64x2_t __b)
2087 return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b);
2090 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2091 vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
2093 return (uint8x16_t) __builtin_aarch64_uqsubv16qi ((int8x16_t) __a,
2094 (int8x16_t) __b);
2097 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2098 vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
2100 return (uint16x8_t) __builtin_aarch64_uqsubv8hi ((int16x8_t) __a,
2101 (int16x8_t) __b);
2104 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2105 vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
2107 return (uint32x4_t) __builtin_aarch64_uqsubv4si ((int32x4_t) __a,
2108 (int32x4_t) __b);
2111 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2112 vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
2114 return (uint64x2_t) __builtin_aarch64_uqsubv2di ((int64x2_t) __a,
2115 (int64x2_t) __b);
2118 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2119 vqneg_s8 (int8x8_t __a)
2121 return (int8x8_t) __builtin_aarch64_sqnegv8qi (__a);
2124 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2125 vqneg_s16 (int16x4_t __a)
2127 return (int16x4_t) __builtin_aarch64_sqnegv4hi (__a);
2130 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2131 vqneg_s32 (int32x2_t __a)
2133 return (int32x2_t) __builtin_aarch64_sqnegv2si (__a);
2136 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2137 vqnegq_s8 (int8x16_t __a)
2139 return (int8x16_t) __builtin_aarch64_sqnegv16qi (__a);
2142 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2143 vqnegq_s16 (int16x8_t __a)
2145 return (int16x8_t) __builtin_aarch64_sqnegv8hi (__a);
2148 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2149 vqnegq_s32 (int32x4_t __a)
2151 return (int32x4_t) __builtin_aarch64_sqnegv4si (__a);
2154 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2155 vqabs_s8 (int8x8_t __a)
2157 return (int8x8_t) __builtin_aarch64_sqabsv8qi (__a);
2160 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2161 vqabs_s16 (int16x4_t __a)
2163 return (int16x4_t) __builtin_aarch64_sqabsv4hi (__a);
2166 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2167 vqabs_s32 (int32x2_t __a)
2169 return (int32x2_t) __builtin_aarch64_sqabsv2si (__a);
2172 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2173 vqabsq_s8 (int8x16_t __a)
2175 return (int8x16_t) __builtin_aarch64_sqabsv16qi (__a);
2178 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2179 vqabsq_s16 (int16x8_t __a)
2181 return (int16x8_t) __builtin_aarch64_sqabsv8hi (__a);
2184 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2185 vqabsq_s32 (int32x4_t __a)
2187 return (int32x4_t) __builtin_aarch64_sqabsv4si (__a);
2190 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2191 vqdmulh_s16 (int16x4_t __a, int16x4_t __b)
2193 return (int16x4_t) __builtin_aarch64_sqdmulhv4hi (__a, __b);
2196 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2197 vqdmulh_s32 (int32x2_t __a, int32x2_t __b)
2199 return (int32x2_t) __builtin_aarch64_sqdmulhv2si (__a, __b);
2202 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2203 vqdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2205 return (int16x8_t) __builtin_aarch64_sqdmulhv8hi (__a, __b);
2208 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2209 vqdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2211 return (int32x4_t) __builtin_aarch64_sqdmulhv4si (__a, __b);
2214 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2215 vqrdmulh_s16 (int16x4_t __a, int16x4_t __b)
2217 return (int16x4_t) __builtin_aarch64_sqrdmulhv4hi (__a, __b);
2220 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2221 vqrdmulh_s32 (int32x2_t __a, int32x2_t __b)
2223 return (int32x2_t) __builtin_aarch64_sqrdmulhv2si (__a, __b);
2226 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2227 vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2229 return (int16x8_t) __builtin_aarch64_sqrdmulhv8hi (__a, __b);
2232 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2233 vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2235 return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b);
2238 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2239 vcreate_s8 (uint64_t __a)
2241 return (int8x8_t) __a;
2244 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2245 vcreate_s16 (uint64_t __a)
2247 return (int16x4_t) __a;
2250 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2251 vcreate_s32 (uint64_t __a)
2253 return (int32x2_t) __a;
2256 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2257 vcreate_s64 (uint64_t __a)
2259 return (int64x1_t) __a;
2262 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2263 vcreate_f32 (uint64_t __a)
2265 return (float32x2_t) __a;
2268 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2269 vcreate_u8 (uint64_t __a)
2271 return (uint8x8_t) __a;
2274 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2275 vcreate_u16 (uint64_t __a)
2277 return (uint16x4_t) __a;
2280 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2281 vcreate_u32 (uint64_t __a)
2283 return (uint32x2_t) __a;
2286 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2287 vcreate_u64 (uint64_t __a)
2289 return (uint64x1_t) __a;
2292 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
2293 vcreate_f64 (uint64_t __a)
2295 return (float64x1_t) __builtin_aarch64_createdf (__a);
2298 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2299 vcreate_p8 (uint64_t __a)
2301 return (poly8x8_t) __a;
2304 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2305 vcreate_p16 (uint64_t __a)
2307 return (poly16x4_t) __a;
2310 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2311 vget_lane_s8 (int8x8_t __a, const int __b)
2313 return (int8_t) __builtin_aarch64_get_lane_signedv8qi (__a, __b);
2316 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2317 vget_lane_s16 (int16x4_t __a, const int __b)
2319 return (int16_t) __builtin_aarch64_get_lane_signedv4hi (__a, __b);
2322 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2323 vget_lane_s32 (int32x2_t __a, const int __b)
2325 return (int32_t) __builtin_aarch64_get_lane_signedv2si (__a, __b);
2328 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2329 vget_lane_f32 (float32x2_t __a, const int __b)
2331 return (float32_t) __builtin_aarch64_get_lanev2sf (__a, __b);
2334 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2335 vget_lane_u8 (uint8x8_t __a, const int __b)
2337 return (uint8_t) __builtin_aarch64_get_lane_unsignedv8qi ((int8x8_t) __a,
2338 __b);
2341 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2342 vget_lane_u16 (uint16x4_t __a, const int __b)
2344 return (uint16_t) __builtin_aarch64_get_lane_unsignedv4hi ((int16x4_t) __a,
2345 __b);
2348 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2349 vget_lane_u32 (uint32x2_t __a, const int __b)
2351 return (uint32_t) __builtin_aarch64_get_lane_unsignedv2si ((int32x2_t) __a,
2352 __b);
2355 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2356 vget_lane_p8 (poly8x8_t __a, const int __b)
2358 return (poly8_t) __builtin_aarch64_get_lane_unsignedv8qi ((int8x8_t) __a,
2359 __b);
2362 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2363 vget_lane_p16 (poly16x4_t __a, const int __b)
2365 return (poly16_t) __builtin_aarch64_get_lane_unsignedv4hi ((int16x4_t) __a,
2366 __b);
2369 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2370 vget_lane_s64 (int64x1_t __a, const int __b)
2372 return (int64_t) __builtin_aarch64_get_lanedi (__a, __b);
2375 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2376 vget_lane_u64 (uint64x1_t __a, const int __b)
2378 return (uint64_t) __builtin_aarch64_get_lanedi ((int64x1_t) __a, __b);
2381 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2382 vgetq_lane_s8 (int8x16_t __a, const int __b)
2384 return (int8_t) __builtin_aarch64_get_lane_signedv16qi (__a, __b);
2387 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2388 vgetq_lane_s16 (int16x8_t __a, const int __b)
2390 return (int16_t) __builtin_aarch64_get_lane_signedv8hi (__a, __b);
2393 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2394 vgetq_lane_s32 (int32x4_t __a, const int __b)
2396 return (int32_t) __builtin_aarch64_get_lane_signedv4si (__a, __b);
2399 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2400 vgetq_lane_f32 (float32x4_t __a, const int __b)
2402 return (float32_t) __builtin_aarch64_get_lanev4sf (__a, __b);
2405 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2406 vgetq_lane_f64 (float64x2_t __a, const int __b)
2408 return (float64_t) __builtin_aarch64_get_lanev2df (__a, __b);
2411 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2412 vgetq_lane_u8 (uint8x16_t __a, const int __b)
2414 return (uint8_t) __builtin_aarch64_get_lane_unsignedv16qi ((int8x16_t) __a,
2415 __b);
2418 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2419 vgetq_lane_u16 (uint16x8_t __a, const int __b)
2421 return (uint16_t) __builtin_aarch64_get_lane_unsignedv8hi ((int16x8_t) __a,
2422 __b);
2425 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2426 vgetq_lane_u32 (uint32x4_t __a, const int __b)
2428 return (uint32_t) __builtin_aarch64_get_lane_unsignedv4si ((int32x4_t) __a,
2429 __b);
2432 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2433 vgetq_lane_p8 (poly8x16_t __a, const int __b)
2435 return (poly8_t) __builtin_aarch64_get_lane_unsignedv16qi ((int8x16_t) __a,
2436 __b);
2439 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2440 vgetq_lane_p16 (poly16x8_t __a, const int __b)
2442 return (poly16_t) __builtin_aarch64_get_lane_unsignedv8hi ((int16x8_t) __a,
2443 __b);
2446 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2447 vgetq_lane_s64 (int64x2_t __a, const int __b)
2449 return __builtin_aarch64_get_lane_unsignedv2di (__a, __b);
2452 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2453 vgetq_lane_u64 (uint64x2_t __a, const int __b)
2455 return (uint64_t) __builtin_aarch64_get_lane_unsignedv2di ((int64x2_t) __a,
2456 __b);
2459 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2460 vreinterpret_p8_s8 (int8x8_t __a)
2462 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a);
2465 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2466 vreinterpret_p8_s16 (int16x4_t __a)
2468 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
2471 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2472 vreinterpret_p8_s32 (int32x2_t __a)
2474 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
2477 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2478 vreinterpret_p8_s64 (int64x1_t __a)
2480 return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
2483 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2484 vreinterpret_p8_f32 (float32x2_t __a)
2486 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
2489 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2490 vreinterpret_p8_u8 (uint8x8_t __a)
2492 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
2495 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2496 vreinterpret_p8_u16 (uint16x4_t __a)
2498 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
2501 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2502 vreinterpret_p8_u32 (uint32x2_t __a)
2504 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
2507 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2508 vreinterpret_p8_u64 (uint64x1_t __a)
2510 return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
2513 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2514 vreinterpret_p8_p16 (poly16x4_t __a)
2516 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
2519 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2520 vreinterpretq_p8_s8 (int8x16_t __a)
2522 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a);
2525 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2526 vreinterpretq_p8_s16 (int16x8_t __a)
2528 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
2531 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2532 vreinterpretq_p8_s32 (int32x4_t __a)
2534 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
2537 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2538 vreinterpretq_p8_s64 (int64x2_t __a)
2540 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
2543 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2544 vreinterpretq_p8_f32 (float32x4_t __a)
2546 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
2549 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2550 vreinterpretq_p8_u8 (uint8x16_t __a)
2552 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
2553 __a);
2556 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2557 vreinterpretq_p8_u16 (uint16x8_t __a)
2559 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
2560 __a);
2563 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2564 vreinterpretq_p8_u32 (uint32x4_t __a)
2566 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t)
2567 __a);
2570 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2571 vreinterpretq_p8_u64 (uint64x2_t __a)
2573 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t)
2574 __a);
2577 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2578 vreinterpretq_p8_p16 (poly16x8_t __a)
2580 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
2581 __a);
2584 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2585 vreinterpret_p16_s8 (int8x8_t __a)
2587 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
2590 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2591 vreinterpret_p16_s16 (int16x4_t __a)
2593 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a);
2596 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2597 vreinterpret_p16_s32 (int32x2_t __a)
2599 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
2602 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2603 vreinterpret_p16_s64 (int64x1_t __a)
2605 return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
2608 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2609 vreinterpret_p16_f32 (float32x2_t __a)
2611 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
2614 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2615 vreinterpret_p16_u8 (uint8x8_t __a)
2617 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
2620 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2621 vreinterpret_p16_u16 (uint16x4_t __a)
2623 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
2626 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2627 vreinterpret_p16_u32 (uint32x2_t __a)
2629 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
2632 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2633 vreinterpret_p16_u64 (uint64x1_t __a)
2635 return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
2638 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2639 vreinterpret_p16_p8 (poly8x8_t __a)
2641 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
2644 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2645 vreinterpretq_p16_s8 (int8x16_t __a)
2647 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
2650 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2651 vreinterpretq_p16_s16 (int16x8_t __a)
2653 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a);
2656 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2657 vreinterpretq_p16_s32 (int32x4_t __a)
2659 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
2662 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2663 vreinterpretq_p16_s64 (int64x2_t __a)
2665 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
2668 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2669 vreinterpretq_p16_f32 (float32x4_t __a)
2671 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
2674 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2675 vreinterpretq_p16_u8 (uint8x16_t __a)
2677 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
2678 __a);
2681 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2682 vreinterpretq_p16_u16 (uint16x8_t __a)
2684 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
2687 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2688 vreinterpretq_p16_u32 (uint32x4_t __a)
2690 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
2693 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2694 vreinterpretq_p16_u64 (uint64x2_t __a)
2696 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
2699 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2700 vreinterpretq_p16_p8 (poly8x16_t __a)
2702 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
2703 __a);
2706 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2707 vreinterpret_f32_s8 (int8x8_t __a)
2709 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi (__a);
2712 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2713 vreinterpret_f32_s16 (int16x4_t __a)
2715 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi (__a);
2718 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2719 vreinterpret_f32_s32 (int32x2_t __a)
2721 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si (__a);
2724 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2725 vreinterpret_f32_s64 (int64x1_t __a)
2727 return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi (__a);
2730 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2731 vreinterpret_f32_u8 (uint8x8_t __a)
2733 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a);
2736 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2737 vreinterpret_f32_u16 (uint16x4_t __a)
2739 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t)
2740 __a);
2743 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2744 vreinterpret_f32_u32 (uint32x2_t __a)
2746 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si ((int32x2_t)
2747 __a);
2750 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2751 vreinterpret_f32_u64 (uint64x1_t __a)
2753 return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi ((int64x1_t) __a);
2756 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2757 vreinterpret_f32_p8 (poly8x8_t __a)
2759 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a);
2762 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2763 vreinterpret_f32_p16 (poly16x4_t __a)
2765 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t)
2766 __a);
2769 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2770 vreinterpretq_f32_s8 (int8x16_t __a)
2772 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi (__a);
2775 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2776 vreinterpretq_f32_s16 (int16x8_t __a)
2778 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi (__a);
2781 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2782 vreinterpretq_f32_s32 (int32x4_t __a)
2784 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si (__a);
2787 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2788 vreinterpretq_f32_s64 (int64x2_t __a)
2790 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di (__a);
2793 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2794 vreinterpretq_f32_u8 (uint8x16_t __a)
2796 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t)
2797 __a);
2800 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2801 vreinterpretq_f32_u16 (uint16x8_t __a)
2803 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t)
2804 __a);
2807 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2808 vreinterpretq_f32_u32 (uint32x4_t __a)
2810 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si ((int32x4_t)
2811 __a);
2814 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2815 vreinterpretq_f32_u64 (uint64x2_t __a)
2817 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di ((int64x2_t)
2818 __a);
2821 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2822 vreinterpretq_f32_p8 (poly8x16_t __a)
2824 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t)
2825 __a);
2828 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2829 vreinterpretq_f32_p16 (poly16x8_t __a)
2831 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t)
2832 __a);
2835 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2836 vreinterpret_s64_s8 (int8x8_t __a)
2838 return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a);
2841 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2842 vreinterpret_s64_s16 (int16x4_t __a)
2844 return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a);
2847 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2848 vreinterpret_s64_s32 (int32x2_t __a)
2850 return (int64x1_t) __builtin_aarch64_reinterpretdiv2si (__a);
2853 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2854 vreinterpret_s64_f32 (float32x2_t __a)
2856 return (int64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a);
2859 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2860 vreinterpret_s64_u8 (uint8x8_t __a)
2862 return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
2865 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2866 vreinterpret_s64_u16 (uint16x4_t __a)
2868 return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
2871 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2872 vreinterpret_s64_u32 (uint32x2_t __a)
2874 return (int64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a);
2877 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2878 vreinterpret_s64_u64 (uint64x1_t __a)
2880 return (int64x1_t) __builtin_aarch64_reinterpretdidi ((int64x1_t) __a);
2883 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2884 vreinterpret_s64_p8 (poly8x8_t __a)
2886 return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
2889 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2890 vreinterpret_s64_p16 (poly16x4_t __a)
2892 return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
2895 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2896 vreinterpretq_s64_s8 (int8x16_t __a)
2898 return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a);
2901 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2902 vreinterpretq_s64_s16 (int16x8_t __a)
2904 return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a);
2907 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2908 vreinterpretq_s64_s32 (int32x4_t __a)
2910 return (int64x2_t) __builtin_aarch64_reinterpretv2div4si (__a);
2913 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2914 vreinterpretq_s64_f32 (float32x4_t __a)
2916 return (int64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a);
2919 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2920 vreinterpretq_s64_u8 (uint8x16_t __a)
2922 return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a);
2925 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2926 vreinterpretq_s64_u16 (uint16x8_t __a)
2928 return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
2931 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2932 vreinterpretq_s64_u32 (uint32x4_t __a)
2934 return (int64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a);
2937 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2938 vreinterpretq_s64_u64 (uint64x2_t __a)
2940 return (int64x2_t) __builtin_aarch64_reinterpretv2div2di ((int64x2_t) __a);
2943 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2944 vreinterpretq_s64_p8 (poly8x16_t __a)
2946 return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a);
2949 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2950 vreinterpretq_s64_p16 (poly16x8_t __a)
2952 return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
2955 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2956 vreinterpret_u64_s8 (int8x8_t __a)
2958 return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a);
2961 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2962 vreinterpret_u64_s16 (int16x4_t __a)
2964 return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a);
2967 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2968 vreinterpret_u64_s32 (int32x2_t __a)
2970 return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si (__a);
2973 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2974 vreinterpret_u64_s64 (int64x1_t __a)
2976 return (uint64x1_t) __builtin_aarch64_reinterpretdidi (__a);
2979 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2980 vreinterpret_u64_f32 (float32x2_t __a)
2982 return (uint64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a);
2985 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2986 vreinterpret_u64_u8 (uint8x8_t __a)
2988 return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
2991 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2992 vreinterpret_u64_u16 (uint16x4_t __a)
2994 return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
2997 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2998 vreinterpret_u64_u32 (uint32x2_t __a)
3000 return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a);
3003 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3004 vreinterpret_u64_p8 (poly8x8_t __a)
3006 return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
3009 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3010 vreinterpret_u64_p16 (poly16x4_t __a)
3012 return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
3015 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3016 vreinterpretq_u64_s8 (int8x16_t __a)
3018 return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a);
3021 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3022 vreinterpretq_u64_s16 (int16x8_t __a)
3024 return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a);
3027 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3028 vreinterpretq_u64_s32 (int32x4_t __a)
3030 return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si (__a);
3033 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3034 vreinterpretq_u64_s64 (int64x2_t __a)
3036 return (uint64x2_t) __builtin_aarch64_reinterpretv2div2di (__a);
3039 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3040 vreinterpretq_u64_f32 (float32x4_t __a)
3042 return (uint64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a);
3045 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3046 vreinterpretq_u64_u8 (uint8x16_t __a)
3048 return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t)
3049 __a);
3052 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3053 vreinterpretq_u64_u16 (uint16x8_t __a)
3055 return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3058 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3059 vreinterpretq_u64_u32 (uint32x4_t __a)
3061 return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a);
3064 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3065 vreinterpretq_u64_p8 (poly8x16_t __a)
3067 return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t)
3068 __a);
3071 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3072 vreinterpretq_u64_p16 (poly16x8_t __a)
3074 return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3077 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3078 vreinterpret_s8_s16 (int16x4_t __a)
3080 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
3083 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3084 vreinterpret_s8_s32 (int32x2_t __a)
3086 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
3089 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3090 vreinterpret_s8_s64 (int64x1_t __a)
3092 return (int8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
3095 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3096 vreinterpret_s8_f32 (float32x2_t __a)
3098 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
3101 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3102 vreinterpret_s8_u8 (uint8x8_t __a)
3104 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
3107 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3108 vreinterpret_s8_u16 (uint16x4_t __a)
3110 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3113 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3114 vreinterpret_s8_u32 (uint32x2_t __a)
3116 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
3119 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3120 vreinterpret_s8_u64 (uint64x1_t __a)
3122 return (int8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
3125 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3126 vreinterpret_s8_p8 (poly8x8_t __a)
3128 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
3131 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3132 vreinterpret_s8_p16 (poly16x4_t __a)
3134 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3137 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3138 vreinterpretq_s8_s16 (int16x8_t __a)
3140 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
3143 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3144 vreinterpretq_s8_s32 (int32x4_t __a)
3146 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
3149 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3150 vreinterpretq_s8_s64 (int64x2_t __a)
3152 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
3155 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3156 vreinterpretq_s8_f32 (float32x4_t __a)
3158 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
3161 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3162 vreinterpretq_s8_u8 (uint8x16_t __a)
3164 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
3165 __a);
3168 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3169 vreinterpretq_s8_u16 (uint16x8_t __a)
3171 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a);
3174 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3175 vreinterpretq_s8_u32 (uint32x4_t __a)
3177 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) __a);
3180 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3181 vreinterpretq_s8_u64 (uint64x2_t __a)
3183 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) __a);
3186 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3187 vreinterpretq_s8_p8 (poly8x16_t __a)
3189 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
3190 __a);
3193 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3194 vreinterpretq_s8_p16 (poly16x8_t __a)
3196 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a);
3199 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3200 vreinterpret_s16_s8 (int8x8_t __a)
3202 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
3205 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3206 vreinterpret_s16_s32 (int32x2_t __a)
3208 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
3211 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3212 vreinterpret_s16_s64 (int64x1_t __a)
3214 return (int16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
3217 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3218 vreinterpret_s16_f32 (float32x2_t __a)
3220 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
3223 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3224 vreinterpret_s16_u8 (uint8x8_t __a)
3226 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3229 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3230 vreinterpret_s16_u16 (uint16x4_t __a)
3232 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
3235 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3236 vreinterpret_s16_u32 (uint32x2_t __a)
3238 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
3241 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3242 vreinterpret_s16_u64 (uint64x1_t __a)
3244 return (int16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
3247 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3248 vreinterpret_s16_p8 (poly8x8_t __a)
3250 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3253 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3254 vreinterpret_s16_p16 (poly16x4_t __a)
3256 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
3259 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3260 vreinterpretq_s16_s8 (int8x16_t __a)
3262 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
3265 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3266 vreinterpretq_s16_s32 (int32x4_t __a)
3268 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
3271 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3272 vreinterpretq_s16_s64 (int64x2_t __a)
3274 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
3277 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3278 vreinterpretq_s16_f32 (float32x4_t __a)
3280 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
3283 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3284 vreinterpretq_s16_u8 (uint8x16_t __a)
3286 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a);
3289 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3290 vreinterpretq_s16_u16 (uint16x8_t __a)
3292 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
3295 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3296 vreinterpretq_s16_u32 (uint32x4_t __a)
3298 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
3301 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3302 vreinterpretq_s16_u64 (uint64x2_t __a)
3304 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
3307 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3308 vreinterpretq_s16_p8 (poly8x16_t __a)
3310 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a);
3313 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3314 vreinterpretq_s16_p16 (poly16x8_t __a)
3316 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
3319 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3320 vreinterpret_s32_s8 (int8x8_t __a)
3322 return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a);
3325 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3326 vreinterpret_s32_s16 (int16x4_t __a)
3328 return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a);
3331 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3332 vreinterpret_s32_s64 (int64x1_t __a)
3334 return (int32x2_t) __builtin_aarch64_reinterpretv2sidi (__a);
3337 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3338 vreinterpret_s32_f32 (float32x2_t __a)
3340 return (int32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a);
3343 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3344 vreinterpret_s32_u8 (uint8x8_t __a)
3346 return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3349 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3350 vreinterpret_s32_u16 (uint16x4_t __a)
3352 return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3355 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3356 vreinterpret_s32_u32 (uint32x2_t __a)
3358 return (int32x2_t) __builtin_aarch64_reinterpretv2siv2si ((int32x2_t) __a);
3361 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3362 vreinterpret_s32_u64 (uint64x1_t __a)
3364 return (int32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a);
3367 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3368 vreinterpret_s32_p8 (poly8x8_t __a)
3370 return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3373 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3374 vreinterpret_s32_p16 (poly16x4_t __a)
3376 return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3379 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3380 vreinterpretq_s32_s8 (int8x16_t __a)
3382 return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a);
3385 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3386 vreinterpretq_s32_s16 (int16x8_t __a)
3388 return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a);
3391 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3392 vreinterpretq_s32_s64 (int64x2_t __a)
3394 return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a);
3397 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3398 vreinterpretq_s32_f32 (float32x4_t __a)
3400 return (int32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a);
3403 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3404 vreinterpretq_s32_u8 (uint8x16_t __a)
3406 return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a);
3409 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3410 vreinterpretq_s32_u16 (uint16x8_t __a)
3412 return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3415 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3416 vreinterpretq_s32_u32 (uint32x4_t __a)
3418 return (int32x4_t) __builtin_aarch64_reinterpretv4siv4si ((int32x4_t) __a);
3421 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3422 vreinterpretq_s32_u64 (uint64x2_t __a)
3424 return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a);
3427 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3428 vreinterpretq_s32_p8 (poly8x16_t __a)
3430 return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a);
3433 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3434 vreinterpretq_s32_p16 (poly16x8_t __a)
3436 return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3439 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3440 vreinterpret_u8_s8 (int8x8_t __a)
3442 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a);
3445 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3446 vreinterpret_u8_s16 (int16x4_t __a)
3448 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
3451 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3452 vreinterpret_u8_s32 (int32x2_t __a)
3454 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
3457 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3458 vreinterpret_u8_s64 (int64x1_t __a)
3460 return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
3463 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3464 vreinterpret_u8_f32 (float32x2_t __a)
3466 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
3469 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3470 vreinterpret_u8_u16 (uint16x4_t __a)
3472 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3475 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3476 vreinterpret_u8_u32 (uint32x2_t __a)
3478 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
3481 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3482 vreinterpret_u8_u64 (uint64x1_t __a)
3484 return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
3487 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3488 vreinterpret_u8_p8 (poly8x8_t __a)
3490 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
3493 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3494 vreinterpret_u8_p16 (poly16x4_t __a)
3496 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3499 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3500 vreinterpretq_u8_s8 (int8x16_t __a)
3502 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a);
3505 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3506 vreinterpretq_u8_s16 (int16x8_t __a)
3508 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
3511 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3512 vreinterpretq_u8_s32 (int32x4_t __a)
3514 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
3517 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3518 vreinterpretq_u8_s64 (int64x2_t __a)
3520 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
3523 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3524 vreinterpretq_u8_f32 (float32x4_t __a)
3526 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
3529 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3530 vreinterpretq_u8_u16 (uint16x8_t __a)
3532 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
3533 __a);
3536 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3537 vreinterpretq_u8_u32 (uint32x4_t __a)
3539 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t)
3540 __a);
3543 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3544 vreinterpretq_u8_u64 (uint64x2_t __a)
3546 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t)
3547 __a);
3550 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3551 vreinterpretq_u8_p8 (poly8x16_t __a)
3553 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
3554 __a);
3557 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3558 vreinterpretq_u8_p16 (poly16x8_t __a)
3560 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
3561 __a);
3564 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3565 vreinterpret_u16_s8 (int8x8_t __a)
3567 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
3570 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3571 vreinterpret_u16_s16 (int16x4_t __a)
3573 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a);
3576 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3577 vreinterpret_u16_s32 (int32x2_t __a)
3579 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
3582 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3583 vreinterpret_u16_s64 (int64x1_t __a)
3585 return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
3588 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3589 vreinterpret_u16_f32 (float32x2_t __a)
3591 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
3594 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3595 vreinterpret_u16_u8 (uint8x8_t __a)
3597 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3600 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3601 vreinterpret_u16_u32 (uint32x2_t __a)
3603 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
3606 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3607 vreinterpret_u16_u64 (uint64x1_t __a)
3609 return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
3612 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3613 vreinterpret_u16_p8 (poly8x8_t __a)
3615 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3618 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3619 vreinterpret_u16_p16 (poly16x4_t __a)
3621 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
3624 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3625 vreinterpretq_u16_s8 (int8x16_t __a)
3627 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
3630 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3631 vreinterpretq_u16_s16 (int16x8_t __a)
3633 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a);
3636 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3637 vreinterpretq_u16_s32 (int32x4_t __a)
3639 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
3642 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3643 vreinterpretq_u16_s64 (int64x2_t __a)
3645 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
3648 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3649 vreinterpretq_u16_f32 (float32x4_t __a)
3651 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
3654 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3655 vreinterpretq_u16_u8 (uint8x16_t __a)
3657 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
3658 __a);
3661 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3662 vreinterpretq_u16_u32 (uint32x4_t __a)
3664 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
3667 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3668 vreinterpretq_u16_u64 (uint64x2_t __a)
3670 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
3673 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3674 vreinterpretq_u16_p8 (poly8x16_t __a)
3676 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
3677 __a);
3680 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3681 vreinterpretq_u16_p16 (poly16x8_t __a)
3683 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
3686 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3687 vreinterpret_u32_s8 (int8x8_t __a)
3689 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a);
3692 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3693 vreinterpret_u32_s16 (int16x4_t __a)
3695 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a);
3698 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3699 vreinterpret_u32_s32 (int32x2_t __a)
3701 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2si (__a);
3704 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3705 vreinterpret_u32_s64 (int64x1_t __a)
3707 return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi (__a);
3710 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3711 vreinterpret_u32_f32 (float32x2_t __a)
3713 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a);
3716 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3717 vreinterpret_u32_u8 (uint8x8_t __a)
3719 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3722 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3723 vreinterpret_u32_u16 (uint16x4_t __a)
3725 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3728 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3729 vreinterpret_u32_u64 (uint64x1_t __a)
3731 return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a);
3734 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3735 vreinterpret_u32_p8 (poly8x8_t __a)
3737 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3740 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3741 vreinterpret_u32_p16 (poly16x4_t __a)
3743 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3746 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3747 vreinterpretq_u32_s8 (int8x16_t __a)
3749 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a);
3752 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3753 vreinterpretq_u32_s16 (int16x8_t __a)
3755 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a);
3758 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3759 vreinterpretq_u32_s32 (int32x4_t __a)
3761 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4si (__a);
3764 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3765 vreinterpretq_u32_s64 (int64x2_t __a)
3767 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a);
3770 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3771 vreinterpretq_u32_f32 (float32x4_t __a)
3773 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a);
3776 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3777 vreinterpretq_u32_u8 (uint8x16_t __a)
3779 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t)
3780 __a);
3783 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3784 vreinterpretq_u32_u16 (uint16x8_t __a)
3786 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3789 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3790 vreinterpretq_u32_u64 (uint64x2_t __a)
3792 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a);
3795 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3796 vreinterpretq_u32_p8 (poly8x16_t __a)
3798 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t)
3799 __a);
3802 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3803 vreinterpretq_u32_p16 (poly16x8_t __a)
3805 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3808 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3809 vcombine_s8 (int8x8_t __a, int8x8_t __b)
3811 return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b);
3814 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3815 vcombine_s16 (int16x4_t __a, int16x4_t __b)
3817 return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b);
3820 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3821 vcombine_s32 (int32x2_t __a, int32x2_t __b)
3823 return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b);
3826 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3827 vcombine_s64 (int64x1_t __a, int64x1_t __b)
3829 return (int64x2_t) __builtin_aarch64_combinedi (__a, __b);
3832 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3833 vcombine_f32 (float32x2_t __a, float32x2_t __b)
3835 return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b);
3838 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3839 vcombine_u8 (uint8x8_t __a, uint8x8_t __b)
3841 return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
3842 (int8x8_t) __b);
3845 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3846 vcombine_u16 (uint16x4_t __a, uint16x4_t __b)
3848 return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
3849 (int16x4_t) __b);
3852 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3853 vcombine_u32 (uint32x2_t __a, uint32x2_t __b)
3855 return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a,
3856 (int32x2_t) __b);
3859 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3860 vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
3862 return (uint64x2_t) __builtin_aarch64_combinedi ((int64x1_t) __a,
3863 (int64x1_t) __b);
3866 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
3867 vcombine_f64 (float64x1_t __a, float64x1_t __b)
3869 return (float64x2_t) __builtin_aarch64_combinedf (__a, __b);
3872 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
3873 vcombine_p8 (poly8x8_t __a, poly8x8_t __b)
3875 return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
3876 (int8x8_t) __b);
3879 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3880 vcombine_p16 (poly16x4_t __a, poly16x4_t __b)
3882 return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
3883 (int16x4_t) __b);
3886 /* Start of temporary inline asm implementations. */
3888 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3889 vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
3891 int8x8_t result;
3892 __asm__ ("saba %0.8b,%2.8b,%3.8b"
3893 : "=w"(result)
3894 : "0"(a), "w"(b), "w"(c)
3895 : /* No clobbers */);
3896 return result;
3899 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3900 vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
3902 int16x4_t result;
3903 __asm__ ("saba %0.4h,%2.4h,%3.4h"
3904 : "=w"(result)
3905 : "0"(a), "w"(b), "w"(c)
3906 : /* No clobbers */);
3907 return result;
3910 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3911 vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
3913 int32x2_t result;
3914 __asm__ ("saba %0.2s,%2.2s,%3.2s"
3915 : "=w"(result)
3916 : "0"(a), "w"(b), "w"(c)
3917 : /* No clobbers */);
3918 return result;
3921 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3922 vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
3924 uint8x8_t result;
3925 __asm__ ("uaba %0.8b,%2.8b,%3.8b"
3926 : "=w"(result)
3927 : "0"(a), "w"(b), "w"(c)
3928 : /* No clobbers */);
3929 return result;
3932 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3933 vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
3935 uint16x4_t result;
3936 __asm__ ("uaba %0.4h,%2.4h,%3.4h"
3937 : "=w"(result)
3938 : "0"(a), "w"(b), "w"(c)
3939 : /* No clobbers */);
3940 return result;
3943 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3944 vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
3946 uint32x2_t result;
3947 __asm__ ("uaba %0.2s,%2.2s,%3.2s"
3948 : "=w"(result)
3949 : "0"(a), "w"(b), "w"(c)
3950 : /* No clobbers */);
3951 return result;
3954 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3955 vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
3957 int16x8_t result;
3958 __asm__ ("sabal2 %0.8h,%2.16b,%3.16b"
3959 : "=w"(result)
3960 : "0"(a), "w"(b), "w"(c)
3961 : /* No clobbers */);
3962 return result;
3965 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3966 vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
3968 int32x4_t result;
3969 __asm__ ("sabal2 %0.4s,%2.8h,%3.8h"
3970 : "=w"(result)
3971 : "0"(a), "w"(b), "w"(c)
3972 : /* No clobbers */);
3973 return result;
3976 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3977 vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
3979 int64x2_t result;
3980 __asm__ ("sabal2 %0.2d,%2.4s,%3.4s"
3981 : "=w"(result)
3982 : "0"(a), "w"(b), "w"(c)
3983 : /* No clobbers */);
3984 return result;
3987 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3988 vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
3990 uint16x8_t result;
3991 __asm__ ("uabal2 %0.8h,%2.16b,%3.16b"
3992 : "=w"(result)
3993 : "0"(a), "w"(b), "w"(c)
3994 : /* No clobbers */);
3995 return result;
3998 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3999 vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
4001 uint32x4_t result;
4002 __asm__ ("uabal2 %0.4s,%2.8h,%3.8h"
4003 : "=w"(result)
4004 : "0"(a), "w"(b), "w"(c)
4005 : /* No clobbers */);
4006 return result;
4009 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4010 vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
4012 uint64x2_t result;
4013 __asm__ ("uabal2 %0.2d,%2.4s,%3.4s"
4014 : "=w"(result)
4015 : "0"(a), "w"(b), "w"(c)
4016 : /* No clobbers */);
4017 return result;
4020 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4021 vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
4023 int16x8_t result;
4024 __asm__ ("sabal %0.8h,%2.8b,%3.8b"
4025 : "=w"(result)
4026 : "0"(a), "w"(b), "w"(c)
4027 : /* No clobbers */);
4028 return result;
4031 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4032 vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
4034 int32x4_t result;
4035 __asm__ ("sabal %0.4s,%2.4h,%3.4h"
4036 : "=w"(result)
4037 : "0"(a), "w"(b), "w"(c)
4038 : /* No clobbers */);
4039 return result;
4042 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4043 vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
4045 int64x2_t result;
4046 __asm__ ("sabal %0.2d,%2.2s,%3.2s"
4047 : "=w"(result)
4048 : "0"(a), "w"(b), "w"(c)
4049 : /* No clobbers */);
4050 return result;
4053 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4054 vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
4056 uint16x8_t result;
4057 __asm__ ("uabal %0.8h,%2.8b,%3.8b"
4058 : "=w"(result)
4059 : "0"(a), "w"(b), "w"(c)
4060 : /* No clobbers */);
4061 return result;
4064 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4065 vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
4067 uint32x4_t result;
4068 __asm__ ("uabal %0.4s,%2.4h,%3.4h"
4069 : "=w"(result)
4070 : "0"(a), "w"(b), "w"(c)
4071 : /* No clobbers */);
4072 return result;
4075 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4076 vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
4078 uint64x2_t result;
4079 __asm__ ("uabal %0.2d,%2.2s,%3.2s"
4080 : "=w"(result)
4081 : "0"(a), "w"(b), "w"(c)
4082 : /* No clobbers */);
4083 return result;
4086 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4087 vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
4089 int8x16_t result;
4090 __asm__ ("saba %0.16b,%2.16b,%3.16b"
4091 : "=w"(result)
4092 : "0"(a), "w"(b), "w"(c)
4093 : /* No clobbers */);
4094 return result;
4097 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4098 vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
4100 int16x8_t result;
4101 __asm__ ("saba %0.8h,%2.8h,%3.8h"
4102 : "=w"(result)
4103 : "0"(a), "w"(b), "w"(c)
4104 : /* No clobbers */);
4105 return result;
4108 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4109 vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
4111 int32x4_t result;
4112 __asm__ ("saba %0.4s,%2.4s,%3.4s"
4113 : "=w"(result)
4114 : "0"(a), "w"(b), "w"(c)
4115 : /* No clobbers */);
4116 return result;
4119 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4120 vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
4122 uint8x16_t result;
4123 __asm__ ("uaba %0.16b,%2.16b,%3.16b"
4124 : "=w"(result)
4125 : "0"(a), "w"(b), "w"(c)
4126 : /* No clobbers */);
4127 return result;
4130 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4131 vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
4133 uint16x8_t result;
4134 __asm__ ("uaba %0.8h,%2.8h,%3.8h"
4135 : "=w"(result)
4136 : "0"(a), "w"(b), "w"(c)
4137 : /* No clobbers */);
4138 return result;
4141 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4142 vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
4144 uint32x4_t result;
4145 __asm__ ("uaba %0.4s,%2.4s,%3.4s"
4146 : "=w"(result)
4147 : "0"(a), "w"(b), "w"(c)
4148 : /* No clobbers */);
4149 return result;
4152 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4153 vabd_f32 (float32x2_t a, float32x2_t b)
4155 float32x2_t result;
4156 __asm__ ("fabd %0.2s, %1.2s, %2.2s"
4157 : "=w"(result)
4158 : "w"(a), "w"(b)
4159 : /* No clobbers */);
4160 return result;
4163 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4164 vabd_s8 (int8x8_t a, int8x8_t b)
4166 int8x8_t result;
4167 __asm__ ("sabd %0.8b, %1.8b, %2.8b"
4168 : "=w"(result)
4169 : "w"(a), "w"(b)
4170 : /* No clobbers */);
4171 return result;
4174 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4175 vabd_s16 (int16x4_t a, int16x4_t b)
4177 int16x4_t result;
4178 __asm__ ("sabd %0.4h, %1.4h, %2.4h"
4179 : "=w"(result)
4180 : "w"(a), "w"(b)
4181 : /* No clobbers */);
4182 return result;
4185 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4186 vabd_s32 (int32x2_t a, int32x2_t b)
4188 int32x2_t result;
4189 __asm__ ("sabd %0.2s, %1.2s, %2.2s"
4190 : "=w"(result)
4191 : "w"(a), "w"(b)
4192 : /* No clobbers */);
4193 return result;
4196 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4197 vabd_u8 (uint8x8_t a, uint8x8_t b)
4199 uint8x8_t result;
4200 __asm__ ("uabd %0.8b, %1.8b, %2.8b"
4201 : "=w"(result)
4202 : "w"(a), "w"(b)
4203 : /* No clobbers */);
4204 return result;
4207 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4208 vabd_u16 (uint16x4_t a, uint16x4_t b)
4210 uint16x4_t result;
4211 __asm__ ("uabd %0.4h, %1.4h, %2.4h"
4212 : "=w"(result)
4213 : "w"(a), "w"(b)
4214 : /* No clobbers */);
4215 return result;
4218 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4219 vabd_u32 (uint32x2_t a, uint32x2_t b)
4221 uint32x2_t result;
4222 __asm__ ("uabd %0.2s, %1.2s, %2.2s"
4223 : "=w"(result)
4224 : "w"(a), "w"(b)
4225 : /* No clobbers */);
4226 return result;
4229 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
4230 vabdd_f64 (float64_t a, float64_t b)
4232 float64_t result;
4233 __asm__ ("fabd %d0, %d1, %d2"
4234 : "=w"(result)
4235 : "w"(a), "w"(b)
4236 : /* No clobbers */);
4237 return result;
4240 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4241 vabdl_high_s8 (int8x16_t a, int8x16_t b)
4243 int16x8_t result;
4244 __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b"
4245 : "=w"(result)
4246 : "w"(a), "w"(b)
4247 : /* No clobbers */);
4248 return result;
4251 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4252 vabdl_high_s16 (int16x8_t a, int16x8_t b)
4254 int32x4_t result;
4255 __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h"
4256 : "=w"(result)
4257 : "w"(a), "w"(b)
4258 : /* No clobbers */);
4259 return result;
4262 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4263 vabdl_high_s32 (int32x4_t a, int32x4_t b)
4265 int64x2_t result;
4266 __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s"
4267 : "=w"(result)
4268 : "w"(a), "w"(b)
4269 : /* No clobbers */);
4270 return result;
4273 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4274 vabdl_high_u8 (uint8x16_t a, uint8x16_t b)
4276 uint16x8_t result;
4277 __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b"
4278 : "=w"(result)
4279 : "w"(a), "w"(b)
4280 : /* No clobbers */);
4281 return result;
4284 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4285 vabdl_high_u16 (uint16x8_t a, uint16x8_t b)
4287 uint32x4_t result;
4288 __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h"
4289 : "=w"(result)
4290 : "w"(a), "w"(b)
4291 : /* No clobbers */);
4292 return result;
4295 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4296 vabdl_high_u32 (uint32x4_t a, uint32x4_t b)
4298 uint64x2_t result;
4299 __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s"
4300 : "=w"(result)
4301 : "w"(a), "w"(b)
4302 : /* No clobbers */);
4303 return result;
4306 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4307 vabdl_s8 (int8x8_t a, int8x8_t b)
4309 int16x8_t result;
4310 __asm__ ("sabdl %0.8h, %1.8b, %2.8b"
4311 : "=w"(result)
4312 : "w"(a), "w"(b)
4313 : /* No clobbers */);
4314 return result;
4317 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4318 vabdl_s16 (int16x4_t a, int16x4_t b)
4320 int32x4_t result;
4321 __asm__ ("sabdl %0.4s, %1.4h, %2.4h"
4322 : "=w"(result)
4323 : "w"(a), "w"(b)
4324 : /* No clobbers */);
4325 return result;
4328 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4329 vabdl_s32 (int32x2_t a, int32x2_t b)
4331 int64x2_t result;
4332 __asm__ ("sabdl %0.2d, %1.2s, %2.2s"
4333 : "=w"(result)
4334 : "w"(a), "w"(b)
4335 : /* No clobbers */);
4336 return result;
4339 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4340 vabdl_u8 (uint8x8_t a, uint8x8_t b)
4342 uint16x8_t result;
4343 __asm__ ("uabdl %0.8h, %1.8b, %2.8b"
4344 : "=w"(result)
4345 : "w"(a), "w"(b)
4346 : /* No clobbers */);
4347 return result;
4350 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4351 vabdl_u16 (uint16x4_t a, uint16x4_t b)
4353 uint32x4_t result;
4354 __asm__ ("uabdl %0.4s, %1.4h, %2.4h"
4355 : "=w"(result)
4356 : "w"(a), "w"(b)
4357 : /* No clobbers */);
4358 return result;
4361 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4362 vabdl_u32 (uint32x2_t a, uint32x2_t b)
4364 uint64x2_t result;
4365 __asm__ ("uabdl %0.2d, %1.2s, %2.2s"
4366 : "=w"(result)
4367 : "w"(a), "w"(b)
4368 : /* No clobbers */);
4369 return result;
4372 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4373 vabdq_f32 (float32x4_t a, float32x4_t b)
4375 float32x4_t result;
4376 __asm__ ("fabd %0.4s, %1.4s, %2.4s"
4377 : "=w"(result)
4378 : "w"(a), "w"(b)
4379 : /* No clobbers */);
4380 return result;
4383 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4384 vabdq_f64 (float64x2_t a, float64x2_t b)
4386 float64x2_t result;
4387 __asm__ ("fabd %0.2d, %1.2d, %2.2d"
4388 : "=w"(result)
4389 : "w"(a), "w"(b)
4390 : /* No clobbers */);
4391 return result;
4394 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4395 vabdq_s8 (int8x16_t a, int8x16_t b)
4397 int8x16_t result;
4398 __asm__ ("sabd %0.16b, %1.16b, %2.16b"
4399 : "=w"(result)
4400 : "w"(a), "w"(b)
4401 : /* No clobbers */);
4402 return result;
4405 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4406 vabdq_s16 (int16x8_t a, int16x8_t b)
4408 int16x8_t result;
4409 __asm__ ("sabd %0.8h, %1.8h, %2.8h"
4410 : "=w"(result)
4411 : "w"(a), "w"(b)
4412 : /* No clobbers */);
4413 return result;
4416 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4417 vabdq_s32 (int32x4_t a, int32x4_t b)
4419 int32x4_t result;
4420 __asm__ ("sabd %0.4s, %1.4s, %2.4s"
4421 : "=w"(result)
4422 : "w"(a), "w"(b)
4423 : /* No clobbers */);
4424 return result;
4427 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4428 vabdq_u8 (uint8x16_t a, uint8x16_t b)
4430 uint8x16_t result;
4431 __asm__ ("uabd %0.16b, %1.16b, %2.16b"
4432 : "=w"(result)
4433 : "w"(a), "w"(b)
4434 : /* No clobbers */);
4435 return result;
4438 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4439 vabdq_u16 (uint16x8_t a, uint16x8_t b)
4441 uint16x8_t result;
4442 __asm__ ("uabd %0.8h, %1.8h, %2.8h"
4443 : "=w"(result)
4444 : "w"(a), "w"(b)
4445 : /* No clobbers */);
4446 return result;
4449 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4450 vabdq_u32 (uint32x4_t a, uint32x4_t b)
4452 uint32x4_t result;
4453 __asm__ ("uabd %0.4s, %1.4s, %2.4s"
4454 : "=w"(result)
4455 : "w"(a), "w"(b)
4456 : /* No clobbers */);
4457 return result;
4460 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
4461 vabds_f32 (float32_t a, float32_t b)
4463 float32_t result;
4464 __asm__ ("fabd %s0, %s1, %s2"
4465 : "=w"(result)
4466 : "w"(a), "w"(b)
4467 : /* No clobbers */);
4468 return result;
4471 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
4472 vaddlv_s8 (int8x8_t a)
4474 int16_t result;
4475 __asm__ ("saddlv %h0,%1.8b"
4476 : "=w"(result)
4477 : "w"(a)
4478 : /* No clobbers */);
4479 return result;
4482 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
4483 vaddlv_s16 (int16x4_t a)
4485 int32_t result;
4486 __asm__ ("saddlv %s0,%1.4h"
4487 : "=w"(result)
4488 : "w"(a)
4489 : /* No clobbers */);
4490 return result;
4493 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
4494 vaddlv_u8 (uint8x8_t a)
4496 uint16_t result;
4497 __asm__ ("uaddlv %h0,%1.8b"
4498 : "=w"(result)
4499 : "w"(a)
4500 : /* No clobbers */);
4501 return result;
4504 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
4505 vaddlv_u16 (uint16x4_t a)
4507 uint32_t result;
4508 __asm__ ("uaddlv %s0,%1.4h"
4509 : "=w"(result)
4510 : "w"(a)
4511 : /* No clobbers */);
4512 return result;
4515 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
4516 vaddlvq_s8 (int8x16_t a)
4518 int16_t result;
4519 __asm__ ("saddlv %h0,%1.16b"
4520 : "=w"(result)
4521 : "w"(a)
4522 : /* No clobbers */);
4523 return result;
4526 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
4527 vaddlvq_s16 (int16x8_t a)
4529 int32_t result;
4530 __asm__ ("saddlv %s0,%1.8h"
4531 : "=w"(result)
4532 : "w"(a)
4533 : /* No clobbers */);
4534 return result;
4537 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
4538 vaddlvq_s32 (int32x4_t a)
4540 int64_t result;
4541 __asm__ ("saddlv %d0,%1.4s"
4542 : "=w"(result)
4543 : "w"(a)
4544 : /* No clobbers */);
4545 return result;
4548 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
4549 vaddlvq_u8 (uint8x16_t a)
4551 uint16_t result;
4552 __asm__ ("uaddlv %h0,%1.16b"
4553 : "=w"(result)
4554 : "w"(a)
4555 : /* No clobbers */);
4556 return result;
4559 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
4560 vaddlvq_u16 (uint16x8_t a)
4562 uint32_t result;
4563 __asm__ ("uaddlv %s0,%1.8h"
4564 : "=w"(result)
4565 : "w"(a)
4566 : /* No clobbers */);
4567 return result;
4570 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
4571 vaddlvq_u32 (uint32x4_t a)
4573 uint64_t result;
4574 __asm__ ("uaddlv %d0,%1.4s"
4575 : "=w"(result)
4576 : "w"(a)
4577 : /* No clobbers */);
4578 return result;
4581 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4582 vbsl_f32 (uint32x2_t a, float32x2_t b, float32x2_t c)
4584 float32x2_t result;
4585 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4586 : "=w"(result)
4587 : "0"(a), "w"(b), "w"(c)
4588 : /* No clobbers */);
4589 return result;
4592 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4593 vbsl_p8 (uint8x8_t a, poly8x8_t b, poly8x8_t c)
4595 poly8x8_t result;
4596 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4597 : "=w"(result)
4598 : "0"(a), "w"(b), "w"(c)
4599 : /* No clobbers */);
4600 return result;
4603 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
4604 vbsl_p16 (uint16x4_t a, poly16x4_t b, poly16x4_t c)
4606 poly16x4_t result;
4607 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4608 : "=w"(result)
4609 : "0"(a), "w"(b), "w"(c)
4610 : /* No clobbers */);
4611 return result;
4614 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4615 vbsl_s8 (uint8x8_t a, int8x8_t b, int8x8_t c)
4617 int8x8_t result;
4618 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4619 : "=w"(result)
4620 : "0"(a), "w"(b), "w"(c)
4621 : /* No clobbers */);
4622 return result;
4625 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4626 vbsl_s16 (uint16x4_t a, int16x4_t b, int16x4_t c)
4628 int16x4_t result;
4629 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4630 : "=w"(result)
4631 : "0"(a), "w"(b), "w"(c)
4632 : /* No clobbers */);
4633 return result;
4636 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4637 vbsl_s32 (uint32x2_t a, int32x2_t b, int32x2_t c)
4639 int32x2_t result;
4640 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4641 : "=w"(result)
4642 : "0"(a), "w"(b), "w"(c)
4643 : /* No clobbers */);
4644 return result;
4647 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4648 vbsl_s64 (uint64x1_t a, int64x1_t b, int64x1_t c)
4650 int64x1_t result;
4651 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4652 : "=w"(result)
4653 : "0"(a), "w"(b), "w"(c)
4654 : /* No clobbers */);
4655 return result;
4658 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4659 vbsl_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
4661 uint8x8_t result;
4662 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4663 : "=w"(result)
4664 : "0"(a), "w"(b), "w"(c)
4665 : /* No clobbers */);
4666 return result;
4669 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4670 vbsl_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
4672 uint16x4_t result;
4673 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4674 : "=w"(result)
4675 : "0"(a), "w"(b), "w"(c)
4676 : /* No clobbers */);
4677 return result;
4680 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4681 vbsl_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
4683 uint32x2_t result;
4684 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4685 : "=w"(result)
4686 : "0"(a), "w"(b), "w"(c)
4687 : /* No clobbers */);
4688 return result;
4691 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4692 vbsl_u64 (uint64x1_t a, uint64x1_t b, uint64x1_t c)
4694 uint64x1_t result;
4695 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4696 : "=w"(result)
4697 : "0"(a), "w"(b), "w"(c)
4698 : /* No clobbers */);
4699 return result;
4702 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4703 vbslq_f32 (uint32x4_t a, float32x4_t b, float32x4_t c)
4705 float32x4_t result;
4706 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4707 : "=w"(result)
4708 : "0"(a), "w"(b), "w"(c)
4709 : /* No clobbers */);
4710 return result;
4713 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4714 vbslq_f64 (uint64x2_t a, float64x2_t b, float64x2_t c)
4716 float64x2_t result;
4717 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4718 : "=w"(result)
4719 : "0"(a), "w"(b), "w"(c)
4720 : /* No clobbers */);
4721 return result;
4724 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
4725 vbslq_p8 (uint8x16_t a, poly8x16_t b, poly8x16_t c)
4727 poly8x16_t result;
4728 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4729 : "=w"(result)
4730 : "0"(a), "w"(b), "w"(c)
4731 : /* No clobbers */);
4732 return result;
4735 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
4736 vbslq_p16 (uint16x8_t a, poly16x8_t b, poly16x8_t c)
4738 poly16x8_t result;
4739 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4740 : "=w"(result)
4741 : "0"(a), "w"(b), "w"(c)
4742 : /* No clobbers */);
4743 return result;
4746 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4747 vbslq_s8 (uint8x16_t a, int8x16_t b, int8x16_t c)
4749 int8x16_t result;
4750 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4751 : "=w"(result)
4752 : "0"(a), "w"(b), "w"(c)
4753 : /* No clobbers */);
4754 return result;
4757 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4758 vbslq_s16 (uint16x8_t a, int16x8_t b, int16x8_t c)
4760 int16x8_t result;
4761 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4762 : "=w"(result)
4763 : "0"(a), "w"(b), "w"(c)
4764 : /* No clobbers */);
4765 return result;
4768 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4769 vbslq_s32 (uint32x4_t a, int32x4_t b, int32x4_t c)
4771 int32x4_t result;
4772 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4773 : "=w"(result)
4774 : "0"(a), "w"(b), "w"(c)
4775 : /* No clobbers */);
4776 return result;
4779 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4780 vbslq_s64 (uint64x2_t a, int64x2_t b, int64x2_t c)
4782 int64x2_t result;
4783 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4784 : "=w"(result)
4785 : "0"(a), "w"(b), "w"(c)
4786 : /* No clobbers */);
4787 return result;
4790 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4791 vbslq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
4793 uint8x16_t result;
4794 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4795 : "=w"(result)
4796 : "0"(a), "w"(b), "w"(c)
4797 : /* No clobbers */);
4798 return result;
4801 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4802 vbslq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
4804 uint16x8_t result;
4805 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4806 : "=w"(result)
4807 : "0"(a), "w"(b), "w"(c)
4808 : /* No clobbers */);
4809 return result;
4812 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4813 vbslq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
4815 uint32x4_t result;
4816 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4817 : "=w"(result)
4818 : "0"(a), "w"(b), "w"(c)
4819 : /* No clobbers */);
4820 return result;
4823 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4824 vbslq_u64 (uint64x2_t a, uint64x2_t b, uint64x2_t c)
4826 uint64x2_t result;
4827 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4828 : "=w"(result)
4829 : "0"(a), "w"(b), "w"(c)
4830 : /* No clobbers */);
4831 return result;
4834 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4835 vcls_s8 (int8x8_t a)
4837 int8x8_t result;
4838 __asm__ ("cls %0.8b,%1.8b"
4839 : "=w"(result)
4840 : "w"(a)
4841 : /* No clobbers */);
4842 return result;
4845 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4846 vcls_s16 (int16x4_t a)
4848 int16x4_t result;
4849 __asm__ ("cls %0.4h,%1.4h"
4850 : "=w"(result)
4851 : "w"(a)
4852 : /* No clobbers */);
4853 return result;
4856 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4857 vcls_s32 (int32x2_t a)
4859 int32x2_t result;
4860 __asm__ ("cls %0.2s,%1.2s"
4861 : "=w"(result)
4862 : "w"(a)
4863 : /* No clobbers */);
4864 return result;
4867 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4868 vclsq_s8 (int8x16_t a)
4870 int8x16_t result;
4871 __asm__ ("cls %0.16b,%1.16b"
4872 : "=w"(result)
4873 : "w"(a)
4874 : /* No clobbers */);
4875 return result;
4878 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4879 vclsq_s16 (int16x8_t a)
4881 int16x8_t result;
4882 __asm__ ("cls %0.8h,%1.8h"
4883 : "=w"(result)
4884 : "w"(a)
4885 : /* No clobbers */);
4886 return result;
4889 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4890 vclsq_s32 (int32x4_t a)
4892 int32x4_t result;
4893 __asm__ ("cls %0.4s,%1.4s"
4894 : "=w"(result)
4895 : "w"(a)
4896 : /* No clobbers */);
4897 return result;
4900 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4901 vclz_s8 (int8x8_t a)
4903 int8x8_t result;
4904 __asm__ ("clz %0.8b,%1.8b"
4905 : "=w"(result)
4906 : "w"(a)
4907 : /* No clobbers */);
4908 return result;
4911 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4912 vclz_s16 (int16x4_t a)
4914 int16x4_t result;
4915 __asm__ ("clz %0.4h,%1.4h"
4916 : "=w"(result)
4917 : "w"(a)
4918 : /* No clobbers */);
4919 return result;
4922 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4923 vclz_s32 (int32x2_t a)
4925 int32x2_t result;
4926 __asm__ ("clz %0.2s,%1.2s"
4927 : "=w"(result)
4928 : "w"(a)
4929 : /* No clobbers */);
4930 return result;
4933 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4934 vclz_u8 (uint8x8_t a)
4936 uint8x8_t result;
4937 __asm__ ("clz %0.8b,%1.8b"
4938 : "=w"(result)
4939 : "w"(a)
4940 : /* No clobbers */);
4941 return result;
4944 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4945 vclz_u16 (uint16x4_t a)
4947 uint16x4_t result;
4948 __asm__ ("clz %0.4h,%1.4h"
4949 : "=w"(result)
4950 : "w"(a)
4951 : /* No clobbers */);
4952 return result;
4955 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4956 vclz_u32 (uint32x2_t a)
4958 uint32x2_t result;
4959 __asm__ ("clz %0.2s,%1.2s"
4960 : "=w"(result)
4961 : "w"(a)
4962 : /* No clobbers */);
4963 return result;
4966 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4967 vclzq_s8 (int8x16_t a)
4969 int8x16_t result;
4970 __asm__ ("clz %0.16b,%1.16b"
4971 : "=w"(result)
4972 : "w"(a)
4973 : /* No clobbers */);
4974 return result;
4977 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4978 vclzq_s16 (int16x8_t a)
4980 int16x8_t result;
4981 __asm__ ("clz %0.8h,%1.8h"
4982 : "=w"(result)
4983 : "w"(a)
4984 : /* No clobbers */);
4985 return result;
4988 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4989 vclzq_s32 (int32x4_t a)
4991 int32x4_t result;
4992 __asm__ ("clz %0.4s,%1.4s"
4993 : "=w"(result)
4994 : "w"(a)
4995 : /* No clobbers */);
4996 return result;
4999 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5000 vclzq_u8 (uint8x16_t a)
5002 uint8x16_t result;
5003 __asm__ ("clz %0.16b,%1.16b"
5004 : "=w"(result)
5005 : "w"(a)
5006 : /* No clobbers */);
5007 return result;
5010 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5011 vclzq_u16 (uint16x8_t a)
5013 uint16x8_t result;
5014 __asm__ ("clz %0.8h,%1.8h"
5015 : "=w"(result)
5016 : "w"(a)
5017 : /* No clobbers */);
5018 return result;
5021 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5022 vclzq_u32 (uint32x4_t a)
5024 uint32x4_t result;
5025 __asm__ ("clz %0.4s,%1.4s"
5026 : "=w"(result)
5027 : "w"(a)
5028 : /* No clobbers */);
5029 return result;
5032 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
5033 vcnt_p8 (poly8x8_t a)
5035 poly8x8_t result;
5036 __asm__ ("cnt %0.8b,%1.8b"
5037 : "=w"(result)
5038 : "w"(a)
5039 : /* No clobbers */);
5040 return result;
5043 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5044 vcnt_s8 (int8x8_t a)
5046 int8x8_t result;
5047 __asm__ ("cnt %0.8b,%1.8b"
5048 : "=w"(result)
5049 : "w"(a)
5050 : /* No clobbers */);
5051 return result;
5054 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5055 vcnt_u8 (uint8x8_t a)
5057 uint8x8_t result;
5058 __asm__ ("cnt %0.8b,%1.8b"
5059 : "=w"(result)
5060 : "w"(a)
5061 : /* No clobbers */);
5062 return result;
5065 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
5066 vcntq_p8 (poly8x16_t a)
5068 poly8x16_t result;
5069 __asm__ ("cnt %0.16b,%1.16b"
5070 : "=w"(result)
5071 : "w"(a)
5072 : /* No clobbers */);
5073 return result;
5076 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5077 vcntq_s8 (int8x16_t a)
5079 int8x16_t result;
5080 __asm__ ("cnt %0.16b,%1.16b"
5081 : "=w"(result)
5082 : "w"(a)
5083 : /* No clobbers */);
5084 return result;
5087 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5088 vcntq_u8 (uint8x16_t a)
5090 uint8x16_t result;
5091 __asm__ ("cnt %0.16b,%1.16b"
5092 : "=w"(result)
5093 : "w"(a)
5094 : /* No clobbers */);
5095 return result;
5098 #define vcopyq_lane_f32(a, b, c, d) \
5099 __extension__ \
5100 ({ \
5101 float32x4_t c_ = (c); \
5102 float32x4_t a_ = (a); \
5103 float32x4_t result; \
5104 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5105 : "=w"(result) \
5106 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5107 : /* No clobbers */); \
5108 result; \
5111 #define vcopyq_lane_f64(a, b, c, d) \
5112 __extension__ \
5113 ({ \
5114 float64x2_t c_ = (c); \
5115 float64x2_t a_ = (a); \
5116 float64x2_t result; \
5117 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5118 : "=w"(result) \
5119 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5120 : /* No clobbers */); \
5121 result; \
5124 #define vcopyq_lane_p8(a, b, c, d) \
5125 __extension__ \
5126 ({ \
5127 poly8x16_t c_ = (c); \
5128 poly8x16_t a_ = (a); \
5129 poly8x16_t result; \
5130 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5131 : "=w"(result) \
5132 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5133 : /* No clobbers */); \
5134 result; \
5137 #define vcopyq_lane_p16(a, b, c, d) \
5138 __extension__ \
5139 ({ \
5140 poly16x8_t c_ = (c); \
5141 poly16x8_t a_ = (a); \
5142 poly16x8_t result; \
5143 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5144 : "=w"(result) \
5145 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5146 : /* No clobbers */); \
5147 result; \
5150 #define vcopyq_lane_s8(a, b, c, d) \
5151 __extension__ \
5152 ({ \
5153 int8x16_t c_ = (c); \
5154 int8x16_t a_ = (a); \
5155 int8x16_t result; \
5156 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5157 : "=w"(result) \
5158 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5159 : /* No clobbers */); \
5160 result; \
5163 #define vcopyq_lane_s16(a, b, c, d) \
5164 __extension__ \
5165 ({ \
5166 int16x8_t c_ = (c); \
5167 int16x8_t a_ = (a); \
5168 int16x8_t result; \
5169 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5170 : "=w"(result) \
5171 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5172 : /* No clobbers */); \
5173 result; \
5176 #define vcopyq_lane_s32(a, b, c, d) \
5177 __extension__ \
5178 ({ \
5179 int32x4_t c_ = (c); \
5180 int32x4_t a_ = (a); \
5181 int32x4_t result; \
5182 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5183 : "=w"(result) \
5184 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5185 : /* No clobbers */); \
5186 result; \
5189 #define vcopyq_lane_s64(a, b, c, d) \
5190 __extension__ \
5191 ({ \
5192 int64x2_t c_ = (c); \
5193 int64x2_t a_ = (a); \
5194 int64x2_t result; \
5195 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5196 : "=w"(result) \
5197 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5198 : /* No clobbers */); \
5199 result; \
5202 #define vcopyq_lane_u8(a, b, c, d) \
5203 __extension__ \
5204 ({ \
5205 uint8x16_t c_ = (c); \
5206 uint8x16_t a_ = (a); \
5207 uint8x16_t result; \
5208 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5209 : "=w"(result) \
5210 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5211 : /* No clobbers */); \
5212 result; \
5215 #define vcopyq_lane_u16(a, b, c, d) \
5216 __extension__ \
5217 ({ \
5218 uint16x8_t c_ = (c); \
5219 uint16x8_t a_ = (a); \
5220 uint16x8_t result; \
5221 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5222 : "=w"(result) \
5223 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5224 : /* No clobbers */); \
5225 result; \
5228 #define vcopyq_lane_u32(a, b, c, d) \
5229 __extension__ \
5230 ({ \
5231 uint32x4_t c_ = (c); \
5232 uint32x4_t a_ = (a); \
5233 uint32x4_t result; \
5234 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5235 : "=w"(result) \
5236 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5237 : /* No clobbers */); \
5238 result; \
5241 #define vcopyq_lane_u64(a, b, c, d) \
5242 __extension__ \
5243 ({ \
5244 uint64x2_t c_ = (c); \
5245 uint64x2_t a_ = (a); \
5246 uint64x2_t result; \
5247 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5248 : "=w"(result) \
5249 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5250 : /* No clobbers */); \
5251 result; \
5254 /* vcvt_f16_f32 not supported */
5256 /* vcvt_f32_f16 not supported */
5258 /* vcvt_high_f16_f32 not supported */
5260 /* vcvt_high_f32_f16 not supported */
5262 static float32x2_t vdup_n_f32 (float32_t);
5264 #define vcvt_n_f32_s32(a, b) \
5265 __extension__ \
5266 ({ \
5267 int32x2_t a_ = (a); \
5268 float32x2_t result; \
5269 __asm__ ("scvtf %0.2s, %1.2s, #%2" \
5270 : "=w"(result) \
5271 : "w"(a_), "i"(b) \
5272 : /* No clobbers */); \
5273 result; \
5276 #define vcvt_n_f32_u32(a, b) \
5277 __extension__ \
5278 ({ \
5279 uint32x2_t a_ = (a); \
5280 float32x2_t result; \
5281 __asm__ ("ucvtf %0.2s, %1.2s, #%2" \
5282 : "=w"(result) \
5283 : "w"(a_), "i"(b) \
5284 : /* No clobbers */); \
5285 result; \
5288 #define vcvt_n_s32_f32(a, b) \
5289 __extension__ \
5290 ({ \
5291 float32x2_t a_ = (a); \
5292 int32x2_t result; \
5293 __asm__ ("fcvtzs %0.2s, %1.2s, #%2" \
5294 : "=w"(result) \
5295 : "w"(a_), "i"(b) \
5296 : /* No clobbers */); \
5297 result; \
5300 #define vcvt_n_u32_f32(a, b) \
5301 __extension__ \
5302 ({ \
5303 float32x2_t a_ = (a); \
5304 uint32x2_t result; \
5305 __asm__ ("fcvtzu %0.2s, %1.2s, #%2" \
5306 : "=w"(result) \
5307 : "w"(a_), "i"(b) \
5308 : /* No clobbers */); \
5309 result; \
5312 #define vcvtd_n_f64_s64(a, b) \
5313 __extension__ \
5314 ({ \
5315 int64_t a_ = (a); \
5316 int64_t result; \
5317 __asm__ ("scvtf %d0,%d1,%2" \
5318 : "=w"(result) \
5319 : "w"(a_), "i"(b) \
5320 : /* No clobbers */); \
5321 result; \
5324 #define vcvtd_n_f64_u64(a, b) \
5325 __extension__ \
5326 ({ \
5327 uint64_t a_ = (a); \
5328 uint64_t result; \
5329 __asm__ ("ucvtf %d0,%d1,%2" \
5330 : "=w"(result) \
5331 : "w"(a_), "i"(b) \
5332 : /* No clobbers */); \
5333 result; \
5336 #define vcvtd_n_s64_f64(a, b) \
5337 __extension__ \
5338 ({ \
5339 float64_t a_ = (a); \
5340 float64_t result; \
5341 __asm__ ("fcvtzs %d0,%d1,%2" \
5342 : "=w"(result) \
5343 : "w"(a_), "i"(b) \
5344 : /* No clobbers */); \
5345 result; \
5348 #define vcvtd_n_u64_f64(a, b) \
5349 __extension__ \
5350 ({ \
5351 float64_t a_ = (a); \
5352 float64_t result; \
5353 __asm__ ("fcvtzu %d0,%d1,%2" \
5354 : "=w"(result) \
5355 : "w"(a_), "i"(b) \
5356 : /* No clobbers */); \
5357 result; \
5360 #define vcvtq_n_f32_s32(a, b) \
5361 __extension__ \
5362 ({ \
5363 int32x4_t a_ = (a); \
5364 float32x4_t result; \
5365 __asm__ ("scvtf %0.4s, %1.4s, #%2" \
5366 : "=w"(result) \
5367 : "w"(a_), "i"(b) \
5368 : /* No clobbers */); \
5369 result; \
5372 #define vcvtq_n_f32_u32(a, b) \
5373 __extension__ \
5374 ({ \
5375 uint32x4_t a_ = (a); \
5376 float32x4_t result; \
5377 __asm__ ("ucvtf %0.4s, %1.4s, #%2" \
5378 : "=w"(result) \
5379 : "w"(a_), "i"(b) \
5380 : /* No clobbers */); \
5381 result; \
5384 #define vcvtq_n_f64_s64(a, b) \
5385 __extension__ \
5386 ({ \
5387 int64x2_t a_ = (a); \
5388 float64x2_t result; \
5389 __asm__ ("scvtf %0.2d, %1.2d, #%2" \
5390 : "=w"(result) \
5391 : "w"(a_), "i"(b) \
5392 : /* No clobbers */); \
5393 result; \
5396 #define vcvtq_n_f64_u64(a, b) \
5397 __extension__ \
5398 ({ \
5399 uint64x2_t a_ = (a); \
5400 float64x2_t result; \
5401 __asm__ ("ucvtf %0.2d, %1.2d, #%2" \
5402 : "=w"(result) \
5403 : "w"(a_), "i"(b) \
5404 : /* No clobbers */); \
5405 result; \
5408 #define vcvtq_n_s32_f32(a, b) \
5409 __extension__ \
5410 ({ \
5411 float32x4_t a_ = (a); \
5412 int32x4_t result; \
5413 __asm__ ("fcvtzs %0.4s, %1.4s, #%2" \
5414 : "=w"(result) \
5415 : "w"(a_), "i"(b) \
5416 : /* No clobbers */); \
5417 result; \
5420 #define vcvtq_n_s64_f64(a, b) \
5421 __extension__ \
5422 ({ \
5423 float64x2_t a_ = (a); \
5424 int64x2_t result; \
5425 __asm__ ("fcvtzs %0.2d, %1.2d, #%2" \
5426 : "=w"(result) \
5427 : "w"(a_), "i"(b) \
5428 : /* No clobbers */); \
5429 result; \
5432 #define vcvtq_n_u32_f32(a, b) \
5433 __extension__ \
5434 ({ \
5435 float32x4_t a_ = (a); \
5436 uint32x4_t result; \
5437 __asm__ ("fcvtzu %0.4s, %1.4s, #%2" \
5438 : "=w"(result) \
5439 : "w"(a_), "i"(b) \
5440 : /* No clobbers */); \
5441 result; \
5444 #define vcvtq_n_u64_f64(a, b) \
5445 __extension__ \
5446 ({ \
5447 float64x2_t a_ = (a); \
5448 uint64x2_t result; \
5449 __asm__ ("fcvtzu %0.2d, %1.2d, #%2" \
5450 : "=w"(result) \
5451 : "w"(a_), "i"(b) \
5452 : /* No clobbers */); \
5453 result; \
5456 #define vcvts_n_f32_s32(a, b) \
5457 __extension__ \
5458 ({ \
5459 int32_t a_ = (a); \
5460 int32_t result; \
5461 __asm__ ("scvtf %s0,%s1,%2" \
5462 : "=w"(result) \
5463 : "w"(a_), "i"(b) \
5464 : /* No clobbers */); \
5465 result; \
5468 #define vcvts_n_f32_u32(a, b) \
5469 __extension__ \
5470 ({ \
5471 uint32_t a_ = (a); \
5472 uint32_t result; \
5473 __asm__ ("ucvtf %s0,%s1,%2" \
5474 : "=w"(result) \
5475 : "w"(a_), "i"(b) \
5476 : /* No clobbers */); \
5477 result; \
5480 #define vcvts_n_s32_f32(a, b) \
5481 __extension__ \
5482 ({ \
5483 float32_t a_ = (a); \
5484 float32_t result; \
5485 __asm__ ("fcvtzs %s0,%s1,%2" \
5486 : "=w"(result) \
5487 : "w"(a_), "i"(b) \
5488 : /* No clobbers */); \
5489 result; \
5492 #define vcvts_n_u32_f32(a, b) \
5493 __extension__ \
5494 ({ \
5495 float32_t a_ = (a); \
5496 float32_t result; \
5497 __asm__ ("fcvtzu %s0,%s1,%2" \
5498 : "=w"(result) \
5499 : "w"(a_), "i"(b) \
5500 : /* No clobbers */); \
5501 result; \
5504 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5505 vcvtx_f32_f64 (float64x2_t a)
5507 float32x2_t result;
5508 __asm__ ("fcvtxn %0.2s,%1.2d"
5509 : "=w"(result)
5510 : "w"(a)
5511 : /* No clobbers */);
5512 return result;
5515 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5516 vcvtx_high_f32_f64 (float64x2_t a)
5518 float32x4_t result;
5519 __asm__ ("fcvtxn2 %0.4s,%1.2d"
5520 : "=w"(result)
5521 : "w"(a)
5522 : /* No clobbers */);
5523 return result;
5526 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
5527 vcvtxd_f32_f64 (float64_t a)
5529 float32_t result;
5530 __asm__ ("fcvtxn %s0,%d1"
5531 : "=w"(result)
5532 : "w"(a)
5533 : /* No clobbers */);
5534 return result;
5537 #define vdup_lane_f32(a, b) \
5538 __extension__ \
5539 ({ \
5540 float32x2_t a_ = (a); \
5541 float32x2_t result; \
5542 __asm__ ("dup %0.2s,%1.s[%2]" \
5543 : "=w"(result) \
5544 : "w"(a_), "i"(b) \
5545 : /* No clobbers */); \
5546 result; \
5549 #define vdup_lane_p8(a, b) \
5550 __extension__ \
5551 ({ \
5552 poly8x8_t a_ = (a); \
5553 poly8x8_t result; \
5554 __asm__ ("dup %0.8b,%1.b[%2]" \
5555 : "=w"(result) \
5556 : "w"(a_), "i"(b) \
5557 : /* No clobbers */); \
5558 result; \
5561 #define vdup_lane_p16(a, b) \
5562 __extension__ \
5563 ({ \
5564 poly16x4_t a_ = (a); \
5565 poly16x4_t result; \
5566 __asm__ ("dup %0.4h,%1.h[%2]" \
5567 : "=w"(result) \
5568 : "w"(a_), "i"(b) \
5569 : /* No clobbers */); \
5570 result; \
5573 #define vdup_lane_s8(a, b) \
5574 __extension__ \
5575 ({ \
5576 int8x8_t a_ = (a); \
5577 int8x8_t result; \
5578 __asm__ ("dup %0.8b,%1.b[%2]" \
5579 : "=w"(result) \
5580 : "w"(a_), "i"(b) \
5581 : /* No clobbers */); \
5582 result; \
5585 #define vdup_lane_s16(a, b) \
5586 __extension__ \
5587 ({ \
5588 int16x4_t a_ = (a); \
5589 int16x4_t result; \
5590 __asm__ ("dup %0.4h,%1.h[%2]" \
5591 : "=w"(result) \
5592 : "w"(a_), "i"(b) \
5593 : /* No clobbers */); \
5594 result; \
5597 #define vdup_lane_s32(a, b) \
5598 __extension__ \
5599 ({ \
5600 int32x2_t a_ = (a); \
5601 int32x2_t result; \
5602 __asm__ ("dup %0.2s,%1.s[%2]" \
5603 : "=w"(result) \
5604 : "w"(a_), "i"(b) \
5605 : /* No clobbers */); \
5606 result; \
5609 #define vdup_lane_s64(a, b) \
5610 __extension__ \
5611 ({ \
5612 int64x1_t a_ = (a); \
5613 int64x1_t result; \
5614 __asm__ ("ins %0.d[0],%1.d[%2]" \
5615 : "=w"(result) \
5616 : "w"(a_), "i"(b) \
5617 : /* No clobbers */); \
5618 result; \
5621 #define vdup_lane_u8(a, b) \
5622 __extension__ \
5623 ({ \
5624 uint8x8_t a_ = (a); \
5625 uint8x8_t result; \
5626 __asm__ ("dup %0.8b,%1.b[%2]" \
5627 : "=w"(result) \
5628 : "w"(a_), "i"(b) \
5629 : /* No clobbers */); \
5630 result; \
5633 #define vdup_lane_u16(a, b) \
5634 __extension__ \
5635 ({ \
5636 uint16x4_t a_ = (a); \
5637 uint16x4_t result; \
5638 __asm__ ("dup %0.4h,%1.h[%2]" \
5639 : "=w"(result) \
5640 : "w"(a_), "i"(b) \
5641 : /* No clobbers */); \
5642 result; \
5645 #define vdup_lane_u32(a, b) \
5646 __extension__ \
5647 ({ \
5648 uint32x2_t a_ = (a); \
5649 uint32x2_t result; \
5650 __asm__ ("dup %0.2s,%1.s[%2]" \
5651 : "=w"(result) \
5652 : "w"(a_), "i"(b) \
5653 : /* No clobbers */); \
5654 result; \
5657 #define vdup_lane_u64(a, b) \
5658 __extension__ \
5659 ({ \
5660 uint64x1_t a_ = (a); \
5661 uint64x1_t result; \
5662 __asm__ ("ins %0.d[0],%1.d[%2]" \
5663 : "=w"(result) \
5664 : "w"(a_), "i"(b) \
5665 : /* No clobbers */); \
5666 result; \
5669 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5670 vdup_n_f32 (float32_t a)
5672 float32x2_t result;
5673 __asm__ ("dup %0.2s, %w1"
5674 : "=w"(result)
5675 : "r"(a)
5676 : /* No clobbers */);
5677 return result;
5680 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
5681 vdup_n_p8 (uint32_t a)
5683 poly8x8_t result;
5684 __asm__ ("dup %0.8b,%w1"
5685 : "=w"(result)
5686 : "r"(a)
5687 : /* No clobbers */);
5688 return result;
5691 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
5692 vdup_n_p16 (uint32_t a)
5694 poly16x4_t result;
5695 __asm__ ("dup %0.4h,%w1"
5696 : "=w"(result)
5697 : "r"(a)
5698 : /* No clobbers */);
5699 return result;
5702 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5703 vdup_n_s8 (int32_t a)
5705 int8x8_t result;
5706 __asm__ ("dup %0.8b,%w1"
5707 : "=w"(result)
5708 : "r"(a)
5709 : /* No clobbers */);
5710 return result;
5713 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5714 vdup_n_s16 (int32_t a)
5716 int16x4_t result;
5717 __asm__ ("dup %0.4h,%w1"
5718 : "=w"(result)
5719 : "r"(a)
5720 : /* No clobbers */);
5721 return result;
5724 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5725 vdup_n_s32 (int32_t a)
5727 int32x2_t result;
5728 __asm__ ("dup %0.2s,%w1"
5729 : "=w"(result)
5730 : "r"(a)
5731 : /* No clobbers */);
5732 return result;
5735 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
5736 vdup_n_s64 (int64_t a)
5738 int64x1_t result;
5739 __asm__ ("ins %0.d[0],%x1"
5740 : "=w"(result)
5741 : "r"(a)
5742 : /* No clobbers */);
5743 return result;
5746 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5747 vdup_n_u8 (uint32_t a)
5749 uint8x8_t result;
5750 __asm__ ("dup %0.8b,%w1"
5751 : "=w"(result)
5752 : "r"(a)
5753 : /* No clobbers */);
5754 return result;
5757 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5758 vdup_n_u16 (uint32_t a)
5760 uint16x4_t result;
5761 __asm__ ("dup %0.4h,%w1"
5762 : "=w"(result)
5763 : "r"(a)
5764 : /* No clobbers */);
5765 return result;
5768 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5769 vdup_n_u32 (uint32_t a)
5771 uint32x2_t result;
5772 __asm__ ("dup %0.2s,%w1"
5773 : "=w"(result)
5774 : "r"(a)
5775 : /* No clobbers */);
5776 return result;
5779 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
5780 vdup_n_u64 (uint64_t a)
5782 uint64x1_t result;
5783 __asm__ ("ins %0.d[0],%x1"
5784 : "=w"(result)
5785 : "r"(a)
5786 : /* No clobbers */);
5787 return result;
5790 #define vdupd_lane_f64(a, b) \
5791 __extension__ \
5792 ({ \
5793 float64x2_t a_ = (a); \
5794 float64_t result; \
5795 __asm__ ("dup %d0, %1.d[%2]" \
5796 : "=w"(result) \
5797 : "w"(a_), "i"(b) \
5798 : /* No clobbers */); \
5799 result; \
5802 #define vdupq_lane_f32(a, b) \
5803 __extension__ \
5804 ({ \
5805 float32x2_t a_ = (a); \
5806 float32x4_t result; \
5807 __asm__ ("dup %0.4s,%1.s[%2]" \
5808 : "=w"(result) \
5809 : "w"(a_), "i"(b) \
5810 : /* No clobbers */); \
5811 result; \
5814 #define vdupq_lane_f64(a, b) \
5815 __extension__ \
5816 ({ \
5817 float64x1_t a_ = (a); \
5818 float64x2_t result; \
5819 __asm__ ("dup %0.2d,%1.d[%2]" \
5820 : "=w"(result) \
5821 : "w"(a_), "i"(b) \
5822 : /* No clobbers */); \
5823 result; \
5826 #define vdupq_lane_p8(a, b) \
5827 __extension__ \
5828 ({ \
5829 poly8x8_t a_ = (a); \
5830 poly8x16_t result; \
5831 __asm__ ("dup %0.16b,%1.b[%2]" \
5832 : "=w"(result) \
5833 : "w"(a_), "i"(b) \
5834 : /* No clobbers */); \
5835 result; \
5838 #define vdupq_lane_p16(a, b) \
5839 __extension__ \
5840 ({ \
5841 poly16x4_t a_ = (a); \
5842 poly16x8_t result; \
5843 __asm__ ("dup %0.8h,%1.h[%2]" \
5844 : "=w"(result) \
5845 : "w"(a_), "i"(b) \
5846 : /* No clobbers */); \
5847 result; \
5850 #define vdupq_lane_s8(a, b) \
5851 __extension__ \
5852 ({ \
5853 int8x8_t a_ = (a); \
5854 int8x16_t result; \
5855 __asm__ ("dup %0.16b,%1.b[%2]" \
5856 : "=w"(result) \
5857 : "w"(a_), "i"(b) \
5858 : /* No clobbers */); \
5859 result; \
5862 #define vdupq_lane_s16(a, b) \
5863 __extension__ \
5864 ({ \
5865 int16x4_t a_ = (a); \
5866 int16x8_t result; \
5867 __asm__ ("dup %0.8h,%1.h[%2]" \
5868 : "=w"(result) \
5869 : "w"(a_), "i"(b) \
5870 : /* No clobbers */); \
5871 result; \
5874 #define vdupq_lane_s32(a, b) \
5875 __extension__ \
5876 ({ \
5877 int32x2_t a_ = (a); \
5878 int32x4_t result; \
5879 __asm__ ("dup %0.4s,%1.s[%2]" \
5880 : "=w"(result) \
5881 : "w"(a_), "i"(b) \
5882 : /* No clobbers */); \
5883 result; \
5886 #define vdupq_lane_s64(a, b) \
5887 __extension__ \
5888 ({ \
5889 int64x1_t a_ = (a); \
5890 int64x2_t result; \
5891 __asm__ ("dup %0.2d,%1.d[%2]" \
5892 : "=w"(result) \
5893 : "w"(a_), "i"(b) \
5894 : /* No clobbers */); \
5895 result; \
5898 #define vdupq_lane_u8(a, b) \
5899 __extension__ \
5900 ({ \
5901 uint8x8_t a_ = (a); \
5902 uint8x16_t result; \
5903 __asm__ ("dup %0.16b,%1.b[%2]" \
5904 : "=w"(result) \
5905 : "w"(a_), "i"(b) \
5906 : /* No clobbers */); \
5907 result; \
5910 #define vdupq_lane_u16(a, b) \
5911 __extension__ \
5912 ({ \
5913 uint16x4_t a_ = (a); \
5914 uint16x8_t result; \
5915 __asm__ ("dup %0.8h,%1.h[%2]" \
5916 : "=w"(result) \
5917 : "w"(a_), "i"(b) \
5918 : /* No clobbers */); \
5919 result; \
5922 #define vdupq_lane_u32(a, b) \
5923 __extension__ \
5924 ({ \
5925 uint32x2_t a_ = (a); \
5926 uint32x4_t result; \
5927 __asm__ ("dup %0.4s,%1.s[%2]" \
5928 : "=w"(result) \
5929 : "w"(a_), "i"(b) \
5930 : /* No clobbers */); \
5931 result; \
5934 #define vdupq_lane_u64(a, b) \
5935 __extension__ \
5936 ({ \
5937 uint64x1_t a_ = (a); \
5938 uint64x2_t result; \
5939 __asm__ ("dup %0.2d,%1.d[%2]" \
5940 : "=w"(result) \
5941 : "w"(a_), "i"(b) \
5942 : /* No clobbers */); \
5943 result; \
5946 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5947 vdupq_n_f32 (float32_t a)
5949 float32x4_t result;
5950 __asm__ ("dup %0.4s, %w1"
5951 : "=w"(result)
5952 : "r"(a)
5953 : /* No clobbers */);
5954 return result;
5957 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5958 vdupq_n_f64 (float64_t a)
5960 float64x2_t result;
5961 __asm__ ("dup %0.2d, %x1"
5962 : "=w"(result)
5963 : "r"(a)
5964 : /* No clobbers */);
5965 return result;
5968 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
5969 vdupq_n_p8 (uint32_t a)
5971 poly8x16_t result;
5972 __asm__ ("dup %0.16b,%w1"
5973 : "=w"(result)
5974 : "r"(a)
5975 : /* No clobbers */);
5976 return result;
5979 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
5980 vdupq_n_p16 (uint32_t a)
5982 poly16x8_t result;
5983 __asm__ ("dup %0.8h,%w1"
5984 : "=w"(result)
5985 : "r"(a)
5986 : /* No clobbers */);
5987 return result;
5990 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5991 vdupq_n_s8 (int32_t a)
5993 int8x16_t result;
5994 __asm__ ("dup %0.16b,%w1"
5995 : "=w"(result)
5996 : "r"(a)
5997 : /* No clobbers */);
5998 return result;
6001 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6002 vdupq_n_s16 (int32_t a)
6004 int16x8_t result;
6005 __asm__ ("dup %0.8h,%w1"
6006 : "=w"(result)
6007 : "r"(a)
6008 : /* No clobbers */);
6009 return result;
6012 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6013 vdupq_n_s32 (int32_t a)
6015 int32x4_t result;
6016 __asm__ ("dup %0.4s,%w1"
6017 : "=w"(result)
6018 : "r"(a)
6019 : /* No clobbers */);
6020 return result;
6023 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6024 vdupq_n_s64 (int64_t a)
6026 int64x2_t result;
6027 __asm__ ("dup %0.2d,%x1"
6028 : "=w"(result)
6029 : "r"(a)
6030 : /* No clobbers */);
6031 return result;
6034 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
6035 vdupq_n_u8 (uint32_t a)
6037 uint8x16_t result;
6038 __asm__ ("dup %0.16b,%w1"
6039 : "=w"(result)
6040 : "r"(a)
6041 : /* No clobbers */);
6042 return result;
6045 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6046 vdupq_n_u16 (uint32_t a)
6048 uint16x8_t result;
6049 __asm__ ("dup %0.8h,%w1"
6050 : "=w"(result)
6051 : "r"(a)
6052 : /* No clobbers */);
6053 return result;
6056 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6057 vdupq_n_u32 (uint32_t a)
6059 uint32x4_t result;
6060 __asm__ ("dup %0.4s,%w1"
6061 : "=w"(result)
6062 : "r"(a)
6063 : /* No clobbers */);
6064 return result;
6067 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6068 vdupq_n_u64 (uint64_t a)
6070 uint64x2_t result;
6071 __asm__ ("dup %0.2d,%x1"
6072 : "=w"(result)
6073 : "r"(a)
6074 : /* No clobbers */);
6075 return result;
6078 #define vdups_lane_f32(a, b) \
6079 __extension__ \
6080 ({ \
6081 float32x4_t a_ = (a); \
6082 float32_t result; \
6083 __asm__ ("dup %s0, %1.s[%2]" \
6084 : "=w"(result) \
6085 : "w"(a_), "i"(b) \
6086 : /* No clobbers */); \
6087 result; \
6090 #define vext_f32(a, b, c) \
6091 __extension__ \
6092 ({ \
6093 float32x2_t b_ = (b); \
6094 float32x2_t a_ = (a); \
6095 float32x2_t result; \
6096 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
6097 : "=w"(result) \
6098 : "w"(a_), "w"(b_), "i"(c) \
6099 : /* No clobbers */); \
6100 result; \
6103 #define vext_f64(a, b, c) \
6104 __extension__ \
6105 ({ \
6106 float64x1_t b_ = (b); \
6107 float64x1_t a_ = (a); \
6108 float64x1_t result; \
6109 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
6110 : "=w"(result) \
6111 : "w"(a_), "w"(b_), "i"(c) \
6112 : /* No clobbers */); \
6113 result; \
6116 #define vext_p8(a, b, c) \
6117 __extension__ \
6118 ({ \
6119 poly8x8_t b_ = (b); \
6120 poly8x8_t a_ = (a); \
6121 poly8x8_t result; \
6122 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
6123 : "=w"(result) \
6124 : "w"(a_), "w"(b_), "i"(c) \
6125 : /* No clobbers */); \
6126 result; \
6129 #define vext_p16(a, b, c) \
6130 __extension__ \
6131 ({ \
6132 poly16x4_t b_ = (b); \
6133 poly16x4_t a_ = (a); \
6134 poly16x4_t result; \
6135 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
6136 : "=w"(result) \
6137 : "w"(a_), "w"(b_), "i"(c) \
6138 : /* No clobbers */); \
6139 result; \
6142 #define vext_s8(a, b, c) \
6143 __extension__ \
6144 ({ \
6145 int8x8_t b_ = (b); \
6146 int8x8_t a_ = (a); \
6147 int8x8_t result; \
6148 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
6149 : "=w"(result) \
6150 : "w"(a_), "w"(b_), "i"(c) \
6151 : /* No clobbers */); \
6152 result; \
6155 #define vext_s16(a, b, c) \
6156 __extension__ \
6157 ({ \
6158 int16x4_t b_ = (b); \
6159 int16x4_t a_ = (a); \
6160 int16x4_t result; \
6161 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
6162 : "=w"(result) \
6163 : "w"(a_), "w"(b_), "i"(c) \
6164 : /* No clobbers */); \
6165 result; \
6168 #define vext_s32(a, b, c) \
6169 __extension__ \
6170 ({ \
6171 int32x2_t b_ = (b); \
6172 int32x2_t a_ = (a); \
6173 int32x2_t result; \
6174 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
6175 : "=w"(result) \
6176 : "w"(a_), "w"(b_), "i"(c) \
6177 : /* No clobbers */); \
6178 result; \
6181 #define vext_s64(a, b, c) \
6182 __extension__ \
6183 ({ \
6184 int64x1_t b_ = (b); \
6185 int64x1_t a_ = (a); \
6186 int64x1_t result; \
6187 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
6188 : "=w"(result) \
6189 : "w"(a_), "w"(b_), "i"(c) \
6190 : /* No clobbers */); \
6191 result; \
6194 #define vext_u8(a, b, c) \
6195 __extension__ \
6196 ({ \
6197 uint8x8_t b_ = (b); \
6198 uint8x8_t a_ = (a); \
6199 uint8x8_t result; \
6200 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
6201 : "=w"(result) \
6202 : "w"(a_), "w"(b_), "i"(c) \
6203 : /* No clobbers */); \
6204 result; \
6207 #define vext_u16(a, b, c) \
6208 __extension__ \
6209 ({ \
6210 uint16x4_t b_ = (b); \
6211 uint16x4_t a_ = (a); \
6212 uint16x4_t result; \
6213 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
6214 : "=w"(result) \
6215 : "w"(a_), "w"(b_), "i"(c) \
6216 : /* No clobbers */); \
6217 result; \
6220 #define vext_u32(a, b, c) \
6221 __extension__ \
6222 ({ \
6223 uint32x2_t b_ = (b); \
6224 uint32x2_t a_ = (a); \
6225 uint32x2_t result; \
6226 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
6227 : "=w"(result) \
6228 : "w"(a_), "w"(b_), "i"(c) \
6229 : /* No clobbers */); \
6230 result; \
6233 #define vext_u64(a, b, c) \
6234 __extension__ \
6235 ({ \
6236 uint64x1_t b_ = (b); \
6237 uint64x1_t a_ = (a); \
6238 uint64x1_t result; \
6239 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
6240 : "=w"(result) \
6241 : "w"(a_), "w"(b_), "i"(c) \
6242 : /* No clobbers */); \
6243 result; \
6246 #define vextq_f32(a, b, c) \
6247 __extension__ \
6248 ({ \
6249 float32x4_t b_ = (b); \
6250 float32x4_t a_ = (a); \
6251 float32x4_t result; \
6252 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
6253 : "=w"(result) \
6254 : "w"(a_), "w"(b_), "i"(c) \
6255 : /* No clobbers */); \
6256 result; \
6259 #define vextq_f64(a, b, c) \
6260 __extension__ \
6261 ({ \
6262 float64x2_t b_ = (b); \
6263 float64x2_t a_ = (a); \
6264 float64x2_t result; \
6265 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
6266 : "=w"(result) \
6267 : "w"(a_), "w"(b_), "i"(c) \
6268 : /* No clobbers */); \
6269 result; \
6272 #define vextq_p8(a, b, c) \
6273 __extension__ \
6274 ({ \
6275 poly8x16_t b_ = (b); \
6276 poly8x16_t a_ = (a); \
6277 poly8x16_t result; \
6278 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
6279 : "=w"(result) \
6280 : "w"(a_), "w"(b_), "i"(c) \
6281 : /* No clobbers */); \
6282 result; \
6285 #define vextq_p16(a, b, c) \
6286 __extension__ \
6287 ({ \
6288 poly16x8_t b_ = (b); \
6289 poly16x8_t a_ = (a); \
6290 poly16x8_t result; \
6291 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
6292 : "=w"(result) \
6293 : "w"(a_), "w"(b_), "i"(c) \
6294 : /* No clobbers */); \
6295 result; \
6298 #define vextq_s8(a, b, c) \
6299 __extension__ \
6300 ({ \
6301 int8x16_t b_ = (b); \
6302 int8x16_t a_ = (a); \
6303 int8x16_t result; \
6304 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
6305 : "=w"(result) \
6306 : "w"(a_), "w"(b_), "i"(c) \
6307 : /* No clobbers */); \
6308 result; \
6311 #define vextq_s16(a, b, c) \
6312 __extension__ \
6313 ({ \
6314 int16x8_t b_ = (b); \
6315 int16x8_t a_ = (a); \
6316 int16x8_t result; \
6317 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
6318 : "=w"(result) \
6319 : "w"(a_), "w"(b_), "i"(c) \
6320 : /* No clobbers */); \
6321 result; \
6324 #define vextq_s32(a, b, c) \
6325 __extension__ \
6326 ({ \
6327 int32x4_t b_ = (b); \
6328 int32x4_t a_ = (a); \
6329 int32x4_t result; \
6330 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
6331 : "=w"(result) \
6332 : "w"(a_), "w"(b_), "i"(c) \
6333 : /* No clobbers */); \
6334 result; \
6337 #define vextq_s64(a, b, c) \
6338 __extension__ \
6339 ({ \
6340 int64x2_t b_ = (b); \
6341 int64x2_t a_ = (a); \
6342 int64x2_t result; \
6343 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
6344 : "=w"(result) \
6345 : "w"(a_), "w"(b_), "i"(c) \
6346 : /* No clobbers */); \
6347 result; \
6350 #define vextq_u8(a, b, c) \
6351 __extension__ \
6352 ({ \
6353 uint8x16_t b_ = (b); \
6354 uint8x16_t a_ = (a); \
6355 uint8x16_t result; \
6356 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
6357 : "=w"(result) \
6358 : "w"(a_), "w"(b_), "i"(c) \
6359 : /* No clobbers */); \
6360 result; \
6363 #define vextq_u16(a, b, c) \
6364 __extension__ \
6365 ({ \
6366 uint16x8_t b_ = (b); \
6367 uint16x8_t a_ = (a); \
6368 uint16x8_t result; \
6369 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
6370 : "=w"(result) \
6371 : "w"(a_), "w"(b_), "i"(c) \
6372 : /* No clobbers */); \
6373 result; \
6376 #define vextq_u32(a, b, c) \
6377 __extension__ \
6378 ({ \
6379 uint32x4_t b_ = (b); \
6380 uint32x4_t a_ = (a); \
6381 uint32x4_t result; \
6382 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
6383 : "=w"(result) \
6384 : "w"(a_), "w"(b_), "i"(c) \
6385 : /* No clobbers */); \
6386 result; \
6389 #define vextq_u64(a, b, c) \
6390 __extension__ \
6391 ({ \
6392 uint64x2_t b_ = (b); \
6393 uint64x2_t a_ = (a); \
6394 uint64x2_t result; \
6395 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
6396 : "=w"(result) \
6397 : "w"(a_), "w"(b_), "i"(c) \
6398 : /* No clobbers */); \
6399 result; \
6402 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6403 vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
6405 float32x2_t result;
6406 __asm__ ("fmla %0.2s,%2.2s,%3.2s"
6407 : "=w"(result)
6408 : "0"(a), "w"(b), "w"(c)
6409 : /* No clobbers */);
6410 return result;
6413 #define vfma_lane_f32(a, b, c, d) \
6414 __extension__ \
6415 ({ \
6416 float32x2_t c_ = (c); \
6417 float32x2_t b_ = (b); \
6418 float32x2_t a_ = (a); \
6419 float32x2_t result; \
6420 __asm__ ("fmla %0.2s,%2.2s,%3.s[%4]" \
6421 : "=w"(result) \
6422 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6423 : /* No clobbers */); \
6424 result; \
6427 #define vfmad_lane_f64(a, b, c) \
6428 __extension__ \
6429 ({ \
6430 float64x2_t b_ = (b); \
6431 float64_t a_ = (a); \
6432 float64_t result; \
6433 __asm__ ("fmla %d0,%d1,%2.d[%3]" \
6434 : "=w"(result) \
6435 : "w"(a_), "w"(b_), "i"(c) \
6436 : /* No clobbers */); \
6437 result; \
6440 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6441 vfmaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
6443 float32x4_t result;
6444 __asm__ ("fmla %0.4s,%2.4s,%3.4s"
6445 : "=w"(result)
6446 : "0"(a), "w"(b), "w"(c)
6447 : /* No clobbers */);
6448 return result;
6451 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6452 vfmaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
6454 float64x2_t result;
6455 __asm__ ("fmla %0.2d,%2.2d,%3.2d"
6456 : "=w"(result)
6457 : "0"(a), "w"(b), "w"(c)
6458 : /* No clobbers */);
6459 return result;
6462 #define vfmaq_lane_f32(a, b, c, d) \
6463 __extension__ \
6464 ({ \
6465 float32x4_t c_ = (c); \
6466 float32x4_t b_ = (b); \
6467 float32x4_t a_ = (a); \
6468 float32x4_t result; \
6469 __asm__ ("fmla %0.4s,%2.4s,%3.s[%4]" \
6470 : "=w"(result) \
6471 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6472 : /* No clobbers */); \
6473 result; \
6476 #define vfmaq_lane_f64(a, b, c, d) \
6477 __extension__ \
6478 ({ \
6479 float64x2_t c_ = (c); \
6480 float64x2_t b_ = (b); \
6481 float64x2_t a_ = (a); \
6482 float64x2_t result; \
6483 __asm__ ("fmla %0.2d,%2.2d,%3.d[%4]" \
6484 : "=w"(result) \
6485 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6486 : /* No clobbers */); \
6487 result; \
6490 #define vfmas_lane_f32(a, b, c) \
6491 __extension__ \
6492 ({ \
6493 float32x4_t b_ = (b); \
6494 float32_t a_ = (a); \
6495 float32_t result; \
6496 __asm__ ("fmla %s0,%s1,%2.s[%3]" \
6497 : "=w"(result) \
6498 : "w"(a_), "w"(b_), "i"(c) \
6499 : /* No clobbers */); \
6500 result; \
6503 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6504 vfma_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
6506 float32x2_t result;
6507 __asm__ ("fmla %0.2s, %2.2s, %3.s[0]"
6508 : "=w"(result)
6509 : "0"(a), "w"(b), "w"(c)
6510 : /* No clobbers */);
6511 return result;
6514 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6515 vfmaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
6517 float32x4_t result;
6518 __asm__ ("fmla %0.4s, %2.4s, %3.s[0]"
6519 : "=w"(result)
6520 : "0"(a), "w"(b), "w"(c)
6521 : /* No clobbers */);
6522 return result;
6525 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6526 vfmaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
6528 float64x2_t result;
6529 __asm__ ("fmla %0.2d, %2.2d, %3.d[0]"
6530 : "=w"(result)
6531 : "0"(a), "w"(b), "w"(c)
6532 : /* No clobbers */);
6533 return result;
6536 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6537 vfms_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
6539 float32x2_t result;
6540 __asm__ ("fmls %0.2s,%2.2s,%3.2s"
6541 : "=w"(result)
6542 : "0"(a), "w"(b), "w"(c)
6543 : /* No clobbers */);
6544 return result;
6547 #define vfmsd_lane_f64(a, b, c) \
6548 __extension__ \
6549 ({ \
6550 float64x2_t b_ = (b); \
6551 float64_t a_ = (a); \
6552 float64_t result; \
6553 __asm__ ("fmls %d0,%d1,%2.d[%3]" \
6554 : "=w"(result) \
6555 : "w"(a_), "w"(b_), "i"(c) \
6556 : /* No clobbers */); \
6557 result; \
6560 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6561 vfmsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
6563 float32x4_t result;
6564 __asm__ ("fmls %0.4s,%2.4s,%3.4s"
6565 : "=w"(result)
6566 : "0"(a), "w"(b), "w"(c)
6567 : /* No clobbers */);
6568 return result;
6571 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6572 vfmsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
6574 float64x2_t result;
6575 __asm__ ("fmls %0.2d,%2.2d,%3.2d"
6576 : "=w"(result)
6577 : "0"(a), "w"(b), "w"(c)
6578 : /* No clobbers */);
6579 return result;
6582 #define vfmss_lane_f32(a, b, c) \
6583 __extension__ \
6584 ({ \
6585 float32x4_t b_ = (b); \
6586 float32_t a_ = (a); \
6587 float32_t result; \
6588 __asm__ ("fmls %s0,%s1,%2.s[%3]" \
6589 : "=w"(result) \
6590 : "w"(a_), "w"(b_), "i"(c) \
6591 : /* No clobbers */); \
6592 result; \
6595 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6596 vget_high_f32 (float32x4_t a)
6598 float32x2_t result;
6599 __asm__ ("ins %0.d[0], %1.d[1]"
6600 : "=w"(result)
6601 : "w"(a)
6602 : /* No clobbers */);
6603 return result;
6606 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
6607 vget_high_f64 (float64x2_t a)
6609 float64x1_t result;
6610 __asm__ ("ins %0.d[0], %1.d[1]"
6611 : "=w"(result)
6612 : "w"(a)
6613 : /* No clobbers */);
6614 return result;
6617 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6618 vget_high_p8 (poly8x16_t a)
6620 poly8x8_t result;
6621 __asm__ ("ins %0.d[0], %1.d[1]"
6622 : "=w"(result)
6623 : "w"(a)
6624 : /* No clobbers */);
6625 return result;
6628 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
6629 vget_high_p16 (poly16x8_t a)
6631 poly16x4_t result;
6632 __asm__ ("ins %0.d[0], %1.d[1]"
6633 : "=w"(result)
6634 : "w"(a)
6635 : /* No clobbers */);
6636 return result;
6639 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6640 vget_high_s8 (int8x16_t a)
6642 int8x8_t result;
6643 __asm__ ("ins %0.d[0], %1.d[1]"
6644 : "=w"(result)
6645 : "w"(a)
6646 : /* No clobbers */);
6647 return result;
6650 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6651 vget_high_s16 (int16x8_t a)
6653 int16x4_t result;
6654 __asm__ ("ins %0.d[0], %1.d[1]"
6655 : "=w"(result)
6656 : "w"(a)
6657 : /* No clobbers */);
6658 return result;
6661 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6662 vget_high_s32 (int32x4_t a)
6664 int32x2_t result;
6665 __asm__ ("ins %0.d[0], %1.d[1]"
6666 : "=w"(result)
6667 : "w"(a)
6668 : /* No clobbers */);
6669 return result;
6672 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
6673 vget_high_s64 (int64x2_t a)
6675 int64x1_t result;
6676 __asm__ ("ins %0.d[0], %1.d[1]"
6677 : "=w"(result)
6678 : "w"(a)
6679 : /* No clobbers */);
6680 return result;
6683 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6684 vget_high_u8 (uint8x16_t a)
6686 uint8x8_t result;
6687 __asm__ ("ins %0.d[0], %1.d[1]"
6688 : "=w"(result)
6689 : "w"(a)
6690 : /* No clobbers */);
6691 return result;
6694 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6695 vget_high_u16 (uint16x8_t a)
6697 uint16x4_t result;
6698 __asm__ ("ins %0.d[0], %1.d[1]"
6699 : "=w"(result)
6700 : "w"(a)
6701 : /* No clobbers */);
6702 return result;
6705 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6706 vget_high_u32 (uint32x4_t a)
6708 uint32x2_t result;
6709 __asm__ ("ins %0.d[0], %1.d[1]"
6710 : "=w"(result)
6711 : "w"(a)
6712 : /* No clobbers */);
6713 return result;
6716 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
6717 vget_high_u64 (uint64x2_t a)
6719 uint64x1_t result;
6720 __asm__ ("ins %0.d[0], %1.d[1]"
6721 : "=w"(result)
6722 : "w"(a)
6723 : /* No clobbers */);
6724 return result;
6727 #define vget_lane_f64(a, b) \
6728 __extension__ \
6729 ({ \
6730 float64x1_t a_ = (a); \
6731 float64_t result; \
6732 __asm__ ("umov %x0, %1.d[%2]" \
6733 : "=r"(result) \
6734 : "w"(a_), "i"(b) \
6735 : /* No clobbers */); \
6736 result; \
6739 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6740 vget_low_f32 (float32x4_t a)
6742 float32x2_t result;
6743 __asm__ ("ins %0.d[0], %1.d[0]"
6744 : "=w"(result)
6745 : "w"(a)
6746 : /* No clobbers */);
6747 return result;
6750 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
6751 vget_low_f64 (float64x2_t a)
6753 float64x1_t result;
6754 __asm__ ("ins %0.d[0], %1.d[0]"
6755 : "=w"(result)
6756 : "w"(a)
6757 : /* No clobbers */);
6758 return result;
6761 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6762 vget_low_p8 (poly8x16_t a)
6764 poly8x8_t result;
6765 __asm__ ("ins %0.d[0], %1.d[0]"
6766 : "=w"(result)
6767 : "w"(a)
6768 : /* No clobbers */);
6769 return result;
6772 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
6773 vget_low_p16 (poly16x8_t a)
6775 poly16x4_t result;
6776 __asm__ ("ins %0.d[0], %1.d[0]"
6777 : "=w"(result)
6778 : "w"(a)
6779 : /* No clobbers */);
6780 return result;
6783 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6784 vget_low_s8 (int8x16_t a)
6786 int8x8_t result;
6787 __asm__ ("ins %0.d[0], %1.d[0]"
6788 : "=w"(result)
6789 : "w"(a)
6790 : /* No clobbers */);
6791 return result;
6794 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6795 vget_low_s16 (int16x8_t a)
6797 int16x4_t result;
6798 __asm__ ("ins %0.d[0], %1.d[0]"
6799 : "=w"(result)
6800 : "w"(a)
6801 : /* No clobbers */);
6802 return result;
6805 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6806 vget_low_s32 (int32x4_t a)
6808 int32x2_t result;
6809 __asm__ ("ins %0.d[0], %1.d[0]"
6810 : "=w"(result)
6811 : "w"(a)
6812 : /* No clobbers */);
6813 return result;
6816 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
6817 vget_low_s64 (int64x2_t a)
6819 int64x1_t result;
6820 __asm__ ("ins %0.d[0], %1.d[0]"
6821 : "=w"(result)
6822 : "w"(a)
6823 : /* No clobbers */);
6824 return result;
6827 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6828 vget_low_u8 (uint8x16_t a)
6830 uint8x8_t result;
6831 __asm__ ("ins %0.d[0], %1.d[0]"
6832 : "=w"(result)
6833 : "w"(a)
6834 : /* No clobbers */);
6835 return result;
6838 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6839 vget_low_u16 (uint16x8_t a)
6841 uint16x4_t result;
6842 __asm__ ("ins %0.d[0], %1.d[0]"
6843 : "=w"(result)
6844 : "w"(a)
6845 : /* No clobbers */);
6846 return result;
6849 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6850 vget_low_u32 (uint32x4_t a)
6852 uint32x2_t result;
6853 __asm__ ("ins %0.d[0], %1.d[0]"
6854 : "=w"(result)
6855 : "w"(a)
6856 : /* No clobbers */);
6857 return result;
6860 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
6861 vget_low_u64 (uint64x2_t a)
6863 uint64x1_t result;
6864 __asm__ ("ins %0.d[0], %1.d[0]"
6865 : "=w"(result)
6866 : "w"(a)
6867 : /* No clobbers */);
6868 return result;
6871 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6872 vhsub_s8 (int8x8_t a, int8x8_t b)
6874 int8x8_t result;
6875 __asm__ ("shsub %0.8b, %1.8b, %2.8b"
6876 : "=w"(result)
6877 : "w"(a), "w"(b)
6878 : /* No clobbers */);
6879 return result;
6882 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6883 vhsub_s16 (int16x4_t a, int16x4_t b)
6885 int16x4_t result;
6886 __asm__ ("shsub %0.4h, %1.4h, %2.4h"
6887 : "=w"(result)
6888 : "w"(a), "w"(b)
6889 : /* No clobbers */);
6890 return result;
6893 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6894 vhsub_s32 (int32x2_t a, int32x2_t b)
6896 int32x2_t result;
6897 __asm__ ("shsub %0.2s, %1.2s, %2.2s"
6898 : "=w"(result)
6899 : "w"(a), "w"(b)
6900 : /* No clobbers */);
6901 return result;
6904 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6905 vhsub_u8 (uint8x8_t a, uint8x8_t b)
6907 uint8x8_t result;
6908 __asm__ ("uhsub %0.8b, %1.8b, %2.8b"
6909 : "=w"(result)
6910 : "w"(a), "w"(b)
6911 : /* No clobbers */);
6912 return result;
6915 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6916 vhsub_u16 (uint16x4_t a, uint16x4_t b)
6918 uint16x4_t result;
6919 __asm__ ("uhsub %0.4h, %1.4h, %2.4h"
6920 : "=w"(result)
6921 : "w"(a), "w"(b)
6922 : /* No clobbers */);
6923 return result;
6926 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6927 vhsub_u32 (uint32x2_t a, uint32x2_t b)
6929 uint32x2_t result;
6930 __asm__ ("uhsub %0.2s, %1.2s, %2.2s"
6931 : "=w"(result)
6932 : "w"(a), "w"(b)
6933 : /* No clobbers */);
6934 return result;
6937 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
6938 vhsubq_s8 (int8x16_t a, int8x16_t b)
6940 int8x16_t result;
6941 __asm__ ("shsub %0.16b, %1.16b, %2.16b"
6942 : "=w"(result)
6943 : "w"(a), "w"(b)
6944 : /* No clobbers */);
6945 return result;
6948 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6949 vhsubq_s16 (int16x8_t a, int16x8_t b)
6951 int16x8_t result;
6952 __asm__ ("shsub %0.8h, %1.8h, %2.8h"
6953 : "=w"(result)
6954 : "w"(a), "w"(b)
6955 : /* No clobbers */);
6956 return result;
6959 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6960 vhsubq_s32 (int32x4_t a, int32x4_t b)
6962 int32x4_t result;
6963 __asm__ ("shsub %0.4s, %1.4s, %2.4s"
6964 : "=w"(result)
6965 : "w"(a), "w"(b)
6966 : /* No clobbers */);
6967 return result;
6970 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
6971 vhsubq_u8 (uint8x16_t a, uint8x16_t b)
6973 uint8x16_t result;
6974 __asm__ ("uhsub %0.16b, %1.16b, %2.16b"
6975 : "=w"(result)
6976 : "w"(a), "w"(b)
6977 : /* No clobbers */);
6978 return result;
6981 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6982 vhsubq_u16 (uint16x8_t a, uint16x8_t b)
6984 uint16x8_t result;
6985 __asm__ ("uhsub %0.8h, %1.8h, %2.8h"
6986 : "=w"(result)
6987 : "w"(a), "w"(b)
6988 : /* No clobbers */);
6989 return result;
6992 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6993 vhsubq_u32 (uint32x4_t a, uint32x4_t b)
6995 uint32x4_t result;
6996 __asm__ ("uhsub %0.4s, %1.4s, %2.4s"
6997 : "=w"(result)
6998 : "w"(a), "w"(b)
6999 : /* No clobbers */);
7000 return result;
7003 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7004 vld1_dup_f32 (const float32_t * a)
7006 float32x2_t result;
7007 __asm__ ("ld1r {%0.2s}, %1"
7008 : "=w"(result)
7009 : "Utv"(*a)
7010 : /* No clobbers */);
7011 return result;
7014 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
7015 vld1_dup_f64 (const float64_t * a)
7017 float64x1_t result;
7018 __asm__ ("ld1r {%0.1d}, %1"
7019 : "=w"(result)
7020 : "Utv"(*a)
7021 : /* No clobbers */);
7022 return result;
7025 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
7026 vld1_dup_p8 (const poly8_t * a)
7028 poly8x8_t result;
7029 __asm__ ("ld1r {%0.8b}, %1"
7030 : "=w"(result)
7031 : "Utv"(*a)
7032 : /* No clobbers */);
7033 return result;
7036 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
7037 vld1_dup_p16 (const poly16_t * a)
7039 poly16x4_t result;
7040 __asm__ ("ld1r {%0.4h}, %1"
7041 : "=w"(result)
7042 : "Utv"(*a)
7043 : /* No clobbers */);
7044 return result;
7047 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7048 vld1_dup_s8 (const int8_t * a)
7050 int8x8_t result;
7051 __asm__ ("ld1r {%0.8b}, %1"
7052 : "=w"(result)
7053 : "Utv"(*a)
7054 : /* No clobbers */);
7055 return result;
7058 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7059 vld1_dup_s16 (const int16_t * a)
7061 int16x4_t result;
7062 __asm__ ("ld1r {%0.4h}, %1"
7063 : "=w"(result)
7064 : "Utv"(*a)
7065 : /* No clobbers */);
7066 return result;
7069 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7070 vld1_dup_s32 (const int32_t * a)
7072 int32x2_t result;
7073 __asm__ ("ld1r {%0.2s}, %1"
7074 : "=w"(result)
7075 : "Utv"(*a)
7076 : /* No clobbers */);
7077 return result;
7080 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
7081 vld1_dup_s64 (const int64_t * a)
7083 int64x1_t result;
7084 __asm__ ("ld1r {%0.1d}, %1"
7085 : "=w"(result)
7086 : "Utv"(*a)
7087 : /* No clobbers */);
7088 return result;
7091 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7092 vld1_dup_u8 (const uint8_t * a)
7094 uint8x8_t result;
7095 __asm__ ("ld1r {%0.8b}, %1"
7096 : "=w"(result)
7097 : "Utv"(*a)
7098 : /* No clobbers */);
7099 return result;
7102 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7103 vld1_dup_u16 (const uint16_t * a)
7105 uint16x4_t result;
7106 __asm__ ("ld1r {%0.4h}, %1"
7107 : "=w"(result)
7108 : "Utv"(*a)
7109 : /* No clobbers */);
7110 return result;
7113 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7114 vld1_dup_u32 (const uint32_t * a)
7116 uint32x2_t result;
7117 __asm__ ("ld1r {%0.2s}, %1"
7118 : "=w"(result)
7119 : "Utv"(*a)
7120 : /* No clobbers */);
7121 return result;
7124 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
7125 vld1_dup_u64 (const uint64_t * a)
7127 uint64x1_t result;
7128 __asm__ ("ld1r {%0.1d}, %1"
7129 : "=w"(result)
7130 : "Utv"(*a)
7131 : /* No clobbers */);
7132 return result;
7135 #define vld1_lane_f32(a, b, c) \
7136 __extension__ \
7137 ({ \
7138 float32x2_t b_ = (b); \
7139 const float32_t * a_ = (a); \
7140 float32x2_t result; \
7141 __asm__ ("ld1 {%0.s}[%1], %2" \
7142 : "=w"(result) \
7143 : "i" (c), "Utv"(*a_), "0"(b_) \
7144 : /* No clobbers */); \
7145 result; \
7148 #define vld1_lane_f64(a, b, c) \
7149 __extension__ \
7150 ({ \
7151 float64x1_t b_ = (b); \
7152 const float64_t * a_ = (a); \
7153 float64x1_t result; \
7154 __asm__ ("ld1 {%0.d}[%1], %2" \
7155 : "=w"(result) \
7156 : "i" (c), "Utv"(*a_), "0"(b_) \
7157 : /* No clobbers */); \
7158 result; \
7161 #define vld1_lane_p8(a, b, c) \
7162 __extension__ \
7163 ({ \
7164 poly8x8_t b_ = (b); \
7165 const poly8_t * a_ = (a); \
7166 poly8x8_t result; \
7167 __asm__ ("ld1 {%0.b}[%1], %2" \
7168 : "=w"(result) \
7169 : "i" (c), "Utv"(*a_), "0"(b_) \
7170 : /* No clobbers */); \
7171 result; \
7174 #define vld1_lane_p16(a, b, c) \
7175 __extension__ \
7176 ({ \
7177 poly16x4_t b_ = (b); \
7178 const poly16_t * a_ = (a); \
7179 poly16x4_t result; \
7180 __asm__ ("ld1 {%0.h}[%1], %2" \
7181 : "=w"(result) \
7182 : "i" (c), "Utv"(*a_), "0"(b_) \
7183 : /* No clobbers */); \
7184 result; \
7187 #define vld1_lane_s8(a, b, c) \
7188 __extension__ \
7189 ({ \
7190 int8x8_t b_ = (b); \
7191 const int8_t * a_ = (a); \
7192 int8x8_t result; \
7193 __asm__ ("ld1 {%0.b}[%1], %2" \
7194 : "=w"(result) \
7195 : "i" (c), "Utv"(*a_), "0"(b_) \
7196 : /* No clobbers */); \
7197 result; \
7200 #define vld1_lane_s16(a, b, c) \
7201 __extension__ \
7202 ({ \
7203 int16x4_t b_ = (b); \
7204 const int16_t * a_ = (a); \
7205 int16x4_t result; \
7206 __asm__ ("ld1 {%0.h}[%1], %2" \
7207 : "=w"(result) \
7208 : "i" (c), "Utv"(*a_), "0"(b_) \
7209 : /* No clobbers */); \
7210 result; \
7213 #define vld1_lane_s32(a, b, c) \
7214 __extension__ \
7215 ({ \
7216 int32x2_t b_ = (b); \
7217 const int32_t * a_ = (a); \
7218 int32x2_t result; \
7219 __asm__ ("ld1 {%0.s}[%1], %2" \
7220 : "=w"(result) \
7221 : "i" (c), "Utv"(*a_), "0"(b_) \
7222 : /* No clobbers */); \
7223 result; \
7226 #define vld1_lane_s64(a, b, c) \
7227 __extension__ \
7228 ({ \
7229 int64x1_t b_ = (b); \
7230 const int64_t * a_ = (a); \
7231 int64x1_t result; \
7232 __asm__ ("ld1 {%0.d}[%1], %2" \
7233 : "=w"(result) \
7234 : "i" (c), "Utv"(*a_), "0"(b_) \
7235 : /* No clobbers */); \
7236 result; \
7239 #define vld1_lane_u8(a, b, c) \
7240 __extension__ \
7241 ({ \
7242 uint8x8_t b_ = (b); \
7243 const uint8_t * a_ = (a); \
7244 uint8x8_t result; \
7245 __asm__ ("ld1 {%0.b}[%1], %2" \
7246 : "=w"(result) \
7247 : "i" (c), "Utv"(*a_), "0"(b_) \
7248 : /* No clobbers */); \
7249 result; \
7252 #define vld1_lane_u16(a, b, c) \
7253 __extension__ \
7254 ({ \
7255 uint16x4_t b_ = (b); \
7256 const uint16_t * a_ = (a); \
7257 uint16x4_t result; \
7258 __asm__ ("ld1 {%0.h}[%1], %2" \
7259 : "=w"(result) \
7260 : "i" (c), "Utv"(*a_), "0"(b_) \
7261 : /* No clobbers */); \
7262 result; \
7265 #define vld1_lane_u32(a, b, c) \
7266 __extension__ \
7267 ({ \
7268 uint32x2_t b_ = (b); \
7269 const uint32_t * a_ = (a); \
7270 uint32x2_t result; \
7271 __asm__ ("ld1 {%0.s}[%1], %2" \
7272 : "=w"(result) \
7273 : "i" (c), "Utv"(*a_), "0"(b_) \
7274 : /* No clobbers */); \
7275 result; \
7278 #define vld1_lane_u64(a, b, c) \
7279 __extension__ \
7280 ({ \
7281 uint64x1_t b_ = (b); \
7282 const uint64_t * a_ = (a); \
7283 uint64x1_t result; \
7284 __asm__ ("ld1 {%0.d}[%1], %2" \
7285 : "=w"(result) \
7286 : "i" (c), "Utv"(*a_), "0"(b_) \
7287 : /* No clobbers */); \
7288 result; \
7291 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7292 vld1q_dup_f32 (const float32_t * a)
7294 float32x4_t result;
7295 __asm__ ("ld1r {%0.4s}, %1"
7296 : "=w"(result)
7297 : "Utv"(*a)
7298 : /* No clobbers */);
7299 return result;
7302 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7303 vld1q_dup_f64 (const float64_t * a)
7305 float64x2_t result;
7306 __asm__ ("ld1r {%0.2d}, %1"
7307 : "=w"(result)
7308 : "Utv"(*a)
7309 : /* No clobbers */);
7310 return result;
7313 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
7314 vld1q_dup_p8 (const poly8_t * a)
7316 poly8x16_t result;
7317 __asm__ ("ld1r {%0.16b}, %1"
7318 : "=w"(result)
7319 : "Utv"(*a)
7320 : /* No clobbers */);
7321 return result;
7324 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
7325 vld1q_dup_p16 (const poly16_t * a)
7327 poly16x8_t result;
7328 __asm__ ("ld1r {%0.8h}, %1"
7329 : "=w"(result)
7330 : "Utv"(*a)
7331 : /* No clobbers */);
7332 return result;
7335 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7336 vld1q_dup_s8 (const int8_t * a)
7338 int8x16_t result;
7339 __asm__ ("ld1r {%0.16b}, %1"
7340 : "=w"(result)
7341 : "Utv"(*a)
7342 : /* No clobbers */);
7343 return result;
7346 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7347 vld1q_dup_s16 (const int16_t * a)
7349 int16x8_t result;
7350 __asm__ ("ld1r {%0.8h}, %1"
7351 : "=w"(result)
7352 : "Utv"(*a)
7353 : /* No clobbers */);
7354 return result;
7357 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7358 vld1q_dup_s32 (const int32_t * a)
7360 int32x4_t result;
7361 __asm__ ("ld1r {%0.4s}, %1"
7362 : "=w"(result)
7363 : "Utv"(*a)
7364 : /* No clobbers */);
7365 return result;
7368 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7369 vld1q_dup_s64 (const int64_t * a)
7371 int64x2_t result;
7372 __asm__ ("ld1r {%0.2d}, %1"
7373 : "=w"(result)
7374 : "Utv"(*a)
7375 : /* No clobbers */);
7376 return result;
7379 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7380 vld1q_dup_u8 (const uint8_t * a)
7382 uint8x16_t result;
7383 __asm__ ("ld1r {%0.16b}, %1"
7384 : "=w"(result)
7385 : "Utv"(*a)
7386 : /* No clobbers */);
7387 return result;
7390 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7391 vld1q_dup_u16 (const uint16_t * a)
7393 uint16x8_t result;
7394 __asm__ ("ld1r {%0.8h}, %1"
7395 : "=w"(result)
7396 : "Utv"(*a)
7397 : /* No clobbers */);
7398 return result;
7401 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7402 vld1q_dup_u32 (const uint32_t * a)
7404 uint32x4_t result;
7405 __asm__ ("ld1r {%0.4s}, %1"
7406 : "=w"(result)
7407 : "Utv"(*a)
7408 : /* No clobbers */);
7409 return result;
7412 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7413 vld1q_dup_u64 (const uint64_t * a)
7415 uint64x2_t result;
7416 __asm__ ("ld1r {%0.2d}, %1"
7417 : "=w"(result)
7418 : "Utv"(*a)
7419 : /* No clobbers */);
7420 return result;
7423 #define vld1q_lane_f32(a, b, c) \
7424 __extension__ \
7425 ({ \
7426 float32x4_t b_ = (b); \
7427 const float32_t * a_ = (a); \
7428 float32x4_t result; \
7429 __asm__ ("ld1 {%0.s}[%1], %2" \
7430 : "=w"(result) \
7431 : "i"(c), "Utv"(*a_), "0"(b_) \
7432 : /* No clobbers */); \
7433 result; \
7436 #define vld1q_lane_f64(a, b, c) \
7437 __extension__ \
7438 ({ \
7439 float64x2_t b_ = (b); \
7440 const float64_t * a_ = (a); \
7441 float64x2_t result; \
7442 __asm__ ("ld1 {%0.d}[%1], %2" \
7443 : "=w"(result) \
7444 : "i"(c), "Utv"(*a_), "0"(b_) \
7445 : /* No clobbers */); \
7446 result; \
7449 #define vld1q_lane_p8(a, b, c) \
7450 __extension__ \
7451 ({ \
7452 poly8x16_t b_ = (b); \
7453 const poly8_t * a_ = (a); \
7454 poly8x16_t result; \
7455 __asm__ ("ld1 {%0.b}[%1], %2" \
7456 : "=w"(result) \
7457 : "i"(c), "Utv"(*a_), "0"(b_) \
7458 : /* No clobbers */); \
7459 result; \
7462 #define vld1q_lane_p16(a, b, c) \
7463 __extension__ \
7464 ({ \
7465 poly16x8_t b_ = (b); \
7466 const poly16_t * a_ = (a); \
7467 poly16x8_t result; \
7468 __asm__ ("ld1 {%0.h}[%1], %2" \
7469 : "=w"(result) \
7470 : "i"(c), "Utv"(*a_), "0"(b_) \
7471 : /* No clobbers */); \
7472 result; \
7475 #define vld1q_lane_s8(a, b, c) \
7476 __extension__ \
7477 ({ \
7478 int8x16_t b_ = (b); \
7479 const int8_t * a_ = (a); \
7480 int8x16_t result; \
7481 __asm__ ("ld1 {%0.b}[%1], %2" \
7482 : "=w"(result) \
7483 : "i"(c), "Utv"(*a_), "0"(b_) \
7484 : /* No clobbers */); \
7485 result; \
7488 #define vld1q_lane_s16(a, b, c) \
7489 __extension__ \
7490 ({ \
7491 int16x8_t b_ = (b); \
7492 const int16_t * a_ = (a); \
7493 int16x8_t result; \
7494 __asm__ ("ld1 {%0.h}[%1], %2" \
7495 : "=w"(result) \
7496 : "i"(c), "Utv"(*a_), "0"(b_) \
7497 : /* No clobbers */); \
7498 result; \
7501 #define vld1q_lane_s32(a, b, c) \
7502 __extension__ \
7503 ({ \
7504 int32x4_t b_ = (b); \
7505 const int32_t * a_ = (a); \
7506 int32x4_t result; \
7507 __asm__ ("ld1 {%0.s}[%1], %2" \
7508 : "=w"(result) \
7509 : "i"(c), "Utv"(*a_), "0"(b_) \
7510 : /* No clobbers */); \
7511 result; \
7514 #define vld1q_lane_s64(a, b, c) \
7515 __extension__ \
7516 ({ \
7517 int64x2_t b_ = (b); \
7518 const int64_t * a_ = (a); \
7519 int64x2_t result; \
7520 __asm__ ("ld1 {%0.d}[%1], %2" \
7521 : "=w"(result) \
7522 : "i"(c), "Utv"(*a_), "0"(b_) \
7523 : /* No clobbers */); \
7524 result; \
7527 #define vld1q_lane_u8(a, b, c) \
7528 __extension__ \
7529 ({ \
7530 uint8x16_t b_ = (b); \
7531 const uint8_t * a_ = (a); \
7532 uint8x16_t result; \
7533 __asm__ ("ld1 {%0.b}[%1], %2" \
7534 : "=w"(result) \
7535 : "i"(c), "Utv"(*a_), "0"(b_) \
7536 : /* No clobbers */); \
7537 result; \
7540 #define vld1q_lane_u16(a, b, c) \
7541 __extension__ \
7542 ({ \
7543 uint16x8_t b_ = (b); \
7544 const uint16_t * a_ = (a); \
7545 uint16x8_t result; \
7546 __asm__ ("ld1 {%0.h}[%1], %2" \
7547 : "=w"(result) \
7548 : "i"(c), "Utv"(*a_), "0"(b_) \
7549 : /* No clobbers */); \
7550 result; \
7553 #define vld1q_lane_u32(a, b, c) \
7554 __extension__ \
7555 ({ \
7556 uint32x4_t b_ = (b); \
7557 const uint32_t * a_ = (a); \
7558 uint32x4_t result; \
7559 __asm__ ("ld1 {%0.s}[%1], %2" \
7560 : "=w"(result) \
7561 : "i"(c), "Utv"(*a_), "0"(b_) \
7562 : /* No clobbers */); \
7563 result; \
7566 #define vld1q_lane_u64(a, b, c) \
7567 __extension__ \
7568 ({ \
7569 uint64x2_t b_ = (b); \
7570 const uint64_t * a_ = (a); \
7571 uint64x2_t result; \
7572 __asm__ ("ld1 {%0.d}[%1], %2" \
7573 : "=w"(result) \
7574 : "i"(c), "Utv"(*a_), "0"(b_) \
7575 : /* No clobbers */); \
7576 result; \
7579 #define vmla_lane_f32(a, b, c, d) \
7580 __extension__ \
7581 ({ \
7582 float32x2_t c_ = (c); \
7583 float32x2_t b_ = (b); \
7584 float32x2_t a_ = (a); \
7585 float32x2_t result; \
7586 float32x2_t t1; \
7587 __asm__ ("fmul %1.2s, %3.2s, %4.s[%5]; fadd %0.2s, %0.2s, %1.2s" \
7588 : "=w"(result), "=w"(t1) \
7589 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7590 : /* No clobbers */); \
7591 result; \
7594 #define vmla_lane_s16(a, b, c, d) \
7595 __extension__ \
7596 ({ \
7597 int16x4_t c_ = (c); \
7598 int16x4_t b_ = (b); \
7599 int16x4_t a_ = (a); \
7600 int16x4_t result; \
7601 __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \
7602 : "=w"(result) \
7603 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7604 : /* No clobbers */); \
7605 result; \
7608 #define vmla_lane_s32(a, b, c, d) \
7609 __extension__ \
7610 ({ \
7611 int32x2_t c_ = (c); \
7612 int32x2_t b_ = (b); \
7613 int32x2_t a_ = (a); \
7614 int32x2_t result; \
7615 __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \
7616 : "=w"(result) \
7617 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7618 : /* No clobbers */); \
7619 result; \
7622 #define vmla_lane_u16(a, b, c, d) \
7623 __extension__ \
7624 ({ \
7625 uint16x4_t c_ = (c); \
7626 uint16x4_t b_ = (b); \
7627 uint16x4_t a_ = (a); \
7628 uint16x4_t result; \
7629 __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \
7630 : "=w"(result) \
7631 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7632 : /* No clobbers */); \
7633 result; \
7636 #define vmla_lane_u32(a, b, c, d) \
7637 __extension__ \
7638 ({ \
7639 uint32x2_t c_ = (c); \
7640 uint32x2_t b_ = (b); \
7641 uint32x2_t a_ = (a); \
7642 uint32x2_t result; \
7643 __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \
7644 : "=w"(result) \
7645 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7646 : /* No clobbers */); \
7647 result; \
7650 #define vmla_laneq_s16(a, b, c, d) \
7651 __extension__ \
7652 ({ \
7653 int16x8_t c_ = (c); \
7654 int16x4_t b_ = (b); \
7655 int16x4_t a_ = (a); \
7656 int16x4_t result; \
7657 __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \
7658 : "=w"(result) \
7659 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7660 : /* No clobbers */); \
7661 result; \
7664 #define vmla_laneq_s32(a, b, c, d) \
7665 __extension__ \
7666 ({ \
7667 int32x4_t c_ = (c); \
7668 int32x2_t b_ = (b); \
7669 int32x2_t a_ = (a); \
7670 int32x2_t result; \
7671 __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \
7672 : "=w"(result) \
7673 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7674 : /* No clobbers */); \
7675 result; \
7678 #define vmla_laneq_u16(a, b, c, d) \
7679 __extension__ \
7680 ({ \
7681 uint16x8_t c_ = (c); \
7682 uint16x4_t b_ = (b); \
7683 uint16x4_t a_ = (a); \
7684 uint16x4_t result; \
7685 __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \
7686 : "=w"(result) \
7687 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7688 : /* No clobbers */); \
7689 result; \
7692 #define vmla_laneq_u32(a, b, c, d) \
7693 __extension__ \
7694 ({ \
7695 uint32x4_t c_ = (c); \
7696 uint32x2_t b_ = (b); \
7697 uint32x2_t a_ = (a); \
7698 uint32x2_t result; \
7699 __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \
7700 : "=w"(result) \
7701 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7702 : /* No clobbers */); \
7703 result; \
7706 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7707 vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
7709 float32x2_t result;
7710 float32x2_t t1;
7711 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s"
7712 : "=w"(result), "=w"(t1)
7713 : "0"(a), "w"(b), "w"(c)
7714 : /* No clobbers */);
7715 return result;
7718 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7719 vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
7721 int16x4_t result;
7722 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
7723 : "=w"(result)
7724 : "0"(a), "w"(b), "w"(c)
7725 : /* No clobbers */);
7726 return result;
7729 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7730 vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
7732 int32x2_t result;
7733 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
7734 : "=w"(result)
7735 : "0"(a), "w"(b), "w"(c)
7736 : /* No clobbers */);
7737 return result;
7740 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7741 vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
7743 uint16x4_t result;
7744 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
7745 : "=w"(result)
7746 : "0"(a), "w"(b), "w"(c)
7747 : /* No clobbers */);
7748 return result;
7751 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7752 vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
7754 uint32x2_t result;
7755 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
7756 : "=w"(result)
7757 : "0"(a), "w"(b), "w"(c)
7758 : /* No clobbers */);
7759 return result;
7762 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7763 vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
7765 int8x8_t result;
7766 __asm__ ("mla %0.8b, %2.8b, %3.8b"
7767 : "=w"(result)
7768 : "0"(a), "w"(b), "w"(c)
7769 : /* No clobbers */);
7770 return result;
7773 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7774 vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
7776 int16x4_t result;
7777 __asm__ ("mla %0.4h, %2.4h, %3.4h"
7778 : "=w"(result)
7779 : "0"(a), "w"(b), "w"(c)
7780 : /* No clobbers */);
7781 return result;
7784 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7785 vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
7787 int32x2_t result;
7788 __asm__ ("mla %0.2s, %2.2s, %3.2s"
7789 : "=w"(result)
7790 : "0"(a), "w"(b), "w"(c)
7791 : /* No clobbers */);
7792 return result;
7795 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7796 vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
7798 uint8x8_t result;
7799 __asm__ ("mla %0.8b, %2.8b, %3.8b"
7800 : "=w"(result)
7801 : "0"(a), "w"(b), "w"(c)
7802 : /* No clobbers */);
7803 return result;
7806 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7807 vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
7809 uint16x4_t result;
7810 __asm__ ("mla %0.4h, %2.4h, %3.4h"
7811 : "=w"(result)
7812 : "0"(a), "w"(b), "w"(c)
7813 : /* No clobbers */);
7814 return result;
7817 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7818 vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
7820 uint32x2_t result;
7821 __asm__ ("mla %0.2s, %2.2s, %3.2s"
7822 : "=w"(result)
7823 : "0"(a), "w"(b), "w"(c)
7824 : /* No clobbers */);
7825 return result;
7828 #define vmlal_high_lane_s16(a, b, c, d) \
7829 __extension__ \
7830 ({ \
7831 int16x8_t c_ = (c); \
7832 int16x8_t b_ = (b); \
7833 int32x4_t a_ = (a); \
7834 int32x4_t result; \
7835 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
7836 : "=w"(result) \
7837 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7838 : /* No clobbers */); \
7839 result; \
7842 #define vmlal_high_lane_s32(a, b, c, d) \
7843 __extension__ \
7844 ({ \
7845 int32x4_t c_ = (c); \
7846 int32x4_t b_ = (b); \
7847 int64x2_t a_ = (a); \
7848 int64x2_t result; \
7849 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
7850 : "=w"(result) \
7851 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7852 : /* No clobbers */); \
7853 result; \
7856 #define vmlal_high_lane_u16(a, b, c, d) \
7857 __extension__ \
7858 ({ \
7859 uint16x8_t c_ = (c); \
7860 uint16x8_t b_ = (b); \
7861 uint32x4_t a_ = (a); \
7862 uint32x4_t result; \
7863 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
7864 : "=w"(result) \
7865 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7866 : /* No clobbers */); \
7867 result; \
7870 #define vmlal_high_lane_u32(a, b, c, d) \
7871 __extension__ \
7872 ({ \
7873 uint32x4_t c_ = (c); \
7874 uint32x4_t b_ = (b); \
7875 uint64x2_t a_ = (a); \
7876 uint64x2_t result; \
7877 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
7878 : "=w"(result) \
7879 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7880 : /* No clobbers */); \
7881 result; \
7884 #define vmlal_high_laneq_s16(a, b, c, d) \
7885 __extension__ \
7886 ({ \
7887 int16x8_t c_ = (c); \
7888 int16x8_t b_ = (b); \
7889 int32x4_t a_ = (a); \
7890 int32x4_t result; \
7891 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
7892 : "=w"(result) \
7893 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7894 : /* No clobbers */); \
7895 result; \
7898 #define vmlal_high_laneq_s32(a, b, c, d) \
7899 __extension__ \
7900 ({ \
7901 int32x4_t c_ = (c); \
7902 int32x4_t b_ = (b); \
7903 int64x2_t a_ = (a); \
7904 int64x2_t result; \
7905 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
7906 : "=w"(result) \
7907 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7908 : /* No clobbers */); \
7909 result; \
7912 #define vmlal_high_laneq_u16(a, b, c, d) \
7913 __extension__ \
7914 ({ \
7915 uint16x8_t c_ = (c); \
7916 uint16x8_t b_ = (b); \
7917 uint32x4_t a_ = (a); \
7918 uint32x4_t result; \
7919 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
7920 : "=w"(result) \
7921 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7922 : /* No clobbers */); \
7923 result; \
7926 #define vmlal_high_laneq_u32(a, b, c, d) \
7927 __extension__ \
7928 ({ \
7929 uint32x4_t c_ = (c); \
7930 uint32x4_t b_ = (b); \
7931 uint64x2_t a_ = (a); \
7932 uint64x2_t result; \
7933 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
7934 : "=w"(result) \
7935 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7936 : /* No clobbers */); \
7937 result; \
7940 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7941 vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
7943 int32x4_t result;
7944 __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]"
7945 : "=w"(result)
7946 : "0"(a), "w"(b), "w"(c)
7947 : /* No clobbers */);
7948 return result;
7951 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7952 vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
7954 int64x2_t result;
7955 __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]"
7956 : "=w"(result)
7957 : "0"(a), "w"(b), "w"(c)
7958 : /* No clobbers */);
7959 return result;
7962 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7963 vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
7965 uint32x4_t result;
7966 __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]"
7967 : "=w"(result)
7968 : "0"(a), "w"(b), "w"(c)
7969 : /* No clobbers */);
7970 return result;
7973 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7974 vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
7976 uint64x2_t result;
7977 __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]"
7978 : "=w"(result)
7979 : "0"(a), "w"(b), "w"(c)
7980 : /* No clobbers */);
7981 return result;
7984 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7985 vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
7987 int16x8_t result;
7988 __asm__ ("smlal2 %0.8h,%2.16b,%3.16b"
7989 : "=w"(result)
7990 : "0"(a), "w"(b), "w"(c)
7991 : /* No clobbers */);
7992 return result;
7995 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7996 vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
7998 int32x4_t result;
7999 __asm__ ("smlal2 %0.4s,%2.8h,%3.8h"
8000 : "=w"(result)
8001 : "0"(a), "w"(b), "w"(c)
8002 : /* No clobbers */);
8003 return result;
8006 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8007 vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
8009 int64x2_t result;
8010 __asm__ ("smlal2 %0.2d,%2.4s,%3.4s"
8011 : "=w"(result)
8012 : "0"(a), "w"(b), "w"(c)
8013 : /* No clobbers */);
8014 return result;
8017 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8018 vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
8020 uint16x8_t result;
8021 __asm__ ("umlal2 %0.8h,%2.16b,%3.16b"
8022 : "=w"(result)
8023 : "0"(a), "w"(b), "w"(c)
8024 : /* No clobbers */);
8025 return result;
8028 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8029 vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
8031 uint32x4_t result;
8032 __asm__ ("umlal2 %0.4s,%2.8h,%3.8h"
8033 : "=w"(result)
8034 : "0"(a), "w"(b), "w"(c)
8035 : /* No clobbers */);
8036 return result;
8039 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8040 vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
8042 uint64x2_t result;
8043 __asm__ ("umlal2 %0.2d,%2.4s,%3.4s"
8044 : "=w"(result)
8045 : "0"(a), "w"(b), "w"(c)
8046 : /* No clobbers */);
8047 return result;
8050 #define vmlal_lane_s16(a, b, c, d) \
8051 __extension__ \
8052 ({ \
8053 int16x4_t c_ = (c); \
8054 int16x4_t b_ = (b); \
8055 int32x4_t a_ = (a); \
8056 int32x4_t result; \
8057 __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]" \
8058 : "=w"(result) \
8059 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8060 : /* No clobbers */); \
8061 result; \
8064 #define vmlal_lane_s32(a, b, c, d) \
8065 __extension__ \
8066 ({ \
8067 int32x2_t c_ = (c); \
8068 int32x2_t b_ = (b); \
8069 int64x2_t a_ = (a); \
8070 int64x2_t result; \
8071 __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]" \
8072 : "=w"(result) \
8073 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8074 : /* No clobbers */); \
8075 result; \
8078 #define vmlal_lane_u16(a, b, c, d) \
8079 __extension__ \
8080 ({ \
8081 uint16x4_t c_ = (c); \
8082 uint16x4_t b_ = (b); \
8083 uint32x4_t a_ = (a); \
8084 uint32x4_t result; \
8085 __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]" \
8086 : "=w"(result) \
8087 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8088 : /* No clobbers */); \
8089 result; \
8092 #define vmlal_lane_u32(a, b, c, d) \
8093 __extension__ \
8094 ({ \
8095 uint32x2_t c_ = (c); \
8096 uint32x2_t b_ = (b); \
8097 uint64x2_t a_ = (a); \
8098 uint64x2_t result; \
8099 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
8100 : "=w"(result) \
8101 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8102 : /* No clobbers */); \
8103 result; \
8106 #define vmlal_laneq_s16(a, b, c, d) \
8107 __extension__ \
8108 ({ \
8109 int16x8_t c_ = (c); \
8110 int16x4_t b_ = (b); \
8111 int32x4_t a_ = (a); \
8112 int32x4_t result; \
8113 __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]" \
8114 : "=w"(result) \
8115 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8116 : /* No clobbers */); \
8117 result; \
8120 #define vmlal_laneq_s32(a, b, c, d) \
8121 __extension__ \
8122 ({ \
8123 int32x4_t c_ = (c); \
8124 int32x2_t b_ = (b); \
8125 int64x2_t a_ = (a); \
8126 int64x2_t result; \
8127 __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]" \
8128 : "=w"(result) \
8129 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8130 : /* No clobbers */); \
8131 result; \
8134 #define vmlal_laneq_u16(a, b, c, d) \
8135 __extension__ \
8136 ({ \
8137 uint16x8_t c_ = (c); \
8138 uint16x4_t b_ = (b); \
8139 uint32x4_t a_ = (a); \
8140 uint32x4_t result; \
8141 __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]" \
8142 : "=w"(result) \
8143 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8144 : /* No clobbers */); \
8145 result; \
8148 #define vmlal_laneq_u32(a, b, c, d) \
8149 __extension__ \
8150 ({ \
8151 uint32x4_t c_ = (c); \
8152 uint32x2_t b_ = (b); \
8153 uint64x2_t a_ = (a); \
8154 uint64x2_t result; \
8155 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
8156 : "=w"(result) \
8157 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8158 : /* No clobbers */); \
8159 result; \
8162 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8163 vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
8165 int32x4_t result;
8166 __asm__ ("smlal %0.4s,%2.4h,%3.h[0]"
8167 : "=w"(result)
8168 : "0"(a), "w"(b), "w"(c)
8169 : /* No clobbers */);
8170 return result;
8173 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8174 vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
8176 int64x2_t result;
8177 __asm__ ("smlal %0.2d,%2.2s,%3.s[0]"
8178 : "=w"(result)
8179 : "0"(a), "w"(b), "w"(c)
8180 : /* No clobbers */);
8181 return result;
8184 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8185 vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
8187 uint32x4_t result;
8188 __asm__ ("umlal %0.4s,%2.4h,%3.h[0]"
8189 : "=w"(result)
8190 : "0"(a), "w"(b), "w"(c)
8191 : /* No clobbers */);
8192 return result;
8195 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8196 vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
8198 uint64x2_t result;
8199 __asm__ ("umlal %0.2d,%2.2s,%3.s[0]"
8200 : "=w"(result)
8201 : "0"(a), "w"(b), "w"(c)
8202 : /* No clobbers */);
8203 return result;
8206 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8207 vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
8209 int16x8_t result;
8210 __asm__ ("smlal %0.8h,%2.8b,%3.8b"
8211 : "=w"(result)
8212 : "0"(a), "w"(b), "w"(c)
8213 : /* No clobbers */);
8214 return result;
8217 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8218 vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
8220 int32x4_t result;
8221 __asm__ ("smlal %0.4s,%2.4h,%3.4h"
8222 : "=w"(result)
8223 : "0"(a), "w"(b), "w"(c)
8224 : /* No clobbers */);
8225 return result;
8228 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8229 vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
8231 int64x2_t result;
8232 __asm__ ("smlal %0.2d,%2.2s,%3.2s"
8233 : "=w"(result)
8234 : "0"(a), "w"(b), "w"(c)
8235 : /* No clobbers */);
8236 return result;
8239 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8240 vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
8242 uint16x8_t result;
8243 __asm__ ("umlal %0.8h,%2.8b,%3.8b"
8244 : "=w"(result)
8245 : "0"(a), "w"(b), "w"(c)
8246 : /* No clobbers */);
8247 return result;
8250 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8251 vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
8253 uint32x4_t result;
8254 __asm__ ("umlal %0.4s,%2.4h,%3.4h"
8255 : "=w"(result)
8256 : "0"(a), "w"(b), "w"(c)
8257 : /* No clobbers */);
8258 return result;
8261 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8262 vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
8264 uint64x2_t result;
8265 __asm__ ("umlal %0.2d,%2.2s,%3.2s"
8266 : "=w"(result)
8267 : "0"(a), "w"(b), "w"(c)
8268 : /* No clobbers */);
8269 return result;
8272 #define vmlaq_lane_f32(a, b, c, d) \
8273 __extension__ \
8274 ({ \
8275 float32x4_t c_ = (c); \
8276 float32x4_t b_ = (b); \
8277 float32x4_t a_ = (a); \
8278 float32x4_t result; \
8279 float32x4_t t1; \
8280 __asm__ ("fmul %1.4s, %3.4s, %4.s[%5]; fadd %0.4s, %0.4s, %1.4s" \
8281 : "=w"(result), "=w"(t1) \
8282 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8283 : /* No clobbers */); \
8284 result; \
8287 #define vmlaq_lane_s16(a, b, c, d) \
8288 __extension__ \
8289 ({ \
8290 int16x8_t c_ = (c); \
8291 int16x8_t b_ = (b); \
8292 int16x8_t a_ = (a); \
8293 int16x8_t result; \
8294 __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \
8295 : "=w"(result) \
8296 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8297 : /* No clobbers */); \
8298 result; \
8301 #define vmlaq_lane_s32(a, b, c, d) \
8302 __extension__ \
8303 ({ \
8304 int32x4_t c_ = (c); \
8305 int32x4_t b_ = (b); \
8306 int32x4_t a_ = (a); \
8307 int32x4_t result; \
8308 __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \
8309 : "=w"(result) \
8310 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8311 : /* No clobbers */); \
8312 result; \
8315 #define vmlaq_lane_u16(a, b, c, d) \
8316 __extension__ \
8317 ({ \
8318 uint16x8_t c_ = (c); \
8319 uint16x8_t b_ = (b); \
8320 uint16x8_t a_ = (a); \
8321 uint16x8_t result; \
8322 __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \
8323 : "=w"(result) \
8324 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8325 : /* No clobbers */); \
8326 result; \
8329 #define vmlaq_lane_u32(a, b, c, d) \
8330 __extension__ \
8331 ({ \
8332 uint32x4_t c_ = (c); \
8333 uint32x4_t b_ = (b); \
8334 uint32x4_t a_ = (a); \
8335 uint32x4_t result; \
8336 __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \
8337 : "=w"(result) \
8338 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8339 : /* No clobbers */); \
8340 result; \
8343 #define vmlaq_laneq_s16(a, b, c, d) \
8344 __extension__ \
8345 ({ \
8346 int16x8_t c_ = (c); \
8347 int16x8_t b_ = (b); \
8348 int16x8_t a_ = (a); \
8349 int16x8_t result; \
8350 __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \
8351 : "=w"(result) \
8352 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8353 : /* No clobbers */); \
8354 result; \
8357 #define vmlaq_laneq_s32(a, b, c, d) \
8358 __extension__ \
8359 ({ \
8360 int32x4_t c_ = (c); \
8361 int32x4_t b_ = (b); \
8362 int32x4_t a_ = (a); \
8363 int32x4_t result; \
8364 __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \
8365 : "=w"(result) \
8366 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8367 : /* No clobbers */); \
8368 result; \
8371 #define vmlaq_laneq_u16(a, b, c, d) \
8372 __extension__ \
8373 ({ \
8374 uint16x8_t c_ = (c); \
8375 uint16x8_t b_ = (b); \
8376 uint16x8_t a_ = (a); \
8377 uint16x8_t result; \
8378 __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \
8379 : "=w"(result) \
8380 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8381 : /* No clobbers */); \
8382 result; \
8385 #define vmlaq_laneq_u32(a, b, c, d) \
8386 __extension__ \
8387 ({ \
8388 uint32x4_t c_ = (c); \
8389 uint32x4_t b_ = (b); \
8390 uint32x4_t a_ = (a); \
8391 uint32x4_t result; \
8392 __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \
8393 : "=w"(result) \
8394 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8395 : /* No clobbers */); \
8396 result; \
8399 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8400 vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
8402 float32x4_t result;
8403 float32x4_t t1;
8404 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s"
8405 : "=w"(result), "=w"(t1)
8406 : "0"(a), "w"(b), "w"(c)
8407 : /* No clobbers */);
8408 return result;
8411 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8412 vmlaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
8414 float64x2_t result;
8415 float64x2_t t1;
8416 __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fadd %0.2d, %0.2d, %1.2d"
8417 : "=w"(result), "=w"(t1)
8418 : "0"(a), "w"(b), "w"(c)
8419 : /* No clobbers */);
8420 return result;
8423 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8424 vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
8426 int16x8_t result;
8427 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
8428 : "=w"(result)
8429 : "0"(a), "w"(b), "w"(c)
8430 : /* No clobbers */);
8431 return result;
8434 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8435 vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
8437 int32x4_t result;
8438 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
8439 : "=w"(result)
8440 : "0"(a), "w"(b), "w"(c)
8441 : /* No clobbers */);
8442 return result;
8445 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8446 vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
8448 uint16x8_t result;
8449 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
8450 : "=w"(result)
8451 : "0"(a), "w"(b), "w"(c)
8452 : /* No clobbers */);
8453 return result;
8456 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8457 vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
8459 uint32x4_t result;
8460 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
8461 : "=w"(result)
8462 : "0"(a), "w"(b), "w"(c)
8463 : /* No clobbers */);
8464 return result;
8467 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8468 vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
8470 int8x16_t result;
8471 __asm__ ("mla %0.16b, %2.16b, %3.16b"
8472 : "=w"(result)
8473 : "0"(a), "w"(b), "w"(c)
8474 : /* No clobbers */);
8475 return result;
8478 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8479 vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
8481 int16x8_t result;
8482 __asm__ ("mla %0.8h, %2.8h, %3.8h"
8483 : "=w"(result)
8484 : "0"(a), "w"(b), "w"(c)
8485 : /* No clobbers */);
8486 return result;
8489 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8490 vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
8492 int32x4_t result;
8493 __asm__ ("mla %0.4s, %2.4s, %3.4s"
8494 : "=w"(result)
8495 : "0"(a), "w"(b), "w"(c)
8496 : /* No clobbers */);
8497 return result;
8500 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8501 vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
8503 uint8x16_t result;
8504 __asm__ ("mla %0.16b, %2.16b, %3.16b"
8505 : "=w"(result)
8506 : "0"(a), "w"(b), "w"(c)
8507 : /* No clobbers */);
8508 return result;
8511 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8512 vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
8514 uint16x8_t result;
8515 __asm__ ("mla %0.8h, %2.8h, %3.8h"
8516 : "=w"(result)
8517 : "0"(a), "w"(b), "w"(c)
8518 : /* No clobbers */);
8519 return result;
8522 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8523 vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
8525 uint32x4_t result;
8526 __asm__ ("mla %0.4s, %2.4s, %3.4s"
8527 : "=w"(result)
8528 : "0"(a), "w"(b), "w"(c)
8529 : /* No clobbers */);
8530 return result;
8533 #define vmls_lane_f32(a, b, c, d) \
8534 __extension__ \
8535 ({ \
8536 float32x2_t c_ = (c); \
8537 float32x2_t b_ = (b); \
8538 float32x2_t a_ = (a); \
8539 float32x2_t result; \
8540 float32x2_t t1; \
8541 __asm__ ("fmul %1.2s, %3.2s, %4.s[%5]; fsub %0.2s, %0.2s, %1.2s" \
8542 : "=w"(result), "=w"(t1) \
8543 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8544 : /* No clobbers */); \
8545 result; \
8548 #define vmls_lane_s16(a, b, c, d) \
8549 __extension__ \
8550 ({ \
8551 int16x4_t c_ = (c); \
8552 int16x4_t b_ = (b); \
8553 int16x4_t a_ = (a); \
8554 int16x4_t result; \
8555 __asm__ ("mls %0.4h,%2.4h,%3.h[%4]" \
8556 : "=w"(result) \
8557 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8558 : /* No clobbers */); \
8559 result; \
8562 #define vmls_lane_s32(a, b, c, d) \
8563 __extension__ \
8564 ({ \
8565 int32x2_t c_ = (c); \
8566 int32x2_t b_ = (b); \
8567 int32x2_t a_ = (a); \
8568 int32x2_t result; \
8569 __asm__ ("mls %0.2s,%2.2s,%3.s[%4]" \
8570 : "=w"(result) \
8571 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8572 : /* No clobbers */); \
8573 result; \
8576 #define vmls_lane_u16(a, b, c, d) \
8577 __extension__ \
8578 ({ \
8579 uint16x4_t c_ = (c); \
8580 uint16x4_t b_ = (b); \
8581 uint16x4_t a_ = (a); \
8582 uint16x4_t result; \
8583 __asm__ ("mls %0.4h,%2.4h,%3.h[%4]" \
8584 : "=w"(result) \
8585 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8586 : /* No clobbers */); \
8587 result; \
8590 #define vmls_lane_u32(a, b, c, d) \
8591 __extension__ \
8592 ({ \
8593 uint32x2_t c_ = (c); \
8594 uint32x2_t b_ = (b); \
8595 uint32x2_t a_ = (a); \
8596 uint32x2_t result; \
8597 __asm__ ("mls %0.2s,%2.2s,%3.s[%4]" \
8598 : "=w"(result) \
8599 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8600 : /* No clobbers */); \
8601 result; \
8604 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8605 vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
8607 float32x2_t result;
8608 float32x2_t t1;
8609 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s"
8610 : "=w"(result), "=w"(t1)
8611 : "0"(a), "w"(b), "w"(c)
8612 : /* No clobbers */);
8613 return result;
8616 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8617 vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
8619 int16x4_t result;
8620 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
8621 : "=w"(result)
8622 : "0"(a), "w"(b), "w"(c)
8623 : /* No clobbers */);
8624 return result;
8627 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8628 vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
8630 int32x2_t result;
8631 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
8632 : "=w"(result)
8633 : "0"(a), "w"(b), "w"(c)
8634 : /* No clobbers */);
8635 return result;
8638 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8639 vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
8641 uint16x4_t result;
8642 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
8643 : "=w"(result)
8644 : "0"(a), "w"(b), "w"(c)
8645 : /* No clobbers */);
8646 return result;
8649 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8650 vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
8652 uint32x2_t result;
8653 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
8654 : "=w"(result)
8655 : "0"(a), "w"(b), "w"(c)
8656 : /* No clobbers */);
8657 return result;
8660 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
8661 vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
8663 int8x8_t result;
8664 __asm__ ("mls %0.8b,%2.8b,%3.8b"
8665 : "=w"(result)
8666 : "0"(a), "w"(b), "w"(c)
8667 : /* No clobbers */);
8668 return result;
8671 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8672 vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
8674 int16x4_t result;
8675 __asm__ ("mls %0.4h,%2.4h,%3.4h"
8676 : "=w"(result)
8677 : "0"(a), "w"(b), "w"(c)
8678 : /* No clobbers */);
8679 return result;
8682 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8683 vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
8685 int32x2_t result;
8686 __asm__ ("mls %0.2s,%2.2s,%3.2s"
8687 : "=w"(result)
8688 : "0"(a), "w"(b), "w"(c)
8689 : /* No clobbers */);
8690 return result;
8693 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8694 vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
8696 uint8x8_t result;
8697 __asm__ ("mls %0.8b,%2.8b,%3.8b"
8698 : "=w"(result)
8699 : "0"(a), "w"(b), "w"(c)
8700 : /* No clobbers */);
8701 return result;
8704 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8705 vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
8707 uint16x4_t result;
8708 __asm__ ("mls %0.4h,%2.4h,%3.4h"
8709 : "=w"(result)
8710 : "0"(a), "w"(b), "w"(c)
8711 : /* No clobbers */);
8712 return result;
8715 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8716 vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
8718 uint32x2_t result;
8719 __asm__ ("mls %0.2s,%2.2s,%3.2s"
8720 : "=w"(result)
8721 : "0"(a), "w"(b), "w"(c)
8722 : /* No clobbers */);
8723 return result;
8726 #define vmlsl_high_lane_s16(a, b, c, d) \
8727 __extension__ \
8728 ({ \
8729 int16x8_t c_ = (c); \
8730 int16x8_t b_ = (b); \
8731 int32x4_t a_ = (a); \
8732 int32x4_t result; \
8733 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
8734 : "=w"(result) \
8735 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8736 : /* No clobbers */); \
8737 result; \
8740 #define vmlsl_high_lane_s32(a, b, c, d) \
8741 __extension__ \
8742 ({ \
8743 int32x4_t c_ = (c); \
8744 int32x4_t b_ = (b); \
8745 int64x2_t a_ = (a); \
8746 int64x2_t result; \
8747 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
8748 : "=w"(result) \
8749 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8750 : /* No clobbers */); \
8751 result; \
8754 #define vmlsl_high_lane_u16(a, b, c, d) \
8755 __extension__ \
8756 ({ \
8757 uint16x8_t c_ = (c); \
8758 uint16x8_t b_ = (b); \
8759 uint32x4_t a_ = (a); \
8760 uint32x4_t result; \
8761 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
8762 : "=w"(result) \
8763 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8764 : /* No clobbers */); \
8765 result; \
8768 #define vmlsl_high_lane_u32(a, b, c, d) \
8769 __extension__ \
8770 ({ \
8771 uint32x4_t c_ = (c); \
8772 uint32x4_t b_ = (b); \
8773 uint64x2_t a_ = (a); \
8774 uint64x2_t result; \
8775 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
8776 : "=w"(result) \
8777 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8778 : /* No clobbers */); \
8779 result; \
8782 #define vmlsl_high_laneq_s16(a, b, c, d) \
8783 __extension__ \
8784 ({ \
8785 int16x8_t c_ = (c); \
8786 int16x8_t b_ = (b); \
8787 int32x4_t a_ = (a); \
8788 int32x4_t result; \
8789 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
8790 : "=w"(result) \
8791 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8792 : /* No clobbers */); \
8793 result; \
8796 #define vmlsl_high_laneq_s32(a, b, c, d) \
8797 __extension__ \
8798 ({ \
8799 int32x4_t c_ = (c); \
8800 int32x4_t b_ = (b); \
8801 int64x2_t a_ = (a); \
8802 int64x2_t result; \
8803 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
8804 : "=w"(result) \
8805 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8806 : /* No clobbers */); \
8807 result; \
8810 #define vmlsl_high_laneq_u16(a, b, c, d) \
8811 __extension__ \
8812 ({ \
8813 uint16x8_t c_ = (c); \
8814 uint16x8_t b_ = (b); \
8815 uint32x4_t a_ = (a); \
8816 uint32x4_t result; \
8817 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
8818 : "=w"(result) \
8819 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8820 : /* No clobbers */); \
8821 result; \
8824 #define vmlsl_high_laneq_u32(a, b, c, d) \
8825 __extension__ \
8826 ({ \
8827 uint32x4_t c_ = (c); \
8828 uint32x4_t b_ = (b); \
8829 uint64x2_t a_ = (a); \
8830 uint64x2_t result; \
8831 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
8832 : "=w"(result) \
8833 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8834 : /* No clobbers */); \
8835 result; \
8838 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8839 vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
8841 int32x4_t result;
8842 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]"
8843 : "=w"(result)
8844 : "0"(a), "w"(b), "w"(c)
8845 : /* No clobbers */);
8846 return result;
8849 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8850 vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
8852 int64x2_t result;
8853 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]"
8854 : "=w"(result)
8855 : "0"(a), "w"(b), "w"(c)
8856 : /* No clobbers */);
8857 return result;
8860 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8861 vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
8863 uint32x4_t result;
8864 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]"
8865 : "=w"(result)
8866 : "0"(a), "w"(b), "w"(c)
8867 : /* No clobbers */);
8868 return result;
8871 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8872 vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
8874 uint64x2_t result;
8875 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]"
8876 : "=w"(result)
8877 : "0"(a), "w"(b), "w"(c)
8878 : /* No clobbers */);
8879 return result;
8882 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8883 vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
8885 int16x8_t result;
8886 __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b"
8887 : "=w"(result)
8888 : "0"(a), "w"(b), "w"(c)
8889 : /* No clobbers */);
8890 return result;
8893 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8894 vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
8896 int32x4_t result;
8897 __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h"
8898 : "=w"(result)
8899 : "0"(a), "w"(b), "w"(c)
8900 : /* No clobbers */);
8901 return result;
8904 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8905 vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
8907 int64x2_t result;
8908 __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s"
8909 : "=w"(result)
8910 : "0"(a), "w"(b), "w"(c)
8911 : /* No clobbers */);
8912 return result;
8915 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8916 vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
8918 uint16x8_t result;
8919 __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b"
8920 : "=w"(result)
8921 : "0"(a), "w"(b), "w"(c)
8922 : /* No clobbers */);
8923 return result;
8926 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8927 vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
8929 uint32x4_t result;
8930 __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h"
8931 : "=w"(result)
8932 : "0"(a), "w"(b), "w"(c)
8933 : /* No clobbers */);
8934 return result;
8937 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8938 vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
8940 uint64x2_t result;
8941 __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s"
8942 : "=w"(result)
8943 : "0"(a), "w"(b), "w"(c)
8944 : /* No clobbers */);
8945 return result;
8948 #define vmlsl_lane_s16(a, b, c, d) \
8949 __extension__ \
8950 ({ \
8951 int16x4_t c_ = (c); \
8952 int16x4_t b_ = (b); \
8953 int32x4_t a_ = (a); \
8954 int32x4_t result; \
8955 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
8956 : "=w"(result) \
8957 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8958 : /* No clobbers */); \
8959 result; \
8962 #define vmlsl_lane_s32(a, b, c, d) \
8963 __extension__ \
8964 ({ \
8965 int32x2_t c_ = (c); \
8966 int32x2_t b_ = (b); \
8967 int64x2_t a_ = (a); \
8968 int64x2_t result; \
8969 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
8970 : "=w"(result) \
8971 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8972 : /* No clobbers */); \
8973 result; \
8976 #define vmlsl_lane_u16(a, b, c, d) \
8977 __extension__ \
8978 ({ \
8979 uint16x4_t c_ = (c); \
8980 uint16x4_t b_ = (b); \
8981 uint32x4_t a_ = (a); \
8982 uint32x4_t result; \
8983 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
8984 : "=w"(result) \
8985 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8986 : /* No clobbers */); \
8987 result; \
8990 #define vmlsl_lane_u32(a, b, c, d) \
8991 __extension__ \
8992 ({ \
8993 uint32x2_t c_ = (c); \
8994 uint32x2_t b_ = (b); \
8995 uint64x2_t a_ = (a); \
8996 uint64x2_t result; \
8997 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
8998 : "=w"(result) \
8999 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9000 : /* No clobbers */); \
9001 result; \
9004 #define vmlsl_laneq_s16(a, b, c, d) \
9005 __extension__ \
9006 ({ \
9007 int16x8_t c_ = (c); \
9008 int16x4_t b_ = (b); \
9009 int32x4_t a_ = (a); \
9010 int32x4_t result; \
9011 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
9012 : "=w"(result) \
9013 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9014 : /* No clobbers */); \
9015 result; \
9018 #define vmlsl_laneq_s32(a, b, c, d) \
9019 __extension__ \
9020 ({ \
9021 int32x4_t c_ = (c); \
9022 int32x2_t b_ = (b); \
9023 int64x2_t a_ = (a); \
9024 int64x2_t result; \
9025 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
9026 : "=w"(result) \
9027 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9028 : /* No clobbers */); \
9029 result; \
9032 #define vmlsl_laneq_u16(a, b, c, d) \
9033 __extension__ \
9034 ({ \
9035 uint16x8_t c_ = (c); \
9036 uint16x4_t b_ = (b); \
9037 uint32x4_t a_ = (a); \
9038 uint32x4_t result; \
9039 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
9040 : "=w"(result) \
9041 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9042 : /* No clobbers */); \
9043 result; \
9046 #define vmlsl_laneq_u32(a, b, c, d) \
9047 __extension__ \
9048 ({ \
9049 uint32x4_t c_ = (c); \
9050 uint32x2_t b_ = (b); \
9051 uint64x2_t a_ = (a); \
9052 uint64x2_t result; \
9053 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
9054 : "=w"(result) \
9055 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9056 : /* No clobbers */); \
9057 result; \
9060 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9061 vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
9063 int32x4_t result;
9064 __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]"
9065 : "=w"(result)
9066 : "0"(a), "w"(b), "w"(c)
9067 : /* No clobbers */);
9068 return result;
9071 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9072 vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
9074 int64x2_t result;
9075 __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]"
9076 : "=w"(result)
9077 : "0"(a), "w"(b), "w"(c)
9078 : /* No clobbers */);
9079 return result;
9082 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9083 vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
9085 uint32x4_t result;
9086 __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]"
9087 : "=w"(result)
9088 : "0"(a), "w"(b), "w"(c)
9089 : /* No clobbers */);
9090 return result;
9093 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9094 vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
9096 uint64x2_t result;
9097 __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]"
9098 : "=w"(result)
9099 : "0"(a), "w"(b), "w"(c)
9100 : /* No clobbers */);
9101 return result;
9104 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9105 vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
9107 int16x8_t result;
9108 __asm__ ("smlsl %0.8h, %2.8b, %3.8b"
9109 : "=w"(result)
9110 : "0"(a), "w"(b), "w"(c)
9111 : /* No clobbers */);
9112 return result;
9115 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9116 vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
9118 int32x4_t result;
9119 __asm__ ("smlsl %0.4s, %2.4h, %3.4h"
9120 : "=w"(result)
9121 : "0"(a), "w"(b), "w"(c)
9122 : /* No clobbers */);
9123 return result;
9126 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9127 vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
9129 int64x2_t result;
9130 __asm__ ("smlsl %0.2d, %2.2s, %3.2s"
9131 : "=w"(result)
9132 : "0"(a), "w"(b), "w"(c)
9133 : /* No clobbers */);
9134 return result;
9137 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9138 vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
9140 uint16x8_t result;
9141 __asm__ ("umlsl %0.8h, %2.8b, %3.8b"
9142 : "=w"(result)
9143 : "0"(a), "w"(b), "w"(c)
9144 : /* No clobbers */);
9145 return result;
9148 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9149 vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
9151 uint32x4_t result;
9152 __asm__ ("umlsl %0.4s, %2.4h, %3.4h"
9153 : "=w"(result)
9154 : "0"(a), "w"(b), "w"(c)
9155 : /* No clobbers */);
9156 return result;
9159 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9160 vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
9162 uint64x2_t result;
9163 __asm__ ("umlsl %0.2d, %2.2s, %3.2s"
9164 : "=w"(result)
9165 : "0"(a), "w"(b), "w"(c)
9166 : /* No clobbers */);
9167 return result;
9170 #define vmlsq_lane_f32(a, b, c, d) \
9171 __extension__ \
9172 ({ \
9173 float32x4_t c_ = (c); \
9174 float32x4_t b_ = (b); \
9175 float32x4_t a_ = (a); \
9176 float32x4_t result; \
9177 float32x4_t t1; \
9178 __asm__ ("fmul %1.4s, %3.4s, %4.s[%5]; fsub %0.4s, %0.4s, %1.4s" \
9179 : "=w"(result), "=w"(t1) \
9180 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9181 : /* No clobbers */); \
9182 result; \
9185 #define vmlsq_lane_s16(a, b, c, d) \
9186 __extension__ \
9187 ({ \
9188 int16x8_t c_ = (c); \
9189 int16x8_t b_ = (b); \
9190 int16x8_t a_ = (a); \
9191 int16x8_t result; \
9192 __asm__ ("mls %0.8h,%2.8h,%3.h[%4]" \
9193 : "=w"(result) \
9194 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9195 : /* No clobbers */); \
9196 result; \
9199 #define vmlsq_lane_s32(a, b, c, d) \
9200 __extension__ \
9201 ({ \
9202 int32x4_t c_ = (c); \
9203 int32x4_t b_ = (b); \
9204 int32x4_t a_ = (a); \
9205 int32x4_t result; \
9206 __asm__ ("mls %0.4s,%2.4s,%3.s[%4]" \
9207 : "=w"(result) \
9208 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9209 : /* No clobbers */); \
9210 result; \
9213 #define vmlsq_lane_u16(a, b, c, d) \
9214 __extension__ \
9215 ({ \
9216 uint16x8_t c_ = (c); \
9217 uint16x8_t b_ = (b); \
9218 uint16x8_t a_ = (a); \
9219 uint16x8_t result; \
9220 __asm__ ("mls %0.8h,%2.8h,%3.h[%4]" \
9221 : "=w"(result) \
9222 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9223 : /* No clobbers */); \
9224 result; \
9227 #define vmlsq_lane_u32(a, b, c, d) \
9228 __extension__ \
9229 ({ \
9230 uint32x4_t c_ = (c); \
9231 uint32x4_t b_ = (b); \
9232 uint32x4_t a_ = (a); \
9233 uint32x4_t result; \
9234 __asm__ ("mls %0.4s,%2.4s,%3.s[%4]" \
9235 : "=w"(result) \
9236 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9237 : /* No clobbers */); \
9238 result; \
9241 #define vmlsq_laneq_f32(__a, __b, __c, __d) \
9242 __extension__ \
9243 ({ \
9244 float32x4_t __c_ = (__c); \
9245 float32x4_t __b_ = (__b); \
9246 float32x4_t __a_ = (__a); \
9247 float32x4_t __result; \
9248 float32x4_t __t1; \
9249 __asm__ ("fmul %1.4s, %3.4s, %4.s[%5]; fsub %0.4s, %0.4s, %1.4s" \
9250 : "=w"(__result), "=w"(__t1) \
9251 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
9252 : /* No clobbers */); \
9253 __result; \
9256 #define vmlsq_laneq_s16(__a, __b, __c, __d) \
9257 __extension__ \
9258 ({ \
9259 int16x8_t __c_ = (__c); \
9260 int16x8_t __b_ = (__b); \
9261 int16x8_t __a_ = (__a); \
9262 int16x8_t __result; \
9263 __asm__ ("mls %0.8h, %2.8h, %3.h[%4]" \
9264 : "=w"(__result) \
9265 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
9266 : /* No clobbers */); \
9267 __result; \
9270 #define vmlsq_laneq_s32(__a, __b, __c, __d) \
9271 __extension__ \
9272 ({ \
9273 int32x4_t __c_ = (__c); \
9274 int32x4_t __b_ = (__b); \
9275 int32x4_t __a_ = (__a); \
9276 int32x4_t __result; \
9277 __asm__ ("mls %0.4s, %2.4s, %3.s[%4]" \
9278 : "=w"(__result) \
9279 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
9280 : /* No clobbers */); \
9281 __result; \
9284 #define vmlsq_laneq_u16(__a, __b, __c, __d) \
9285 __extension__ \
9286 ({ \
9287 uint16x8_t __c_ = (__c); \
9288 uint16x8_t __b_ = (__b); \
9289 uint16x8_t __a_ = (__a); \
9290 uint16x8_t __result; \
9291 __asm__ ("mls %0.8h, %2.8h, %3.h[%4]" \
9292 : "=w"(__result) \
9293 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
9294 : /* No clobbers */); \
9295 __result; \
9298 #define vmlsq_laneq_u32(__a, __b, __c, __d) \
9299 __extension__ \
9300 ({ \
9301 uint32x4_t __c_ = (__c); \
9302 uint32x4_t __b_ = (__b); \
9303 uint32x4_t __a_ = (__a); \
9304 uint32x4_t __result; \
9305 __asm__ ("mls %0.4s, %2.4s, %3.s[%4]" \
9306 : "=w"(__result) \
9307 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
9308 : /* No clobbers */); \
9309 __result; \
9312 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9313 vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
9315 float32x4_t result;
9316 float32x4_t t1;
9317 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s"
9318 : "=w"(result), "=w"(t1)
9319 : "0"(a), "w"(b), "w"(c)
9320 : /* No clobbers */);
9321 return result;
9324 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9325 vmlsq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
9327 float64x2_t result;
9328 float64x2_t t1;
9329 __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fsub %0.2d, %0.2d, %1.2d"
9330 : "=w"(result), "=w"(t1)
9331 : "0"(a), "w"(b), "w"(c)
9332 : /* No clobbers */);
9333 return result;
9336 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9337 vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
9339 int16x8_t result;
9340 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
9341 : "=w"(result)
9342 : "0"(a), "w"(b), "w"(c)
9343 : /* No clobbers */);
9344 return result;
9347 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9348 vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
9350 int32x4_t result;
9351 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
9352 : "=w"(result)
9353 : "0"(a), "w"(b), "w"(c)
9354 : /* No clobbers */);
9355 return result;
9358 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9359 vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
9361 uint16x8_t result;
9362 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
9363 : "=w"(result)
9364 : "0"(a), "w"(b), "w"(c)
9365 : /* No clobbers */);
9366 return result;
9369 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9370 vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
9372 uint32x4_t result;
9373 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
9374 : "=w"(result)
9375 : "0"(a), "w"(b), "w"(c)
9376 : /* No clobbers */);
9377 return result;
9380 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9381 vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
9383 int8x16_t result;
9384 __asm__ ("mls %0.16b,%2.16b,%3.16b"
9385 : "=w"(result)
9386 : "0"(a), "w"(b), "w"(c)
9387 : /* No clobbers */);
9388 return result;
9391 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9392 vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
9394 int16x8_t result;
9395 __asm__ ("mls %0.8h,%2.8h,%3.8h"
9396 : "=w"(result)
9397 : "0"(a), "w"(b), "w"(c)
9398 : /* No clobbers */);
9399 return result;
9402 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9403 vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
9405 int32x4_t result;
9406 __asm__ ("mls %0.4s,%2.4s,%3.4s"
9407 : "=w"(result)
9408 : "0"(a), "w"(b), "w"(c)
9409 : /* No clobbers */);
9410 return result;
9413 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9414 vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
9416 uint8x16_t result;
9417 __asm__ ("mls %0.16b,%2.16b,%3.16b"
9418 : "=w"(result)
9419 : "0"(a), "w"(b), "w"(c)
9420 : /* No clobbers */);
9421 return result;
9424 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9425 vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
9427 uint16x8_t result;
9428 __asm__ ("mls %0.8h,%2.8h,%3.8h"
9429 : "=w"(result)
9430 : "0"(a), "w"(b), "w"(c)
9431 : /* No clobbers */);
9432 return result;
9435 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9436 vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
9438 uint32x4_t result;
9439 __asm__ ("mls %0.4s,%2.4s,%3.4s"
9440 : "=w"(result)
9441 : "0"(a), "w"(b), "w"(c)
9442 : /* No clobbers */);
9443 return result;
9446 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9447 vmov_n_f32 (float32_t a)
9449 float32x2_t result;
9450 __asm__ ("dup %0.2s, %w1"
9451 : "=w"(result)
9452 : "r"(a)
9453 : /* No clobbers */);
9454 return result;
9457 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
9458 vmov_n_p8 (uint32_t a)
9460 poly8x8_t result;
9461 __asm__ ("dup %0.8b,%w1"
9462 : "=w"(result)
9463 : "r"(a)
9464 : /* No clobbers */);
9465 return result;
9468 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
9469 vmov_n_p16 (uint32_t a)
9471 poly16x4_t result;
9472 __asm__ ("dup %0.4h,%w1"
9473 : "=w"(result)
9474 : "r"(a)
9475 : /* No clobbers */);
9476 return result;
9479 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9480 vmov_n_s8 (int32_t a)
9482 int8x8_t result;
9483 __asm__ ("dup %0.8b,%w1"
9484 : "=w"(result)
9485 : "r"(a)
9486 : /* No clobbers */);
9487 return result;
9490 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9491 vmov_n_s16 (int32_t a)
9493 int16x4_t result;
9494 __asm__ ("dup %0.4h,%w1"
9495 : "=w"(result)
9496 : "r"(a)
9497 : /* No clobbers */);
9498 return result;
9501 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9502 vmov_n_s32 (int32_t a)
9504 int32x2_t result;
9505 __asm__ ("dup %0.2s,%w1"
9506 : "=w"(result)
9507 : "r"(a)
9508 : /* No clobbers */);
9509 return result;
9512 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
9513 vmov_n_s64 (int64_t a)
9515 int64x1_t result;
9516 __asm__ ("ins %0.d[0],%x1"
9517 : "=w"(result)
9518 : "r"(a)
9519 : /* No clobbers */);
9520 return result;
9523 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9524 vmov_n_u8 (uint32_t a)
9526 uint8x8_t result;
9527 __asm__ ("dup %0.8b,%w1"
9528 : "=w"(result)
9529 : "r"(a)
9530 : /* No clobbers */);
9531 return result;
9534 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9535 vmov_n_u16 (uint32_t a)
9537 uint16x4_t result;
9538 __asm__ ("dup %0.4h,%w1"
9539 : "=w"(result)
9540 : "r"(a)
9541 : /* No clobbers */);
9542 return result;
9545 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9546 vmov_n_u32 (uint32_t a)
9548 uint32x2_t result;
9549 __asm__ ("dup %0.2s,%w1"
9550 : "=w"(result)
9551 : "r"(a)
9552 : /* No clobbers */);
9553 return result;
9556 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9557 vmov_n_u64 (uint64_t a)
9559 uint64x1_t result;
9560 __asm__ ("ins %0.d[0],%x1"
9561 : "=w"(result)
9562 : "r"(a)
9563 : /* No clobbers */);
9564 return result;
9567 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9568 vmovl_high_s8 (int8x16_t a)
9570 int16x8_t result;
9571 __asm__ ("sshll2 %0.8h,%1.16b,#0"
9572 : "=w"(result)
9573 : "w"(a)
9574 : /* No clobbers */);
9575 return result;
9578 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9579 vmovl_high_s16 (int16x8_t a)
9581 int32x4_t result;
9582 __asm__ ("sshll2 %0.4s,%1.8h,#0"
9583 : "=w"(result)
9584 : "w"(a)
9585 : /* No clobbers */);
9586 return result;
9589 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9590 vmovl_high_s32 (int32x4_t a)
9592 int64x2_t result;
9593 __asm__ ("sshll2 %0.2d,%1.4s,#0"
9594 : "=w"(result)
9595 : "w"(a)
9596 : /* No clobbers */);
9597 return result;
9600 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9601 vmovl_high_u8 (uint8x16_t a)
9603 uint16x8_t result;
9604 __asm__ ("ushll2 %0.8h,%1.16b,#0"
9605 : "=w"(result)
9606 : "w"(a)
9607 : /* No clobbers */);
9608 return result;
9611 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9612 vmovl_high_u16 (uint16x8_t a)
9614 uint32x4_t result;
9615 __asm__ ("ushll2 %0.4s,%1.8h,#0"
9616 : "=w"(result)
9617 : "w"(a)
9618 : /* No clobbers */);
9619 return result;
9622 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9623 vmovl_high_u32 (uint32x4_t a)
9625 uint64x2_t result;
9626 __asm__ ("ushll2 %0.2d,%1.4s,#0"
9627 : "=w"(result)
9628 : "w"(a)
9629 : /* No clobbers */);
9630 return result;
9633 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9634 vmovl_s8 (int8x8_t a)
9636 int16x8_t result;
9637 __asm__ ("sshll %0.8h,%1.8b,#0"
9638 : "=w"(result)
9639 : "w"(a)
9640 : /* No clobbers */);
9641 return result;
9644 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9645 vmovl_s16 (int16x4_t a)
9647 int32x4_t result;
9648 __asm__ ("sshll %0.4s,%1.4h,#0"
9649 : "=w"(result)
9650 : "w"(a)
9651 : /* No clobbers */);
9652 return result;
9655 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9656 vmovl_s32 (int32x2_t a)
9658 int64x2_t result;
9659 __asm__ ("sshll %0.2d,%1.2s,#0"
9660 : "=w"(result)
9661 : "w"(a)
9662 : /* No clobbers */);
9663 return result;
9666 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9667 vmovl_u8 (uint8x8_t a)
9669 uint16x8_t result;
9670 __asm__ ("ushll %0.8h,%1.8b,#0"
9671 : "=w"(result)
9672 : "w"(a)
9673 : /* No clobbers */);
9674 return result;
9677 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9678 vmovl_u16 (uint16x4_t a)
9680 uint32x4_t result;
9681 __asm__ ("ushll %0.4s,%1.4h,#0"
9682 : "=w"(result)
9683 : "w"(a)
9684 : /* No clobbers */);
9685 return result;
9688 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9689 vmovl_u32 (uint32x2_t a)
9691 uint64x2_t result;
9692 __asm__ ("ushll %0.2d,%1.2s,#0"
9693 : "=w"(result)
9694 : "w"(a)
9695 : /* No clobbers */);
9696 return result;
9699 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9700 vmovn_high_s16 (int8x8_t a, int16x8_t b)
9702 int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0)));
9703 __asm__ ("xtn2 %0.16b,%1.8h"
9704 : "+w"(result)
9705 : "w"(b)
9706 : /* No clobbers */);
9707 return result;
9710 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9711 vmovn_high_s32 (int16x4_t a, int32x4_t b)
9713 int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0)));
9714 __asm__ ("xtn2 %0.8h,%1.4s"
9715 : "+w"(result)
9716 : "w"(b)
9717 : /* No clobbers */);
9718 return result;
9721 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9722 vmovn_high_s64 (int32x2_t a, int64x2_t b)
9724 int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0)));
9725 __asm__ ("xtn2 %0.4s,%1.2d"
9726 : "+w"(result)
9727 : "w"(b)
9728 : /* No clobbers */);
9729 return result;
9732 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9733 vmovn_high_u16 (uint8x8_t a, uint16x8_t b)
9735 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0)));
9736 __asm__ ("xtn2 %0.16b,%1.8h"
9737 : "+w"(result)
9738 : "w"(b)
9739 : /* No clobbers */);
9740 return result;
9743 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9744 vmovn_high_u32 (uint16x4_t a, uint32x4_t b)
9746 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0)));
9747 __asm__ ("xtn2 %0.8h,%1.4s"
9748 : "+w"(result)
9749 : "w"(b)
9750 : /* No clobbers */);
9751 return result;
9754 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9755 vmovn_high_u64 (uint32x2_t a, uint64x2_t b)
9757 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0)));
9758 __asm__ ("xtn2 %0.4s,%1.2d"
9759 : "+w"(result)
9760 : "w"(b)
9761 : /* No clobbers */);
9762 return result;
9765 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9766 vmovn_s16 (int16x8_t a)
9768 int8x8_t result;
9769 __asm__ ("xtn %0.8b,%1.8h"
9770 : "=w"(result)
9771 : "w"(a)
9772 : /* No clobbers */);
9773 return result;
9776 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9777 vmovn_s32 (int32x4_t a)
9779 int16x4_t result;
9780 __asm__ ("xtn %0.4h,%1.4s"
9781 : "=w"(result)
9782 : "w"(a)
9783 : /* No clobbers */);
9784 return result;
9787 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9788 vmovn_s64 (int64x2_t a)
9790 int32x2_t result;
9791 __asm__ ("xtn %0.2s,%1.2d"
9792 : "=w"(result)
9793 : "w"(a)
9794 : /* No clobbers */);
9795 return result;
9798 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9799 vmovn_u16 (uint16x8_t a)
9801 uint8x8_t result;
9802 __asm__ ("xtn %0.8b,%1.8h"
9803 : "=w"(result)
9804 : "w"(a)
9805 : /* No clobbers */);
9806 return result;
9809 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9810 vmovn_u32 (uint32x4_t a)
9812 uint16x4_t result;
9813 __asm__ ("xtn %0.4h,%1.4s"
9814 : "=w"(result)
9815 : "w"(a)
9816 : /* No clobbers */);
9817 return result;
9820 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9821 vmovn_u64 (uint64x2_t a)
9823 uint32x2_t result;
9824 __asm__ ("xtn %0.2s,%1.2d"
9825 : "=w"(result)
9826 : "w"(a)
9827 : /* No clobbers */);
9828 return result;
9831 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9832 vmovq_n_f32 (float32_t a)
9834 float32x4_t result;
9835 __asm__ ("dup %0.4s, %w1"
9836 : "=w"(result)
9837 : "r"(a)
9838 : /* No clobbers */);
9839 return result;
9842 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9843 vmovq_n_f64 (float64_t a)
9845 return (float64x2_t) {a, a};
9848 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
9849 vmovq_n_p8 (uint32_t a)
9851 poly8x16_t result;
9852 __asm__ ("dup %0.16b,%w1"
9853 : "=w"(result)
9854 : "r"(a)
9855 : /* No clobbers */);
9856 return result;
9859 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
9860 vmovq_n_p16 (uint32_t a)
9862 poly16x8_t result;
9863 __asm__ ("dup %0.8h,%w1"
9864 : "=w"(result)
9865 : "r"(a)
9866 : /* No clobbers */);
9867 return result;
9870 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9871 vmovq_n_s8 (int32_t a)
9873 int8x16_t result;
9874 __asm__ ("dup %0.16b,%w1"
9875 : "=w"(result)
9876 : "r"(a)
9877 : /* No clobbers */);
9878 return result;
9881 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9882 vmovq_n_s16 (int32_t a)
9884 int16x8_t result;
9885 __asm__ ("dup %0.8h,%w1"
9886 : "=w"(result)
9887 : "r"(a)
9888 : /* No clobbers */);
9889 return result;
9892 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9893 vmovq_n_s32 (int32_t a)
9895 int32x4_t result;
9896 __asm__ ("dup %0.4s,%w1"
9897 : "=w"(result)
9898 : "r"(a)
9899 : /* No clobbers */);
9900 return result;
9903 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9904 vmovq_n_s64 (int64_t a)
9906 int64x2_t result;
9907 __asm__ ("dup %0.2d,%x1"
9908 : "=w"(result)
9909 : "r"(a)
9910 : /* No clobbers */);
9911 return result;
9914 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9915 vmovq_n_u8 (uint32_t a)
9917 uint8x16_t result;
9918 __asm__ ("dup %0.16b,%w1"
9919 : "=w"(result)
9920 : "r"(a)
9921 : /* No clobbers */);
9922 return result;
9925 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9926 vmovq_n_u16 (uint32_t a)
9928 uint16x8_t result;
9929 __asm__ ("dup %0.8h,%w1"
9930 : "=w"(result)
9931 : "r"(a)
9932 : /* No clobbers */);
9933 return result;
9936 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9937 vmovq_n_u32 (uint32_t a)
9939 uint32x4_t result;
9940 __asm__ ("dup %0.4s,%w1"
9941 : "=w"(result)
9942 : "r"(a)
9943 : /* No clobbers */);
9944 return result;
9947 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9948 vmovq_n_u64 (uint64_t a)
9950 uint64x2_t result;
9951 __asm__ ("dup %0.2d,%x1"
9952 : "=w"(result)
9953 : "r"(a)
9954 : /* No clobbers */);
9955 return result;
9958 #define vmul_lane_f32(a, b, c) \
9959 __extension__ \
9960 ({ \
9961 float32x2_t b_ = (b); \
9962 float32x2_t a_ = (a); \
9963 float32x2_t result; \
9964 __asm__ ("fmul %0.2s,%1.2s,%2.s[%3]" \
9965 : "=w"(result) \
9966 : "w"(a_), "w"(b_), "i"(c) \
9967 : /* No clobbers */); \
9968 result; \
9971 #define vmul_lane_s16(a, b, c) \
9972 __extension__ \
9973 ({ \
9974 int16x4_t b_ = (b); \
9975 int16x4_t a_ = (a); \
9976 int16x4_t result; \
9977 __asm__ ("mul %0.4h,%1.4h,%2.h[%3]" \
9978 : "=w"(result) \
9979 : "w"(a_), "w"(b_), "i"(c) \
9980 : /* No clobbers */); \
9981 result; \
9984 #define vmul_lane_s32(a, b, c) \
9985 __extension__ \
9986 ({ \
9987 int32x2_t b_ = (b); \
9988 int32x2_t a_ = (a); \
9989 int32x2_t result; \
9990 __asm__ ("mul %0.2s,%1.2s,%2.s[%3]" \
9991 : "=w"(result) \
9992 : "w"(a_), "w"(b_), "i"(c) \
9993 : /* No clobbers */); \
9994 result; \
9997 #define vmul_lane_u16(a, b, c) \
9998 __extension__ \
9999 ({ \
10000 uint16x4_t b_ = (b); \
10001 uint16x4_t a_ = (a); \
10002 uint16x4_t result; \
10003 __asm__ ("mul %0.4h,%1.4h,%2.h[%3]" \
10004 : "=w"(result) \
10005 : "w"(a_), "w"(b_), "i"(c) \
10006 : /* No clobbers */); \
10007 result; \
10010 #define vmul_lane_u32(a, b, c) \
10011 __extension__ \
10012 ({ \
10013 uint32x2_t b_ = (b); \
10014 uint32x2_t a_ = (a); \
10015 uint32x2_t result; \
10016 __asm__ ("mul %0.2s, %1.2s, %2.s[%3]" \
10017 : "=w"(result) \
10018 : "w"(a_), "w"(b_), "i"(c) \
10019 : /* No clobbers */); \
10020 result; \
10023 #define vmul_laneq_f32(a, b, c) \
10024 __extension__ \
10025 ({ \
10026 float32x4_t b_ = (b); \
10027 float32x2_t a_ = (a); \
10028 float32x2_t result; \
10029 __asm__ ("fmul %0.2s, %1.2s, %2.s[%3]" \
10030 : "=w"(result) \
10031 : "w"(a_), "w"(b_), "i"(c) \
10032 : /* No clobbers */); \
10033 result; \
10036 #define vmul_laneq_s16(a, b, c) \
10037 __extension__ \
10038 ({ \
10039 int16x8_t b_ = (b); \
10040 int16x4_t a_ = (a); \
10041 int16x4_t result; \
10042 __asm__ ("mul %0.4h, %1.4h, %2.h[%3]" \
10043 : "=w"(result) \
10044 : "w"(a_), "w"(b_), "i"(c) \
10045 : /* No clobbers */); \
10046 result; \
10049 #define vmul_laneq_s32(a, b, c) \
10050 __extension__ \
10051 ({ \
10052 int32x4_t b_ = (b); \
10053 int32x2_t a_ = (a); \
10054 int32x2_t result; \
10055 __asm__ ("mul %0.2s, %1.2s, %2.s[%3]" \
10056 : "=w"(result) \
10057 : "w"(a_), "w"(b_), "i"(c) \
10058 : /* No clobbers */); \
10059 result; \
10062 #define vmul_laneq_u16(a, b, c) \
10063 __extension__ \
10064 ({ \
10065 uint16x8_t b_ = (b); \
10066 uint16x4_t a_ = (a); \
10067 uint16x4_t result; \
10068 __asm__ ("mul %0.4h, %1.4h, %2.h[%3]" \
10069 : "=w"(result) \
10070 : "w"(a_), "w"(b_), "i"(c) \
10071 : /* No clobbers */); \
10072 result; \
10075 #define vmul_laneq_u32(a, b, c) \
10076 __extension__ \
10077 ({ \
10078 uint32x4_t b_ = (b); \
10079 uint32x2_t a_ = (a); \
10080 uint32x2_t result; \
10081 __asm__ ("mul %0.2s, %1.2s, %2.s[%3]" \
10082 : "=w"(result) \
10083 : "w"(a_), "w"(b_), "i"(c) \
10084 : /* No clobbers */); \
10085 result; \
10088 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10089 vmul_n_f32 (float32x2_t a, float32_t b)
10091 float32x2_t result;
10092 __asm__ ("fmul %0.2s,%1.2s,%2.s[0]"
10093 : "=w"(result)
10094 : "w"(a), "w"(b)
10095 : /* No clobbers */);
10096 return result;
10099 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10100 vmul_n_s16 (int16x4_t a, int16_t b)
10102 int16x4_t result;
10103 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
10104 : "=w"(result)
10105 : "w"(a), "w"(b)
10106 : /* No clobbers */);
10107 return result;
10110 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10111 vmul_n_s32 (int32x2_t a, int32_t b)
10113 int32x2_t result;
10114 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
10115 : "=w"(result)
10116 : "w"(a), "w"(b)
10117 : /* No clobbers */);
10118 return result;
10121 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10122 vmul_n_u16 (uint16x4_t a, uint16_t b)
10124 uint16x4_t result;
10125 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
10126 : "=w"(result)
10127 : "w"(a), "w"(b)
10128 : /* No clobbers */);
10129 return result;
10132 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10133 vmul_n_u32 (uint32x2_t a, uint32_t b)
10135 uint32x2_t result;
10136 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
10137 : "=w"(result)
10138 : "w"(a), "w"(b)
10139 : /* No clobbers */);
10140 return result;
10143 #define vmuld_lane_f64(a, b, c) \
10144 __extension__ \
10145 ({ \
10146 float64x2_t b_ = (b); \
10147 float64_t a_ = (a); \
10148 float64_t result; \
10149 __asm__ ("fmul %d0,%d1,%2.d[%3]" \
10150 : "=w"(result) \
10151 : "w"(a_), "w"(b_), "i"(c) \
10152 : /* No clobbers */); \
10153 result; \
10156 #define vmull_high_lane_s16(a, b, c) \
10157 __extension__ \
10158 ({ \
10159 int16x8_t b_ = (b); \
10160 int16x8_t a_ = (a); \
10161 int32x4_t result; \
10162 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
10163 : "=w"(result) \
10164 : "w"(a_), "w"(b_), "i"(c) \
10165 : /* No clobbers */); \
10166 result; \
10169 #define vmull_high_lane_s32(a, b, c) \
10170 __extension__ \
10171 ({ \
10172 int32x4_t b_ = (b); \
10173 int32x4_t a_ = (a); \
10174 int64x2_t result; \
10175 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
10176 : "=w"(result) \
10177 : "w"(a_), "w"(b_), "i"(c) \
10178 : /* No clobbers */); \
10179 result; \
10182 #define vmull_high_lane_u16(a, b, c) \
10183 __extension__ \
10184 ({ \
10185 uint16x8_t b_ = (b); \
10186 uint16x8_t a_ = (a); \
10187 uint32x4_t result; \
10188 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
10189 : "=w"(result) \
10190 : "w"(a_), "w"(b_), "i"(c) \
10191 : /* No clobbers */); \
10192 result; \
10195 #define vmull_high_lane_u32(a, b, c) \
10196 __extension__ \
10197 ({ \
10198 uint32x4_t b_ = (b); \
10199 uint32x4_t a_ = (a); \
10200 uint64x2_t result; \
10201 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
10202 : "=w"(result) \
10203 : "w"(a_), "w"(b_), "i"(c) \
10204 : /* No clobbers */); \
10205 result; \
10208 #define vmull_high_laneq_s16(a, b, c) \
10209 __extension__ \
10210 ({ \
10211 int16x8_t b_ = (b); \
10212 int16x8_t a_ = (a); \
10213 int32x4_t result; \
10214 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
10215 : "=w"(result) \
10216 : "w"(a_), "w"(b_), "i"(c) \
10217 : /* No clobbers */); \
10218 result; \
10221 #define vmull_high_laneq_s32(a, b, c) \
10222 __extension__ \
10223 ({ \
10224 int32x4_t b_ = (b); \
10225 int32x4_t a_ = (a); \
10226 int64x2_t result; \
10227 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
10228 : "=w"(result) \
10229 : "w"(a_), "w"(b_), "i"(c) \
10230 : /* No clobbers */); \
10231 result; \
10234 #define vmull_high_laneq_u16(a, b, c) \
10235 __extension__ \
10236 ({ \
10237 uint16x8_t b_ = (b); \
10238 uint16x8_t a_ = (a); \
10239 uint32x4_t result; \
10240 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
10241 : "=w"(result) \
10242 : "w"(a_), "w"(b_), "i"(c) \
10243 : /* No clobbers */); \
10244 result; \
10247 #define vmull_high_laneq_u32(a, b, c) \
10248 __extension__ \
10249 ({ \
10250 uint32x4_t b_ = (b); \
10251 uint32x4_t a_ = (a); \
10252 uint64x2_t result; \
10253 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
10254 : "=w"(result) \
10255 : "w"(a_), "w"(b_), "i"(c) \
10256 : /* No clobbers */); \
10257 result; \
10260 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10261 vmull_high_n_s16 (int16x8_t a, int16_t b)
10263 int32x4_t result;
10264 __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]"
10265 : "=w"(result)
10266 : "w"(a), "w"(b)
10267 : /* No clobbers */);
10268 return result;
10271 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
10272 vmull_high_n_s32 (int32x4_t a, int32_t b)
10274 int64x2_t result;
10275 __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]"
10276 : "=w"(result)
10277 : "w"(a), "w"(b)
10278 : /* No clobbers */);
10279 return result;
10282 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10283 vmull_high_n_u16 (uint16x8_t a, uint16_t b)
10285 uint32x4_t result;
10286 __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]"
10287 : "=w"(result)
10288 : "w"(a), "w"(b)
10289 : /* No clobbers */);
10290 return result;
10293 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10294 vmull_high_n_u32 (uint32x4_t a, uint32_t b)
10296 uint64x2_t result;
10297 __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]"
10298 : "=w"(result)
10299 : "w"(a), "w"(b)
10300 : /* No clobbers */);
10301 return result;
10304 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
10305 vmull_high_p8 (poly8x16_t a, poly8x16_t b)
10307 poly16x8_t result;
10308 __asm__ ("pmull2 %0.8h,%1.16b,%2.16b"
10309 : "=w"(result)
10310 : "w"(a), "w"(b)
10311 : /* No clobbers */);
10312 return result;
10315 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10316 vmull_high_s8 (int8x16_t a, int8x16_t b)
10318 int16x8_t result;
10319 __asm__ ("smull2 %0.8h,%1.16b,%2.16b"
10320 : "=w"(result)
10321 : "w"(a), "w"(b)
10322 : /* No clobbers */);
10323 return result;
10326 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10327 vmull_high_s16 (int16x8_t a, int16x8_t b)
10329 int32x4_t result;
10330 __asm__ ("smull2 %0.4s,%1.8h,%2.8h"
10331 : "=w"(result)
10332 : "w"(a), "w"(b)
10333 : /* No clobbers */);
10334 return result;
10337 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
10338 vmull_high_s32 (int32x4_t a, int32x4_t b)
10340 int64x2_t result;
10341 __asm__ ("smull2 %0.2d,%1.4s,%2.4s"
10342 : "=w"(result)
10343 : "w"(a), "w"(b)
10344 : /* No clobbers */);
10345 return result;
10348 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10349 vmull_high_u8 (uint8x16_t a, uint8x16_t b)
10351 uint16x8_t result;
10352 __asm__ ("umull2 %0.8h,%1.16b,%2.16b"
10353 : "=w"(result)
10354 : "w"(a), "w"(b)
10355 : /* No clobbers */);
10356 return result;
10359 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10360 vmull_high_u16 (uint16x8_t a, uint16x8_t b)
10362 uint32x4_t result;
10363 __asm__ ("umull2 %0.4s,%1.8h,%2.8h"
10364 : "=w"(result)
10365 : "w"(a), "w"(b)
10366 : /* No clobbers */);
10367 return result;
10370 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10371 vmull_high_u32 (uint32x4_t a, uint32x4_t b)
10373 uint64x2_t result;
10374 __asm__ ("umull2 %0.2d,%1.4s,%2.4s"
10375 : "=w"(result)
10376 : "w"(a), "w"(b)
10377 : /* No clobbers */);
10378 return result;
10381 #define vmull_lane_s16(a, b, c) \
10382 __extension__ \
10383 ({ \
10384 int16x4_t b_ = (b); \
10385 int16x4_t a_ = (a); \
10386 int32x4_t result; \
10387 __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \
10388 : "=w"(result) \
10389 : "w"(a_), "w"(b_), "i"(c) \
10390 : /* No clobbers */); \
10391 result; \
10394 #define vmull_lane_s32(a, b, c) \
10395 __extension__ \
10396 ({ \
10397 int32x2_t b_ = (b); \
10398 int32x2_t a_ = (a); \
10399 int64x2_t result; \
10400 __asm__ ("smull %0.2d,%1.2s,%2.s[%3]" \
10401 : "=w"(result) \
10402 : "w"(a_), "w"(b_), "i"(c) \
10403 : /* No clobbers */); \
10404 result; \
10407 #define vmull_lane_u16(a, b, c) \
10408 __extension__ \
10409 ({ \
10410 uint16x4_t b_ = (b); \
10411 uint16x4_t a_ = (a); \
10412 uint32x4_t result; \
10413 __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \
10414 : "=w"(result) \
10415 : "w"(a_), "w"(b_), "i"(c) \
10416 : /* No clobbers */); \
10417 result; \
10420 #define vmull_lane_u32(a, b, c) \
10421 __extension__ \
10422 ({ \
10423 uint32x2_t b_ = (b); \
10424 uint32x2_t a_ = (a); \
10425 uint64x2_t result; \
10426 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
10427 : "=w"(result) \
10428 : "w"(a_), "w"(b_), "i"(c) \
10429 : /* No clobbers */); \
10430 result; \
10433 #define vmull_laneq_s16(a, b, c) \
10434 __extension__ \
10435 ({ \
10436 int16x8_t b_ = (b); \
10437 int16x4_t a_ = (a); \
10438 int32x4_t result; \
10439 __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \
10440 : "=w"(result) \
10441 : "w"(a_), "w"(b_), "i"(c) \
10442 : /* No clobbers */); \
10443 result; \
10446 #define vmull_laneq_s32(a, b, c) \
10447 __extension__ \
10448 ({ \
10449 int32x4_t b_ = (b); \
10450 int32x2_t a_ = (a); \
10451 int64x2_t result; \
10452 __asm__ ("smull %0.2d, %1.2s, %2.s[%3]" \
10453 : "=w"(result) \
10454 : "w"(a_), "w"(b_), "i"(c) \
10455 : /* No clobbers */); \
10456 result; \
10459 #define vmull_laneq_u16(a, b, c) \
10460 __extension__ \
10461 ({ \
10462 uint16x8_t b_ = (b); \
10463 uint16x4_t a_ = (a); \
10464 uint32x4_t result; \
10465 __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \
10466 : "=w"(result) \
10467 : "w"(a_), "w"(b_), "i"(c) \
10468 : /* No clobbers */); \
10469 result; \
10472 #define vmull_laneq_u32(a, b, c) \
10473 __extension__ \
10474 ({ \
10475 uint32x4_t b_ = (b); \
10476 uint32x2_t a_ = (a); \
10477 uint64x2_t result; \
10478 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
10479 : "=w"(result) \
10480 : "w"(a_), "w"(b_), "i"(c) \
10481 : /* No clobbers */); \
10482 result; \
10485 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10486 vmull_n_s16 (int16x4_t a, int16_t b)
10488 int32x4_t result;
10489 __asm__ ("smull %0.4s,%1.4h,%2.h[0]"
10490 : "=w"(result)
10491 : "w"(a), "w"(b)
10492 : /* No clobbers */);
10493 return result;
10496 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
10497 vmull_n_s32 (int32x2_t a, int32_t b)
10499 int64x2_t result;
10500 __asm__ ("smull %0.2d,%1.2s,%2.s[0]"
10501 : "=w"(result)
10502 : "w"(a), "w"(b)
10503 : /* No clobbers */);
10504 return result;
10507 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10508 vmull_n_u16 (uint16x4_t a, uint16_t b)
10510 uint32x4_t result;
10511 __asm__ ("umull %0.4s,%1.4h,%2.h[0]"
10512 : "=w"(result)
10513 : "w"(a), "w"(b)
10514 : /* No clobbers */);
10515 return result;
10518 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10519 vmull_n_u32 (uint32x2_t a, uint32_t b)
10521 uint64x2_t result;
10522 __asm__ ("umull %0.2d,%1.2s,%2.s[0]"
10523 : "=w"(result)
10524 : "w"(a), "w"(b)
10525 : /* No clobbers */);
10526 return result;
10529 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
10530 vmull_p8 (poly8x8_t a, poly8x8_t b)
10532 poly16x8_t result;
10533 __asm__ ("pmull %0.8h, %1.8b, %2.8b"
10534 : "=w"(result)
10535 : "w"(a), "w"(b)
10536 : /* No clobbers */);
10537 return result;
10540 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10541 vmull_s8 (int8x8_t a, int8x8_t b)
10543 int16x8_t result;
10544 __asm__ ("smull %0.8h, %1.8b, %2.8b"
10545 : "=w"(result)
10546 : "w"(a), "w"(b)
10547 : /* No clobbers */);
10548 return result;
10551 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10552 vmull_s16 (int16x4_t a, int16x4_t b)
10554 int32x4_t result;
10555 __asm__ ("smull %0.4s, %1.4h, %2.4h"
10556 : "=w"(result)
10557 : "w"(a), "w"(b)
10558 : /* No clobbers */);
10559 return result;
10562 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
10563 vmull_s32 (int32x2_t a, int32x2_t b)
10565 int64x2_t result;
10566 __asm__ ("smull %0.2d, %1.2s, %2.2s"
10567 : "=w"(result)
10568 : "w"(a), "w"(b)
10569 : /* No clobbers */);
10570 return result;
10573 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10574 vmull_u8 (uint8x8_t a, uint8x8_t b)
10576 uint16x8_t result;
10577 __asm__ ("umull %0.8h, %1.8b, %2.8b"
10578 : "=w"(result)
10579 : "w"(a), "w"(b)
10580 : /* No clobbers */);
10581 return result;
10584 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10585 vmull_u16 (uint16x4_t a, uint16x4_t b)
10587 uint32x4_t result;
10588 __asm__ ("umull %0.4s, %1.4h, %2.4h"
10589 : "=w"(result)
10590 : "w"(a), "w"(b)
10591 : /* No clobbers */);
10592 return result;
10595 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10596 vmull_u32 (uint32x2_t a, uint32x2_t b)
10598 uint64x2_t result;
10599 __asm__ ("umull %0.2d, %1.2s, %2.2s"
10600 : "=w"(result)
10601 : "w"(a), "w"(b)
10602 : /* No clobbers */);
10603 return result;
10606 #define vmulq_lane_f32(a, b, c) \
10607 __extension__ \
10608 ({ \
10609 float32x2_t b_ = (b); \
10610 float32x4_t a_ = (a); \
10611 float32x4_t result; \
10612 __asm__ ("fmul %0.4s, %1.4s, %2.s[%3]" \
10613 : "=w"(result) \
10614 : "w"(a_), "w"(b_), "i"(c) \
10615 : /* No clobbers */); \
10616 result; \
10619 #define vmulq_lane_f64(a, b, c) \
10620 __extension__ \
10621 ({ \
10622 float64x1_t b_ = (b); \
10623 float64x2_t a_ = (a); \
10624 float64x2_t result; \
10625 __asm__ ("fmul %0.2d,%1.2d,%2.d[%3]" \
10626 : "=w"(result) \
10627 : "w"(a_), "w"(b_), "i"(c) \
10628 : /* No clobbers */); \
10629 result; \
10632 #define vmulq_lane_s16(a, b, c) \
10633 __extension__ \
10634 ({ \
10635 int16x4_t b_ = (b); \
10636 int16x8_t a_ = (a); \
10637 int16x8_t result; \
10638 __asm__ ("mul %0.8h,%1.8h,%2.h[%3]" \
10639 : "=w"(result) \
10640 : "w"(a_), "w"(b_), "i"(c) \
10641 : /* No clobbers */); \
10642 result; \
10645 #define vmulq_lane_s32(a, b, c) \
10646 __extension__ \
10647 ({ \
10648 int32x2_t b_ = (b); \
10649 int32x4_t a_ = (a); \
10650 int32x4_t result; \
10651 __asm__ ("mul %0.4s,%1.4s,%2.s[%3]" \
10652 : "=w"(result) \
10653 : "w"(a_), "w"(b_), "i"(c) \
10654 : /* No clobbers */); \
10655 result; \
10658 #define vmulq_lane_u16(a, b, c) \
10659 __extension__ \
10660 ({ \
10661 uint16x4_t b_ = (b); \
10662 uint16x8_t a_ = (a); \
10663 uint16x8_t result; \
10664 __asm__ ("mul %0.8h,%1.8h,%2.h[%3]" \
10665 : "=w"(result) \
10666 : "w"(a_), "w"(b_), "i"(c) \
10667 : /* No clobbers */); \
10668 result; \
10671 #define vmulq_lane_u32(a, b, c) \
10672 __extension__ \
10673 ({ \
10674 uint32x2_t b_ = (b); \
10675 uint32x4_t a_ = (a); \
10676 uint32x4_t result; \
10677 __asm__ ("mul %0.4s, %1.4s, %2.s[%3]" \
10678 : "=w"(result) \
10679 : "w"(a_), "w"(b_), "i"(c) \
10680 : /* No clobbers */); \
10681 result; \
10684 #define vmulq_laneq_f32(a, b, c) \
10685 __extension__ \
10686 ({ \
10687 float32x4_t b_ = (b); \
10688 float32x4_t a_ = (a); \
10689 float32x4_t result; \
10690 __asm__ ("fmul %0.4s, %1.4s, %2.s[%3]" \
10691 : "=w"(result) \
10692 : "w"(a_), "w"(b_), "i"(c) \
10693 : /* No clobbers */); \
10694 result; \
10697 #define vmulq_laneq_f64(a, b, c) \
10698 __extension__ \
10699 ({ \
10700 float64x2_t b_ = (b); \
10701 float64x2_t a_ = (a); \
10702 float64x2_t result; \
10703 __asm__ ("fmul %0.2d,%1.2d,%2.d[%3]" \
10704 : "=w"(result) \
10705 : "w"(a_), "w"(b_), "i"(c) \
10706 : /* No clobbers */); \
10707 result; \
10710 #define vmulq_laneq_s16(a, b, c) \
10711 __extension__ \
10712 ({ \
10713 int16x8_t b_ = (b); \
10714 int16x8_t a_ = (a); \
10715 int16x8_t result; \
10716 __asm__ ("mul %0.8h, %1.8h, %2.h[%3]" \
10717 : "=w"(result) \
10718 : "w"(a_), "w"(b_), "i"(c) \
10719 : /* No clobbers */); \
10720 result; \
10723 #define vmulq_laneq_s32(a, b, c) \
10724 __extension__ \
10725 ({ \
10726 int32x4_t b_ = (b); \
10727 int32x4_t a_ = (a); \
10728 int32x4_t result; \
10729 __asm__ ("mul %0.4s, %1.4s, %2.s[%3]" \
10730 : "=w"(result) \
10731 : "w"(a_), "w"(b_), "i"(c) \
10732 : /* No clobbers */); \
10733 result; \
10736 #define vmulq_laneq_u16(a, b, c) \
10737 __extension__ \
10738 ({ \
10739 uint16x8_t b_ = (b); \
10740 uint16x8_t a_ = (a); \
10741 uint16x8_t result; \
10742 __asm__ ("mul %0.8h, %1.8h, %2.h[%3]" \
10743 : "=w"(result) \
10744 : "w"(a_), "w"(b_), "i"(c) \
10745 : /* No clobbers */); \
10746 result; \
10749 #define vmulq_laneq_u32(a, b, c) \
10750 __extension__ \
10751 ({ \
10752 uint32x4_t b_ = (b); \
10753 uint32x4_t a_ = (a); \
10754 uint32x4_t result; \
10755 __asm__ ("mul %0.4s, %1.4s, %2.s[%3]" \
10756 : "=w"(result) \
10757 : "w"(a_), "w"(b_), "i"(c) \
10758 : /* No clobbers */); \
10759 result; \
10762 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10763 vmulq_n_f32 (float32x4_t a, float32_t b)
10765 float32x4_t result;
10766 __asm__ ("fmul %0.4s,%1.4s,%2.s[0]"
10767 : "=w"(result)
10768 : "w"(a), "w"(b)
10769 : /* No clobbers */);
10770 return result;
10773 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10774 vmulq_n_f64 (float64x2_t a, float64_t b)
10776 float64x2_t result;
10777 __asm__ ("fmul %0.2d,%1.2d,%2.d[0]"
10778 : "=w"(result)
10779 : "w"(a), "w"(b)
10780 : /* No clobbers */);
10781 return result;
10784 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10785 vmulq_n_s16 (int16x8_t a, int16_t b)
10787 int16x8_t result;
10788 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
10789 : "=w"(result)
10790 : "w"(a), "w"(b)
10791 : /* No clobbers */);
10792 return result;
10795 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10796 vmulq_n_s32 (int32x4_t a, int32_t b)
10798 int32x4_t result;
10799 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
10800 : "=w"(result)
10801 : "w"(a), "w"(b)
10802 : /* No clobbers */);
10803 return result;
10806 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10807 vmulq_n_u16 (uint16x8_t a, uint16_t b)
10809 uint16x8_t result;
10810 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
10811 : "=w"(result)
10812 : "w"(a), "w"(b)
10813 : /* No clobbers */);
10814 return result;
10817 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10818 vmulq_n_u32 (uint32x4_t a, uint32_t b)
10820 uint32x4_t result;
10821 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
10822 : "=w"(result)
10823 : "w"(a), "w"(b)
10824 : /* No clobbers */);
10825 return result;
10828 #define vmuls_lane_f32(a, b, c) \
10829 __extension__ \
10830 ({ \
10831 float32x4_t b_ = (b); \
10832 float32_t a_ = (a); \
10833 float32_t result; \
10834 __asm__ ("fmul %s0,%s1,%2.s[%3]" \
10835 : "=w"(result) \
10836 : "w"(a_), "w"(b_), "i"(c) \
10837 : /* No clobbers */); \
10838 result; \
10841 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10842 vmulx_f32 (float32x2_t a, float32x2_t b)
10844 float32x2_t result;
10845 __asm__ ("fmulx %0.2s,%1.2s,%2.2s"
10846 : "=w"(result)
10847 : "w"(a), "w"(b)
10848 : /* No clobbers */);
10849 return result;
10852 #define vmulx_lane_f32(a, b, c) \
10853 __extension__ \
10854 ({ \
10855 float32x4_t b_ = (b); \
10856 float32x2_t a_ = (a); \
10857 float32x2_t result; \
10858 __asm__ ("fmulx %0.2s,%1.2s,%2.s[%3]" \
10859 : "=w"(result) \
10860 : "w"(a_), "w"(b_), "i"(c) \
10861 : /* No clobbers */); \
10862 result; \
10865 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
10866 vmulxd_f64 (float64_t a, float64_t b)
10868 float64_t result;
10869 __asm__ ("fmulx %d0, %d1, %d2"
10870 : "=w"(result)
10871 : "w"(a), "w"(b)
10872 : /* No clobbers */);
10873 return result;
10876 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10877 vmulxq_f32 (float32x4_t a, float32x4_t b)
10879 float32x4_t result;
10880 __asm__ ("fmulx %0.4s,%1.4s,%2.4s"
10881 : "=w"(result)
10882 : "w"(a), "w"(b)
10883 : /* No clobbers */);
10884 return result;
10887 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10888 vmulxq_f64 (float64x2_t a, float64x2_t b)
10890 float64x2_t result;
10891 __asm__ ("fmulx %0.2d,%1.2d,%2.2d"
10892 : "=w"(result)
10893 : "w"(a), "w"(b)
10894 : /* No clobbers */);
10895 return result;
10898 #define vmulxq_lane_f32(a, b, c) \
10899 __extension__ \
10900 ({ \
10901 float32x4_t b_ = (b); \
10902 float32x4_t a_ = (a); \
10903 float32x4_t result; \
10904 __asm__ ("fmulx %0.4s,%1.4s,%2.s[%3]" \
10905 : "=w"(result) \
10906 : "w"(a_), "w"(b_), "i"(c) \
10907 : /* No clobbers */); \
10908 result; \
10911 #define vmulxq_lane_f64(a, b, c) \
10912 __extension__ \
10913 ({ \
10914 float64x2_t b_ = (b); \
10915 float64x2_t a_ = (a); \
10916 float64x2_t result; \
10917 __asm__ ("fmulx %0.2d,%1.2d,%2.d[%3]" \
10918 : "=w"(result) \
10919 : "w"(a_), "w"(b_), "i"(c) \
10920 : /* No clobbers */); \
10921 result; \
10924 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
10925 vmulxs_f32 (float32_t a, float32_t b)
10927 float32_t result;
10928 __asm__ ("fmulx %s0, %s1, %s2"
10929 : "=w"(result)
10930 : "w"(a), "w"(b)
10931 : /* No clobbers */);
10932 return result;
10935 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
10936 vmvn_p8 (poly8x8_t a)
10938 poly8x8_t result;
10939 __asm__ ("mvn %0.8b,%1.8b"
10940 : "=w"(result)
10941 : "w"(a)
10942 : /* No clobbers */);
10943 return result;
10946 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10947 vmvn_s8 (int8x8_t a)
10949 int8x8_t result;
10950 __asm__ ("mvn %0.8b,%1.8b"
10951 : "=w"(result)
10952 : "w"(a)
10953 : /* No clobbers */);
10954 return result;
10957 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10958 vmvn_s16 (int16x4_t a)
10960 int16x4_t result;
10961 __asm__ ("mvn %0.8b,%1.8b"
10962 : "=w"(result)
10963 : "w"(a)
10964 : /* No clobbers */);
10965 return result;
10968 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10969 vmvn_s32 (int32x2_t a)
10971 int32x2_t result;
10972 __asm__ ("mvn %0.8b,%1.8b"
10973 : "=w"(result)
10974 : "w"(a)
10975 : /* No clobbers */);
10976 return result;
10979 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10980 vmvn_u8 (uint8x8_t a)
10982 uint8x8_t result;
10983 __asm__ ("mvn %0.8b,%1.8b"
10984 : "=w"(result)
10985 : "w"(a)
10986 : /* No clobbers */);
10987 return result;
10990 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10991 vmvn_u16 (uint16x4_t a)
10993 uint16x4_t result;
10994 __asm__ ("mvn %0.8b,%1.8b"
10995 : "=w"(result)
10996 : "w"(a)
10997 : /* No clobbers */);
10998 return result;
11001 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11002 vmvn_u32 (uint32x2_t a)
11004 uint32x2_t result;
11005 __asm__ ("mvn %0.8b,%1.8b"
11006 : "=w"(result)
11007 : "w"(a)
11008 : /* No clobbers */);
11009 return result;
11012 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
11013 vmvnq_p8 (poly8x16_t a)
11015 poly8x16_t result;
11016 __asm__ ("mvn %0.16b,%1.16b"
11017 : "=w"(result)
11018 : "w"(a)
11019 : /* No clobbers */);
11020 return result;
11023 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11024 vmvnq_s8 (int8x16_t a)
11026 int8x16_t result;
11027 __asm__ ("mvn %0.16b,%1.16b"
11028 : "=w"(result)
11029 : "w"(a)
11030 : /* No clobbers */);
11031 return result;
11034 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11035 vmvnq_s16 (int16x8_t a)
11037 int16x8_t result;
11038 __asm__ ("mvn %0.16b,%1.16b"
11039 : "=w"(result)
11040 : "w"(a)
11041 : /* No clobbers */);
11042 return result;
11045 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11046 vmvnq_s32 (int32x4_t a)
11048 int32x4_t result;
11049 __asm__ ("mvn %0.16b,%1.16b"
11050 : "=w"(result)
11051 : "w"(a)
11052 : /* No clobbers */);
11053 return result;
11056 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11057 vmvnq_u8 (uint8x16_t a)
11059 uint8x16_t result;
11060 __asm__ ("mvn %0.16b,%1.16b"
11061 : "=w"(result)
11062 : "w"(a)
11063 : /* No clobbers */);
11064 return result;
11067 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11068 vmvnq_u16 (uint16x8_t a)
11070 uint16x8_t result;
11071 __asm__ ("mvn %0.16b,%1.16b"
11072 : "=w"(result)
11073 : "w"(a)
11074 : /* No clobbers */);
11075 return result;
11078 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11079 vmvnq_u32 (uint32x4_t a)
11081 uint32x4_t result;
11082 __asm__ ("mvn %0.16b,%1.16b"
11083 : "=w"(result)
11084 : "w"(a)
11085 : /* No clobbers */);
11086 return result;
11089 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11090 vneg_f32 (float32x2_t a)
11092 float32x2_t result;
11093 __asm__ ("fneg %0.2s,%1.2s"
11094 : "=w"(result)
11095 : "w"(a)
11096 : /* No clobbers */);
11097 return result;
11100 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11101 vneg_s8 (int8x8_t a)
11103 int8x8_t result;
11104 __asm__ ("neg %0.8b,%1.8b"
11105 : "=w"(result)
11106 : "w"(a)
11107 : /* No clobbers */);
11108 return result;
11111 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11112 vneg_s16 (int16x4_t a)
11114 int16x4_t result;
11115 __asm__ ("neg %0.4h,%1.4h"
11116 : "=w"(result)
11117 : "w"(a)
11118 : /* No clobbers */);
11119 return result;
11122 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11123 vneg_s32 (int32x2_t a)
11125 int32x2_t result;
11126 __asm__ ("neg %0.2s,%1.2s"
11127 : "=w"(result)
11128 : "w"(a)
11129 : /* No clobbers */);
11130 return result;
11133 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11134 vnegq_f32 (float32x4_t a)
11136 float32x4_t result;
11137 __asm__ ("fneg %0.4s,%1.4s"
11138 : "=w"(result)
11139 : "w"(a)
11140 : /* No clobbers */);
11141 return result;
11144 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11145 vnegq_f64 (float64x2_t a)
11147 float64x2_t result;
11148 __asm__ ("fneg %0.2d,%1.2d"
11149 : "=w"(result)
11150 : "w"(a)
11151 : /* No clobbers */);
11152 return result;
11155 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11156 vnegq_s8 (int8x16_t a)
11158 int8x16_t result;
11159 __asm__ ("neg %0.16b,%1.16b"
11160 : "=w"(result)
11161 : "w"(a)
11162 : /* No clobbers */);
11163 return result;
11166 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11167 vnegq_s16 (int16x8_t a)
11169 int16x8_t result;
11170 __asm__ ("neg %0.8h,%1.8h"
11171 : "=w"(result)
11172 : "w"(a)
11173 : /* No clobbers */);
11174 return result;
11177 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11178 vnegq_s32 (int32x4_t a)
11180 int32x4_t result;
11181 __asm__ ("neg %0.4s,%1.4s"
11182 : "=w"(result)
11183 : "w"(a)
11184 : /* No clobbers */);
11185 return result;
11188 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
11189 vnegq_s64 (int64x2_t a)
11191 int64x2_t result;
11192 __asm__ ("neg %0.2d,%1.2d"
11193 : "=w"(result)
11194 : "w"(a)
11195 : /* No clobbers */);
11196 return result;
11199 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11200 vpadal_s8 (int16x4_t a, int8x8_t b)
11202 int16x4_t result;
11203 __asm__ ("sadalp %0.4h,%2.8b"
11204 : "=w"(result)
11205 : "0"(a), "w"(b)
11206 : /* No clobbers */);
11207 return result;
11210 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11211 vpadal_s16 (int32x2_t a, int16x4_t b)
11213 int32x2_t result;
11214 __asm__ ("sadalp %0.2s,%2.4h"
11215 : "=w"(result)
11216 : "0"(a), "w"(b)
11217 : /* No clobbers */);
11218 return result;
11221 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
11222 vpadal_s32 (int64x1_t a, int32x2_t b)
11224 int64x1_t result;
11225 __asm__ ("sadalp %0.1d,%2.2s"
11226 : "=w"(result)
11227 : "0"(a), "w"(b)
11228 : /* No clobbers */);
11229 return result;
11232 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11233 vpadal_u8 (uint16x4_t a, uint8x8_t b)
11235 uint16x4_t result;
11236 __asm__ ("uadalp %0.4h,%2.8b"
11237 : "=w"(result)
11238 : "0"(a), "w"(b)
11239 : /* No clobbers */);
11240 return result;
11243 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11244 vpadal_u16 (uint32x2_t a, uint16x4_t b)
11246 uint32x2_t result;
11247 __asm__ ("uadalp %0.2s,%2.4h"
11248 : "=w"(result)
11249 : "0"(a), "w"(b)
11250 : /* No clobbers */);
11251 return result;
11254 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11255 vpadal_u32 (uint64x1_t a, uint32x2_t b)
11257 uint64x1_t result;
11258 __asm__ ("uadalp %0.1d,%2.2s"
11259 : "=w"(result)
11260 : "0"(a), "w"(b)
11261 : /* No clobbers */);
11262 return result;
11265 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11266 vpadalq_s8 (int16x8_t a, int8x16_t b)
11268 int16x8_t result;
11269 __asm__ ("sadalp %0.8h,%2.16b"
11270 : "=w"(result)
11271 : "0"(a), "w"(b)
11272 : /* No clobbers */);
11273 return result;
11276 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11277 vpadalq_s16 (int32x4_t a, int16x8_t b)
11279 int32x4_t result;
11280 __asm__ ("sadalp %0.4s,%2.8h"
11281 : "=w"(result)
11282 : "0"(a), "w"(b)
11283 : /* No clobbers */);
11284 return result;
11287 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
11288 vpadalq_s32 (int64x2_t a, int32x4_t b)
11290 int64x2_t result;
11291 __asm__ ("sadalp %0.2d,%2.4s"
11292 : "=w"(result)
11293 : "0"(a), "w"(b)
11294 : /* No clobbers */);
11295 return result;
11298 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11299 vpadalq_u8 (uint16x8_t a, uint8x16_t b)
11301 uint16x8_t result;
11302 __asm__ ("uadalp %0.8h,%2.16b"
11303 : "=w"(result)
11304 : "0"(a), "w"(b)
11305 : /* No clobbers */);
11306 return result;
11309 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11310 vpadalq_u16 (uint32x4_t a, uint16x8_t b)
11312 uint32x4_t result;
11313 __asm__ ("uadalp %0.4s,%2.8h"
11314 : "=w"(result)
11315 : "0"(a), "w"(b)
11316 : /* No clobbers */);
11317 return result;
11320 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11321 vpadalq_u32 (uint64x2_t a, uint32x4_t b)
11323 uint64x2_t result;
11324 __asm__ ("uadalp %0.2d,%2.4s"
11325 : "=w"(result)
11326 : "0"(a), "w"(b)
11327 : /* No clobbers */);
11328 return result;
11331 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11332 vpadd_f32 (float32x2_t a, float32x2_t b)
11334 float32x2_t result;
11335 __asm__ ("faddp %0.2s,%1.2s,%2.2s"
11336 : "=w"(result)
11337 : "w"(a), "w"(b)
11338 : /* No clobbers */);
11339 return result;
11342 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11343 vpadd_s8 (int8x8_t __a, int8x8_t __b)
11345 return __builtin_aarch64_addpv8qi (__a, __b);
11348 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11349 vpadd_s16 (int16x4_t __a, int16x4_t __b)
11351 return __builtin_aarch64_addpv4hi (__a, __b);
11354 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11355 vpadd_s32 (int32x2_t __a, int32x2_t __b)
11357 return __builtin_aarch64_addpv2si (__a, __b);
11360 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11361 vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
11363 return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
11364 (int8x8_t) __b);
11367 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11368 vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
11370 return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
11371 (int16x4_t) __b);
11374 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11375 vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
11377 return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
11378 (int32x2_t) __b);
11381 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
11382 vpaddd_f64 (float64x2_t a)
11384 float64_t result;
11385 __asm__ ("faddp %d0,%1.2d"
11386 : "=w"(result)
11387 : "w"(a)
11388 : /* No clobbers */);
11389 return result;
11392 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11393 vpaddl_s8 (int8x8_t a)
11395 int16x4_t result;
11396 __asm__ ("saddlp %0.4h,%1.8b"
11397 : "=w"(result)
11398 : "w"(a)
11399 : /* No clobbers */);
11400 return result;
11403 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11404 vpaddl_s16 (int16x4_t a)
11406 int32x2_t result;
11407 __asm__ ("saddlp %0.2s,%1.4h"
11408 : "=w"(result)
11409 : "w"(a)
11410 : /* No clobbers */);
11411 return result;
11414 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
11415 vpaddl_s32 (int32x2_t a)
11417 int64x1_t result;
11418 __asm__ ("saddlp %0.1d,%1.2s"
11419 : "=w"(result)
11420 : "w"(a)
11421 : /* No clobbers */);
11422 return result;
11425 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11426 vpaddl_u8 (uint8x8_t a)
11428 uint16x4_t result;
11429 __asm__ ("uaddlp %0.4h,%1.8b"
11430 : "=w"(result)
11431 : "w"(a)
11432 : /* No clobbers */);
11433 return result;
11436 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11437 vpaddl_u16 (uint16x4_t a)
11439 uint32x2_t result;
11440 __asm__ ("uaddlp %0.2s,%1.4h"
11441 : "=w"(result)
11442 : "w"(a)
11443 : /* No clobbers */);
11444 return result;
11447 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11448 vpaddl_u32 (uint32x2_t a)
11450 uint64x1_t result;
11451 __asm__ ("uaddlp %0.1d,%1.2s"
11452 : "=w"(result)
11453 : "w"(a)
11454 : /* No clobbers */);
11455 return result;
11458 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11459 vpaddlq_s8 (int8x16_t a)
11461 int16x8_t result;
11462 __asm__ ("saddlp %0.8h,%1.16b"
11463 : "=w"(result)
11464 : "w"(a)
11465 : /* No clobbers */);
11466 return result;
11469 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11470 vpaddlq_s16 (int16x8_t a)
11472 int32x4_t result;
11473 __asm__ ("saddlp %0.4s,%1.8h"
11474 : "=w"(result)
11475 : "w"(a)
11476 : /* No clobbers */);
11477 return result;
11480 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
11481 vpaddlq_s32 (int32x4_t a)
11483 int64x2_t result;
11484 __asm__ ("saddlp %0.2d,%1.4s"
11485 : "=w"(result)
11486 : "w"(a)
11487 : /* No clobbers */);
11488 return result;
11491 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11492 vpaddlq_u8 (uint8x16_t a)
11494 uint16x8_t result;
11495 __asm__ ("uaddlp %0.8h,%1.16b"
11496 : "=w"(result)
11497 : "w"(a)
11498 : /* No clobbers */);
11499 return result;
11502 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11503 vpaddlq_u16 (uint16x8_t a)
11505 uint32x4_t result;
11506 __asm__ ("uaddlp %0.4s,%1.8h"
11507 : "=w"(result)
11508 : "w"(a)
11509 : /* No clobbers */);
11510 return result;
11513 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11514 vpaddlq_u32 (uint32x4_t a)
11516 uint64x2_t result;
11517 __asm__ ("uaddlp %0.2d,%1.4s"
11518 : "=w"(result)
11519 : "w"(a)
11520 : /* No clobbers */);
11521 return result;
11524 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11525 vpaddq_f32 (float32x4_t a, float32x4_t b)
11527 float32x4_t result;
11528 __asm__ ("faddp %0.4s,%1.4s,%2.4s"
11529 : "=w"(result)
11530 : "w"(a), "w"(b)
11531 : /* No clobbers */);
11532 return result;
11535 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11536 vpaddq_f64 (float64x2_t a, float64x2_t b)
11538 float64x2_t result;
11539 __asm__ ("faddp %0.2d,%1.2d,%2.2d"
11540 : "=w"(result)
11541 : "w"(a), "w"(b)
11542 : /* No clobbers */);
11543 return result;
11546 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11547 vpaddq_s8 (int8x16_t a, int8x16_t b)
11549 int8x16_t result;
11550 __asm__ ("addp %0.16b,%1.16b,%2.16b"
11551 : "=w"(result)
11552 : "w"(a), "w"(b)
11553 : /* No clobbers */);
11554 return result;
11557 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11558 vpaddq_s16 (int16x8_t a, int16x8_t b)
11560 int16x8_t result;
11561 __asm__ ("addp %0.8h,%1.8h,%2.8h"
11562 : "=w"(result)
11563 : "w"(a), "w"(b)
11564 : /* No clobbers */);
11565 return result;
11568 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11569 vpaddq_s32 (int32x4_t a, int32x4_t b)
11571 int32x4_t result;
11572 __asm__ ("addp %0.4s,%1.4s,%2.4s"
11573 : "=w"(result)
11574 : "w"(a), "w"(b)
11575 : /* No clobbers */);
11576 return result;
11579 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
11580 vpaddq_s64 (int64x2_t a, int64x2_t b)
11582 int64x2_t result;
11583 __asm__ ("addp %0.2d,%1.2d,%2.2d"
11584 : "=w"(result)
11585 : "w"(a), "w"(b)
11586 : /* No clobbers */);
11587 return result;
11590 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11591 vpaddq_u8 (uint8x16_t a, uint8x16_t b)
11593 uint8x16_t result;
11594 __asm__ ("addp %0.16b,%1.16b,%2.16b"
11595 : "=w"(result)
11596 : "w"(a), "w"(b)
11597 : /* No clobbers */);
11598 return result;
11601 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11602 vpaddq_u16 (uint16x8_t a, uint16x8_t b)
11604 uint16x8_t result;
11605 __asm__ ("addp %0.8h,%1.8h,%2.8h"
11606 : "=w"(result)
11607 : "w"(a), "w"(b)
11608 : /* No clobbers */);
11609 return result;
11612 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11613 vpaddq_u32 (uint32x4_t a, uint32x4_t b)
11615 uint32x4_t result;
11616 __asm__ ("addp %0.4s,%1.4s,%2.4s"
11617 : "=w"(result)
11618 : "w"(a), "w"(b)
11619 : /* No clobbers */);
11620 return result;
11623 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11624 vpaddq_u64 (uint64x2_t a, uint64x2_t b)
11626 uint64x2_t result;
11627 __asm__ ("addp %0.2d,%1.2d,%2.2d"
11628 : "=w"(result)
11629 : "w"(a), "w"(b)
11630 : /* No clobbers */);
11631 return result;
11634 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
11635 vpadds_f32 (float32x2_t a)
11637 float32_t result;
11638 __asm__ ("faddp %s0,%1.2s"
11639 : "=w"(result)
11640 : "w"(a)
11641 : /* No clobbers */);
11642 return result;
11645 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11646 vpmax_f32 (float32x2_t a, float32x2_t b)
11648 float32x2_t result;
11649 __asm__ ("fmaxp %0.2s, %1.2s, %2.2s"
11650 : "=w"(result)
11651 : "w"(a), "w"(b)
11652 : /* No clobbers */);
11653 return result;
11656 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11657 vpmax_s8 (int8x8_t a, int8x8_t b)
11659 int8x8_t result;
11660 __asm__ ("smaxp %0.8b, %1.8b, %2.8b"
11661 : "=w"(result)
11662 : "w"(a), "w"(b)
11663 : /* No clobbers */);
11664 return result;
11667 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11668 vpmax_s16 (int16x4_t a, int16x4_t b)
11670 int16x4_t result;
11671 __asm__ ("smaxp %0.4h, %1.4h, %2.4h"
11672 : "=w"(result)
11673 : "w"(a), "w"(b)
11674 : /* No clobbers */);
11675 return result;
11678 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11679 vpmax_s32 (int32x2_t a, int32x2_t b)
11681 int32x2_t result;
11682 __asm__ ("smaxp %0.2s, %1.2s, %2.2s"
11683 : "=w"(result)
11684 : "w"(a), "w"(b)
11685 : /* No clobbers */);
11686 return result;
11689 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11690 vpmax_u8 (uint8x8_t a, uint8x8_t b)
11692 uint8x8_t result;
11693 __asm__ ("umaxp %0.8b, %1.8b, %2.8b"
11694 : "=w"(result)
11695 : "w"(a), "w"(b)
11696 : /* No clobbers */);
11697 return result;
11700 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11701 vpmax_u16 (uint16x4_t a, uint16x4_t b)
11703 uint16x4_t result;
11704 __asm__ ("umaxp %0.4h, %1.4h, %2.4h"
11705 : "=w"(result)
11706 : "w"(a), "w"(b)
11707 : /* No clobbers */);
11708 return result;
11711 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11712 vpmax_u32 (uint32x2_t a, uint32x2_t b)
11714 uint32x2_t result;
11715 __asm__ ("umaxp %0.2s, %1.2s, %2.2s"
11716 : "=w"(result)
11717 : "w"(a), "w"(b)
11718 : /* No clobbers */);
11719 return result;
11722 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11723 vpmaxnm_f32 (float32x2_t a, float32x2_t b)
11725 float32x2_t result;
11726 __asm__ ("fmaxnmp %0.2s,%1.2s,%2.2s"
11727 : "=w"(result)
11728 : "w"(a), "w"(b)
11729 : /* No clobbers */);
11730 return result;
11733 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11734 vpmaxnmq_f32 (float32x4_t a, float32x4_t b)
11736 float32x4_t result;
11737 __asm__ ("fmaxnmp %0.4s,%1.4s,%2.4s"
11738 : "=w"(result)
11739 : "w"(a), "w"(b)
11740 : /* No clobbers */);
11741 return result;
11744 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11745 vpmaxnmq_f64 (float64x2_t a, float64x2_t b)
11747 float64x2_t result;
11748 __asm__ ("fmaxnmp %0.2d,%1.2d,%2.2d"
11749 : "=w"(result)
11750 : "w"(a), "w"(b)
11751 : /* No clobbers */);
11752 return result;
11755 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
11756 vpmaxnmqd_f64 (float64x2_t a)
11758 float64_t result;
11759 __asm__ ("fmaxnmp %d0,%1.2d"
11760 : "=w"(result)
11761 : "w"(a)
11762 : /* No clobbers */);
11763 return result;
11766 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
11767 vpmaxnms_f32 (float32x2_t a)
11769 float32_t result;
11770 __asm__ ("fmaxnmp %s0,%1.2s"
11771 : "=w"(result)
11772 : "w"(a)
11773 : /* No clobbers */);
11774 return result;
11777 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11778 vpmaxq_f32 (float32x4_t a, float32x4_t b)
11780 float32x4_t result;
11781 __asm__ ("fmaxp %0.4s, %1.4s, %2.4s"
11782 : "=w"(result)
11783 : "w"(a), "w"(b)
11784 : /* No clobbers */);
11785 return result;
11788 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11789 vpmaxq_f64 (float64x2_t a, float64x2_t b)
11791 float64x2_t result;
11792 __asm__ ("fmaxp %0.2d, %1.2d, %2.2d"
11793 : "=w"(result)
11794 : "w"(a), "w"(b)
11795 : /* No clobbers */);
11796 return result;
11799 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11800 vpmaxq_s8 (int8x16_t a, int8x16_t b)
11802 int8x16_t result;
11803 __asm__ ("smaxp %0.16b, %1.16b, %2.16b"
11804 : "=w"(result)
11805 : "w"(a), "w"(b)
11806 : /* No clobbers */);
11807 return result;
11810 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11811 vpmaxq_s16 (int16x8_t a, int16x8_t b)
11813 int16x8_t result;
11814 __asm__ ("smaxp %0.8h, %1.8h, %2.8h"
11815 : "=w"(result)
11816 : "w"(a), "w"(b)
11817 : /* No clobbers */);
11818 return result;
11821 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11822 vpmaxq_s32 (int32x4_t a, int32x4_t b)
11824 int32x4_t result;
11825 __asm__ ("smaxp %0.4s, %1.4s, %2.4s"
11826 : "=w"(result)
11827 : "w"(a), "w"(b)
11828 : /* No clobbers */);
11829 return result;
11832 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11833 vpmaxq_u8 (uint8x16_t a, uint8x16_t b)
11835 uint8x16_t result;
11836 __asm__ ("umaxp %0.16b, %1.16b, %2.16b"
11837 : "=w"(result)
11838 : "w"(a), "w"(b)
11839 : /* No clobbers */);
11840 return result;
11843 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11844 vpmaxq_u16 (uint16x8_t a, uint16x8_t b)
11846 uint16x8_t result;
11847 __asm__ ("umaxp %0.8h, %1.8h, %2.8h"
11848 : "=w"(result)
11849 : "w"(a), "w"(b)
11850 : /* No clobbers */);
11851 return result;
11854 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11855 vpmaxq_u32 (uint32x4_t a, uint32x4_t b)
11857 uint32x4_t result;
11858 __asm__ ("umaxp %0.4s, %1.4s, %2.4s"
11859 : "=w"(result)
11860 : "w"(a), "w"(b)
11861 : /* No clobbers */);
11862 return result;
11865 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
11866 vpmaxqd_f64 (float64x2_t a)
11868 float64_t result;
11869 __asm__ ("fmaxp %d0,%1.2d"
11870 : "=w"(result)
11871 : "w"(a)
11872 : /* No clobbers */);
11873 return result;
11876 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
11877 vpmaxs_f32 (float32x2_t a)
11879 float32_t result;
11880 __asm__ ("fmaxp %s0,%1.2s"
11881 : "=w"(result)
11882 : "w"(a)
11883 : /* No clobbers */);
11884 return result;
11887 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11888 vpmin_f32 (float32x2_t a, float32x2_t b)
11890 float32x2_t result;
11891 __asm__ ("fminp %0.2s, %1.2s, %2.2s"
11892 : "=w"(result)
11893 : "w"(a), "w"(b)
11894 : /* No clobbers */);
11895 return result;
11898 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11899 vpmin_s8 (int8x8_t a, int8x8_t b)
11901 int8x8_t result;
11902 __asm__ ("sminp %0.8b, %1.8b, %2.8b"
11903 : "=w"(result)
11904 : "w"(a), "w"(b)
11905 : /* No clobbers */);
11906 return result;
11909 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11910 vpmin_s16 (int16x4_t a, int16x4_t b)
11912 int16x4_t result;
11913 __asm__ ("sminp %0.4h, %1.4h, %2.4h"
11914 : "=w"(result)
11915 : "w"(a), "w"(b)
11916 : /* No clobbers */);
11917 return result;
11920 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11921 vpmin_s32 (int32x2_t a, int32x2_t b)
11923 int32x2_t result;
11924 __asm__ ("sminp %0.2s, %1.2s, %2.2s"
11925 : "=w"(result)
11926 : "w"(a), "w"(b)
11927 : /* No clobbers */);
11928 return result;
11931 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11932 vpmin_u8 (uint8x8_t a, uint8x8_t b)
11934 uint8x8_t result;
11935 __asm__ ("uminp %0.8b, %1.8b, %2.8b"
11936 : "=w"(result)
11937 : "w"(a), "w"(b)
11938 : /* No clobbers */);
11939 return result;
11942 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11943 vpmin_u16 (uint16x4_t a, uint16x4_t b)
11945 uint16x4_t result;
11946 __asm__ ("uminp %0.4h, %1.4h, %2.4h"
11947 : "=w"(result)
11948 : "w"(a), "w"(b)
11949 : /* No clobbers */);
11950 return result;
11953 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11954 vpmin_u32 (uint32x2_t a, uint32x2_t b)
11956 uint32x2_t result;
11957 __asm__ ("uminp %0.2s, %1.2s, %2.2s"
11958 : "=w"(result)
11959 : "w"(a), "w"(b)
11960 : /* No clobbers */);
11961 return result;
11964 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11965 vpminnm_f32 (float32x2_t a, float32x2_t b)
11967 float32x2_t result;
11968 __asm__ ("fminnmp %0.2s,%1.2s,%2.2s"
11969 : "=w"(result)
11970 : "w"(a), "w"(b)
11971 : /* No clobbers */);
11972 return result;
11975 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11976 vpminnmq_f32 (float32x4_t a, float32x4_t b)
11978 float32x4_t result;
11979 __asm__ ("fminnmp %0.4s,%1.4s,%2.4s"
11980 : "=w"(result)
11981 : "w"(a), "w"(b)
11982 : /* No clobbers */);
11983 return result;
11986 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11987 vpminnmq_f64 (float64x2_t a, float64x2_t b)
11989 float64x2_t result;
11990 __asm__ ("fminnmp %0.2d,%1.2d,%2.2d"
11991 : "=w"(result)
11992 : "w"(a), "w"(b)
11993 : /* No clobbers */);
11994 return result;
11997 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
11998 vpminnmqd_f64 (float64x2_t a)
12000 float64_t result;
12001 __asm__ ("fminnmp %d0,%1.2d"
12002 : "=w"(result)
12003 : "w"(a)
12004 : /* No clobbers */);
12005 return result;
12008 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
12009 vpminnms_f32 (float32x2_t a)
12011 float32_t result;
12012 __asm__ ("fminnmp %s0,%1.2s"
12013 : "=w"(result)
12014 : "w"(a)
12015 : /* No clobbers */);
12016 return result;
12019 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12020 vpminq_f32 (float32x4_t a, float32x4_t b)
12022 float32x4_t result;
12023 __asm__ ("fminp %0.4s, %1.4s, %2.4s"
12024 : "=w"(result)
12025 : "w"(a), "w"(b)
12026 : /* No clobbers */);
12027 return result;
12030 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12031 vpminq_f64 (float64x2_t a, float64x2_t b)
12033 float64x2_t result;
12034 __asm__ ("fminp %0.2d, %1.2d, %2.2d"
12035 : "=w"(result)
12036 : "w"(a), "w"(b)
12037 : /* No clobbers */);
12038 return result;
12041 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12042 vpminq_s8 (int8x16_t a, int8x16_t b)
12044 int8x16_t result;
12045 __asm__ ("sminp %0.16b, %1.16b, %2.16b"
12046 : "=w"(result)
12047 : "w"(a), "w"(b)
12048 : /* No clobbers */);
12049 return result;
12052 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12053 vpminq_s16 (int16x8_t a, int16x8_t b)
12055 int16x8_t result;
12056 __asm__ ("sminp %0.8h, %1.8h, %2.8h"
12057 : "=w"(result)
12058 : "w"(a), "w"(b)
12059 : /* No clobbers */);
12060 return result;
12063 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12064 vpminq_s32 (int32x4_t a, int32x4_t b)
12066 int32x4_t result;
12067 __asm__ ("sminp %0.4s, %1.4s, %2.4s"
12068 : "=w"(result)
12069 : "w"(a), "w"(b)
12070 : /* No clobbers */);
12071 return result;
12074 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12075 vpminq_u8 (uint8x16_t a, uint8x16_t b)
12077 uint8x16_t result;
12078 __asm__ ("uminp %0.16b, %1.16b, %2.16b"
12079 : "=w"(result)
12080 : "w"(a), "w"(b)
12081 : /* No clobbers */);
12082 return result;
12085 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12086 vpminq_u16 (uint16x8_t a, uint16x8_t b)
12088 uint16x8_t result;
12089 __asm__ ("uminp %0.8h, %1.8h, %2.8h"
12090 : "=w"(result)
12091 : "w"(a), "w"(b)
12092 : /* No clobbers */);
12093 return result;
12096 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12097 vpminq_u32 (uint32x4_t a, uint32x4_t b)
12099 uint32x4_t result;
12100 __asm__ ("uminp %0.4s, %1.4s, %2.4s"
12101 : "=w"(result)
12102 : "w"(a), "w"(b)
12103 : /* No clobbers */);
12104 return result;
12107 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
12108 vpminqd_f64 (float64x2_t a)
12110 float64_t result;
12111 __asm__ ("fminp %d0,%1.2d"
12112 : "=w"(result)
12113 : "w"(a)
12114 : /* No clobbers */);
12115 return result;
12118 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
12119 vpmins_f32 (float32x2_t a)
12121 float32_t result;
12122 __asm__ ("fminp %s0,%1.2s"
12123 : "=w"(result)
12124 : "w"(a)
12125 : /* No clobbers */);
12126 return result;
12129 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12130 vqdmulh_n_s16 (int16x4_t a, int16_t b)
12132 int16x4_t result;
12133 __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]"
12134 : "=w"(result)
12135 : "w"(a), "w"(b)
12136 : /* No clobbers */);
12137 return result;
12140 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12141 vqdmulh_n_s32 (int32x2_t a, int32_t b)
12143 int32x2_t result;
12144 __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]"
12145 : "=w"(result)
12146 : "w"(a), "w"(b)
12147 : /* No clobbers */);
12148 return result;
12151 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12152 vqdmulhq_n_s16 (int16x8_t a, int16_t b)
12154 int16x8_t result;
12155 __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]"
12156 : "=w"(result)
12157 : "w"(a), "w"(b)
12158 : /* No clobbers */);
12159 return result;
12162 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12163 vqdmulhq_n_s32 (int32x4_t a, int32_t b)
12165 int32x4_t result;
12166 __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]"
12167 : "=w"(result)
12168 : "w"(a), "w"(b)
12169 : /* No clobbers */);
12170 return result;
12173 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12174 vqmovn_high_s16 (int8x8_t a, int16x8_t b)
12176 int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0)));
12177 __asm__ ("sqxtn2 %0.16b, %1.8h"
12178 : "+w"(result)
12179 : "w"(b)
12180 : /* No clobbers */);
12181 return result;
12184 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12185 vqmovn_high_s32 (int16x4_t a, int32x4_t b)
12187 int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0)));
12188 __asm__ ("sqxtn2 %0.8h, %1.4s"
12189 : "+w"(result)
12190 : "w"(b)
12191 : /* No clobbers */);
12192 return result;
12195 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12196 vqmovn_high_s64 (int32x2_t a, int64x2_t b)
12198 int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0)));
12199 __asm__ ("sqxtn2 %0.4s, %1.2d"
12200 : "+w"(result)
12201 : "w"(b)
12202 : /* No clobbers */);
12203 return result;
12206 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12207 vqmovn_high_u16 (uint8x8_t a, uint16x8_t b)
12209 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0)));
12210 __asm__ ("uqxtn2 %0.16b, %1.8h"
12211 : "+w"(result)
12212 : "w"(b)
12213 : /* No clobbers */);
12214 return result;
12217 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12218 vqmovn_high_u32 (uint16x4_t a, uint32x4_t b)
12220 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0)));
12221 __asm__ ("uqxtn2 %0.8h, %1.4s"
12222 : "+w"(result)
12223 : "w"(b)
12224 : /* No clobbers */);
12225 return result;
12228 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12229 vqmovn_high_u64 (uint32x2_t a, uint64x2_t b)
12231 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0)));
12232 __asm__ ("uqxtn2 %0.4s, %1.2d"
12233 : "+w"(result)
12234 : "w"(b)
12235 : /* No clobbers */);
12236 return result;
12239 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12240 vqmovun_high_s16 (uint8x8_t a, int16x8_t b)
12242 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0)));
12243 __asm__ ("sqxtun2 %0.16b, %1.8h"
12244 : "+w"(result)
12245 : "w"(b)
12246 : /* No clobbers */);
12247 return result;
12250 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12251 vqmovun_high_s32 (uint16x4_t a, int32x4_t b)
12253 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0)));
12254 __asm__ ("sqxtun2 %0.8h, %1.4s"
12255 : "+w"(result)
12256 : "w"(b)
12257 : /* No clobbers */);
12258 return result;
12261 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12262 vqmovun_high_s64 (uint32x2_t a, int64x2_t b)
12264 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0)));
12265 __asm__ ("sqxtun2 %0.4s, %1.2d"
12266 : "+w"(result)
12267 : "w"(b)
12268 : /* No clobbers */);
12269 return result;
12272 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12273 vqrdmulh_n_s16 (int16x4_t a, int16_t b)
12275 int16x4_t result;
12276 __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
12277 : "=w"(result)
12278 : "w"(a), "w"(b)
12279 : /* No clobbers */);
12280 return result;
12283 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12284 vqrdmulh_n_s32 (int32x2_t a, int32_t b)
12286 int32x2_t result;
12287 __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]"
12288 : "=w"(result)
12289 : "w"(a), "w"(b)
12290 : /* No clobbers */);
12291 return result;
12294 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12295 vqrdmulhq_n_s16 (int16x8_t a, int16_t b)
12297 int16x8_t result;
12298 __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
12299 : "=w"(result)
12300 : "w"(a), "w"(b)
12301 : /* No clobbers */);
12302 return result;
12305 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12306 vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
12308 int32x4_t result;
12309 __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]"
12310 : "=w"(result)
12311 : "w"(a), "w"(b)
12312 : /* No clobbers */);
12313 return result;
12316 #define vqrshrn_high_n_s16(a, b, c) \
12317 __extension__ \
12318 ({ \
12319 int16x8_t b_ = (b); \
12320 int8x8_t a_ = (a); \
12321 int8x16_t result = vcombine_s8 \
12322 (a_, vcreate_s8 (UINT64_C (0x0))); \
12323 __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \
12324 : "+w"(result) \
12325 : "w"(b_), "i"(c) \
12326 : /* No clobbers */); \
12327 result; \
12330 #define vqrshrn_high_n_s32(a, b, c) \
12331 __extension__ \
12332 ({ \
12333 int32x4_t b_ = (b); \
12334 int16x4_t a_ = (a); \
12335 int16x8_t result = vcombine_s16 \
12336 (a_, vcreate_s16 (UINT64_C (0x0))); \
12337 __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \
12338 : "+w"(result) \
12339 : "w"(b_), "i"(c) \
12340 : /* No clobbers */); \
12341 result; \
12344 #define vqrshrn_high_n_s64(a, b, c) \
12345 __extension__ \
12346 ({ \
12347 int64x2_t b_ = (b); \
12348 int32x2_t a_ = (a); \
12349 int32x4_t result = vcombine_s32 \
12350 (a_, vcreate_s32 (UINT64_C (0x0))); \
12351 __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \
12352 : "+w"(result) \
12353 : "w"(b_), "i"(c) \
12354 : /* No clobbers */); \
12355 result; \
12358 #define vqrshrn_high_n_u16(a, b, c) \
12359 __extension__ \
12360 ({ \
12361 uint16x8_t b_ = (b); \
12362 uint8x8_t a_ = (a); \
12363 uint8x16_t result = vcombine_u8 \
12364 (a_, vcreate_u8 (UINT64_C (0x0))); \
12365 __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \
12366 : "+w"(result) \
12367 : "w"(b_), "i"(c) \
12368 : /* No clobbers */); \
12369 result; \
12372 #define vqrshrn_high_n_u32(a, b, c) \
12373 __extension__ \
12374 ({ \
12375 uint32x4_t b_ = (b); \
12376 uint16x4_t a_ = (a); \
12377 uint16x8_t result = vcombine_u16 \
12378 (a_, vcreate_u16 (UINT64_C (0x0))); \
12379 __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \
12380 : "+w"(result) \
12381 : "w"(b_), "i"(c) \
12382 : /* No clobbers */); \
12383 result; \
12386 #define vqrshrn_high_n_u64(a, b, c) \
12387 __extension__ \
12388 ({ \
12389 uint64x2_t b_ = (b); \
12390 uint32x2_t a_ = (a); \
12391 uint32x4_t result = vcombine_u32 \
12392 (a_, vcreate_u32 (UINT64_C (0x0))); \
12393 __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \
12394 : "+w"(result) \
12395 : "w"(b_), "i"(c) \
12396 : /* No clobbers */); \
12397 result; \
12400 #define vqrshrun_high_n_s16(a, b, c) \
12401 __extension__ \
12402 ({ \
12403 int16x8_t b_ = (b); \
12404 uint8x8_t a_ = (a); \
12405 uint8x16_t result = vcombine_u8 \
12406 (a_, vcreate_u8 (UINT64_C (0x0))); \
12407 __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \
12408 : "+w"(result) \
12409 : "w"(b_), "i"(c) \
12410 : /* No clobbers */); \
12411 result; \
12414 #define vqrshrun_high_n_s32(a, b, c) \
12415 __extension__ \
12416 ({ \
12417 int32x4_t b_ = (b); \
12418 uint16x4_t a_ = (a); \
12419 uint16x8_t result = vcombine_u16 \
12420 (a_, vcreate_u16 (UINT64_C (0x0))); \
12421 __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \
12422 : "+w"(result) \
12423 : "w"(b_), "i"(c) \
12424 : /* No clobbers */); \
12425 result; \
12428 #define vqrshrun_high_n_s64(a, b, c) \
12429 __extension__ \
12430 ({ \
12431 int64x2_t b_ = (b); \
12432 uint32x2_t a_ = (a); \
12433 uint32x4_t result = vcombine_u32 \
12434 (a_, vcreate_u32 (UINT64_C (0x0))); \
12435 __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \
12436 : "+w"(result) \
12437 : "w"(b_), "i"(c) \
12438 : /* No clobbers */); \
12439 result; \
12442 #define vqshrn_high_n_s16(a, b, c) \
12443 __extension__ \
12444 ({ \
12445 int16x8_t b_ = (b); \
12446 int8x8_t a_ = (a); \
12447 int8x16_t result = vcombine_s8 \
12448 (a_, vcreate_s8 (UINT64_C (0x0))); \
12449 __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \
12450 : "+w"(result) \
12451 : "w"(b_), "i"(c) \
12452 : /* No clobbers */); \
12453 result; \
12456 #define vqshrn_high_n_s32(a, b, c) \
12457 __extension__ \
12458 ({ \
12459 int32x4_t b_ = (b); \
12460 int16x4_t a_ = (a); \
12461 int16x8_t result = vcombine_s16 \
12462 (a_, vcreate_s16 (UINT64_C (0x0))); \
12463 __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \
12464 : "+w"(result) \
12465 : "w"(b_), "i"(c) \
12466 : /* No clobbers */); \
12467 result; \
12470 #define vqshrn_high_n_s64(a, b, c) \
12471 __extension__ \
12472 ({ \
12473 int64x2_t b_ = (b); \
12474 int32x2_t a_ = (a); \
12475 int32x4_t result = vcombine_s32 \
12476 (a_, vcreate_s32 (UINT64_C (0x0))); \
12477 __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \
12478 : "+w"(result) \
12479 : "w"(b_), "i"(c) \
12480 : /* No clobbers */); \
12481 result; \
12484 #define vqshrn_high_n_u16(a, b, c) \
12485 __extension__ \
12486 ({ \
12487 uint16x8_t b_ = (b); \
12488 uint8x8_t a_ = (a); \
12489 uint8x16_t result = vcombine_u8 \
12490 (a_, vcreate_u8 (UINT64_C (0x0))); \
12491 __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \
12492 : "+w"(result) \
12493 : "w"(b_), "i"(c) \
12494 : /* No clobbers */); \
12495 result; \
12498 #define vqshrn_high_n_u32(a, b, c) \
12499 __extension__ \
12500 ({ \
12501 uint32x4_t b_ = (b); \
12502 uint16x4_t a_ = (a); \
12503 uint16x8_t result = vcombine_u16 \
12504 (a_, vcreate_u16 (UINT64_C (0x0))); \
12505 __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \
12506 : "+w"(result) \
12507 : "w"(b_), "i"(c) \
12508 : /* No clobbers */); \
12509 result; \
12512 #define vqshrn_high_n_u64(a, b, c) \
12513 __extension__ \
12514 ({ \
12515 uint64x2_t b_ = (b); \
12516 uint32x2_t a_ = (a); \
12517 uint32x4_t result = vcombine_u32 \
12518 (a_, vcreate_u32 (UINT64_C (0x0))); \
12519 __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \
12520 : "+w"(result) \
12521 : "w"(b_), "i"(c) \
12522 : /* No clobbers */); \
12523 result; \
12526 #define vqshrun_high_n_s16(a, b, c) \
12527 __extension__ \
12528 ({ \
12529 int16x8_t b_ = (b); \
12530 uint8x8_t a_ = (a); \
12531 uint8x16_t result = vcombine_u8 \
12532 (a_, vcreate_u8 (UINT64_C (0x0))); \
12533 __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \
12534 : "+w"(result) \
12535 : "w"(b_), "i"(c) \
12536 : /* No clobbers */); \
12537 result; \
12540 #define vqshrun_high_n_s32(a, b, c) \
12541 __extension__ \
12542 ({ \
12543 int32x4_t b_ = (b); \
12544 uint16x4_t a_ = (a); \
12545 uint16x8_t result = vcombine_u16 \
12546 (a_, vcreate_u16 (UINT64_C (0x0))); \
12547 __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \
12548 : "+w"(result) \
12549 : "w"(b_), "i"(c) \
12550 : /* No clobbers */); \
12551 result; \
12554 #define vqshrun_high_n_s64(a, b, c) \
12555 __extension__ \
12556 ({ \
12557 int64x2_t b_ = (b); \
12558 uint32x2_t a_ = (a); \
12559 uint32x4_t result = vcombine_u32 \
12560 (a_, vcreate_u32 (UINT64_C (0x0))); \
12561 __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \
12562 : "+w"(result) \
12563 : "w"(b_), "i"(c) \
12564 : /* No clobbers */); \
12565 result; \
12568 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12569 vrbit_s8 (int8x8_t a)
12571 int8x8_t result;
12572 __asm__ ("rbit %0.8b,%1.8b"
12573 : "=w"(result)
12574 : "w"(a)
12575 : /* No clobbers */);
12576 return result;
12579 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12580 vrbit_u8 (uint8x8_t a)
12582 uint8x8_t result;
12583 __asm__ ("rbit %0.8b,%1.8b"
12584 : "=w"(result)
12585 : "w"(a)
12586 : /* No clobbers */);
12587 return result;
12590 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12591 vrbitq_s8 (int8x16_t a)
12593 int8x16_t result;
12594 __asm__ ("rbit %0.16b,%1.16b"
12595 : "=w"(result)
12596 : "w"(a)
12597 : /* No clobbers */);
12598 return result;
12601 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12602 vrbitq_u8 (uint8x16_t a)
12604 uint8x16_t result;
12605 __asm__ ("rbit %0.16b,%1.16b"
12606 : "=w"(result)
12607 : "w"(a)
12608 : /* No clobbers */);
12609 return result;
12612 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12613 vrecpe_u32 (uint32x2_t a)
12615 uint32x2_t result;
12616 __asm__ ("urecpe %0.2s,%1.2s"
12617 : "=w"(result)
12618 : "w"(a)
12619 : /* No clobbers */);
12620 return result;
12623 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12624 vrecpeq_u32 (uint32x4_t a)
12626 uint32x4_t result;
12627 __asm__ ("urecpe %0.4s,%1.4s"
12628 : "=w"(result)
12629 : "w"(a)
12630 : /* No clobbers */);
12631 return result;
12634 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12635 vrev16_p8 (poly8x8_t a)
12637 poly8x8_t result;
12638 __asm__ ("rev16 %0.8b,%1.8b"
12639 : "=w"(result)
12640 : "w"(a)
12641 : /* No clobbers */);
12642 return result;
12645 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12646 vrev16_s8 (int8x8_t a)
12648 int8x8_t result;
12649 __asm__ ("rev16 %0.8b,%1.8b"
12650 : "=w"(result)
12651 : "w"(a)
12652 : /* No clobbers */);
12653 return result;
12656 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12657 vrev16_u8 (uint8x8_t a)
12659 uint8x8_t result;
12660 __asm__ ("rev16 %0.8b,%1.8b"
12661 : "=w"(result)
12662 : "w"(a)
12663 : /* No clobbers */);
12664 return result;
12667 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12668 vrev16q_p8 (poly8x16_t a)
12670 poly8x16_t result;
12671 __asm__ ("rev16 %0.16b,%1.16b"
12672 : "=w"(result)
12673 : "w"(a)
12674 : /* No clobbers */);
12675 return result;
12678 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12679 vrev16q_s8 (int8x16_t a)
12681 int8x16_t result;
12682 __asm__ ("rev16 %0.16b,%1.16b"
12683 : "=w"(result)
12684 : "w"(a)
12685 : /* No clobbers */);
12686 return result;
12689 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12690 vrev16q_u8 (uint8x16_t a)
12692 uint8x16_t result;
12693 __asm__ ("rev16 %0.16b,%1.16b"
12694 : "=w"(result)
12695 : "w"(a)
12696 : /* No clobbers */);
12697 return result;
12700 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12701 vrev32_p8 (poly8x8_t a)
12703 poly8x8_t result;
12704 __asm__ ("rev32 %0.8b,%1.8b"
12705 : "=w"(result)
12706 : "w"(a)
12707 : /* No clobbers */);
12708 return result;
12711 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12712 vrev32_p16 (poly16x4_t a)
12714 poly16x4_t result;
12715 __asm__ ("rev32 %0.4h,%1.4h"
12716 : "=w"(result)
12717 : "w"(a)
12718 : /* No clobbers */);
12719 return result;
12722 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12723 vrev32_s8 (int8x8_t a)
12725 int8x8_t result;
12726 __asm__ ("rev32 %0.8b,%1.8b"
12727 : "=w"(result)
12728 : "w"(a)
12729 : /* No clobbers */);
12730 return result;
12733 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12734 vrev32_s16 (int16x4_t a)
12736 int16x4_t result;
12737 __asm__ ("rev32 %0.4h,%1.4h"
12738 : "=w"(result)
12739 : "w"(a)
12740 : /* No clobbers */);
12741 return result;
12744 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12745 vrev32_u8 (uint8x8_t a)
12747 uint8x8_t result;
12748 __asm__ ("rev32 %0.8b,%1.8b"
12749 : "=w"(result)
12750 : "w"(a)
12751 : /* No clobbers */);
12752 return result;
12755 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12756 vrev32_u16 (uint16x4_t a)
12758 uint16x4_t result;
12759 __asm__ ("rev32 %0.4h,%1.4h"
12760 : "=w"(result)
12761 : "w"(a)
12762 : /* No clobbers */);
12763 return result;
12766 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12767 vrev32q_p8 (poly8x16_t a)
12769 poly8x16_t result;
12770 __asm__ ("rev32 %0.16b,%1.16b"
12771 : "=w"(result)
12772 : "w"(a)
12773 : /* No clobbers */);
12774 return result;
12777 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
12778 vrev32q_p16 (poly16x8_t a)
12780 poly16x8_t result;
12781 __asm__ ("rev32 %0.8h,%1.8h"
12782 : "=w"(result)
12783 : "w"(a)
12784 : /* No clobbers */);
12785 return result;
12788 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12789 vrev32q_s8 (int8x16_t a)
12791 int8x16_t result;
12792 __asm__ ("rev32 %0.16b,%1.16b"
12793 : "=w"(result)
12794 : "w"(a)
12795 : /* No clobbers */);
12796 return result;
12799 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12800 vrev32q_s16 (int16x8_t a)
12802 int16x8_t result;
12803 __asm__ ("rev32 %0.8h,%1.8h"
12804 : "=w"(result)
12805 : "w"(a)
12806 : /* No clobbers */);
12807 return result;
12810 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12811 vrev32q_u8 (uint8x16_t a)
12813 uint8x16_t result;
12814 __asm__ ("rev32 %0.16b,%1.16b"
12815 : "=w"(result)
12816 : "w"(a)
12817 : /* No clobbers */);
12818 return result;
12821 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12822 vrev32q_u16 (uint16x8_t a)
12824 uint16x8_t result;
12825 __asm__ ("rev32 %0.8h,%1.8h"
12826 : "=w"(result)
12827 : "w"(a)
12828 : /* No clobbers */);
12829 return result;
12832 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12833 vrev64_f32 (float32x2_t a)
12835 float32x2_t result;
12836 __asm__ ("rev64 %0.2s,%1.2s"
12837 : "=w"(result)
12838 : "w"(a)
12839 : /* No clobbers */);
12840 return result;
12843 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12844 vrev64_p8 (poly8x8_t a)
12846 poly8x8_t result;
12847 __asm__ ("rev64 %0.8b,%1.8b"
12848 : "=w"(result)
12849 : "w"(a)
12850 : /* No clobbers */);
12851 return result;
12854 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12855 vrev64_p16 (poly16x4_t a)
12857 poly16x4_t result;
12858 __asm__ ("rev64 %0.4h,%1.4h"
12859 : "=w"(result)
12860 : "w"(a)
12861 : /* No clobbers */);
12862 return result;
12865 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12866 vrev64_s8 (int8x8_t a)
12868 int8x8_t result;
12869 __asm__ ("rev64 %0.8b,%1.8b"
12870 : "=w"(result)
12871 : "w"(a)
12872 : /* No clobbers */);
12873 return result;
12876 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12877 vrev64_s16 (int16x4_t a)
12879 int16x4_t result;
12880 __asm__ ("rev64 %0.4h,%1.4h"
12881 : "=w"(result)
12882 : "w"(a)
12883 : /* No clobbers */);
12884 return result;
12887 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12888 vrev64_s32 (int32x2_t a)
12890 int32x2_t result;
12891 __asm__ ("rev64 %0.2s,%1.2s"
12892 : "=w"(result)
12893 : "w"(a)
12894 : /* No clobbers */);
12895 return result;
12898 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12899 vrev64_u8 (uint8x8_t a)
12901 uint8x8_t result;
12902 __asm__ ("rev64 %0.8b,%1.8b"
12903 : "=w"(result)
12904 : "w"(a)
12905 : /* No clobbers */);
12906 return result;
12909 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12910 vrev64_u16 (uint16x4_t a)
12912 uint16x4_t result;
12913 __asm__ ("rev64 %0.4h,%1.4h"
12914 : "=w"(result)
12915 : "w"(a)
12916 : /* No clobbers */);
12917 return result;
12920 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12921 vrev64_u32 (uint32x2_t a)
12923 uint32x2_t result;
12924 __asm__ ("rev64 %0.2s,%1.2s"
12925 : "=w"(result)
12926 : "w"(a)
12927 : /* No clobbers */);
12928 return result;
12931 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12932 vrev64q_f32 (float32x4_t a)
12934 float32x4_t result;
12935 __asm__ ("rev64 %0.4s,%1.4s"
12936 : "=w"(result)
12937 : "w"(a)
12938 : /* No clobbers */);
12939 return result;
12942 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12943 vrev64q_p8 (poly8x16_t a)
12945 poly8x16_t result;
12946 __asm__ ("rev64 %0.16b,%1.16b"
12947 : "=w"(result)
12948 : "w"(a)
12949 : /* No clobbers */);
12950 return result;
12953 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
12954 vrev64q_p16 (poly16x8_t a)
12956 poly16x8_t result;
12957 __asm__ ("rev64 %0.8h,%1.8h"
12958 : "=w"(result)
12959 : "w"(a)
12960 : /* No clobbers */);
12961 return result;
12964 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12965 vrev64q_s8 (int8x16_t a)
12967 int8x16_t result;
12968 __asm__ ("rev64 %0.16b,%1.16b"
12969 : "=w"(result)
12970 : "w"(a)
12971 : /* No clobbers */);
12972 return result;
12975 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12976 vrev64q_s16 (int16x8_t a)
12978 int16x8_t result;
12979 __asm__ ("rev64 %0.8h,%1.8h"
12980 : "=w"(result)
12981 : "w"(a)
12982 : /* No clobbers */);
12983 return result;
12986 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12987 vrev64q_s32 (int32x4_t a)
12989 int32x4_t result;
12990 __asm__ ("rev64 %0.4s,%1.4s"
12991 : "=w"(result)
12992 : "w"(a)
12993 : /* No clobbers */);
12994 return result;
12997 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12998 vrev64q_u8 (uint8x16_t a)
13000 uint8x16_t result;
13001 __asm__ ("rev64 %0.16b,%1.16b"
13002 : "=w"(result)
13003 : "w"(a)
13004 : /* No clobbers */);
13005 return result;
13008 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13009 vrev64q_u16 (uint16x8_t a)
13011 uint16x8_t result;
13012 __asm__ ("rev64 %0.8h,%1.8h"
13013 : "=w"(result)
13014 : "w"(a)
13015 : /* No clobbers */);
13016 return result;
13019 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13020 vrev64q_u32 (uint32x4_t a)
13022 uint32x4_t result;
13023 __asm__ ("rev64 %0.4s,%1.4s"
13024 : "=w"(result)
13025 : "w"(a)
13026 : /* No clobbers */);
13027 return result;
13030 #define vrshrn_high_n_s16(a, b, c) \
13031 __extension__ \
13032 ({ \
13033 int16x8_t b_ = (b); \
13034 int8x8_t a_ = (a); \
13035 int8x16_t result = vcombine_s8 \
13036 (a_, vcreate_s8 (UINT64_C (0x0))); \
13037 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
13038 : "+w"(result) \
13039 : "w"(b_), "i"(c) \
13040 : /* No clobbers */); \
13041 result; \
13044 #define vrshrn_high_n_s32(a, b, c) \
13045 __extension__ \
13046 ({ \
13047 int32x4_t b_ = (b); \
13048 int16x4_t a_ = (a); \
13049 int16x8_t result = vcombine_s16 \
13050 (a_, vcreate_s16 (UINT64_C (0x0))); \
13051 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
13052 : "+w"(result) \
13053 : "w"(b_), "i"(c) \
13054 : /* No clobbers */); \
13055 result; \
13058 #define vrshrn_high_n_s64(a, b, c) \
13059 __extension__ \
13060 ({ \
13061 int64x2_t b_ = (b); \
13062 int32x2_t a_ = (a); \
13063 int32x4_t result = vcombine_s32 \
13064 (a_, vcreate_s32 (UINT64_C (0x0))); \
13065 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
13066 : "+w"(result) \
13067 : "w"(b_), "i"(c) \
13068 : /* No clobbers */); \
13069 result; \
13072 #define vrshrn_high_n_u16(a, b, c) \
13073 __extension__ \
13074 ({ \
13075 uint16x8_t b_ = (b); \
13076 uint8x8_t a_ = (a); \
13077 uint8x16_t result = vcombine_u8 \
13078 (a_, vcreate_u8 (UINT64_C (0x0))); \
13079 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
13080 : "+w"(result) \
13081 : "w"(b_), "i"(c) \
13082 : /* No clobbers */); \
13083 result; \
13086 #define vrshrn_high_n_u32(a, b, c) \
13087 __extension__ \
13088 ({ \
13089 uint32x4_t b_ = (b); \
13090 uint16x4_t a_ = (a); \
13091 uint16x8_t result = vcombine_u16 \
13092 (a_, vcreate_u16 (UINT64_C (0x0))); \
13093 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
13094 : "+w"(result) \
13095 : "w"(b_), "i"(c) \
13096 : /* No clobbers */); \
13097 result; \
13100 #define vrshrn_high_n_u64(a, b, c) \
13101 __extension__ \
13102 ({ \
13103 uint64x2_t b_ = (b); \
13104 uint32x2_t a_ = (a); \
13105 uint32x4_t result = vcombine_u32 \
13106 (a_, vcreate_u32 (UINT64_C (0x0))); \
13107 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
13108 : "+w"(result) \
13109 : "w"(b_), "i"(c) \
13110 : /* No clobbers */); \
13111 result; \
13114 #define vrshrn_n_s16(a, b) \
13115 __extension__ \
13116 ({ \
13117 int16x8_t a_ = (a); \
13118 int8x8_t result; \
13119 __asm__ ("rshrn %0.8b,%1.8h,%2" \
13120 : "=w"(result) \
13121 : "w"(a_), "i"(b) \
13122 : /* No clobbers */); \
13123 result; \
13126 #define vrshrn_n_s32(a, b) \
13127 __extension__ \
13128 ({ \
13129 int32x4_t a_ = (a); \
13130 int16x4_t result; \
13131 __asm__ ("rshrn %0.4h,%1.4s,%2" \
13132 : "=w"(result) \
13133 : "w"(a_), "i"(b) \
13134 : /* No clobbers */); \
13135 result; \
13138 #define vrshrn_n_s64(a, b) \
13139 __extension__ \
13140 ({ \
13141 int64x2_t a_ = (a); \
13142 int32x2_t result; \
13143 __asm__ ("rshrn %0.2s,%1.2d,%2" \
13144 : "=w"(result) \
13145 : "w"(a_), "i"(b) \
13146 : /* No clobbers */); \
13147 result; \
13150 #define vrshrn_n_u16(a, b) \
13151 __extension__ \
13152 ({ \
13153 uint16x8_t a_ = (a); \
13154 uint8x8_t result; \
13155 __asm__ ("rshrn %0.8b,%1.8h,%2" \
13156 : "=w"(result) \
13157 : "w"(a_), "i"(b) \
13158 : /* No clobbers */); \
13159 result; \
13162 #define vrshrn_n_u32(a, b) \
13163 __extension__ \
13164 ({ \
13165 uint32x4_t a_ = (a); \
13166 uint16x4_t result; \
13167 __asm__ ("rshrn %0.4h,%1.4s,%2" \
13168 : "=w"(result) \
13169 : "w"(a_), "i"(b) \
13170 : /* No clobbers */); \
13171 result; \
13174 #define vrshrn_n_u64(a, b) \
13175 __extension__ \
13176 ({ \
13177 uint64x2_t a_ = (a); \
13178 uint32x2_t result; \
13179 __asm__ ("rshrn %0.2s,%1.2d,%2" \
13180 : "=w"(result) \
13181 : "w"(a_), "i"(b) \
13182 : /* No clobbers */); \
13183 result; \
13186 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13187 vrsqrte_f32 (float32x2_t a)
13189 float32x2_t result;
13190 __asm__ ("frsqrte %0.2s,%1.2s"
13191 : "=w"(result)
13192 : "w"(a)
13193 : /* No clobbers */);
13194 return result;
13197 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13198 vrsqrte_f64 (float64x2_t a)
13200 float64x2_t result;
13201 __asm__ ("frsqrte %0.2d,%1.2d"
13202 : "=w"(result)
13203 : "w"(a)
13204 : /* No clobbers */);
13205 return result;
13208 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13209 vrsqrte_u32 (uint32x2_t a)
13211 uint32x2_t result;
13212 __asm__ ("ursqrte %0.2s,%1.2s"
13213 : "=w"(result)
13214 : "w"(a)
13215 : /* No clobbers */);
13216 return result;
13219 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
13220 vrsqrted_f64 (float64_t a)
13222 float64_t result;
13223 __asm__ ("frsqrte %d0,%d1"
13224 : "=w"(result)
13225 : "w"(a)
13226 : /* No clobbers */);
13227 return result;
13230 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13231 vrsqrteq_f32 (float32x4_t a)
13233 float32x4_t result;
13234 __asm__ ("frsqrte %0.4s,%1.4s"
13235 : "=w"(result)
13236 : "w"(a)
13237 : /* No clobbers */);
13238 return result;
13241 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13242 vrsqrteq_f64 (float64x2_t a)
13244 float64x2_t result;
13245 __asm__ ("frsqrte %0.2d,%1.2d"
13246 : "=w"(result)
13247 : "w"(a)
13248 : /* No clobbers */);
13249 return result;
13252 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13253 vrsqrteq_u32 (uint32x4_t a)
13255 uint32x4_t result;
13256 __asm__ ("ursqrte %0.4s,%1.4s"
13257 : "=w"(result)
13258 : "w"(a)
13259 : /* No clobbers */);
13260 return result;
13263 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
13264 vrsqrtes_f32 (float32_t a)
13266 float32_t result;
13267 __asm__ ("frsqrte %s0,%s1"
13268 : "=w"(result)
13269 : "w"(a)
13270 : /* No clobbers */);
13271 return result;
13274 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13275 vrsqrts_f32 (float32x2_t a, float32x2_t b)
13277 float32x2_t result;
13278 __asm__ ("frsqrts %0.2s,%1.2s,%2.2s"
13279 : "=w"(result)
13280 : "w"(a), "w"(b)
13281 : /* No clobbers */);
13282 return result;
13285 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
13286 vrsqrtsd_f64 (float64_t a, float64_t b)
13288 float64_t result;
13289 __asm__ ("frsqrts %d0,%d1,%d2"
13290 : "=w"(result)
13291 : "w"(a), "w"(b)
13292 : /* No clobbers */);
13293 return result;
13296 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13297 vrsqrtsq_f32 (float32x4_t a, float32x4_t b)
13299 float32x4_t result;
13300 __asm__ ("frsqrts %0.4s,%1.4s,%2.4s"
13301 : "=w"(result)
13302 : "w"(a), "w"(b)
13303 : /* No clobbers */);
13304 return result;
13307 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13308 vrsqrtsq_f64 (float64x2_t a, float64x2_t b)
13310 float64x2_t result;
13311 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
13312 : "=w"(result)
13313 : "w"(a), "w"(b)
13314 : /* No clobbers */);
13315 return result;
13318 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
13319 vrsqrtss_f32 (float32_t a, float32_t b)
13321 float32_t result;
13322 __asm__ ("frsqrts %s0,%s1,%s2"
13323 : "=w"(result)
13324 : "w"(a), "w"(b)
13325 : /* No clobbers */);
13326 return result;
13329 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13330 vrsrtsq_f64 (float64x2_t a, float64x2_t b)
13332 float64x2_t result;
13333 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
13334 : "=w"(result)
13335 : "w"(a), "w"(b)
13336 : /* No clobbers */);
13337 return result;
13340 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13341 vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
13343 int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0)));
13344 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
13345 : "+w"(result)
13346 : "w"(b), "w"(c)
13347 : /* No clobbers */);
13348 return result;
13351 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13352 vrsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
13354 int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0)));
13355 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
13356 : "+w"(result)
13357 : "w"(b), "w"(c)
13358 : /* No clobbers */);
13359 return result;
13362 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13363 vrsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
13365 int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0)));
13366 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
13367 : "+w"(result)
13368 : "w"(b), "w"(c)
13369 : /* No clobbers */);
13370 return result;
13373 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13374 vrsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
13376 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0)));
13377 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
13378 : "+w"(result)
13379 : "w"(b), "w"(c)
13380 : /* No clobbers */);
13381 return result;
13384 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13385 vrsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
13387 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0)));
13388 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
13389 : "+w"(result)
13390 : "w"(b), "w"(c)
13391 : /* No clobbers */);
13392 return result;
13395 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13396 vrsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
13398 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0)));
13399 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
13400 : "+w"(result)
13401 : "w"(b), "w"(c)
13402 : /* No clobbers */);
13403 return result;
13406 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13407 vrsubhn_s16 (int16x8_t a, int16x8_t b)
13409 int8x8_t result;
13410 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
13411 : "=w"(result)
13412 : "w"(a), "w"(b)
13413 : /* No clobbers */);
13414 return result;
13417 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13418 vrsubhn_s32 (int32x4_t a, int32x4_t b)
13420 int16x4_t result;
13421 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
13422 : "=w"(result)
13423 : "w"(a), "w"(b)
13424 : /* No clobbers */);
13425 return result;
13428 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13429 vrsubhn_s64 (int64x2_t a, int64x2_t b)
13431 int32x2_t result;
13432 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
13433 : "=w"(result)
13434 : "w"(a), "w"(b)
13435 : /* No clobbers */);
13436 return result;
13439 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13440 vrsubhn_u16 (uint16x8_t a, uint16x8_t b)
13442 uint8x8_t result;
13443 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
13444 : "=w"(result)
13445 : "w"(a), "w"(b)
13446 : /* No clobbers */);
13447 return result;
13450 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13451 vrsubhn_u32 (uint32x4_t a, uint32x4_t b)
13453 uint16x4_t result;
13454 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
13455 : "=w"(result)
13456 : "w"(a), "w"(b)
13457 : /* No clobbers */);
13458 return result;
13461 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13462 vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
13464 uint32x2_t result;
13465 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
13466 : "=w"(result)
13467 : "w"(a), "w"(b)
13468 : /* No clobbers */);
13469 return result;
13472 #define vset_lane_f32(a, b, c) \
13473 __extension__ \
13474 ({ \
13475 float32x2_t b_ = (b); \
13476 float32_t a_ = (a); \
13477 float32x2_t result; \
13478 __asm__ ("ins %0.s[%3], %w1" \
13479 : "=w"(result) \
13480 : "r"(a_), "0"(b_), "i"(c) \
13481 : /* No clobbers */); \
13482 result; \
13485 #define vset_lane_f64(a, b, c) \
13486 __extension__ \
13487 ({ \
13488 float64x1_t b_ = (b); \
13489 float64_t a_ = (a); \
13490 float64x1_t result; \
13491 __asm__ ("ins %0.d[%3], %x1" \
13492 : "=w"(result) \
13493 : "r"(a_), "0"(b_), "i"(c) \
13494 : /* No clobbers */); \
13495 result; \
13498 #define vset_lane_p8(a, b, c) \
13499 __extension__ \
13500 ({ \
13501 poly8x8_t b_ = (b); \
13502 poly8_t a_ = (a); \
13503 poly8x8_t result; \
13504 __asm__ ("ins %0.b[%3], %w1" \
13505 : "=w"(result) \
13506 : "r"(a_), "0"(b_), "i"(c) \
13507 : /* No clobbers */); \
13508 result; \
13511 #define vset_lane_p16(a, b, c) \
13512 __extension__ \
13513 ({ \
13514 poly16x4_t b_ = (b); \
13515 poly16_t a_ = (a); \
13516 poly16x4_t result; \
13517 __asm__ ("ins %0.h[%3], %w1" \
13518 : "=w"(result) \
13519 : "r"(a_), "0"(b_), "i"(c) \
13520 : /* No clobbers */); \
13521 result; \
13524 #define vset_lane_s8(a, b, c) \
13525 __extension__ \
13526 ({ \
13527 int8x8_t b_ = (b); \
13528 int8_t a_ = (a); \
13529 int8x8_t result; \
13530 __asm__ ("ins %0.b[%3], %w1" \
13531 : "=w"(result) \
13532 : "r"(a_), "0"(b_), "i"(c) \
13533 : /* No clobbers */); \
13534 result; \
13537 #define vset_lane_s16(a, b, c) \
13538 __extension__ \
13539 ({ \
13540 int16x4_t b_ = (b); \
13541 int16_t a_ = (a); \
13542 int16x4_t result; \
13543 __asm__ ("ins %0.h[%3], %w1" \
13544 : "=w"(result) \
13545 : "r"(a_), "0"(b_), "i"(c) \
13546 : /* No clobbers */); \
13547 result; \
13550 #define vset_lane_s32(a, b, c) \
13551 __extension__ \
13552 ({ \
13553 int32x2_t b_ = (b); \
13554 int32_t a_ = (a); \
13555 int32x2_t result; \
13556 __asm__ ("ins %0.s[%3], %w1" \
13557 : "=w"(result) \
13558 : "r"(a_), "0"(b_), "i"(c) \
13559 : /* No clobbers */); \
13560 result; \
13563 #define vset_lane_s64(a, b, c) \
13564 __extension__ \
13565 ({ \
13566 int64x1_t b_ = (b); \
13567 int64_t a_ = (a); \
13568 int64x1_t result; \
13569 __asm__ ("ins %0.d[%3], %x1" \
13570 : "=w"(result) \
13571 : "r"(a_), "0"(b_), "i"(c) \
13572 : /* No clobbers */); \
13573 result; \
13576 #define vset_lane_u8(a, b, c) \
13577 __extension__ \
13578 ({ \
13579 uint8x8_t b_ = (b); \
13580 uint8_t a_ = (a); \
13581 uint8x8_t result; \
13582 __asm__ ("ins %0.b[%3], %w1" \
13583 : "=w"(result) \
13584 : "r"(a_), "0"(b_), "i"(c) \
13585 : /* No clobbers */); \
13586 result; \
13589 #define vset_lane_u16(a, b, c) \
13590 __extension__ \
13591 ({ \
13592 uint16x4_t b_ = (b); \
13593 uint16_t a_ = (a); \
13594 uint16x4_t result; \
13595 __asm__ ("ins %0.h[%3], %w1" \
13596 : "=w"(result) \
13597 : "r"(a_), "0"(b_), "i"(c) \
13598 : /* No clobbers */); \
13599 result; \
13602 #define vset_lane_u32(a, b, c) \
13603 __extension__ \
13604 ({ \
13605 uint32x2_t b_ = (b); \
13606 uint32_t a_ = (a); \
13607 uint32x2_t result; \
13608 __asm__ ("ins %0.s[%3], %w1" \
13609 : "=w"(result) \
13610 : "r"(a_), "0"(b_), "i"(c) \
13611 : /* No clobbers */); \
13612 result; \
13615 #define vset_lane_u64(a, b, c) \
13616 __extension__ \
13617 ({ \
13618 uint64x1_t b_ = (b); \
13619 uint64_t a_ = (a); \
13620 uint64x1_t result; \
13621 __asm__ ("ins %0.d[%3], %x1" \
13622 : "=w"(result) \
13623 : "r"(a_), "0"(b_), "i"(c) \
13624 : /* No clobbers */); \
13625 result; \
13628 #define vsetq_lane_f32(a, b, c) \
13629 __extension__ \
13630 ({ \
13631 float32x4_t b_ = (b); \
13632 float32_t a_ = (a); \
13633 float32x4_t result; \
13634 __asm__ ("ins %0.s[%3], %w1" \
13635 : "=w"(result) \
13636 : "r"(a_), "0"(b_), "i"(c) \
13637 : /* No clobbers */); \
13638 result; \
13641 #define vsetq_lane_f64(a, b, c) \
13642 __extension__ \
13643 ({ \
13644 float64x2_t b_ = (b); \
13645 float64_t a_ = (a); \
13646 float64x2_t result; \
13647 __asm__ ("ins %0.d[%3], %x1" \
13648 : "=w"(result) \
13649 : "r"(a_), "0"(b_), "i"(c) \
13650 : /* No clobbers */); \
13651 result; \
13654 #define vsetq_lane_p8(a, b, c) \
13655 __extension__ \
13656 ({ \
13657 poly8x16_t b_ = (b); \
13658 poly8_t a_ = (a); \
13659 poly8x16_t result; \
13660 __asm__ ("ins %0.b[%3], %w1" \
13661 : "=w"(result) \
13662 : "r"(a_), "0"(b_), "i"(c) \
13663 : /* No clobbers */); \
13664 result; \
13667 #define vsetq_lane_p16(a, b, c) \
13668 __extension__ \
13669 ({ \
13670 poly16x8_t b_ = (b); \
13671 poly16_t a_ = (a); \
13672 poly16x8_t result; \
13673 __asm__ ("ins %0.h[%3], %w1" \
13674 : "=w"(result) \
13675 : "r"(a_), "0"(b_), "i"(c) \
13676 : /* No clobbers */); \
13677 result; \
13680 #define vsetq_lane_s8(a, b, c) \
13681 __extension__ \
13682 ({ \
13683 int8x16_t b_ = (b); \
13684 int8_t a_ = (a); \
13685 int8x16_t result; \
13686 __asm__ ("ins %0.b[%3], %w1" \
13687 : "=w"(result) \
13688 : "r"(a_), "0"(b_), "i"(c) \
13689 : /* No clobbers */); \
13690 result; \
13693 #define vsetq_lane_s16(a, b, c) \
13694 __extension__ \
13695 ({ \
13696 int16x8_t b_ = (b); \
13697 int16_t a_ = (a); \
13698 int16x8_t result; \
13699 __asm__ ("ins %0.h[%3], %w1" \
13700 : "=w"(result) \
13701 : "r"(a_), "0"(b_), "i"(c) \
13702 : /* No clobbers */); \
13703 result; \
13706 #define vsetq_lane_s32(a, b, c) \
13707 __extension__ \
13708 ({ \
13709 int32x4_t b_ = (b); \
13710 int32_t a_ = (a); \
13711 int32x4_t result; \
13712 __asm__ ("ins %0.s[%3], %w1" \
13713 : "=w"(result) \
13714 : "r"(a_), "0"(b_), "i"(c) \
13715 : /* No clobbers */); \
13716 result; \
13719 #define vsetq_lane_s64(a, b, c) \
13720 __extension__ \
13721 ({ \
13722 int64x2_t b_ = (b); \
13723 int64_t a_ = (a); \
13724 int64x2_t result; \
13725 __asm__ ("ins %0.d[%3], %x1" \
13726 : "=w"(result) \
13727 : "r"(a_), "0"(b_), "i"(c) \
13728 : /* No clobbers */); \
13729 result; \
13732 #define vsetq_lane_u8(a, b, c) \
13733 __extension__ \
13734 ({ \
13735 uint8x16_t b_ = (b); \
13736 uint8_t a_ = (a); \
13737 uint8x16_t result; \
13738 __asm__ ("ins %0.b[%3], %w1" \
13739 : "=w"(result) \
13740 : "r"(a_), "0"(b_), "i"(c) \
13741 : /* No clobbers */); \
13742 result; \
13745 #define vsetq_lane_u16(a, b, c) \
13746 __extension__ \
13747 ({ \
13748 uint16x8_t b_ = (b); \
13749 uint16_t a_ = (a); \
13750 uint16x8_t result; \
13751 __asm__ ("ins %0.h[%3], %w1" \
13752 : "=w"(result) \
13753 : "r"(a_), "0"(b_), "i"(c) \
13754 : /* No clobbers */); \
13755 result; \
13758 #define vsetq_lane_u32(a, b, c) \
13759 __extension__ \
13760 ({ \
13761 uint32x4_t b_ = (b); \
13762 uint32_t a_ = (a); \
13763 uint32x4_t result; \
13764 __asm__ ("ins %0.s[%3], %w1" \
13765 : "=w"(result) \
13766 : "r"(a_), "0"(b_), "i"(c) \
13767 : /* No clobbers */); \
13768 result; \
13771 #define vsetq_lane_u64(a, b, c) \
13772 __extension__ \
13773 ({ \
13774 uint64x2_t b_ = (b); \
13775 uint64_t a_ = (a); \
13776 uint64x2_t result; \
13777 __asm__ ("ins %0.d[%3], %x1" \
13778 : "=w"(result) \
13779 : "r"(a_), "0"(b_), "i"(c) \
13780 : /* No clobbers */); \
13781 result; \
13784 #define vshrn_high_n_s16(a, b, c) \
13785 __extension__ \
13786 ({ \
13787 int16x8_t b_ = (b); \
13788 int8x8_t a_ = (a); \
13789 int8x16_t result = vcombine_s8 \
13790 (a_, vcreate_s8 (UINT64_C (0x0))); \
13791 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
13792 : "+w"(result) \
13793 : "w"(b_), "i"(c) \
13794 : /* No clobbers */); \
13795 result; \
13798 #define vshrn_high_n_s32(a, b, c) \
13799 __extension__ \
13800 ({ \
13801 int32x4_t b_ = (b); \
13802 int16x4_t a_ = (a); \
13803 int16x8_t result = vcombine_s16 \
13804 (a_, vcreate_s16 (UINT64_C (0x0))); \
13805 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
13806 : "+w"(result) \
13807 : "w"(b_), "i"(c) \
13808 : /* No clobbers */); \
13809 result; \
13812 #define vshrn_high_n_s64(a, b, c) \
13813 __extension__ \
13814 ({ \
13815 int64x2_t b_ = (b); \
13816 int32x2_t a_ = (a); \
13817 int32x4_t result = vcombine_s32 \
13818 (a_, vcreate_s32 (UINT64_C (0x0))); \
13819 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
13820 : "+w"(result) \
13821 : "w"(b_), "i"(c) \
13822 : /* No clobbers */); \
13823 result; \
13826 #define vshrn_high_n_u16(a, b, c) \
13827 __extension__ \
13828 ({ \
13829 uint16x8_t b_ = (b); \
13830 uint8x8_t a_ = (a); \
13831 uint8x16_t result = vcombine_u8 \
13832 (a_, vcreate_u8 (UINT64_C (0x0))); \
13833 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
13834 : "+w"(result) \
13835 : "w"(b_), "i"(c) \
13836 : /* No clobbers */); \
13837 result; \
13840 #define vshrn_high_n_u32(a, b, c) \
13841 __extension__ \
13842 ({ \
13843 uint32x4_t b_ = (b); \
13844 uint16x4_t a_ = (a); \
13845 uint16x8_t result = vcombine_u16 \
13846 (a_, vcreate_u16 (UINT64_C (0x0))); \
13847 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
13848 : "+w"(result) \
13849 : "w"(b_), "i"(c) \
13850 : /* No clobbers */); \
13851 result; \
13854 #define vshrn_high_n_u64(a, b, c) \
13855 __extension__ \
13856 ({ \
13857 uint64x2_t b_ = (b); \
13858 uint32x2_t a_ = (a); \
13859 uint32x4_t result = vcombine_u32 \
13860 (a_, vcreate_u32 (UINT64_C (0x0))); \
13861 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
13862 : "+w"(result) \
13863 : "w"(b_), "i"(c) \
13864 : /* No clobbers */); \
13865 result; \
13868 #define vshrn_n_s16(a, b) \
13869 __extension__ \
13870 ({ \
13871 int16x8_t a_ = (a); \
13872 int8x8_t result; \
13873 __asm__ ("shrn %0.8b,%1.8h,%2" \
13874 : "=w"(result) \
13875 : "w"(a_), "i"(b) \
13876 : /* No clobbers */); \
13877 result; \
13880 #define vshrn_n_s32(a, b) \
13881 __extension__ \
13882 ({ \
13883 int32x4_t a_ = (a); \
13884 int16x4_t result; \
13885 __asm__ ("shrn %0.4h,%1.4s,%2" \
13886 : "=w"(result) \
13887 : "w"(a_), "i"(b) \
13888 : /* No clobbers */); \
13889 result; \
13892 #define vshrn_n_s64(a, b) \
13893 __extension__ \
13894 ({ \
13895 int64x2_t a_ = (a); \
13896 int32x2_t result; \
13897 __asm__ ("shrn %0.2s,%1.2d,%2" \
13898 : "=w"(result) \
13899 : "w"(a_), "i"(b) \
13900 : /* No clobbers */); \
13901 result; \
13904 #define vshrn_n_u16(a, b) \
13905 __extension__ \
13906 ({ \
13907 uint16x8_t a_ = (a); \
13908 uint8x8_t result; \
13909 __asm__ ("shrn %0.8b,%1.8h,%2" \
13910 : "=w"(result) \
13911 : "w"(a_), "i"(b) \
13912 : /* No clobbers */); \
13913 result; \
13916 #define vshrn_n_u32(a, b) \
13917 __extension__ \
13918 ({ \
13919 uint32x4_t a_ = (a); \
13920 uint16x4_t result; \
13921 __asm__ ("shrn %0.4h,%1.4s,%2" \
13922 : "=w"(result) \
13923 : "w"(a_), "i"(b) \
13924 : /* No clobbers */); \
13925 result; \
13928 #define vshrn_n_u64(a, b) \
13929 __extension__ \
13930 ({ \
13931 uint64x2_t a_ = (a); \
13932 uint32x2_t result; \
13933 __asm__ ("shrn %0.2s,%1.2d,%2" \
13934 : "=w"(result) \
13935 : "w"(a_), "i"(b) \
13936 : /* No clobbers */); \
13937 result; \
13940 #define vsli_n_p8(a, b, c) \
13941 __extension__ \
13942 ({ \
13943 poly8x8_t b_ = (b); \
13944 poly8x8_t a_ = (a); \
13945 poly8x8_t result; \
13946 __asm__ ("sli %0.8b,%2.8b,%3" \
13947 : "=w"(result) \
13948 : "0"(a_), "w"(b_), "i"(c) \
13949 : /* No clobbers */); \
13950 result; \
13953 #define vsli_n_p16(a, b, c) \
13954 __extension__ \
13955 ({ \
13956 poly16x4_t b_ = (b); \
13957 poly16x4_t a_ = (a); \
13958 poly16x4_t result; \
13959 __asm__ ("sli %0.4h,%2.4h,%3" \
13960 : "=w"(result) \
13961 : "0"(a_), "w"(b_), "i"(c) \
13962 : /* No clobbers */); \
13963 result; \
13966 #define vsliq_n_p8(a, b, c) \
13967 __extension__ \
13968 ({ \
13969 poly8x16_t b_ = (b); \
13970 poly8x16_t a_ = (a); \
13971 poly8x16_t result; \
13972 __asm__ ("sli %0.16b,%2.16b,%3" \
13973 : "=w"(result) \
13974 : "0"(a_), "w"(b_), "i"(c) \
13975 : /* No clobbers */); \
13976 result; \
13979 #define vsliq_n_p16(a, b, c) \
13980 __extension__ \
13981 ({ \
13982 poly16x8_t b_ = (b); \
13983 poly16x8_t a_ = (a); \
13984 poly16x8_t result; \
13985 __asm__ ("sli %0.8h,%2.8h,%3" \
13986 : "=w"(result) \
13987 : "0"(a_), "w"(b_), "i"(c) \
13988 : /* No clobbers */); \
13989 result; \
13992 #define vsri_n_p8(a, b, c) \
13993 __extension__ \
13994 ({ \
13995 poly8x8_t b_ = (b); \
13996 poly8x8_t a_ = (a); \
13997 poly8x8_t result; \
13998 __asm__ ("sri %0.8b,%2.8b,%3" \
13999 : "=w"(result) \
14000 : "0"(a_), "w"(b_), "i"(c) \
14001 : /* No clobbers */); \
14002 result; \
14005 #define vsri_n_p16(a, b, c) \
14006 __extension__ \
14007 ({ \
14008 poly16x4_t b_ = (b); \
14009 poly16x4_t a_ = (a); \
14010 poly16x4_t result; \
14011 __asm__ ("sri %0.4h,%2.4h,%3" \
14012 : "=w"(result) \
14013 : "0"(a_), "w"(b_), "i"(c) \
14014 : /* No clobbers */); \
14015 result; \
14018 #define vsriq_n_p8(a, b, c) \
14019 __extension__ \
14020 ({ \
14021 poly8x16_t b_ = (b); \
14022 poly8x16_t a_ = (a); \
14023 poly8x16_t result; \
14024 __asm__ ("sri %0.16b,%2.16b,%3" \
14025 : "=w"(result) \
14026 : "0"(a_), "w"(b_), "i"(c) \
14027 : /* No clobbers */); \
14028 result; \
14031 #define vsriq_n_p16(a, b, c) \
14032 __extension__ \
14033 ({ \
14034 poly16x8_t b_ = (b); \
14035 poly16x8_t a_ = (a); \
14036 poly16x8_t result; \
14037 __asm__ ("sri %0.8h,%2.8h,%3" \
14038 : "=w"(result) \
14039 : "0"(a_), "w"(b_), "i"(c) \
14040 : /* No clobbers */); \
14041 result; \
14044 #define vst1_lane_f32(a, b, c) \
14045 __extension__ \
14046 ({ \
14047 float32x2_t b_ = (b); \
14048 float32_t * a_ = (a); \
14049 __asm__ ("st1 {%1.s}[%2],[%0]" \
14051 : "r"(a_), "w"(b_), "i"(c) \
14052 : "memory"); \
14055 #define vst1_lane_f64(a, b, c) \
14056 __extension__ \
14057 ({ \
14058 float64x1_t b_ = (b); \
14059 float64_t * a_ = (a); \
14060 __asm__ ("st1 {%1.d}[%2],[%0]" \
14062 : "r"(a_), "w"(b_), "i"(c) \
14063 : "memory"); \
14066 #define vst1_lane_p8(a, b, c) \
14067 __extension__ \
14068 ({ \
14069 poly8x8_t b_ = (b); \
14070 poly8_t * a_ = (a); \
14071 __asm__ ("st1 {%1.b}[%2],[%0]" \
14073 : "r"(a_), "w"(b_), "i"(c) \
14074 : "memory"); \
14077 #define vst1_lane_p16(a, b, c) \
14078 __extension__ \
14079 ({ \
14080 poly16x4_t b_ = (b); \
14081 poly16_t * a_ = (a); \
14082 __asm__ ("st1 {%1.h}[%2],[%0]" \
14084 : "r"(a_), "w"(b_), "i"(c) \
14085 : "memory"); \
14088 #define vst1_lane_s8(a, b, c) \
14089 __extension__ \
14090 ({ \
14091 int8x8_t b_ = (b); \
14092 int8_t * a_ = (a); \
14093 __asm__ ("st1 {%1.b}[%2],[%0]" \
14095 : "r"(a_), "w"(b_), "i"(c) \
14096 : "memory"); \
14099 #define vst1_lane_s16(a, b, c) \
14100 __extension__ \
14101 ({ \
14102 int16x4_t b_ = (b); \
14103 int16_t * a_ = (a); \
14104 __asm__ ("st1 {%1.h}[%2],[%0]" \
14106 : "r"(a_), "w"(b_), "i"(c) \
14107 : "memory"); \
14110 #define vst1_lane_s32(a, b, c) \
14111 __extension__ \
14112 ({ \
14113 int32x2_t b_ = (b); \
14114 int32_t * a_ = (a); \
14115 __asm__ ("st1 {%1.s}[%2],[%0]" \
14117 : "r"(a_), "w"(b_), "i"(c) \
14118 : "memory"); \
14121 #define vst1_lane_s64(a, b, c) \
14122 __extension__ \
14123 ({ \
14124 int64x1_t b_ = (b); \
14125 int64_t * a_ = (a); \
14126 __asm__ ("st1 {%1.d}[%2],[%0]" \
14128 : "r"(a_), "w"(b_), "i"(c) \
14129 : "memory"); \
14132 #define vst1_lane_u8(a, b, c) \
14133 __extension__ \
14134 ({ \
14135 uint8x8_t b_ = (b); \
14136 uint8_t * a_ = (a); \
14137 __asm__ ("st1 {%1.b}[%2],[%0]" \
14139 : "r"(a_), "w"(b_), "i"(c) \
14140 : "memory"); \
14143 #define vst1_lane_u16(a, b, c) \
14144 __extension__ \
14145 ({ \
14146 uint16x4_t b_ = (b); \
14147 uint16_t * a_ = (a); \
14148 __asm__ ("st1 {%1.h}[%2],[%0]" \
14150 : "r"(a_), "w"(b_), "i"(c) \
14151 : "memory"); \
14154 #define vst1_lane_u32(a, b, c) \
14155 __extension__ \
14156 ({ \
14157 uint32x2_t b_ = (b); \
14158 uint32_t * a_ = (a); \
14159 __asm__ ("st1 {%1.s}[%2],[%0]" \
14161 : "r"(a_), "w"(b_), "i"(c) \
14162 : "memory"); \
14165 #define vst1_lane_u64(a, b, c) \
14166 __extension__ \
14167 ({ \
14168 uint64x1_t b_ = (b); \
14169 uint64_t * a_ = (a); \
14170 __asm__ ("st1 {%1.d}[%2],[%0]" \
14172 : "r"(a_), "w"(b_), "i"(c) \
14173 : "memory"); \
14177 #define vst1q_lane_f32(a, b, c) \
14178 __extension__ \
14179 ({ \
14180 float32x4_t b_ = (b); \
14181 float32_t * a_ = (a); \
14182 __asm__ ("st1 {%1.s}[%2],[%0]" \
14184 : "r"(a_), "w"(b_), "i"(c) \
14185 : "memory"); \
14188 #define vst1q_lane_f64(a, b, c) \
14189 __extension__ \
14190 ({ \
14191 float64x2_t b_ = (b); \
14192 float64_t * a_ = (a); \
14193 __asm__ ("st1 {%1.d}[%2],[%0]" \
14195 : "r"(a_), "w"(b_), "i"(c) \
14196 : "memory"); \
14199 #define vst1q_lane_p8(a, b, c) \
14200 __extension__ \
14201 ({ \
14202 poly8x16_t b_ = (b); \
14203 poly8_t * a_ = (a); \
14204 __asm__ ("st1 {%1.b}[%2],[%0]" \
14206 : "r"(a_), "w"(b_), "i"(c) \
14207 : "memory"); \
14210 #define vst1q_lane_p16(a, b, c) \
14211 __extension__ \
14212 ({ \
14213 poly16x8_t b_ = (b); \
14214 poly16_t * a_ = (a); \
14215 __asm__ ("st1 {%1.h}[%2],[%0]" \
14217 : "r"(a_), "w"(b_), "i"(c) \
14218 : "memory"); \
14221 #define vst1q_lane_s8(a, b, c) \
14222 __extension__ \
14223 ({ \
14224 int8x16_t b_ = (b); \
14225 int8_t * a_ = (a); \
14226 __asm__ ("st1 {%1.b}[%2],[%0]" \
14228 : "r"(a_), "w"(b_), "i"(c) \
14229 : "memory"); \
14232 #define vst1q_lane_s16(a, b, c) \
14233 __extension__ \
14234 ({ \
14235 int16x8_t b_ = (b); \
14236 int16_t * a_ = (a); \
14237 __asm__ ("st1 {%1.h}[%2],[%0]" \
14239 : "r"(a_), "w"(b_), "i"(c) \
14240 : "memory"); \
14243 #define vst1q_lane_s32(a, b, c) \
14244 __extension__ \
14245 ({ \
14246 int32x4_t b_ = (b); \
14247 int32_t * a_ = (a); \
14248 __asm__ ("st1 {%1.s}[%2],[%0]" \
14250 : "r"(a_), "w"(b_), "i"(c) \
14251 : "memory"); \
14254 #define vst1q_lane_s64(a, b, c) \
14255 __extension__ \
14256 ({ \
14257 int64x2_t b_ = (b); \
14258 int64_t * a_ = (a); \
14259 __asm__ ("st1 {%1.d}[%2],[%0]" \
14261 : "r"(a_), "w"(b_), "i"(c) \
14262 : "memory"); \
14265 #define vst1q_lane_u8(a, b, c) \
14266 __extension__ \
14267 ({ \
14268 uint8x16_t b_ = (b); \
14269 uint8_t * a_ = (a); \
14270 __asm__ ("st1 {%1.b}[%2],[%0]" \
14272 : "r"(a_), "w"(b_), "i"(c) \
14273 : "memory"); \
14276 #define vst1q_lane_u16(a, b, c) \
14277 __extension__ \
14278 ({ \
14279 uint16x8_t b_ = (b); \
14280 uint16_t * a_ = (a); \
14281 __asm__ ("st1 {%1.h}[%2],[%0]" \
14283 : "r"(a_), "w"(b_), "i"(c) \
14284 : "memory"); \
14287 #define vst1q_lane_u32(a, b, c) \
14288 __extension__ \
14289 ({ \
14290 uint32x4_t b_ = (b); \
14291 uint32_t * a_ = (a); \
14292 __asm__ ("st1 {%1.s}[%2],[%0]" \
14294 : "r"(a_), "w"(b_), "i"(c) \
14295 : "memory"); \
14298 #define vst1q_lane_u64(a, b, c) \
14299 __extension__ \
14300 ({ \
14301 uint64x2_t b_ = (b); \
14302 uint64_t * a_ = (a); \
14303 __asm__ ("st1 {%1.d}[%2],[%0]" \
14305 : "r"(a_), "w"(b_), "i"(c) \
14306 : "memory"); \
14309 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14310 vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
14312 int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0)));
14313 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
14314 : "+w"(result)
14315 : "w"(b), "w"(c)
14316 : /* No clobbers */);
14317 return result;
14320 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14321 vsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
14323 int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0)));
14324 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
14325 : "+w"(result)
14326 : "w"(b), "w"(c)
14327 : /* No clobbers */);
14328 return result;
14331 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14332 vsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
14334 int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0)));
14335 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
14336 : "+w"(result)
14337 : "w"(b), "w"(c)
14338 : /* No clobbers */);
14339 return result;
14342 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14343 vsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
14345 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0)));
14346 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
14347 : "+w"(result)
14348 : "w"(b), "w"(c)
14349 : /* No clobbers */);
14350 return result;
14353 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14354 vsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
14356 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0)));
14357 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
14358 : "+w"(result)
14359 : "w"(b), "w"(c)
14360 : /* No clobbers */);
14361 return result;
14364 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14365 vsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
14367 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0)));
14368 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
14369 : "+w"(result)
14370 : "w"(b), "w"(c)
14371 : /* No clobbers */);
14372 return result;
14375 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14376 vsubhn_s16 (int16x8_t a, int16x8_t b)
14378 int8x8_t result;
14379 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
14380 : "=w"(result)
14381 : "w"(a), "w"(b)
14382 : /* No clobbers */);
14383 return result;
14386 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14387 vsubhn_s32 (int32x4_t a, int32x4_t b)
14389 int16x4_t result;
14390 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
14391 : "=w"(result)
14392 : "w"(a), "w"(b)
14393 : /* No clobbers */);
14394 return result;
14397 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14398 vsubhn_s64 (int64x2_t a, int64x2_t b)
14400 int32x2_t result;
14401 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
14402 : "=w"(result)
14403 : "w"(a), "w"(b)
14404 : /* No clobbers */);
14405 return result;
14408 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14409 vsubhn_u16 (uint16x8_t a, uint16x8_t b)
14411 uint8x8_t result;
14412 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
14413 : "=w"(result)
14414 : "w"(a), "w"(b)
14415 : /* No clobbers */);
14416 return result;
14419 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14420 vsubhn_u32 (uint32x4_t a, uint32x4_t b)
14422 uint16x4_t result;
14423 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
14424 : "=w"(result)
14425 : "w"(a), "w"(b)
14426 : /* No clobbers */);
14427 return result;
14430 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14431 vsubhn_u64 (uint64x2_t a, uint64x2_t b)
14433 uint32x2_t result;
14434 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
14435 : "=w"(result)
14436 : "w"(a), "w"(b)
14437 : /* No clobbers */);
14438 return result;
14441 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14442 vtrn1_f32 (float32x2_t a, float32x2_t b)
14444 float32x2_t result;
14445 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
14446 : "=w"(result)
14447 : "w"(a), "w"(b)
14448 : /* No clobbers */);
14449 return result;
14452 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14453 vtrn1_p8 (poly8x8_t a, poly8x8_t b)
14455 poly8x8_t result;
14456 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
14457 : "=w"(result)
14458 : "w"(a), "w"(b)
14459 : /* No clobbers */);
14460 return result;
14463 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
14464 vtrn1_p16 (poly16x4_t a, poly16x4_t b)
14466 poly16x4_t result;
14467 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
14468 : "=w"(result)
14469 : "w"(a), "w"(b)
14470 : /* No clobbers */);
14471 return result;
14474 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14475 vtrn1_s8 (int8x8_t a, int8x8_t b)
14477 int8x8_t result;
14478 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
14479 : "=w"(result)
14480 : "w"(a), "w"(b)
14481 : /* No clobbers */);
14482 return result;
14485 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14486 vtrn1_s16 (int16x4_t a, int16x4_t b)
14488 int16x4_t result;
14489 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
14490 : "=w"(result)
14491 : "w"(a), "w"(b)
14492 : /* No clobbers */);
14493 return result;
14496 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14497 vtrn1_s32 (int32x2_t a, int32x2_t b)
14499 int32x2_t result;
14500 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
14501 : "=w"(result)
14502 : "w"(a), "w"(b)
14503 : /* No clobbers */);
14504 return result;
14507 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14508 vtrn1_u8 (uint8x8_t a, uint8x8_t b)
14510 uint8x8_t result;
14511 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
14512 : "=w"(result)
14513 : "w"(a), "w"(b)
14514 : /* No clobbers */);
14515 return result;
14518 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14519 vtrn1_u16 (uint16x4_t a, uint16x4_t b)
14521 uint16x4_t result;
14522 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
14523 : "=w"(result)
14524 : "w"(a), "w"(b)
14525 : /* No clobbers */);
14526 return result;
14529 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14530 vtrn1_u32 (uint32x2_t a, uint32x2_t b)
14532 uint32x2_t result;
14533 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
14534 : "=w"(result)
14535 : "w"(a), "w"(b)
14536 : /* No clobbers */);
14537 return result;
14540 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14541 vtrn1q_f32 (float32x4_t a, float32x4_t b)
14543 float32x4_t result;
14544 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
14545 : "=w"(result)
14546 : "w"(a), "w"(b)
14547 : /* No clobbers */);
14548 return result;
14551 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14552 vtrn1q_f64 (float64x2_t a, float64x2_t b)
14554 float64x2_t result;
14555 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
14556 : "=w"(result)
14557 : "w"(a), "w"(b)
14558 : /* No clobbers */);
14559 return result;
14562 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14563 vtrn1q_p8 (poly8x16_t a, poly8x16_t b)
14565 poly8x16_t result;
14566 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
14567 : "=w"(result)
14568 : "w"(a), "w"(b)
14569 : /* No clobbers */);
14570 return result;
14573 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
14574 vtrn1q_p16 (poly16x8_t a, poly16x8_t b)
14576 poly16x8_t result;
14577 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
14578 : "=w"(result)
14579 : "w"(a), "w"(b)
14580 : /* No clobbers */);
14581 return result;
14584 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14585 vtrn1q_s8 (int8x16_t a, int8x16_t b)
14587 int8x16_t result;
14588 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
14589 : "=w"(result)
14590 : "w"(a), "w"(b)
14591 : /* No clobbers */);
14592 return result;
14595 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14596 vtrn1q_s16 (int16x8_t a, int16x8_t b)
14598 int16x8_t result;
14599 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
14600 : "=w"(result)
14601 : "w"(a), "w"(b)
14602 : /* No clobbers */);
14603 return result;
14606 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14607 vtrn1q_s32 (int32x4_t a, int32x4_t b)
14609 int32x4_t result;
14610 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
14611 : "=w"(result)
14612 : "w"(a), "w"(b)
14613 : /* No clobbers */);
14614 return result;
14617 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14618 vtrn1q_s64 (int64x2_t a, int64x2_t b)
14620 int64x2_t result;
14621 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
14622 : "=w"(result)
14623 : "w"(a), "w"(b)
14624 : /* No clobbers */);
14625 return result;
14628 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14629 vtrn1q_u8 (uint8x16_t a, uint8x16_t b)
14631 uint8x16_t result;
14632 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
14633 : "=w"(result)
14634 : "w"(a), "w"(b)
14635 : /* No clobbers */);
14636 return result;
14639 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14640 vtrn1q_u16 (uint16x8_t a, uint16x8_t b)
14642 uint16x8_t result;
14643 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
14644 : "=w"(result)
14645 : "w"(a), "w"(b)
14646 : /* No clobbers */);
14647 return result;
14650 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14651 vtrn1q_u32 (uint32x4_t a, uint32x4_t b)
14653 uint32x4_t result;
14654 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
14655 : "=w"(result)
14656 : "w"(a), "w"(b)
14657 : /* No clobbers */);
14658 return result;
14661 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14662 vtrn1q_u64 (uint64x2_t a, uint64x2_t b)
14664 uint64x2_t result;
14665 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
14666 : "=w"(result)
14667 : "w"(a), "w"(b)
14668 : /* No clobbers */);
14669 return result;
14672 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14673 vtrn2_f32 (float32x2_t a, float32x2_t b)
14675 float32x2_t result;
14676 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
14677 : "=w"(result)
14678 : "w"(a), "w"(b)
14679 : /* No clobbers */);
14680 return result;
14683 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14684 vtrn2_p8 (poly8x8_t a, poly8x8_t b)
14686 poly8x8_t result;
14687 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
14688 : "=w"(result)
14689 : "w"(a), "w"(b)
14690 : /* No clobbers */);
14691 return result;
14694 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
14695 vtrn2_p16 (poly16x4_t a, poly16x4_t b)
14697 poly16x4_t result;
14698 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
14699 : "=w"(result)
14700 : "w"(a), "w"(b)
14701 : /* No clobbers */);
14702 return result;
14705 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14706 vtrn2_s8 (int8x8_t a, int8x8_t b)
14708 int8x8_t result;
14709 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
14710 : "=w"(result)
14711 : "w"(a), "w"(b)
14712 : /* No clobbers */);
14713 return result;
14716 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14717 vtrn2_s16 (int16x4_t a, int16x4_t b)
14719 int16x4_t result;
14720 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
14721 : "=w"(result)
14722 : "w"(a), "w"(b)
14723 : /* No clobbers */);
14724 return result;
14727 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14728 vtrn2_s32 (int32x2_t a, int32x2_t b)
14730 int32x2_t result;
14731 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
14732 : "=w"(result)
14733 : "w"(a), "w"(b)
14734 : /* No clobbers */);
14735 return result;
14738 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14739 vtrn2_u8 (uint8x8_t a, uint8x8_t b)
14741 uint8x8_t result;
14742 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
14743 : "=w"(result)
14744 : "w"(a), "w"(b)
14745 : /* No clobbers */);
14746 return result;
14749 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14750 vtrn2_u16 (uint16x4_t a, uint16x4_t b)
14752 uint16x4_t result;
14753 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
14754 : "=w"(result)
14755 : "w"(a), "w"(b)
14756 : /* No clobbers */);
14757 return result;
14760 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14761 vtrn2_u32 (uint32x2_t a, uint32x2_t b)
14763 uint32x2_t result;
14764 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
14765 : "=w"(result)
14766 : "w"(a), "w"(b)
14767 : /* No clobbers */);
14768 return result;
14771 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14772 vtrn2q_f32 (float32x4_t a, float32x4_t b)
14774 float32x4_t result;
14775 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
14776 : "=w"(result)
14777 : "w"(a), "w"(b)
14778 : /* No clobbers */);
14779 return result;
14782 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14783 vtrn2q_f64 (float64x2_t a, float64x2_t b)
14785 float64x2_t result;
14786 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
14787 : "=w"(result)
14788 : "w"(a), "w"(b)
14789 : /* No clobbers */);
14790 return result;
14793 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14794 vtrn2q_p8 (poly8x16_t a, poly8x16_t b)
14796 poly8x16_t result;
14797 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
14798 : "=w"(result)
14799 : "w"(a), "w"(b)
14800 : /* No clobbers */);
14801 return result;
14804 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
14805 vtrn2q_p16 (poly16x8_t a, poly16x8_t b)
14807 poly16x8_t result;
14808 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
14809 : "=w"(result)
14810 : "w"(a), "w"(b)
14811 : /* No clobbers */);
14812 return result;
14815 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14816 vtrn2q_s8 (int8x16_t a, int8x16_t b)
14818 int8x16_t result;
14819 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
14820 : "=w"(result)
14821 : "w"(a), "w"(b)
14822 : /* No clobbers */);
14823 return result;
14826 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14827 vtrn2q_s16 (int16x8_t a, int16x8_t b)
14829 int16x8_t result;
14830 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
14831 : "=w"(result)
14832 : "w"(a), "w"(b)
14833 : /* No clobbers */);
14834 return result;
14837 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14838 vtrn2q_s32 (int32x4_t a, int32x4_t b)
14840 int32x4_t result;
14841 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
14842 : "=w"(result)
14843 : "w"(a), "w"(b)
14844 : /* No clobbers */);
14845 return result;
14848 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14849 vtrn2q_s64 (int64x2_t a, int64x2_t b)
14851 int64x2_t result;
14852 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
14853 : "=w"(result)
14854 : "w"(a), "w"(b)
14855 : /* No clobbers */);
14856 return result;
14859 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14860 vtrn2q_u8 (uint8x16_t a, uint8x16_t b)
14862 uint8x16_t result;
14863 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
14864 : "=w"(result)
14865 : "w"(a), "w"(b)
14866 : /* No clobbers */);
14867 return result;
14870 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14871 vtrn2q_u16 (uint16x8_t a, uint16x8_t b)
14873 uint16x8_t result;
14874 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
14875 : "=w"(result)
14876 : "w"(a), "w"(b)
14877 : /* No clobbers */);
14878 return result;
14881 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14882 vtrn2q_u32 (uint32x4_t a, uint32x4_t b)
14884 uint32x4_t result;
14885 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
14886 : "=w"(result)
14887 : "w"(a), "w"(b)
14888 : /* No clobbers */);
14889 return result;
14892 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14893 vtrn2q_u64 (uint64x2_t a, uint64x2_t b)
14895 uint64x2_t result;
14896 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
14897 : "=w"(result)
14898 : "w"(a), "w"(b)
14899 : /* No clobbers */);
14900 return result;
14903 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14904 vtst_p8 (poly8x8_t a, poly8x8_t b)
14906 uint8x8_t result;
14907 __asm__ ("cmtst %0.8b, %1.8b, %2.8b"
14908 : "=w"(result)
14909 : "w"(a), "w"(b)
14910 : /* No clobbers */);
14911 return result;
14914 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14915 vtst_p16 (poly16x4_t a, poly16x4_t b)
14917 uint16x4_t result;
14918 __asm__ ("cmtst %0.4h, %1.4h, %2.4h"
14919 : "=w"(result)
14920 : "w"(a), "w"(b)
14921 : /* No clobbers */);
14922 return result;
14925 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14926 vtstq_p8 (poly8x16_t a, poly8x16_t b)
14928 uint8x16_t result;
14929 __asm__ ("cmtst %0.16b, %1.16b, %2.16b"
14930 : "=w"(result)
14931 : "w"(a), "w"(b)
14932 : /* No clobbers */);
14933 return result;
14936 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14937 vtstq_p16 (poly16x8_t a, poly16x8_t b)
14939 uint16x8_t result;
14940 __asm__ ("cmtst %0.8h, %1.8h, %2.8h"
14941 : "=w"(result)
14942 : "w"(a), "w"(b)
14943 : /* No clobbers */);
14944 return result;
14946 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14947 vuzp1_f32 (float32x2_t a, float32x2_t b)
14949 float32x2_t result;
14950 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
14951 : "=w"(result)
14952 : "w"(a), "w"(b)
14953 : /* No clobbers */);
14954 return result;
14957 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14958 vuzp1_p8 (poly8x8_t a, poly8x8_t b)
14960 poly8x8_t result;
14961 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
14962 : "=w"(result)
14963 : "w"(a), "w"(b)
14964 : /* No clobbers */);
14965 return result;
14968 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
14969 vuzp1_p16 (poly16x4_t a, poly16x4_t b)
14971 poly16x4_t result;
14972 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
14973 : "=w"(result)
14974 : "w"(a), "w"(b)
14975 : /* No clobbers */);
14976 return result;
14979 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14980 vuzp1_s8 (int8x8_t a, int8x8_t b)
14982 int8x8_t result;
14983 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
14984 : "=w"(result)
14985 : "w"(a), "w"(b)
14986 : /* No clobbers */);
14987 return result;
14990 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14991 vuzp1_s16 (int16x4_t a, int16x4_t b)
14993 int16x4_t result;
14994 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
14995 : "=w"(result)
14996 : "w"(a), "w"(b)
14997 : /* No clobbers */);
14998 return result;
15001 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15002 vuzp1_s32 (int32x2_t a, int32x2_t b)
15004 int32x2_t result;
15005 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
15006 : "=w"(result)
15007 : "w"(a), "w"(b)
15008 : /* No clobbers */);
15009 return result;
15012 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15013 vuzp1_u8 (uint8x8_t a, uint8x8_t b)
15015 uint8x8_t result;
15016 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
15017 : "=w"(result)
15018 : "w"(a), "w"(b)
15019 : /* No clobbers */);
15020 return result;
15023 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15024 vuzp1_u16 (uint16x4_t a, uint16x4_t b)
15026 uint16x4_t result;
15027 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
15028 : "=w"(result)
15029 : "w"(a), "w"(b)
15030 : /* No clobbers */);
15031 return result;
15034 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15035 vuzp1_u32 (uint32x2_t a, uint32x2_t b)
15037 uint32x2_t result;
15038 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
15039 : "=w"(result)
15040 : "w"(a), "w"(b)
15041 : /* No clobbers */);
15042 return result;
15045 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15046 vuzp1q_f32 (float32x4_t a, float32x4_t b)
15048 float32x4_t result;
15049 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
15050 : "=w"(result)
15051 : "w"(a), "w"(b)
15052 : /* No clobbers */);
15053 return result;
15056 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15057 vuzp1q_f64 (float64x2_t a, float64x2_t b)
15059 float64x2_t result;
15060 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
15061 : "=w"(result)
15062 : "w"(a), "w"(b)
15063 : /* No clobbers */);
15064 return result;
15067 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15068 vuzp1q_p8 (poly8x16_t a, poly8x16_t b)
15070 poly8x16_t result;
15071 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
15072 : "=w"(result)
15073 : "w"(a), "w"(b)
15074 : /* No clobbers */);
15075 return result;
15078 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
15079 vuzp1q_p16 (poly16x8_t a, poly16x8_t b)
15081 poly16x8_t result;
15082 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
15083 : "=w"(result)
15084 : "w"(a), "w"(b)
15085 : /* No clobbers */);
15086 return result;
15089 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15090 vuzp1q_s8 (int8x16_t a, int8x16_t b)
15092 int8x16_t result;
15093 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
15094 : "=w"(result)
15095 : "w"(a), "w"(b)
15096 : /* No clobbers */);
15097 return result;
15100 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15101 vuzp1q_s16 (int16x8_t a, int16x8_t b)
15103 int16x8_t result;
15104 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
15105 : "=w"(result)
15106 : "w"(a), "w"(b)
15107 : /* No clobbers */);
15108 return result;
15111 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15112 vuzp1q_s32 (int32x4_t a, int32x4_t b)
15114 int32x4_t result;
15115 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
15116 : "=w"(result)
15117 : "w"(a), "w"(b)
15118 : /* No clobbers */);
15119 return result;
15122 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15123 vuzp1q_s64 (int64x2_t a, int64x2_t b)
15125 int64x2_t result;
15126 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
15127 : "=w"(result)
15128 : "w"(a), "w"(b)
15129 : /* No clobbers */);
15130 return result;
15133 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15134 vuzp1q_u8 (uint8x16_t a, uint8x16_t b)
15136 uint8x16_t result;
15137 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
15138 : "=w"(result)
15139 : "w"(a), "w"(b)
15140 : /* No clobbers */);
15141 return result;
15144 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15145 vuzp1q_u16 (uint16x8_t a, uint16x8_t b)
15147 uint16x8_t result;
15148 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
15149 : "=w"(result)
15150 : "w"(a), "w"(b)
15151 : /* No clobbers */);
15152 return result;
15155 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15156 vuzp1q_u32 (uint32x4_t a, uint32x4_t b)
15158 uint32x4_t result;
15159 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
15160 : "=w"(result)
15161 : "w"(a), "w"(b)
15162 : /* No clobbers */);
15163 return result;
15166 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15167 vuzp1q_u64 (uint64x2_t a, uint64x2_t b)
15169 uint64x2_t result;
15170 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
15171 : "=w"(result)
15172 : "w"(a), "w"(b)
15173 : /* No clobbers */);
15174 return result;
15177 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15178 vuzp2_f32 (float32x2_t a, float32x2_t b)
15180 float32x2_t result;
15181 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
15182 : "=w"(result)
15183 : "w"(a), "w"(b)
15184 : /* No clobbers */);
15185 return result;
15188 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15189 vuzp2_p8 (poly8x8_t a, poly8x8_t b)
15191 poly8x8_t result;
15192 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
15193 : "=w"(result)
15194 : "w"(a), "w"(b)
15195 : /* No clobbers */);
15196 return result;
15199 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
15200 vuzp2_p16 (poly16x4_t a, poly16x4_t b)
15202 poly16x4_t result;
15203 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
15204 : "=w"(result)
15205 : "w"(a), "w"(b)
15206 : /* No clobbers */);
15207 return result;
15210 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15211 vuzp2_s8 (int8x8_t a, int8x8_t b)
15213 int8x8_t result;
15214 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
15215 : "=w"(result)
15216 : "w"(a), "w"(b)
15217 : /* No clobbers */);
15218 return result;
15221 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15222 vuzp2_s16 (int16x4_t a, int16x4_t b)
15224 int16x4_t result;
15225 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
15226 : "=w"(result)
15227 : "w"(a), "w"(b)
15228 : /* No clobbers */);
15229 return result;
15232 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15233 vuzp2_s32 (int32x2_t a, int32x2_t b)
15235 int32x2_t result;
15236 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
15237 : "=w"(result)
15238 : "w"(a), "w"(b)
15239 : /* No clobbers */);
15240 return result;
15243 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15244 vuzp2_u8 (uint8x8_t a, uint8x8_t b)
15246 uint8x8_t result;
15247 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
15248 : "=w"(result)
15249 : "w"(a), "w"(b)
15250 : /* No clobbers */);
15251 return result;
15254 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15255 vuzp2_u16 (uint16x4_t a, uint16x4_t b)
15257 uint16x4_t result;
15258 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
15259 : "=w"(result)
15260 : "w"(a), "w"(b)
15261 : /* No clobbers */);
15262 return result;
15265 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15266 vuzp2_u32 (uint32x2_t a, uint32x2_t b)
15268 uint32x2_t result;
15269 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
15270 : "=w"(result)
15271 : "w"(a), "w"(b)
15272 : /* No clobbers */);
15273 return result;
15276 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15277 vuzp2q_f32 (float32x4_t a, float32x4_t b)
15279 float32x4_t result;
15280 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
15281 : "=w"(result)
15282 : "w"(a), "w"(b)
15283 : /* No clobbers */);
15284 return result;
15287 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15288 vuzp2q_f64 (float64x2_t a, float64x2_t b)
15290 float64x2_t result;
15291 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
15292 : "=w"(result)
15293 : "w"(a), "w"(b)
15294 : /* No clobbers */);
15295 return result;
15298 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15299 vuzp2q_p8 (poly8x16_t a, poly8x16_t b)
15301 poly8x16_t result;
15302 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
15303 : "=w"(result)
15304 : "w"(a), "w"(b)
15305 : /* No clobbers */);
15306 return result;
15309 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
15310 vuzp2q_p16 (poly16x8_t a, poly16x8_t b)
15312 poly16x8_t result;
15313 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
15314 : "=w"(result)
15315 : "w"(a), "w"(b)
15316 : /* No clobbers */);
15317 return result;
15320 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15321 vuzp2q_s8 (int8x16_t a, int8x16_t b)
15323 int8x16_t result;
15324 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
15325 : "=w"(result)
15326 : "w"(a), "w"(b)
15327 : /* No clobbers */);
15328 return result;
15331 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15332 vuzp2q_s16 (int16x8_t a, int16x8_t b)
15334 int16x8_t result;
15335 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
15336 : "=w"(result)
15337 : "w"(a), "w"(b)
15338 : /* No clobbers */);
15339 return result;
15342 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15343 vuzp2q_s32 (int32x4_t a, int32x4_t b)
15345 int32x4_t result;
15346 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
15347 : "=w"(result)
15348 : "w"(a), "w"(b)
15349 : /* No clobbers */);
15350 return result;
15353 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15354 vuzp2q_s64 (int64x2_t a, int64x2_t b)
15356 int64x2_t result;
15357 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
15358 : "=w"(result)
15359 : "w"(a), "w"(b)
15360 : /* No clobbers */);
15361 return result;
15364 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15365 vuzp2q_u8 (uint8x16_t a, uint8x16_t b)
15367 uint8x16_t result;
15368 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
15369 : "=w"(result)
15370 : "w"(a), "w"(b)
15371 : /* No clobbers */);
15372 return result;
15375 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15376 vuzp2q_u16 (uint16x8_t a, uint16x8_t b)
15378 uint16x8_t result;
15379 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
15380 : "=w"(result)
15381 : "w"(a), "w"(b)
15382 : /* No clobbers */);
15383 return result;
15386 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15387 vuzp2q_u32 (uint32x4_t a, uint32x4_t b)
15389 uint32x4_t result;
15390 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
15391 : "=w"(result)
15392 : "w"(a), "w"(b)
15393 : /* No clobbers */);
15394 return result;
15397 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15398 vuzp2q_u64 (uint64x2_t a, uint64x2_t b)
15400 uint64x2_t result;
15401 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
15402 : "=w"(result)
15403 : "w"(a), "w"(b)
15404 : /* No clobbers */);
15405 return result;
15408 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15409 vzip1_f32 (float32x2_t a, float32x2_t b)
15411 float32x2_t result;
15412 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
15413 : "=w"(result)
15414 : "w"(a), "w"(b)
15415 : /* No clobbers */);
15416 return result;
15419 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15420 vzip1_p8 (poly8x8_t a, poly8x8_t b)
15422 poly8x8_t result;
15423 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
15424 : "=w"(result)
15425 : "w"(a), "w"(b)
15426 : /* No clobbers */);
15427 return result;
15430 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
15431 vzip1_p16 (poly16x4_t a, poly16x4_t b)
15433 poly16x4_t result;
15434 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
15435 : "=w"(result)
15436 : "w"(a), "w"(b)
15437 : /* No clobbers */);
15438 return result;
15441 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15442 vzip1_s8 (int8x8_t a, int8x8_t b)
15444 int8x8_t result;
15445 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
15446 : "=w"(result)
15447 : "w"(a), "w"(b)
15448 : /* No clobbers */);
15449 return result;
15452 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15453 vzip1_s16 (int16x4_t a, int16x4_t b)
15455 int16x4_t result;
15456 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
15457 : "=w"(result)
15458 : "w"(a), "w"(b)
15459 : /* No clobbers */);
15460 return result;
15463 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15464 vzip1_s32 (int32x2_t a, int32x2_t b)
15466 int32x2_t result;
15467 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
15468 : "=w"(result)
15469 : "w"(a), "w"(b)
15470 : /* No clobbers */);
15471 return result;
15474 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15475 vzip1_u8 (uint8x8_t a, uint8x8_t b)
15477 uint8x8_t result;
15478 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
15479 : "=w"(result)
15480 : "w"(a), "w"(b)
15481 : /* No clobbers */);
15482 return result;
15485 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15486 vzip1_u16 (uint16x4_t a, uint16x4_t b)
15488 uint16x4_t result;
15489 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
15490 : "=w"(result)
15491 : "w"(a), "w"(b)
15492 : /* No clobbers */);
15493 return result;
15496 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15497 vzip1_u32 (uint32x2_t a, uint32x2_t b)
15499 uint32x2_t result;
15500 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
15501 : "=w"(result)
15502 : "w"(a), "w"(b)
15503 : /* No clobbers */);
15504 return result;
15507 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15508 vzip1q_f32 (float32x4_t a, float32x4_t b)
15510 float32x4_t result;
15511 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
15512 : "=w"(result)
15513 : "w"(a), "w"(b)
15514 : /* No clobbers */);
15515 return result;
15518 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15519 vzip1q_f64 (float64x2_t a, float64x2_t b)
15521 float64x2_t result;
15522 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
15523 : "=w"(result)
15524 : "w"(a), "w"(b)
15525 : /* No clobbers */);
15526 return result;
15529 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15530 vzip1q_p8 (poly8x16_t a, poly8x16_t b)
15532 poly8x16_t result;
15533 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
15534 : "=w"(result)
15535 : "w"(a), "w"(b)
15536 : /* No clobbers */);
15537 return result;
15540 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
15541 vzip1q_p16 (poly16x8_t a, poly16x8_t b)
15543 poly16x8_t result;
15544 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
15545 : "=w"(result)
15546 : "w"(a), "w"(b)
15547 : /* No clobbers */);
15548 return result;
15551 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15552 vzip1q_s8 (int8x16_t a, int8x16_t b)
15554 int8x16_t result;
15555 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
15556 : "=w"(result)
15557 : "w"(a), "w"(b)
15558 : /* No clobbers */);
15559 return result;
15562 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15563 vzip1q_s16 (int16x8_t a, int16x8_t b)
15565 int16x8_t result;
15566 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
15567 : "=w"(result)
15568 : "w"(a), "w"(b)
15569 : /* No clobbers */);
15570 return result;
15573 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15574 vzip1q_s32 (int32x4_t a, int32x4_t b)
15576 int32x4_t result;
15577 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
15578 : "=w"(result)
15579 : "w"(a), "w"(b)
15580 : /* No clobbers */);
15581 return result;
15584 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15585 vzip1q_s64 (int64x2_t a, int64x2_t b)
15587 int64x2_t result;
15588 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
15589 : "=w"(result)
15590 : "w"(a), "w"(b)
15591 : /* No clobbers */);
15592 return result;
15595 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15596 vzip1q_u8 (uint8x16_t a, uint8x16_t b)
15598 uint8x16_t result;
15599 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
15600 : "=w"(result)
15601 : "w"(a), "w"(b)
15602 : /* No clobbers */);
15603 return result;
15606 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15607 vzip1q_u16 (uint16x8_t a, uint16x8_t b)
15609 uint16x8_t result;
15610 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
15611 : "=w"(result)
15612 : "w"(a), "w"(b)
15613 : /* No clobbers */);
15614 return result;
15617 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15618 vzip1q_u32 (uint32x4_t a, uint32x4_t b)
15620 uint32x4_t result;
15621 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
15622 : "=w"(result)
15623 : "w"(a), "w"(b)
15624 : /* No clobbers */);
15625 return result;
15628 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15629 vzip1q_u64 (uint64x2_t a, uint64x2_t b)
15631 uint64x2_t result;
15632 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
15633 : "=w"(result)
15634 : "w"(a), "w"(b)
15635 : /* No clobbers */);
15636 return result;
15639 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15640 vzip2_f32 (float32x2_t a, float32x2_t b)
15642 float32x2_t result;
15643 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
15644 : "=w"(result)
15645 : "w"(a), "w"(b)
15646 : /* No clobbers */);
15647 return result;
15650 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15651 vzip2_p8 (poly8x8_t a, poly8x8_t b)
15653 poly8x8_t result;
15654 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
15655 : "=w"(result)
15656 : "w"(a), "w"(b)
15657 : /* No clobbers */);
15658 return result;
15661 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
15662 vzip2_p16 (poly16x4_t a, poly16x4_t b)
15664 poly16x4_t result;
15665 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
15666 : "=w"(result)
15667 : "w"(a), "w"(b)
15668 : /* No clobbers */);
15669 return result;
15672 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15673 vzip2_s8 (int8x8_t a, int8x8_t b)
15675 int8x8_t result;
15676 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
15677 : "=w"(result)
15678 : "w"(a), "w"(b)
15679 : /* No clobbers */);
15680 return result;
15683 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15684 vzip2_s16 (int16x4_t a, int16x4_t b)
15686 int16x4_t result;
15687 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
15688 : "=w"(result)
15689 : "w"(a), "w"(b)
15690 : /* No clobbers */);
15691 return result;
15694 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15695 vzip2_s32 (int32x2_t a, int32x2_t b)
15697 int32x2_t result;
15698 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
15699 : "=w"(result)
15700 : "w"(a), "w"(b)
15701 : /* No clobbers */);
15702 return result;
15705 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15706 vzip2_u8 (uint8x8_t a, uint8x8_t b)
15708 uint8x8_t result;
15709 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
15710 : "=w"(result)
15711 : "w"(a), "w"(b)
15712 : /* No clobbers */);
15713 return result;
15716 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15717 vzip2_u16 (uint16x4_t a, uint16x4_t b)
15719 uint16x4_t result;
15720 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
15721 : "=w"(result)
15722 : "w"(a), "w"(b)
15723 : /* No clobbers */);
15724 return result;
15727 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15728 vzip2_u32 (uint32x2_t a, uint32x2_t b)
15730 uint32x2_t result;
15731 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
15732 : "=w"(result)
15733 : "w"(a), "w"(b)
15734 : /* No clobbers */);
15735 return result;
15738 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15739 vzip2q_f32 (float32x4_t a, float32x4_t b)
15741 float32x4_t result;
15742 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
15743 : "=w"(result)
15744 : "w"(a), "w"(b)
15745 : /* No clobbers */);
15746 return result;
15749 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15750 vzip2q_f64 (float64x2_t a, float64x2_t b)
15752 float64x2_t result;
15753 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
15754 : "=w"(result)
15755 : "w"(a), "w"(b)
15756 : /* No clobbers */);
15757 return result;
15760 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15761 vzip2q_p8 (poly8x16_t a, poly8x16_t b)
15763 poly8x16_t result;
15764 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
15765 : "=w"(result)
15766 : "w"(a), "w"(b)
15767 : /* No clobbers */);
15768 return result;
15771 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
15772 vzip2q_p16 (poly16x8_t a, poly16x8_t b)
15774 poly16x8_t result;
15775 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
15776 : "=w"(result)
15777 : "w"(a), "w"(b)
15778 : /* No clobbers */);
15779 return result;
15782 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15783 vzip2q_s8 (int8x16_t a, int8x16_t b)
15785 int8x16_t result;
15786 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
15787 : "=w"(result)
15788 : "w"(a), "w"(b)
15789 : /* No clobbers */);
15790 return result;
15793 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15794 vzip2q_s16 (int16x8_t a, int16x8_t b)
15796 int16x8_t result;
15797 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
15798 : "=w"(result)
15799 : "w"(a), "w"(b)
15800 : /* No clobbers */);
15801 return result;
15804 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15805 vzip2q_s32 (int32x4_t a, int32x4_t b)
15807 int32x4_t result;
15808 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
15809 : "=w"(result)
15810 : "w"(a), "w"(b)
15811 : /* No clobbers */);
15812 return result;
15815 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15816 vzip2q_s64 (int64x2_t a, int64x2_t b)
15818 int64x2_t result;
15819 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
15820 : "=w"(result)
15821 : "w"(a), "w"(b)
15822 : /* No clobbers */);
15823 return result;
15826 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15827 vzip2q_u8 (uint8x16_t a, uint8x16_t b)
15829 uint8x16_t result;
15830 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
15831 : "=w"(result)
15832 : "w"(a), "w"(b)
15833 : /* No clobbers */);
15834 return result;
15837 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15838 vzip2q_u16 (uint16x8_t a, uint16x8_t b)
15840 uint16x8_t result;
15841 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
15842 : "=w"(result)
15843 : "w"(a), "w"(b)
15844 : /* No clobbers */);
15845 return result;
15848 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15849 vzip2q_u32 (uint32x4_t a, uint32x4_t b)
15851 uint32x4_t result;
15852 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
15853 : "=w"(result)
15854 : "w"(a), "w"(b)
15855 : /* No clobbers */);
15856 return result;
15859 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15860 vzip2q_u64 (uint64x2_t a, uint64x2_t b)
15862 uint64x2_t result;
15863 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
15864 : "=w"(result)
15865 : "w"(a), "w"(b)
15866 : /* No clobbers */);
15867 return result;
15870 /* End of temporary inline asm implementations. */
15872 /* Start of temporary inline asm for vldn, vstn and friends. */
15874 /* Create struct element types for duplicating loads.
15876 Create 2 element structures of:
15878 +------+----+----+----+----+
15879 | | 8 | 16 | 32 | 64 |
15880 +------+----+----+----+----+
15881 |int | Y | Y | N | N |
15882 +------+----+----+----+----+
15883 |uint | Y | Y | N | N |
15884 +------+----+----+----+----+
15885 |float | - | - | N | N |
15886 +------+----+----+----+----+
15887 |poly | Y | Y | - | - |
15888 +------+----+----+----+----+
15890 Create 3 element structures of:
15892 +------+----+----+----+----+
15893 | | 8 | 16 | 32 | 64 |
15894 +------+----+----+----+----+
15895 |int | Y | Y | Y | Y |
15896 +------+----+----+----+----+
15897 |uint | Y | Y | Y | Y |
15898 +------+----+----+----+----+
15899 |float | - | - | Y | Y |
15900 +------+----+----+----+----+
15901 |poly | Y | Y | - | - |
15902 +------+----+----+----+----+
15904 Create 4 element structures of:
15906 +------+----+----+----+----+
15907 | | 8 | 16 | 32 | 64 |
15908 +------+----+----+----+----+
15909 |int | Y | N | N | Y |
15910 +------+----+----+----+----+
15911 |uint | Y | N | N | Y |
15912 +------+----+----+----+----+
15913 |float | - | - | N | Y |
15914 +------+----+----+----+----+
15915 |poly | Y | N | - | - |
15916 +------+----+----+----+----+
15918 This is required for casting memory reference. */
15919 #define __STRUCTN(t, sz, nelem) \
15920 typedef struct t ## sz ## x ## nelem ## _t { \
15921 t ## sz ## _t val[nelem]; \
15922 } t ## sz ## x ## nelem ## _t;
15924 /* 2-element structs. */
15925 __STRUCTN (int, 8, 2)
15926 __STRUCTN (int, 16, 2)
15927 __STRUCTN (uint, 8, 2)
15928 __STRUCTN (uint, 16, 2)
15929 __STRUCTN (poly, 8, 2)
15930 __STRUCTN (poly, 16, 2)
15931 /* 3-element structs. */
15932 __STRUCTN (int, 8, 3)
15933 __STRUCTN (int, 16, 3)
15934 __STRUCTN (int, 32, 3)
15935 __STRUCTN (int, 64, 3)
15936 __STRUCTN (uint, 8, 3)
15937 __STRUCTN (uint, 16, 3)
15938 __STRUCTN (uint, 32, 3)
15939 __STRUCTN (uint, 64, 3)
15940 __STRUCTN (float, 32, 3)
15941 __STRUCTN (float, 64, 3)
15942 __STRUCTN (poly, 8, 3)
15943 __STRUCTN (poly, 16, 3)
15944 /* 4-element structs. */
15945 __STRUCTN (int, 8, 4)
15946 __STRUCTN (int, 64, 4)
15947 __STRUCTN (uint, 8, 4)
15948 __STRUCTN (uint, 64, 4)
15949 __STRUCTN (poly, 8, 4)
15950 __STRUCTN (float, 64, 4)
15951 #undef __STRUCTN
15953 #define __LD2R_FUNC(rettype, structtype, ptrtype, \
15954 regsuffix, funcsuffix, Q) \
15955 __extension__ static __inline rettype \
15956 __attribute__ ((__always_inline__)) \
15957 vld2 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
15959 rettype result; \
15960 __asm__ ("ld2r {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
15961 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
15962 : "=Q"(result) \
15963 : "Q"(*(const structtype *)ptr) \
15964 : "memory", "v16", "v17"); \
15965 return result; \
15968 __LD2R_FUNC (float32x2x2_t, float32x2_t, float32_t, 2s, f32,)
15969 __LD2R_FUNC (float64x1x2_t, float64x2_t, float64_t, 1d, f64,)
15970 __LD2R_FUNC (poly8x8x2_t, poly8x2_t, poly8_t, 8b, p8,)
15971 __LD2R_FUNC (poly16x4x2_t, poly16x2_t, poly16_t, 4h, p16,)
15972 __LD2R_FUNC (int8x8x2_t, int8x2_t, int8_t, 8b, s8,)
15973 __LD2R_FUNC (int16x4x2_t, int16x2_t, int16_t, 4h, s16,)
15974 __LD2R_FUNC (int32x2x2_t, int32x2_t, int32_t, 2s, s32,)
15975 __LD2R_FUNC (int64x1x2_t, int64x2_t, int64_t, 1d, s64,)
15976 __LD2R_FUNC (uint8x8x2_t, uint8x2_t, uint8_t, 8b, u8,)
15977 __LD2R_FUNC (uint16x4x2_t, uint16x2_t, uint16_t, 4h, u16,)
15978 __LD2R_FUNC (uint32x2x2_t, uint32x2_t, uint32_t, 2s, u32,)
15979 __LD2R_FUNC (uint64x1x2_t, uint64x2_t, uint64_t, 1d, u64,)
15980 __LD2R_FUNC (float32x4x2_t, float32x2_t, float32_t, 4s, f32, q)
15981 __LD2R_FUNC (float64x2x2_t, float64x2_t, float64_t, 2d, f64, q)
15982 __LD2R_FUNC (poly8x16x2_t, poly8x2_t, poly8_t, 16b, p8, q)
15983 __LD2R_FUNC (poly16x8x2_t, poly16x2_t, poly16_t, 8h, p16, q)
15984 __LD2R_FUNC (int8x16x2_t, int8x2_t, int8_t, 16b, s8, q)
15985 __LD2R_FUNC (int16x8x2_t, int16x2_t, int16_t, 8h, s16, q)
15986 __LD2R_FUNC (int32x4x2_t, int32x2_t, int32_t, 4s, s32, q)
15987 __LD2R_FUNC (int64x2x2_t, int64x2_t, int64_t, 2d, s64, q)
15988 __LD2R_FUNC (uint8x16x2_t, uint8x2_t, uint8_t, 16b, u8, q)
15989 __LD2R_FUNC (uint16x8x2_t, uint16x2_t, uint16_t, 8h, u16, q)
15990 __LD2R_FUNC (uint32x4x2_t, uint32x2_t, uint32_t, 4s, u32, q)
15991 __LD2R_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, 2d, u64, q)
15993 #define __LD2_LANE_FUNC(rettype, ptrtype, regsuffix, \
15994 lnsuffix, funcsuffix, Q) \
15995 __extension__ static __inline rettype \
15996 __attribute__ ((__always_inline__)) \
15997 vld2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
15998 rettype b, const int c) \
16000 rettype result; \
16001 __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
16002 "ld2 {v16." #lnsuffix ", v17." #lnsuffix "}[%3], %2\n\t" \
16003 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
16004 : "=Q"(result) \
16005 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
16006 : "memory", "v16", "v17"); \
16007 return result; \
16010 __LD2_LANE_FUNC (int8x8x2_t, uint8_t, 8b, b, s8,)
16011 __LD2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
16012 __LD2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
16013 __LD2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
16014 __LD2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
16015 __LD2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
16016 __LD2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
16017 __LD2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
16018 __LD2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
16019 __LD2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
16020 __LD2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
16021 __LD2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
16022 __LD2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
16023 __LD2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
16024 __LD2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
16025 __LD2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
16026 __LD2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
16027 __LD2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
16028 __LD2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
16029 __LD2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
16030 __LD2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
16031 __LD2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
16032 __LD2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
16033 __LD2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
16035 #define __LD3R_FUNC(rettype, structtype, ptrtype, \
16036 regsuffix, funcsuffix, Q) \
16037 __extension__ static __inline rettype \
16038 __attribute__ ((__always_inline__)) \
16039 vld3 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
16041 rettype result; \
16042 __asm__ ("ld3r {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
16043 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
16044 : "=Q"(result) \
16045 : "Q"(*(const structtype *)ptr) \
16046 : "memory", "v16", "v17", "v18"); \
16047 return result; \
16050 __LD3R_FUNC (float32x2x3_t, float32x3_t, float32_t, 2s, f32,)
16051 __LD3R_FUNC (float64x1x3_t, float64x3_t, float64_t, 1d, f64,)
16052 __LD3R_FUNC (poly8x8x3_t, poly8x3_t, poly8_t, 8b, p8,)
16053 __LD3R_FUNC (poly16x4x3_t, poly16x3_t, poly16_t, 4h, p16,)
16054 __LD3R_FUNC (int8x8x3_t, int8x3_t, int8_t, 8b, s8,)
16055 __LD3R_FUNC (int16x4x3_t, int16x3_t, int16_t, 4h, s16,)
16056 __LD3R_FUNC (int32x2x3_t, int32x3_t, int32_t, 2s, s32,)
16057 __LD3R_FUNC (int64x1x3_t, int64x3_t, int64_t, 1d, s64,)
16058 __LD3R_FUNC (uint8x8x3_t, uint8x3_t, uint8_t, 8b, u8,)
16059 __LD3R_FUNC (uint16x4x3_t, uint16x3_t, uint16_t, 4h, u16,)
16060 __LD3R_FUNC (uint32x2x3_t, uint32x3_t, uint32_t, 2s, u32,)
16061 __LD3R_FUNC (uint64x1x3_t, uint64x3_t, uint64_t, 1d, u64,)
16062 __LD3R_FUNC (float32x4x3_t, float32x3_t, float32_t, 4s, f32, q)
16063 __LD3R_FUNC (float64x2x3_t, float64x3_t, float64_t, 2d, f64, q)
16064 __LD3R_FUNC (poly8x16x3_t, poly8x3_t, poly8_t, 16b, p8, q)
16065 __LD3R_FUNC (poly16x8x3_t, poly16x3_t, poly16_t, 8h, p16, q)
16066 __LD3R_FUNC (int8x16x3_t, int8x3_t, int8_t, 16b, s8, q)
16067 __LD3R_FUNC (int16x8x3_t, int16x3_t, int16_t, 8h, s16, q)
16068 __LD3R_FUNC (int32x4x3_t, int32x3_t, int32_t, 4s, s32, q)
16069 __LD3R_FUNC (int64x2x3_t, int64x3_t, int64_t, 2d, s64, q)
16070 __LD3R_FUNC (uint8x16x3_t, uint8x3_t, uint8_t, 16b, u8, q)
16071 __LD3R_FUNC (uint16x8x3_t, uint16x3_t, uint16_t, 8h, u16, q)
16072 __LD3R_FUNC (uint32x4x3_t, uint32x3_t, uint32_t, 4s, u32, q)
16073 __LD3R_FUNC (uint64x2x3_t, uint64x3_t, uint64_t, 2d, u64, q)
16075 #define __LD3_LANE_FUNC(rettype, ptrtype, regsuffix, \
16076 lnsuffix, funcsuffix, Q) \
16077 __extension__ static __inline rettype \
16078 __attribute__ ((__always_inline__)) \
16079 vld3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
16080 rettype b, const int c) \
16082 rettype result; \
16083 __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
16084 "ld3 {v16." #lnsuffix " - v18." #lnsuffix "}[%3], %2\n\t" \
16085 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
16086 : "=Q"(result) \
16087 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
16088 : "memory", "v16", "v17", "v18"); \
16089 return result; \
16092 __LD3_LANE_FUNC (int8x8x3_t, uint8_t, 8b, b, s8,)
16093 __LD3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
16094 __LD3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
16095 __LD3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
16096 __LD3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
16097 __LD3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
16098 __LD3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
16099 __LD3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
16100 __LD3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
16101 __LD3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
16102 __LD3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
16103 __LD3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
16104 __LD3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
16105 __LD3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
16106 __LD3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
16107 __LD3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
16108 __LD3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
16109 __LD3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
16110 __LD3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
16111 __LD3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
16112 __LD3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
16113 __LD3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
16114 __LD3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
16115 __LD3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
16117 #define __LD4R_FUNC(rettype, structtype, ptrtype, \
16118 regsuffix, funcsuffix, Q) \
16119 __extension__ static __inline rettype \
16120 __attribute__ ((__always_inline__)) \
16121 vld4 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
16123 rettype result; \
16124 __asm__ ("ld4r {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
16125 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
16126 : "=Q"(result) \
16127 : "Q"(*(const structtype *)ptr) \
16128 : "memory", "v16", "v17", "v18", "v19"); \
16129 return result; \
16132 __LD4R_FUNC (float32x2x4_t, float32x4_t, float32_t, 2s, f32,)
16133 __LD4R_FUNC (float64x1x4_t, float64x4_t, float64_t, 1d, f64,)
16134 __LD4R_FUNC (poly8x8x4_t, poly8x4_t, poly8_t, 8b, p8,)
16135 __LD4R_FUNC (poly16x4x4_t, poly16x4_t, poly16_t, 4h, p16,)
16136 __LD4R_FUNC (int8x8x4_t, int8x4_t, int8_t, 8b, s8,)
16137 __LD4R_FUNC (int16x4x4_t, int16x4_t, int16_t, 4h, s16,)
16138 __LD4R_FUNC (int32x2x4_t, int32x4_t, int32_t, 2s, s32,)
16139 __LD4R_FUNC (int64x1x4_t, int64x4_t, int64_t, 1d, s64,)
16140 __LD4R_FUNC (uint8x8x4_t, uint8x4_t, uint8_t, 8b, u8,)
16141 __LD4R_FUNC (uint16x4x4_t, uint16x4_t, uint16_t, 4h, u16,)
16142 __LD4R_FUNC (uint32x2x4_t, uint32x4_t, uint32_t, 2s, u32,)
16143 __LD4R_FUNC (uint64x1x4_t, uint64x4_t, uint64_t, 1d, u64,)
16144 __LD4R_FUNC (float32x4x4_t, float32x4_t, float32_t, 4s, f32, q)
16145 __LD4R_FUNC (float64x2x4_t, float64x4_t, float64_t, 2d, f64, q)
16146 __LD4R_FUNC (poly8x16x4_t, poly8x4_t, poly8_t, 16b, p8, q)
16147 __LD4R_FUNC (poly16x8x4_t, poly16x4_t, poly16_t, 8h, p16, q)
16148 __LD4R_FUNC (int8x16x4_t, int8x4_t, int8_t, 16b, s8, q)
16149 __LD4R_FUNC (int16x8x4_t, int16x4_t, int16_t, 8h, s16, q)
16150 __LD4R_FUNC (int32x4x4_t, int32x4_t, int32_t, 4s, s32, q)
16151 __LD4R_FUNC (int64x2x4_t, int64x4_t, int64_t, 2d, s64, q)
16152 __LD4R_FUNC (uint8x16x4_t, uint8x4_t, uint8_t, 16b, u8, q)
16153 __LD4R_FUNC (uint16x8x4_t, uint16x4_t, uint16_t, 8h, u16, q)
16154 __LD4R_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, 4s, u32, q)
16155 __LD4R_FUNC (uint64x2x4_t, uint64x4_t, uint64_t, 2d, u64, q)
16157 #define __LD4_LANE_FUNC(rettype, ptrtype, regsuffix, \
16158 lnsuffix, funcsuffix, Q) \
16159 __extension__ static __inline rettype \
16160 __attribute__ ((__always_inline__)) \
16161 vld4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
16162 rettype b, const int c) \
16164 rettype result; \
16165 __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
16166 "ld4 {v16." #lnsuffix " - v19." #lnsuffix "}[%3], %2\n\t" \
16167 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
16168 : "=Q"(result) \
16169 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
16170 : "memory", "v16", "v17", "v18", "v19"); \
16171 return result; \
16174 __LD4_LANE_FUNC (int8x8x4_t, uint8_t, 8b, b, s8,)
16175 __LD4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
16176 __LD4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
16177 __LD4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
16178 __LD4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
16179 __LD4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
16180 __LD4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
16181 __LD4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
16182 __LD4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
16183 __LD4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
16184 __LD4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
16185 __LD4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
16186 __LD4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
16187 __LD4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
16188 __LD4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
16189 __LD4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
16190 __LD4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
16191 __LD4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
16192 __LD4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
16193 __LD4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
16194 __LD4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
16195 __LD4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
16196 __LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
16197 __LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
16199 #define __ST2_LANE_FUNC(intype, ptrtype, regsuffix, \
16200 lnsuffix, funcsuffix, Q) \
16201 __extension__ static __inline void \
16202 __attribute__ ((__always_inline__)) \
16203 vst2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
16204 intype b, const int c) \
16206 __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
16207 "st2 {v16." #lnsuffix ", v17." #lnsuffix "}[%2], %0\n\t" \
16208 : "=Q"(*(intype *) ptr) \
16209 : "Q"(b), "i"(c) \
16210 : "memory", "v16", "v17"); \
16213 __ST2_LANE_FUNC (int8x8x2_t, int8_t, 8b, b, s8,)
16214 __ST2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
16215 __ST2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
16216 __ST2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
16217 __ST2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
16218 __ST2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
16219 __ST2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
16220 __ST2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
16221 __ST2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
16222 __ST2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
16223 __ST2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
16224 __ST2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
16225 __ST2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
16226 __ST2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
16227 __ST2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
16228 __ST2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
16229 __ST2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
16230 __ST2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
16231 __ST2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
16232 __ST2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
16233 __ST2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
16234 __ST2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
16235 __ST2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
16236 __ST2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
16238 #define __ST3_LANE_FUNC(intype, ptrtype, regsuffix, \
16239 lnsuffix, funcsuffix, Q) \
16240 __extension__ static __inline void \
16241 __attribute__ ((__always_inline__)) \
16242 vst3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
16243 intype b, const int c) \
16245 __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
16246 "st3 {v16." #lnsuffix " - v18." #lnsuffix "}[%2], %0\n\t" \
16247 : "=Q"(*(intype *) ptr) \
16248 : "Q"(b), "i"(c) \
16249 : "memory", "v16", "v17", "v18"); \
16252 __ST3_LANE_FUNC (int8x8x3_t, int8_t, 8b, b, s8,)
16253 __ST3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
16254 __ST3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
16255 __ST3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
16256 __ST3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
16257 __ST3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
16258 __ST3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
16259 __ST3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
16260 __ST3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
16261 __ST3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
16262 __ST3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
16263 __ST3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
16264 __ST3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
16265 __ST3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
16266 __ST3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
16267 __ST3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
16268 __ST3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
16269 __ST3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
16270 __ST3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
16271 __ST3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
16272 __ST3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
16273 __ST3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
16274 __ST3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
16275 __ST3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
16277 #define __ST4_LANE_FUNC(intype, ptrtype, regsuffix, \
16278 lnsuffix, funcsuffix, Q) \
16279 __extension__ static __inline void \
16280 __attribute__ ((__always_inline__)) \
16281 vst4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
16282 intype b, const int c) \
16284 __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
16285 "st4 {v16." #lnsuffix " - v19." #lnsuffix "}[%2], %0\n\t" \
16286 : "=Q"(*(intype *) ptr) \
16287 : "Q"(b), "i"(c) \
16288 : "memory", "v16", "v17", "v18", "v19"); \
16291 __ST4_LANE_FUNC (int8x8x4_t, int8_t, 8b, b, s8,)
16292 __ST4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
16293 __ST4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
16294 __ST4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
16295 __ST4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
16296 __ST4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
16297 __ST4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
16298 __ST4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
16299 __ST4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
16300 __ST4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
16301 __ST4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
16302 __ST4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
16303 __ST4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
16304 __ST4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
16305 __ST4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
16306 __ST4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
16307 __ST4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
16308 __ST4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
16309 __ST4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
16310 __ST4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
16311 __ST4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
16312 __ST4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
16313 __ST4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
16314 __ST4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
16316 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
16317 vaddlv_s32 (int32x2_t a)
16319 int64_t result;
16320 __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
16321 return result;
16324 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16325 vaddlv_u32 (uint32x2_t a)
16327 uint64_t result;
16328 __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
16329 return result;
16332 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
16333 vpaddd_s64 (int64x2_t __a)
16335 return __builtin_aarch64_addpdi (__a);
16338 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
16339 vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
16341 return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c);
16344 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
16345 vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
16347 return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c);
16350 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
16351 vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
16353 return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c);
16356 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
16357 vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
16359 return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c);
16362 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
16363 vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
16365 return __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c);
16368 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
16369 vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
16371 return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c);
16374 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
16375 vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
16377 return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c);
16380 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
16381 vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
16383 return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c);
16386 /* Table intrinsics. */
16388 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16389 vqtbl1_p8 (poly8x16_t a, uint8x8_t b)
16391 poly8x8_t result;
16392 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
16393 : "=w"(result)
16394 : "w"(a), "w"(b)
16395 : /* No clobbers */);
16396 return result;
16399 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16400 vqtbl1_s8 (int8x16_t a, int8x8_t b)
16402 int8x8_t result;
16403 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
16404 : "=w"(result)
16405 : "w"(a), "w"(b)
16406 : /* No clobbers */);
16407 return result;
16410 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16411 vqtbl1_u8 (uint8x16_t a, uint8x8_t b)
16413 uint8x8_t result;
16414 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
16415 : "=w"(result)
16416 : "w"(a), "w"(b)
16417 : /* No clobbers */);
16418 return result;
16421 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16422 vqtbl1q_p8 (poly8x16_t a, uint8x16_t b)
16424 poly8x16_t result;
16425 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
16426 : "=w"(result)
16427 : "w"(a), "w"(b)
16428 : /* No clobbers */);
16429 return result;
16432 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16433 vqtbl1q_s8 (int8x16_t a, int8x16_t b)
16435 int8x16_t result;
16436 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
16437 : "=w"(result)
16438 : "w"(a), "w"(b)
16439 : /* No clobbers */);
16440 return result;
16443 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16444 vqtbl1q_u8 (uint8x16_t a, uint8x16_t b)
16446 uint8x16_t result;
16447 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
16448 : "=w"(result)
16449 : "w"(a), "w"(b)
16450 : /* No clobbers */);
16451 return result;
16454 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16455 vqtbl2_s8 (int8x16x2_t tab, int8x8_t idx)
16457 int8x8_t result;
16458 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16459 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
16460 :"=w"(result)
16461 :"Q"(tab),"w"(idx)
16462 :"memory", "v16", "v17");
16463 return result;
16466 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16467 vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx)
16469 uint8x8_t result;
16470 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16471 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
16472 :"=w"(result)
16473 :"Q"(tab),"w"(idx)
16474 :"memory", "v16", "v17");
16475 return result;
16478 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16479 vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx)
16481 poly8x8_t result;
16482 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16483 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
16484 :"=w"(result)
16485 :"Q"(tab),"w"(idx)
16486 :"memory", "v16", "v17");
16487 return result;
16490 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16491 vqtbl2q_s8 (int8x16x2_t tab, int8x16_t idx)
16493 int8x16_t result;
16494 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16495 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
16496 :"=w"(result)
16497 :"Q"(tab),"w"(idx)
16498 :"memory", "v16", "v17");
16499 return result;
16502 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16503 vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx)
16505 uint8x16_t result;
16506 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16507 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
16508 :"=w"(result)
16509 :"Q"(tab),"w"(idx)
16510 :"memory", "v16", "v17");
16511 return result;
16514 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16515 vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx)
16517 poly8x16_t result;
16518 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16519 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
16520 :"=w"(result)
16521 :"Q"(tab),"w"(idx)
16522 :"memory", "v16", "v17");
16523 return result;
16526 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16527 vqtbl3_s8 (int8x16x3_t tab, int8x8_t idx)
16529 int8x8_t result;
16530 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16531 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
16532 :"=w"(result)
16533 :"Q"(tab),"w"(idx)
16534 :"memory", "v16", "v17", "v18");
16535 return result;
16538 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16539 vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx)
16541 uint8x8_t result;
16542 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16543 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
16544 :"=w"(result)
16545 :"Q"(tab),"w"(idx)
16546 :"memory", "v16", "v17", "v18");
16547 return result;
16550 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16551 vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx)
16553 poly8x8_t result;
16554 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16555 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
16556 :"=w"(result)
16557 :"Q"(tab),"w"(idx)
16558 :"memory", "v16", "v17", "v18");
16559 return result;
16562 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16563 vqtbl3q_s8 (int8x16x3_t tab, int8x16_t idx)
16565 int8x16_t result;
16566 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16567 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
16568 :"=w"(result)
16569 :"Q"(tab),"w"(idx)
16570 :"memory", "v16", "v17", "v18");
16571 return result;
16574 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16575 vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx)
16577 uint8x16_t result;
16578 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16579 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
16580 :"=w"(result)
16581 :"Q"(tab),"w"(idx)
16582 :"memory", "v16", "v17", "v18");
16583 return result;
16586 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16587 vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx)
16589 poly8x16_t result;
16590 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16591 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
16592 :"=w"(result)
16593 :"Q"(tab),"w"(idx)
16594 :"memory", "v16", "v17", "v18");
16595 return result;
16598 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16599 vqtbl4_s8 (int8x16x4_t tab, int8x8_t idx)
16601 int8x8_t result;
16602 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16603 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
16604 :"=w"(result)
16605 :"Q"(tab),"w"(idx)
16606 :"memory", "v16", "v17", "v18", "v19");
16607 return result;
16610 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16611 vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx)
16613 uint8x8_t result;
16614 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16615 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
16616 :"=w"(result)
16617 :"Q"(tab),"w"(idx)
16618 :"memory", "v16", "v17", "v18", "v19");
16619 return result;
16622 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16623 vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx)
16625 poly8x8_t result;
16626 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16627 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
16628 :"=w"(result)
16629 :"Q"(tab),"w"(idx)
16630 :"memory", "v16", "v17", "v18", "v19");
16631 return result;
16635 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16636 vqtbl4q_s8 (int8x16x4_t tab, int8x16_t idx)
16638 int8x16_t result;
16639 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16640 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
16641 :"=w"(result)
16642 :"Q"(tab),"w"(idx)
16643 :"memory", "v16", "v17", "v18", "v19");
16644 return result;
16647 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16648 vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx)
16650 uint8x16_t result;
16651 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16652 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
16653 :"=w"(result)
16654 :"Q"(tab),"w"(idx)
16655 :"memory", "v16", "v17", "v18", "v19");
16656 return result;
16659 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16660 vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx)
16662 poly8x16_t result;
16663 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16664 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
16665 :"=w"(result)
16666 :"Q"(tab),"w"(idx)
16667 :"memory", "v16", "v17", "v18", "v19");
16668 return result;
16672 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16673 vqtbx1_s8 (int8x8_t r, int8x16_t tab, int8x8_t idx)
16675 int8x8_t result = r;
16676 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
16677 : "+w"(result)
16678 : "w"(tab), "w"(idx)
16679 : /* No clobbers */);
16680 return result;
16683 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16684 vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx)
16686 uint8x8_t result = r;
16687 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
16688 : "+w"(result)
16689 : "w"(tab), "w"(idx)
16690 : /* No clobbers */);
16691 return result;
16694 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16695 vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx)
16697 poly8x8_t result = r;
16698 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
16699 : "+w"(result)
16700 : "w"(tab), "w"(idx)
16701 : /* No clobbers */);
16702 return result;
16705 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16706 vqtbx1q_s8 (int8x16_t r, int8x16_t tab, int8x16_t idx)
16708 int8x16_t result = r;
16709 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
16710 : "+w"(result)
16711 : "w"(tab), "w"(idx)
16712 : /* No clobbers */);
16713 return result;
16716 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16717 vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx)
16719 uint8x16_t result = r;
16720 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
16721 : "+w"(result)
16722 : "w"(tab), "w"(idx)
16723 : /* No clobbers */);
16724 return result;
16727 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16728 vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx)
16730 poly8x16_t result = r;
16731 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
16732 : "+w"(result)
16733 : "w"(tab), "w"(idx)
16734 : /* No clobbers */);
16735 return result;
16738 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16739 vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, int8x8_t idx)
16741 int8x8_t result = r;
16742 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16743 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
16744 :"+w"(result)
16745 :"Q"(tab),"w"(idx)
16746 :"memory", "v16", "v17");
16747 return result;
16750 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16751 vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx)
16753 uint8x8_t result = r;
16754 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16755 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
16756 :"+w"(result)
16757 :"Q"(tab),"w"(idx)
16758 :"memory", "v16", "v17");
16759 return result;
16762 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16763 vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx)
16765 poly8x8_t result = r;
16766 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16767 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
16768 :"+w"(result)
16769 :"Q"(tab),"w"(idx)
16770 :"memory", "v16", "v17");
16771 return result;
16775 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16776 vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, int8x16_t idx)
16778 int8x16_t result = r;
16779 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16780 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
16781 :"+w"(result)
16782 :"Q"(tab),"w"(idx)
16783 :"memory", "v16", "v17");
16784 return result;
16787 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16788 vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx)
16790 uint8x16_t result = r;
16791 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16792 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
16793 :"+w"(result)
16794 :"Q"(tab),"w"(idx)
16795 :"memory", "v16", "v17");
16796 return result;
16799 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16800 vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx)
16802 poly8x16_t result = r;
16803 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16804 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
16805 :"+w"(result)
16806 :"Q"(tab),"w"(idx)
16807 :"memory", "v16", "v17");
16808 return result;
16812 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16813 vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, int8x8_t idx)
16815 int8x8_t result = r;
16816 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16817 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
16818 :"+w"(result)
16819 :"Q"(tab),"w"(idx)
16820 :"memory", "v16", "v17", "v18");
16821 return result;
16824 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16825 vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx)
16827 uint8x8_t result = r;
16828 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16829 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
16830 :"+w"(result)
16831 :"Q"(tab),"w"(idx)
16832 :"memory", "v16", "v17", "v18");
16833 return result;
16836 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16837 vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx)
16839 poly8x8_t result = r;
16840 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16841 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
16842 :"+w"(result)
16843 :"Q"(tab),"w"(idx)
16844 :"memory", "v16", "v17", "v18");
16845 return result;
16849 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16850 vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, int8x16_t idx)
16852 int8x16_t result = r;
16853 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16854 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
16855 :"+w"(result)
16856 :"Q"(tab),"w"(idx)
16857 :"memory", "v16", "v17", "v18");
16858 return result;
16861 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16862 vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx)
16864 uint8x16_t result = r;
16865 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16866 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
16867 :"+w"(result)
16868 :"Q"(tab),"w"(idx)
16869 :"memory", "v16", "v17", "v18");
16870 return result;
16873 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16874 vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx)
16876 poly8x16_t result = r;
16877 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16878 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
16879 :"+w"(result)
16880 :"Q"(tab),"w"(idx)
16881 :"memory", "v16", "v17", "v18");
16882 return result;
16886 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16887 vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, int8x8_t idx)
16889 int8x8_t result = r;
16890 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16891 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
16892 :"+w"(result)
16893 :"Q"(tab),"w"(idx)
16894 :"memory", "v16", "v17", "v18", "v19");
16895 return result;
16898 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16899 vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx)
16901 uint8x8_t result = r;
16902 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16903 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
16904 :"+w"(result)
16905 :"Q"(tab),"w"(idx)
16906 :"memory", "v16", "v17", "v18", "v19");
16907 return result;
16910 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16911 vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx)
16913 poly8x8_t result = r;
16914 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16915 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
16916 :"+w"(result)
16917 :"Q"(tab),"w"(idx)
16918 :"memory", "v16", "v17", "v18", "v19");
16919 return result;
16923 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16924 vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, int8x16_t idx)
16926 int8x16_t result = r;
16927 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16928 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
16929 :"+w"(result)
16930 :"Q"(tab),"w"(idx)
16931 :"memory", "v16", "v17", "v18", "v19");
16932 return result;
16935 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16936 vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx)
16938 uint8x16_t result = r;
16939 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16940 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
16941 :"+w"(result)
16942 :"Q"(tab),"w"(idx)
16943 :"memory", "v16", "v17", "v18", "v19");
16944 return result;
16947 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16948 vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx)
16950 poly8x16_t result = r;
16951 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16952 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
16953 :"+w"(result)
16954 :"Q"(tab),"w"(idx)
16955 :"memory", "v16", "v17", "v18", "v19");
16956 return result;
16959 /* V7 legacy table intrinsics. */
16961 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16962 vtbl1_s8 (int8x8_t tab, int8x8_t idx)
16964 int8x8_t result;
16965 int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (UINT64_C (0x0)));
16966 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
16967 : "=w"(result)
16968 : "w"(temp), "w"(idx)
16969 : /* No clobbers */);
16970 return result;
16973 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16974 vtbl1_u8 (uint8x8_t tab, uint8x8_t idx)
16976 uint8x8_t result;
16977 uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (UINT64_C (0x0)));
16978 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
16979 : "=w"(result)
16980 : "w"(temp), "w"(idx)
16981 : /* No clobbers */);
16982 return result;
16985 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16986 vtbl1_p8 (poly8x8_t tab, uint8x8_t idx)
16988 poly8x8_t result;
16989 poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (UINT64_C (0x0)));
16990 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
16991 : "=w"(result)
16992 : "w"(temp), "w"(idx)
16993 : /* No clobbers */);
16994 return result;
16997 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16998 vtbl2_s8 (int8x8x2_t tab, int8x8_t idx)
17000 int8x8_t result;
17001 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
17002 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
17003 : "=w"(result)
17004 : "w"(temp), "w"(idx)
17005 : /* No clobbers */);
17006 return result;
17009 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17010 vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx)
17012 uint8x8_t result;
17013 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
17014 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
17015 : "=w"(result)
17016 : "w"(temp), "w"(idx)
17017 : /* No clobbers */);
17018 return result;
17021 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
17022 vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx)
17024 poly8x8_t result;
17025 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
17026 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
17027 : "=w"(result)
17028 : "w"(temp), "w"(idx)
17029 : /* No clobbers */);
17030 return result;
17033 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17034 vtbl3_s8 (int8x8x3_t tab, int8x8_t idx)
17036 int8x8_t result;
17037 int8x16x2_t temp;
17038 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
17039 temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (UINT64_C (0x0)));
17040 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
17041 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
17042 : "=w"(result)
17043 : "Q"(temp), "w"(idx)
17044 : "v16", "v17", "memory");
17045 return result;
17048 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17049 vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx)
17051 uint8x8_t result;
17052 uint8x16x2_t temp;
17053 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
17054 temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (UINT64_C (0x0)));
17055 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
17056 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
17057 : "=w"(result)
17058 : "Q"(temp), "w"(idx)
17059 : "v16", "v17", "memory");
17060 return result;
17063 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
17064 vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx)
17066 poly8x8_t result;
17067 poly8x16x2_t temp;
17068 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
17069 temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (UINT64_C (0x0)));
17070 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
17071 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
17072 : "=w"(result)
17073 : "Q"(temp), "w"(idx)
17074 : "v16", "v17", "memory");
17075 return result;
17078 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17079 vtbl4_s8 (int8x8x4_t tab, int8x8_t idx)
17081 int8x8_t result;
17082 int8x16x2_t temp;
17083 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
17084 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
17085 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
17086 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
17087 : "=w"(result)
17088 : "Q"(temp), "w"(idx)
17089 : "v16", "v17", "memory");
17090 return result;
17093 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17094 vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx)
17096 uint8x8_t result;
17097 uint8x16x2_t temp;
17098 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
17099 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
17100 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
17101 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
17102 : "=w"(result)
17103 : "Q"(temp), "w"(idx)
17104 : "v16", "v17", "memory");
17105 return result;
17108 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
17109 vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx)
17111 poly8x8_t result;
17112 poly8x16x2_t temp;
17113 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
17114 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
17115 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
17116 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
17117 : "=w"(result)
17118 : "Q"(temp), "w"(idx)
17119 : "v16", "v17", "memory");
17120 return result;
17123 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17124 vtbx1_s8 (int8x8_t r, int8x8_t tab, int8x8_t idx)
17126 int8x8_t result;
17127 int8x8_t tmp1;
17128 int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (UINT64_C (0x0)));
17129 __asm__ ("movi %0.8b, 8\n\t"
17130 "cmhs %0.8b, %3.8b, %0.8b\n\t"
17131 "tbl %1.8b, {%2.16b}, %3.8b\n\t"
17132 "bsl %0.8b, %4.8b, %1.8b\n\t"
17133 : "+w"(result), "=w"(tmp1)
17134 : "w"(temp), "w"(idx), "w"(r)
17135 : /* No clobbers */);
17136 return result;
17139 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17140 vtbx1_u8 (uint8x8_t r, uint8x8_t tab, uint8x8_t idx)
17142 uint8x8_t result;
17143 uint8x8_t tmp1;
17144 uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (UINT64_C (0x0)));
17145 __asm__ ("movi %0.8b, 8\n\t"
17146 "cmhs %0.8b, %3.8b, %0.8b\n\t"
17147 "tbl %1.8b, {%2.16b}, %3.8b\n\t"
17148 "bsl %0.8b, %4.8b, %1.8b\n\t"
17149 : "+w"(result), "=w"(tmp1)
17150 : "w"(temp), "w"(idx), "w"(r)
17151 : /* No clobbers */);
17152 return result;
17155 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
17156 vtbx1_p8 (poly8x8_t r, poly8x8_t tab, uint8x8_t idx)
17158 poly8x8_t result;
17159 poly8x8_t tmp1;
17160 poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (UINT64_C (0x0)));
17161 __asm__ ("movi %0.8b, 8\n\t"
17162 "cmhs %0.8b, %3.8b, %0.8b\n\t"
17163 "tbl %1.8b, {%2.16b}, %3.8b\n\t"
17164 "bsl %0.8b, %4.8b, %1.8b\n\t"
17165 : "+w"(result), "=w"(tmp1)
17166 : "w"(temp), "w"(idx), "w"(r)
17167 : /* No clobbers */);
17168 return result;
17171 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17172 vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx)
17174 int8x8_t result = r;
17175 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
17176 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
17177 : "+w"(result)
17178 : "w"(temp), "w"(idx)
17179 : /* No clobbers */);
17180 return result;
17183 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17184 vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx)
17186 uint8x8_t result = r;
17187 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
17188 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
17189 : "+w"(result)
17190 : "w"(temp), "w"(idx)
17191 : /* No clobbers */);
17192 return result;
17195 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
17196 vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx)
17198 poly8x8_t result = r;
17199 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
17200 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
17201 : "+w"(result)
17202 : "w"(temp), "w"(idx)
17203 : /* No clobbers */);
17204 return result;
17207 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17208 vtbx3_s8 (int8x8_t r, int8x8x3_t tab, int8x8_t idx)
17210 int8x8_t result;
17211 int8x8_t tmp1;
17212 int8x16x2_t temp;
17213 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
17214 temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (UINT64_C (0x0)));
17215 __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
17216 "movi %0.8b, 24\n\t"
17217 "cmhs %0.8b, %3.8b, %0.8b\n\t"
17218 "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
17219 "bsl %0.8b, %4.8b, %1.8b\n\t"
17220 : "+w"(result), "=w"(tmp1)
17221 : "Q"(temp), "w"(idx), "w"(r)
17222 : "v16", "v17", "memory");
17223 return result;
17226 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17227 vtbx3_u8 (uint8x8_t r, uint8x8x3_t tab, uint8x8_t idx)
17229 uint8x8_t result;
17230 uint8x8_t tmp1;
17231 uint8x16x2_t temp;
17232 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
17233 temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (UINT64_C (0x0)));
17234 __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
17235 "movi %0.8b, 24\n\t"
17236 "cmhs %0.8b, %3.8b, %0.8b\n\t"
17237 "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
17238 "bsl %0.8b, %4.8b, %1.8b\n\t"
17239 : "+w"(result), "=w"(tmp1)
17240 : "Q"(temp), "w"(idx), "w"(r)
17241 : "v16", "v17", "memory");
17242 return result;
17245 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
17246 vtbx3_p8 (poly8x8_t r, poly8x8x3_t tab, uint8x8_t idx)
17248 poly8x8_t result;
17249 poly8x8_t tmp1;
17250 poly8x16x2_t temp;
17251 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
17252 temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (UINT64_C (0x0)));
17253 __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
17254 "movi %0.8b, 24\n\t"
17255 "cmhs %0.8b, %3.8b, %0.8b\n\t"
17256 "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
17257 "bsl %0.8b, %4.8b, %1.8b\n\t"
17258 : "+w"(result), "=w"(tmp1)
17259 : "Q"(temp), "w"(idx), "w"(r)
17260 : "v16", "v17", "memory");
17261 return result;
17264 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17265 vtbx4_s8 (int8x8_t r, int8x8x4_t tab, int8x8_t idx)
17267 int8x8_t result = r;
17268 int8x16x2_t temp;
17269 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
17270 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
17271 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
17272 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
17273 : "+w"(result)
17274 : "Q"(temp), "w"(idx)
17275 : "v16", "v17", "memory");
17276 return result;
17279 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17280 vtbx4_u8 (uint8x8_t r, uint8x8x4_t tab, uint8x8_t idx)
17282 uint8x8_t result = r;
17283 uint8x16x2_t temp;
17284 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
17285 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
17286 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
17287 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
17288 : "+w"(result)
17289 : "Q"(temp), "w"(idx)
17290 : "v16", "v17", "memory");
17291 return result;
17294 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
17295 vtbx4_p8 (poly8x8_t r, poly8x8x4_t tab, uint8x8_t idx)
17297 poly8x8_t result = r;
17298 poly8x16x2_t temp;
17299 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
17300 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
17301 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
17302 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
17303 : "+w"(result)
17304 : "Q"(temp), "w"(idx)
17305 : "v16", "v17", "memory");
17306 return result;
17309 /* End of temporary inline asm. */
17311 /* Start of optimal implementations in approved order. */
17313 /* vabs */
17315 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17316 vabs_f32 (float32x2_t __a)
17318 return __builtin_aarch64_absv2sf (__a);
17321 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
17322 vabs_f64 (float64x1_t __a)
17324 return __builtin_fabs (__a);
17327 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17328 vabs_s8 (int8x8_t __a)
17330 return __builtin_aarch64_absv8qi (__a);
17333 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17334 vabs_s16 (int16x4_t __a)
17336 return __builtin_aarch64_absv4hi (__a);
17339 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17340 vabs_s32 (int32x2_t __a)
17342 return __builtin_aarch64_absv2si (__a);
17345 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
17346 vabs_s64 (int64x1_t __a)
17348 return __builtin_llabs (__a);
17351 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17352 vabsq_f32 (float32x4_t __a)
17354 return __builtin_aarch64_absv4sf (__a);
17357 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17358 vabsq_f64 (float64x2_t __a)
17360 return __builtin_aarch64_absv2df (__a);
17363 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
17364 vabsq_s8 (int8x16_t __a)
17366 return __builtin_aarch64_absv16qi (__a);
17369 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
17370 vabsq_s16 (int16x8_t __a)
17372 return __builtin_aarch64_absv8hi (__a);
17375 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17376 vabsq_s32 (int32x4_t __a)
17378 return __builtin_aarch64_absv4si (__a);
17381 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17382 vabsq_s64 (int64x2_t __a)
17384 return __builtin_aarch64_absv2di (__a);
17387 /* vadd */
17389 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
17390 vaddd_s64 (int64x1_t __a, int64x1_t __b)
17392 return __a + __b;
17395 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17396 vaddd_u64 (uint64x1_t __a, uint64x1_t __b)
17398 return __a + __b;
17401 /* vaddv */
17403 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
17404 vaddv_s8 (int8x8_t __a)
17406 return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0);
17409 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
17410 vaddv_s16 (int16x4_t __a)
17412 return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0);
17415 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17416 vaddv_s32 (int32x2_t __a)
17418 return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0);
17421 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
17422 vaddv_u8 (uint8x8_t __a)
17424 return vget_lane_u8 ((uint8x8_t)
17425 __builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a), 0);
17428 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
17429 vaddv_u16 (uint16x4_t __a)
17431 return vget_lane_u16 ((uint16x4_t)
17432 __builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a), 0);
17435 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17436 vaddv_u32 (uint32x2_t __a)
17438 return vget_lane_u32 ((uint32x2_t)
17439 __builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a), 0);
17442 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
17443 vaddvq_s8 (int8x16_t __a)
17445 return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a), 0);
17448 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
17449 vaddvq_s16 (int16x8_t __a)
17451 return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0);
17454 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17455 vaddvq_s32 (int32x4_t __a)
17457 return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0);
17460 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17461 vaddvq_s64 (int64x2_t __a)
17463 return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0);
17466 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
17467 vaddvq_u8 (uint8x16_t __a)
17469 return vgetq_lane_u8 ((uint8x16_t)
17470 __builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a), 0);
17473 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
17474 vaddvq_u16 (uint16x8_t __a)
17476 return vgetq_lane_u16 ((uint16x8_t)
17477 __builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a), 0);
17480 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17481 vaddvq_u32 (uint32x4_t __a)
17483 return vgetq_lane_u32 ((uint32x4_t)
17484 __builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a), 0);
17487 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17488 vaddvq_u64 (uint64x2_t __a)
17490 return vgetq_lane_u64 ((uint64x2_t)
17491 __builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a), 0);
17494 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17495 vaddv_f32 (float32x2_t __a)
17497 float32x2_t t = __builtin_aarch64_reduc_splus_v2sf (__a);
17498 return vget_lane_f32 (t, 0);
17501 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17502 vaddvq_f32 (float32x4_t __a)
17504 float32x4_t t = __builtin_aarch64_reduc_splus_v4sf (__a);
17505 return vgetq_lane_f32 (t, 0);
17508 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17509 vaddvq_f64 (float64x2_t __a)
17511 float64x2_t t = __builtin_aarch64_reduc_splus_v2df (__a);
17512 return vgetq_lane_f64 (t, 0);
17515 /* vcage */
17517 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17518 vcages_f32 (float32_t __a, float32_t __b)
17520 return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0;
17523 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17524 vcage_f32 (float32x2_t __a, float32x2_t __b)
17526 return vabs_f32 (__a) >= vabs_f32 (__b);
17529 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17530 vcageq_f32 (float32x4_t __a, float32x4_t __b)
17532 return vabsq_f32 (__a) >= vabsq_f32 (__b);
17535 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17536 vcaged_f64 (float64_t __a, float64_t __b)
17538 return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0;
17541 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17542 vcageq_f64 (float64x2_t __a, float64x2_t __b)
17544 return vabsq_f64 (__a) >= vabsq_f64 (__b);
17547 /* vcagt */
17549 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17550 vcagts_f32 (float32_t __a, float32_t __b)
17552 return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0;
17555 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17556 vcagt_f32 (float32x2_t __a, float32x2_t __b)
17558 return vabs_f32 (__a) > vabs_f32 (__b);
17561 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17562 vcagtq_f32 (float32x4_t __a, float32x4_t __b)
17564 return vabsq_f32 (__a) > vabsq_f32 (__b);
17567 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17568 vcagtd_f64 (float64_t __a, float64_t __b)
17570 return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0;
17573 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17574 vcagtq_f64 (float64x2_t __a, float64x2_t __b)
17576 return vabsq_f64 (__a) > vabsq_f64 (__b);
17579 /* vcale */
17581 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17582 vcale_f32 (float32x2_t __a, float32x2_t __b)
17584 return vabs_f32 (__a) <= vabs_f32 (__b);
17587 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17588 vcaleq_f32 (float32x4_t __a, float32x4_t __b)
17590 return vabsq_f32 (__a) <= vabsq_f32 (__b);
17593 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17594 vcaleq_f64 (float64x2_t __a, float64x2_t __b)
17596 return vabsq_f64 (__a) <= vabsq_f64 (__b);
17599 /* vcalt */
17601 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17602 vcalt_f32 (float32x2_t __a, float32x2_t __b)
17604 return vabs_f32 (__a) < vabs_f32 (__b);
17607 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17608 vcaltq_f32 (float32x4_t __a, float32x4_t __b)
17610 return vabsq_f32 (__a) < vabsq_f32 (__b);
17613 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17614 vcaltq_f64 (float64x2_t __a, float64x2_t __b)
17616 return vabsq_f64 (__a) < vabsq_f64 (__b);
17619 /* vceq - vector. */
17621 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17622 vceq_f32 (float32x2_t __a, float32x2_t __b)
17624 return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
17627 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17628 vceq_f64 (float64x1_t __a, float64x1_t __b)
17630 return __a == __b ? -1ll : 0ll;
17633 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17634 vceq_p8 (poly8x8_t __a, poly8x8_t __b)
17636 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
17637 (int8x8_t) __b);
17640 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17641 vceq_s8 (int8x8_t __a, int8x8_t __b)
17643 return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
17646 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17647 vceq_s16 (int16x4_t __a, int16x4_t __b)
17649 return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
17652 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17653 vceq_s32 (int32x2_t __a, int32x2_t __b)
17655 return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
17658 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17659 vceq_s64 (int64x1_t __a, int64x1_t __b)
17661 return __a == __b ? -1ll : 0ll;
17664 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17665 vceq_u8 (uint8x8_t __a, uint8x8_t __b)
17667 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
17668 (int8x8_t) __b);
17671 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17672 vceq_u16 (uint16x4_t __a, uint16x4_t __b)
17674 return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
17675 (int16x4_t) __b);
17678 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17679 vceq_u32 (uint32x2_t __a, uint32x2_t __b)
17681 return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
17682 (int32x2_t) __b);
17685 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17686 vceq_u64 (uint64x1_t __a, uint64x1_t __b)
17688 return __a == __b ? -1ll : 0ll;
17691 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17692 vceqq_f32 (float32x4_t __a, float32x4_t __b)
17694 return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
17697 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17698 vceqq_f64 (float64x2_t __a, float64x2_t __b)
17700 return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
17703 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17704 vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
17706 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
17707 (int8x16_t) __b);
17710 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17711 vceqq_s8 (int8x16_t __a, int8x16_t __b)
17713 return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
17716 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17717 vceqq_s16 (int16x8_t __a, int16x8_t __b)
17719 return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
17722 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17723 vceqq_s32 (int32x4_t __a, int32x4_t __b)
17725 return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
17728 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17729 vceqq_s64 (int64x2_t __a, int64x2_t __b)
17731 return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
17734 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17735 vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
17737 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
17738 (int8x16_t) __b);
17741 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17742 vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
17744 return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
17745 (int16x8_t) __b);
17748 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17749 vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
17751 return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
17752 (int32x4_t) __b);
17755 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17756 vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
17758 return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
17759 (int64x2_t) __b);
17762 /* vceq - scalar. */
17764 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17765 vceqs_f32 (float32_t __a, float32_t __b)
17767 return __a == __b ? -1 : 0;
17770 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17771 vceqd_s64 (int64x1_t __a, int64x1_t __b)
17773 return __a == __b ? -1ll : 0ll;
17776 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17777 vceqd_u64 (uint64x1_t __a, uint64x1_t __b)
17779 return __a == __b ? -1ll : 0ll;
17782 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17783 vceqd_f64 (float64_t __a, float64_t __b)
17785 return __a == __b ? -1ll : 0ll;
17788 /* vceqz - vector. */
17790 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17791 vceqz_f32 (float32x2_t __a)
17793 float32x2_t __b = {0.0f, 0.0f};
17794 return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
17797 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17798 vceqz_f64 (float64x1_t __a)
17800 return __a == 0.0 ? -1ll : 0ll;
17803 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17804 vceqz_p8 (poly8x8_t __a)
17806 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17807 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
17808 (int8x8_t) __b);
17811 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17812 vceqz_s8 (int8x8_t __a)
17814 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17815 return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
17818 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17819 vceqz_s16 (int16x4_t __a)
17821 int16x4_t __b = {0, 0, 0, 0};
17822 return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
17825 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17826 vceqz_s32 (int32x2_t __a)
17828 int32x2_t __b = {0, 0};
17829 return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
17832 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17833 vceqz_s64 (int64x1_t __a)
17835 return __a == 0ll ? -1ll : 0ll;
17838 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17839 vceqz_u8 (uint8x8_t __a)
17841 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17842 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
17843 (int8x8_t) __b);
17846 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17847 vceqz_u16 (uint16x4_t __a)
17849 uint16x4_t __b = {0, 0, 0, 0};
17850 return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
17851 (int16x4_t) __b);
17854 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17855 vceqz_u32 (uint32x2_t __a)
17857 uint32x2_t __b = {0, 0};
17858 return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
17859 (int32x2_t) __b);
17862 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17863 vceqz_u64 (uint64x1_t __a)
17865 return __a == 0ll ? -1ll : 0ll;
17868 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17869 vceqzq_f32 (float32x4_t __a)
17871 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
17872 return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
17875 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17876 vceqzq_f64 (float64x2_t __a)
17878 float64x2_t __b = {0.0, 0.0};
17879 return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
17882 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17883 vceqzq_p8 (poly8x16_t __a)
17885 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17886 0, 0, 0, 0, 0, 0, 0, 0};
17887 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
17888 (int8x16_t) __b);
17891 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17892 vceqzq_s8 (int8x16_t __a)
17894 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17895 0, 0, 0, 0, 0, 0, 0, 0};
17896 return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
17899 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17900 vceqzq_s16 (int16x8_t __a)
17902 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17903 return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
17906 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17907 vceqzq_s32 (int32x4_t __a)
17909 int32x4_t __b = {0, 0, 0, 0};
17910 return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
17913 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17914 vceqzq_s64 (int64x2_t __a)
17916 int64x2_t __b = {0, 0};
17917 return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
17920 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17921 vceqzq_u8 (uint8x16_t __a)
17923 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17924 0, 0, 0, 0, 0, 0, 0, 0};
17925 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
17926 (int8x16_t) __b);
17929 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17930 vceqzq_u16 (uint16x8_t __a)
17932 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17933 return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
17934 (int16x8_t) __b);
17937 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17938 vceqzq_u32 (uint32x4_t __a)
17940 uint32x4_t __b = {0, 0, 0, 0};
17941 return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
17942 (int32x4_t) __b);
17945 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17946 vceqzq_u64 (uint64x2_t __a)
17948 uint64x2_t __b = {0, 0};
17949 return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
17950 (int64x2_t) __b);
17953 /* vceqz - scalar. */
17955 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17956 vceqzs_f32 (float32_t __a)
17958 return __a == 0.0f ? -1 : 0;
17961 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17962 vceqzd_s64 (int64x1_t __a)
17964 return __a == 0 ? -1ll : 0ll;
17967 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17968 vceqzd_u64 (int64x1_t __a)
17970 return __a == 0 ? -1ll : 0ll;
17973 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17974 vceqzd_f64 (float64_t __a)
17976 return __a == 0.0 ? -1ll : 0ll;
17979 /* vcge - vector. */
17981 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17982 vcge_f32 (float32x2_t __a, float32x2_t __b)
17984 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
17987 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17988 vcge_f64 (float64x1_t __a, float64x1_t __b)
17990 return __a >= __b ? -1ll : 0ll;
17993 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17994 vcge_p8 (poly8x8_t __a, poly8x8_t __b)
17996 return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
17997 (int8x8_t) __b);
18000 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18001 vcge_s8 (int8x8_t __a, int8x8_t __b)
18003 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
18006 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18007 vcge_s16 (int16x4_t __a, int16x4_t __b)
18009 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
18012 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18013 vcge_s32 (int32x2_t __a, int32x2_t __b)
18015 return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
18018 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18019 vcge_s64 (int64x1_t __a, int64x1_t __b)
18021 return __a >= __b ? -1ll : 0ll;
18024 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18025 vcge_u8 (uint8x8_t __a, uint8x8_t __b)
18027 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
18028 (int8x8_t) __b);
18031 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18032 vcge_u16 (uint16x4_t __a, uint16x4_t __b)
18034 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
18035 (int16x4_t) __b);
18038 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18039 vcge_u32 (uint32x2_t __a, uint32x2_t __b)
18041 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
18042 (int32x2_t) __b);
18045 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18046 vcge_u64 (uint64x1_t __a, uint64x1_t __b)
18048 return __a >= __b ? -1ll : 0ll;
18051 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18052 vcgeq_f32 (float32x4_t __a, float32x4_t __b)
18054 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
18057 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18058 vcgeq_f64 (float64x2_t __a, float64x2_t __b)
18060 return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
18063 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18064 vcgeq_p8 (poly8x16_t __a, poly8x16_t __b)
18066 return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
18067 (int8x16_t) __b);
18070 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18071 vcgeq_s8 (int8x16_t __a, int8x16_t __b)
18073 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
18076 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18077 vcgeq_s16 (int16x8_t __a, int16x8_t __b)
18079 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
18082 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18083 vcgeq_s32 (int32x4_t __a, int32x4_t __b)
18085 return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
18088 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18089 vcgeq_s64 (int64x2_t __a, int64x2_t __b)
18091 return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
18094 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18095 vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
18097 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
18098 (int8x16_t) __b);
18101 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18102 vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
18104 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
18105 (int16x8_t) __b);
18108 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18109 vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
18111 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
18112 (int32x4_t) __b);
18115 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18116 vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
18118 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
18119 (int64x2_t) __b);
18122 /* vcge - scalar. */
18124 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18125 vcges_f32 (float32_t __a, float32_t __b)
18127 return __a >= __b ? -1 : 0;
18130 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18131 vcged_s64 (int64x1_t __a, int64x1_t __b)
18133 return __a >= __b ? -1ll : 0ll;
18136 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18137 vcged_u64 (uint64x1_t __a, uint64x1_t __b)
18139 return __a >= __b ? -1ll : 0ll;
18142 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18143 vcged_f64 (float64_t __a, float64_t __b)
18145 return __a >= __b ? -1ll : 0ll;
18148 /* vcgez - vector. */
18150 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18151 vcgez_f32 (float32x2_t __a)
18153 float32x2_t __b = {0.0f, 0.0f};
18154 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
18157 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18158 vcgez_f64 (float64x1_t __a)
18160 return __a >= 0.0 ? -1ll : 0ll;
18163 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18164 vcgez_p8 (poly8x8_t __a)
18166 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18167 return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
18168 (int8x8_t) __b);
18171 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18172 vcgez_s8 (int8x8_t __a)
18174 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18175 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
18178 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18179 vcgez_s16 (int16x4_t __a)
18181 int16x4_t __b = {0, 0, 0, 0};
18182 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
18185 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18186 vcgez_s32 (int32x2_t __a)
18188 int32x2_t __b = {0, 0};
18189 return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
18192 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18193 vcgez_s64 (int64x1_t __a)
18195 return __a >= 0ll ? -1ll : 0ll;
18198 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18199 vcgez_u8 (uint8x8_t __a)
18201 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18202 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
18203 (int8x8_t) __b);
18206 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18207 vcgez_u16 (uint16x4_t __a)
18209 uint16x4_t __b = {0, 0, 0, 0};
18210 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
18211 (int16x4_t) __b);
18214 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18215 vcgez_u32 (uint32x2_t __a)
18217 uint32x2_t __b = {0, 0};
18218 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
18219 (int32x2_t) __b);
18222 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18223 vcgez_u64 (uint64x1_t __a)
18225 return __a >= 0ll ? -1ll : 0ll;
18228 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18229 vcgezq_f32 (float32x4_t __a)
18231 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
18232 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
18235 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18236 vcgezq_f64 (float64x2_t __a)
18238 float64x2_t __b = {0.0, 0.0};
18239 return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
18242 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18243 vcgezq_p8 (poly8x16_t __a)
18245 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
18246 0, 0, 0, 0, 0, 0, 0, 0};
18247 return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
18248 (int8x16_t) __b);
18251 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18252 vcgezq_s8 (int8x16_t __a)
18254 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
18255 0, 0, 0, 0, 0, 0, 0, 0};
18256 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
18259 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18260 vcgezq_s16 (int16x8_t __a)
18262 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18263 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
18266 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18267 vcgezq_s32 (int32x4_t __a)
18269 int32x4_t __b = {0, 0, 0, 0};
18270 return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
18273 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18274 vcgezq_s64 (int64x2_t __a)
18276 int64x2_t __b = {0, 0};
18277 return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
18280 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18281 vcgezq_u8 (uint8x16_t __a)
18283 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
18284 0, 0, 0, 0, 0, 0, 0, 0};
18285 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
18286 (int8x16_t) __b);
18289 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18290 vcgezq_u16 (uint16x8_t __a)
18292 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18293 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
18294 (int16x8_t) __b);
18297 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18298 vcgezq_u32 (uint32x4_t __a)
18300 uint32x4_t __b = {0, 0, 0, 0};
18301 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
18302 (int32x4_t) __b);
18305 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18306 vcgezq_u64 (uint64x2_t __a)
18308 uint64x2_t __b = {0, 0};
18309 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
18310 (int64x2_t) __b);
18313 /* vcgez - scalar. */
18315 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18316 vcgezs_f32 (float32_t __a)
18318 return __a >= 0.0f ? -1 : 0;
18321 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18322 vcgezd_s64 (int64x1_t __a)
18324 return __a >= 0 ? -1ll : 0ll;
18327 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18328 vcgezd_u64 (int64x1_t __a)
18330 return __a >= 0 ? -1ll : 0ll;
18333 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18334 vcgezd_f64 (float64_t __a)
18336 return __a >= 0.0 ? -1ll : 0ll;
18339 /* vcgt - vector. */
18341 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18342 vcgt_f32 (float32x2_t __a, float32x2_t __b)
18344 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
18347 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18348 vcgt_f64 (float64x1_t __a, float64x1_t __b)
18350 return __a > __b ? -1ll : 0ll;
18353 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18354 vcgt_p8 (poly8x8_t __a, poly8x8_t __b)
18356 return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
18357 (int8x8_t) __b);
18360 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18361 vcgt_s8 (int8x8_t __a, int8x8_t __b)
18363 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
18366 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18367 vcgt_s16 (int16x4_t __a, int16x4_t __b)
18369 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
18372 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18373 vcgt_s32 (int32x2_t __a, int32x2_t __b)
18375 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
18378 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18379 vcgt_s64 (int64x1_t __a, int64x1_t __b)
18381 return __a > __b ? -1ll : 0ll;
18384 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18385 vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
18387 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
18388 (int8x8_t) __b);
18391 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18392 vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
18394 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
18395 (int16x4_t) __b);
18398 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18399 vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
18401 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
18402 (int32x2_t) __b);
18405 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18406 vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
18408 return __a > __b ? -1ll : 0ll;
18411 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18412 vcgtq_f32 (float32x4_t __a, float32x4_t __b)
18414 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
18417 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18418 vcgtq_f64 (float64x2_t __a, float64x2_t __b)
18420 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
18423 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18424 vcgtq_p8 (poly8x16_t __a, poly8x16_t __b)
18426 return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
18427 (int8x16_t) __b);
18430 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18431 vcgtq_s8 (int8x16_t __a, int8x16_t __b)
18433 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
18436 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18437 vcgtq_s16 (int16x8_t __a, int16x8_t __b)
18439 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
18442 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18443 vcgtq_s32 (int32x4_t __a, int32x4_t __b)
18445 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
18448 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18449 vcgtq_s64 (int64x2_t __a, int64x2_t __b)
18451 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
18454 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18455 vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
18457 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
18458 (int8x16_t) __b);
18461 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18462 vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
18464 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
18465 (int16x8_t) __b);
18468 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18469 vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
18471 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
18472 (int32x4_t) __b);
18475 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18476 vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
18478 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
18479 (int64x2_t) __b);
18482 /* vcgt - scalar. */
18484 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18485 vcgts_f32 (float32_t __a, float32_t __b)
18487 return __a > __b ? -1 : 0;
18490 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18491 vcgtd_s64 (int64x1_t __a, int64x1_t __b)
18493 return __a > __b ? -1ll : 0ll;
18496 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18497 vcgtd_u64 (uint64x1_t __a, uint64x1_t __b)
18499 return __a > __b ? -1ll : 0ll;
18502 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18503 vcgtd_f64 (float64_t __a, float64_t __b)
18505 return __a > __b ? -1ll : 0ll;
18508 /* vcgtz - vector. */
18510 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18511 vcgtz_f32 (float32x2_t __a)
18513 float32x2_t __b = {0.0f, 0.0f};
18514 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
18517 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18518 vcgtz_f64 (float64x1_t __a)
18520 return __a > 0.0 ? -1ll : 0ll;
18523 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18524 vcgtz_p8 (poly8x8_t __a)
18526 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18527 return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
18528 (int8x8_t) __b);
18531 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18532 vcgtz_s8 (int8x8_t __a)
18534 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18535 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
18538 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18539 vcgtz_s16 (int16x4_t __a)
18541 int16x4_t __b = {0, 0, 0, 0};
18542 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
18545 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18546 vcgtz_s32 (int32x2_t __a)
18548 int32x2_t __b = {0, 0};
18549 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
18552 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18553 vcgtz_s64 (int64x1_t __a)
18555 return __a > 0ll ? -1ll : 0ll;
18558 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18559 vcgtz_u8 (uint8x8_t __a)
18561 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18562 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
18563 (int8x8_t) __b);
18566 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18567 vcgtz_u16 (uint16x4_t __a)
18569 uint16x4_t __b = {0, 0, 0, 0};
18570 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
18571 (int16x4_t) __b);
18574 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18575 vcgtz_u32 (uint32x2_t __a)
18577 uint32x2_t __b = {0, 0};
18578 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
18579 (int32x2_t) __b);
18582 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18583 vcgtz_u64 (uint64x1_t __a)
18585 return __a > 0ll ? -1ll : 0ll;
18588 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18589 vcgtzq_f32 (float32x4_t __a)
18591 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
18592 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
18595 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18596 vcgtzq_f64 (float64x2_t __a)
18598 float64x2_t __b = {0.0, 0.0};
18599 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
18602 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18603 vcgtzq_p8 (poly8x16_t __a)
18605 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
18606 0, 0, 0, 0, 0, 0, 0, 0};
18607 return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
18608 (int8x16_t) __b);
18611 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18612 vcgtzq_s8 (int8x16_t __a)
18614 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
18615 0, 0, 0, 0, 0, 0, 0, 0};
18616 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
18619 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18620 vcgtzq_s16 (int16x8_t __a)
18622 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18623 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
18626 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18627 vcgtzq_s32 (int32x4_t __a)
18629 int32x4_t __b = {0, 0, 0, 0};
18630 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
18633 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18634 vcgtzq_s64 (int64x2_t __a)
18636 int64x2_t __b = {0, 0};
18637 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
18640 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18641 vcgtzq_u8 (uint8x16_t __a)
18643 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
18644 0, 0, 0, 0, 0, 0, 0, 0};
18645 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
18646 (int8x16_t) __b);
18649 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18650 vcgtzq_u16 (uint16x8_t __a)
18652 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18653 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
18654 (int16x8_t) __b);
18657 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18658 vcgtzq_u32 (uint32x4_t __a)
18660 uint32x4_t __b = {0, 0, 0, 0};
18661 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
18662 (int32x4_t) __b);
18665 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18666 vcgtzq_u64 (uint64x2_t __a)
18668 uint64x2_t __b = {0, 0};
18669 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
18670 (int64x2_t) __b);
18673 /* vcgtz - scalar. */
18675 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18676 vcgtzs_f32 (float32_t __a)
18678 return __a > 0.0f ? -1 : 0;
18681 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18682 vcgtzd_s64 (int64x1_t __a)
18684 return __a > 0 ? -1ll : 0ll;
18687 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18688 vcgtzd_u64 (int64x1_t __a)
18690 return __a > 0 ? -1ll : 0ll;
18693 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18694 vcgtzd_f64 (float64_t __a)
18696 return __a > 0.0 ? -1ll : 0ll;
18699 /* vcle - vector. */
18701 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18702 vcle_f32 (float32x2_t __a, float32x2_t __b)
18704 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__b, __a);
18707 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18708 vcle_f64 (float64x1_t __a, float64x1_t __b)
18710 return __a <= __b ? -1ll : 0ll;
18713 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18714 vcle_p8 (poly8x8_t __a, poly8x8_t __b)
18716 return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __b,
18717 (int8x8_t) __a);
18720 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18721 vcle_s8 (int8x8_t __a, int8x8_t __b)
18723 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__b, __a);
18726 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18727 vcle_s16 (int16x4_t __a, int16x4_t __b)
18729 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__b, __a);
18732 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18733 vcle_s32 (int32x2_t __a, int32x2_t __b)
18735 return (uint32x2_t) __builtin_aarch64_cmgev2si (__b, __a);
18738 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18739 vcle_s64 (int64x1_t __a, int64x1_t __b)
18741 return __a <= __b ? -1ll : 0ll;
18744 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18745 vcle_u8 (uint8x8_t __a, uint8x8_t __b)
18747 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b,
18748 (int8x8_t) __a);
18751 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18752 vcle_u16 (uint16x4_t __a, uint16x4_t __b)
18754 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b,
18755 (int16x4_t) __a);
18758 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18759 vcle_u32 (uint32x2_t __a, uint32x2_t __b)
18761 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b,
18762 (int32x2_t) __a);
18765 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18766 vcle_u64 (uint64x1_t __a, uint64x1_t __b)
18768 return __a <= __b ? -1ll : 0ll;
18771 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18772 vcleq_f32 (float32x4_t __a, float32x4_t __b)
18774 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__b, __a);
18777 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18778 vcleq_f64 (float64x2_t __a, float64x2_t __b)
18780 return (uint64x2_t) __builtin_aarch64_cmgev2df (__b, __a);
18783 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18784 vcleq_p8 (poly8x16_t __a, poly8x16_t __b)
18786 return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __b,
18787 (int8x16_t) __a);
18790 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18791 vcleq_s8 (int8x16_t __a, int8x16_t __b)
18793 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__b, __a);
18796 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18797 vcleq_s16 (int16x8_t __a, int16x8_t __b)
18799 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__b, __a);
18802 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18803 vcleq_s32 (int32x4_t __a, int32x4_t __b)
18805 return (uint32x4_t) __builtin_aarch64_cmgev4si (__b, __a);
18808 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18809 vcleq_s64 (int64x2_t __a, int64x2_t __b)
18811 return (uint64x2_t) __builtin_aarch64_cmgev2di (__b, __a);
18814 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18815 vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
18817 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b,
18818 (int8x16_t) __a);
18821 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18822 vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
18824 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b,
18825 (int16x8_t) __a);
18828 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18829 vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
18831 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b,
18832 (int32x4_t) __a);
18835 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18836 vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
18838 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b,
18839 (int64x2_t) __a);
18842 /* vcle - scalar. */
18844 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18845 vcles_f32 (float32_t __a, float32_t __b)
18847 return __a <= __b ? -1 : 0;
18850 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18851 vcled_s64 (int64x1_t __a, int64x1_t __b)
18853 return __a <= __b ? -1ll : 0ll;
18856 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18857 vcled_u64 (uint64x1_t __a, uint64x1_t __b)
18859 return __a <= __b ? -1ll : 0ll;
18862 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18863 vcled_f64 (float64_t __a, float64_t __b)
18865 return __a <= __b ? -1ll : 0ll;
18868 /* vclez - vector. */
18870 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18871 vclez_f32 (float32x2_t __a)
18873 float32x2_t __b = {0.0f, 0.0f};
18874 return (uint32x2_t) __builtin_aarch64_cmlev2sf (__a, __b);
18877 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18878 vclez_f64 (float64x1_t __a)
18880 return __a <= 0.0 ? -1ll : 0ll;
18883 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18884 vclez_p8 (poly8x8_t __a)
18886 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18887 return (uint8x8_t) __builtin_aarch64_cmlev8qi ((int8x8_t) __a,
18888 (int8x8_t) __b);
18891 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18892 vclez_s8 (int8x8_t __a)
18894 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18895 return (uint8x8_t) __builtin_aarch64_cmlev8qi (__a, __b);
18898 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18899 vclez_s16 (int16x4_t __a)
18901 int16x4_t __b = {0, 0, 0, 0};
18902 return (uint16x4_t) __builtin_aarch64_cmlev4hi (__a, __b);
18905 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18906 vclez_s32 (int32x2_t __a)
18908 int32x2_t __b = {0, 0};
18909 return (uint32x2_t) __builtin_aarch64_cmlev2si (__a, __b);
18912 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18913 vclez_s64 (int64x1_t __a)
18915 return __a <= 0ll ? -1ll : 0ll;
18918 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18919 vclez_u64 (uint64x1_t __a)
18921 return __a <= 0ll ? -1ll : 0ll;
18924 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18925 vclezq_f32 (float32x4_t __a)
18927 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
18928 return (uint32x4_t) __builtin_aarch64_cmlev4sf (__a, __b);
18931 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18932 vclezq_f64 (float64x2_t __a)
18934 float64x2_t __b = {0.0, 0.0};
18935 return (uint64x2_t) __builtin_aarch64_cmlev2df (__a, __b);
18938 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18939 vclezq_p8 (poly8x16_t __a)
18941 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
18942 0, 0, 0, 0, 0, 0, 0, 0};
18943 return (uint8x16_t) __builtin_aarch64_cmlev16qi ((int8x16_t) __a,
18944 (int8x16_t) __b);
18947 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18948 vclezq_s8 (int8x16_t __a)
18950 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
18951 0, 0, 0, 0, 0, 0, 0, 0};
18952 return (uint8x16_t) __builtin_aarch64_cmlev16qi (__a, __b);
18955 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18956 vclezq_s16 (int16x8_t __a)
18958 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18959 return (uint16x8_t) __builtin_aarch64_cmlev8hi (__a, __b);
18962 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18963 vclezq_s32 (int32x4_t __a)
18965 int32x4_t __b = {0, 0, 0, 0};
18966 return (uint32x4_t) __builtin_aarch64_cmlev4si (__a, __b);
18969 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18970 vclezq_s64 (int64x2_t __a)
18972 int64x2_t __b = {0, 0};
18973 return (uint64x2_t) __builtin_aarch64_cmlev2di (__a, __b);
18976 /* vclez - scalar. */
18978 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18979 vclezs_f32 (float32_t __a)
18981 return __a <= 0.0f ? -1 : 0;
18984 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18985 vclezd_s64 (int64x1_t __a)
18987 return __a <= 0 ? -1ll : 0ll;
18990 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18991 vclezd_u64 (int64x1_t __a)
18993 return __a <= 0 ? -1ll : 0ll;
18996 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18997 vclezd_f64 (float64_t __a)
18999 return __a <= 0.0 ? -1ll : 0ll;
19002 /* vclt - vector. */
19004 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19005 vclt_f32 (float32x2_t __a, float32x2_t __b)
19007 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__b, __a);
19010 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19011 vclt_f64 (float64x1_t __a, float64x1_t __b)
19013 return __a < __b ? -1ll : 0ll;
19016 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19017 vclt_p8 (poly8x8_t __a, poly8x8_t __b)
19019 return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __b,
19020 (int8x8_t) __a);
19023 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19024 vclt_s8 (int8x8_t __a, int8x8_t __b)
19026 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__b, __a);
19029 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19030 vclt_s16 (int16x4_t __a, int16x4_t __b)
19032 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__b, __a);
19035 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19036 vclt_s32 (int32x2_t __a, int32x2_t __b)
19038 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__b, __a);
19041 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19042 vclt_s64 (int64x1_t __a, int64x1_t __b)
19044 return __a < __b ? -1ll : 0ll;
19047 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19048 vclt_u8 (uint8x8_t __a, uint8x8_t __b)
19050 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b,
19051 (int8x8_t) __a);
19054 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19055 vclt_u16 (uint16x4_t __a, uint16x4_t __b)
19057 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b,
19058 (int16x4_t) __a);
19061 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19062 vclt_u32 (uint32x2_t __a, uint32x2_t __b)
19064 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b,
19065 (int32x2_t) __a);
19068 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19069 vclt_u64 (uint64x1_t __a, uint64x1_t __b)
19071 return __a < __b ? -1ll : 0ll;
19074 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19075 vcltq_f32 (float32x4_t __a, float32x4_t __b)
19077 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__b, __a);
19080 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19081 vcltq_f64 (float64x2_t __a, float64x2_t __b)
19083 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__b, __a);
19086 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19087 vcltq_p8 (poly8x16_t __a, poly8x16_t __b)
19089 return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __b,
19090 (int8x16_t) __a);
19093 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19094 vcltq_s8 (int8x16_t __a, int8x16_t __b)
19096 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__b, __a);
19099 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19100 vcltq_s16 (int16x8_t __a, int16x8_t __b)
19102 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__b, __a);
19105 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19106 vcltq_s32 (int32x4_t __a, int32x4_t __b)
19108 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__b, __a);
19111 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19112 vcltq_s64 (int64x2_t __a, int64x2_t __b)
19114 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__b, __a);
19117 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19118 vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
19120 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b,
19121 (int8x16_t) __a);
19124 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19125 vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
19127 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b,
19128 (int16x8_t) __a);
19131 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19132 vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
19134 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b,
19135 (int32x4_t) __a);
19138 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19139 vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
19141 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b,
19142 (int64x2_t) __a);
19145 /* vclt - scalar. */
19147 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19148 vclts_f32 (float32_t __a, float32_t __b)
19150 return __a < __b ? -1 : 0;
19153 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19154 vcltd_s64 (int64x1_t __a, int64x1_t __b)
19156 return __a < __b ? -1ll : 0ll;
19159 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19160 vcltd_u64 (uint64x1_t __a, uint64x1_t __b)
19162 return __a < __b ? -1ll : 0ll;
19165 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19166 vcltd_f64 (float64_t __a, float64_t __b)
19168 return __a < __b ? -1ll : 0ll;
19171 /* vcltz - vector. */
19173 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19174 vcltz_f32 (float32x2_t __a)
19176 float32x2_t __b = {0.0f, 0.0f};
19177 return (uint32x2_t) __builtin_aarch64_cmltv2sf (__a, __b);
19180 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19181 vcltz_f64 (float64x1_t __a)
19183 return __a < 0.0 ? -1ll : 0ll;
19186 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19187 vcltz_p8 (poly8x8_t __a)
19189 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
19190 return (uint8x8_t) __builtin_aarch64_cmltv8qi ((int8x8_t) __a,
19191 (int8x8_t) __b);
19194 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19195 vcltz_s8 (int8x8_t __a)
19197 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
19198 return (uint8x8_t) __builtin_aarch64_cmltv8qi (__a, __b);
19201 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19202 vcltz_s16 (int16x4_t __a)
19204 int16x4_t __b = {0, 0, 0, 0};
19205 return (uint16x4_t) __builtin_aarch64_cmltv4hi (__a, __b);
19208 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19209 vcltz_s32 (int32x2_t __a)
19211 int32x2_t __b = {0, 0};
19212 return (uint32x2_t) __builtin_aarch64_cmltv2si (__a, __b);
19215 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19216 vcltz_s64 (int64x1_t __a)
19218 return __a < 0ll ? -1ll : 0ll;
19221 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19222 vcltzq_f32 (float32x4_t __a)
19224 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
19225 return (uint32x4_t) __builtin_aarch64_cmltv4sf (__a, __b);
19228 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19229 vcltzq_f64 (float64x2_t __a)
19231 float64x2_t __b = {0.0, 0.0};
19232 return (uint64x2_t) __builtin_aarch64_cmltv2df (__a, __b);
19235 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19236 vcltzq_p8 (poly8x16_t __a)
19238 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
19239 0, 0, 0, 0, 0, 0, 0, 0};
19240 return (uint8x16_t) __builtin_aarch64_cmltv16qi ((int8x16_t) __a,
19241 (int8x16_t) __b);
19244 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19245 vcltzq_s8 (int8x16_t __a)
19247 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
19248 0, 0, 0, 0, 0, 0, 0, 0};
19249 return (uint8x16_t) __builtin_aarch64_cmltv16qi (__a, __b);
19252 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19253 vcltzq_s16 (int16x8_t __a)
19255 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
19256 return (uint16x8_t) __builtin_aarch64_cmltv8hi (__a, __b);
19259 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19260 vcltzq_s32 (int32x4_t __a)
19262 int32x4_t __b = {0, 0, 0, 0};
19263 return (uint32x4_t) __builtin_aarch64_cmltv4si (__a, __b);
19266 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19267 vcltzq_s64 (int64x2_t __a)
19269 int64x2_t __b = {0, 0};
19270 return (uint64x2_t) __builtin_aarch64_cmltv2di (__a, __b);
19273 /* vcltz - scalar. */
19275 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19276 vcltzs_f32 (float32_t __a)
19278 return __a < 0.0f ? -1 : 0;
19281 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19282 vcltzd_s64 (int64x1_t __a)
19284 return __a < 0 ? -1ll : 0ll;
19287 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19288 vcltzd_u64 (int64x1_t __a)
19290 return __a < 0 ? -1ll : 0ll;
19293 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19294 vcltzd_f64 (float64_t __a)
19296 return __a < 0.0 ? -1ll : 0ll;
19299 /* vcvt (double -> float). */
19301 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19302 vcvt_f32_f64 (float64x2_t __a)
19304 return __builtin_aarch64_float_truncate_lo_v2sf (__a);
19307 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19308 vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
19310 return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b);
19313 /* vcvt (float -> double). */
19315 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19316 vcvt_f64_f32 (float32x2_t __a)
19319 return __builtin_aarch64_float_extend_lo_v2df (__a);
19322 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19323 vcvt_high_f64_f32 (float32x4_t __a)
19325 return __builtin_aarch64_vec_unpacks_hi_v4sf (__a);
19328 /* vcvt (<u>int -> float) */
19330 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
19331 vcvtd_f64_s64 (int64_t __a)
19333 return (float64_t) __a;
19336 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
19337 vcvtd_f64_u64 (uint64_t __a)
19339 return (float64_t) __a;
19342 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
19343 vcvts_f32_s32 (int32_t __a)
19345 return (float32_t) __a;
19348 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
19349 vcvts_f32_u32 (uint32_t __a)
19351 return (float32_t) __a;
19354 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19355 vcvt_f32_s32 (int32x2_t __a)
19357 return __builtin_aarch64_floatv2siv2sf (__a);
19360 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19361 vcvt_f32_u32 (uint32x2_t __a)
19363 return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a);
19366 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19367 vcvtq_f32_s32 (int32x4_t __a)
19369 return __builtin_aarch64_floatv4siv4sf (__a);
19372 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19373 vcvtq_f32_u32 (uint32x4_t __a)
19375 return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a);
19378 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19379 vcvtq_f64_s64 (int64x2_t __a)
19381 return __builtin_aarch64_floatv2div2df (__a);
19384 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19385 vcvtq_f64_u64 (uint64x2_t __a)
19387 return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a);
19390 /* vcvt (float -> <u>int) */
19392 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19393 vcvtd_s64_f64 (float64_t __a)
19395 return (int64_t) __a;
19398 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19399 vcvtd_u64_f64 (float64_t __a)
19401 return (uint64_t) __a;
19404 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19405 vcvts_s32_f32 (float32_t __a)
19407 return (int32_t) __a;
19410 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19411 vcvts_u32_f32 (float32_t __a)
19413 return (uint32_t) __a;
19416 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19417 vcvt_s32_f32 (float32x2_t __a)
19419 return __builtin_aarch64_lbtruncv2sfv2si (__a);
19422 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19423 vcvt_u32_f32 (float32x2_t __a)
19425 /* TODO: This cast should go away when builtins have
19426 their correct types. */
19427 return (uint32x2_t) __builtin_aarch64_lbtruncuv2sfv2si (__a);
19430 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19431 vcvtq_s32_f32 (float32x4_t __a)
19433 return __builtin_aarch64_lbtruncv4sfv4si (__a);
19436 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19437 vcvtq_u32_f32 (float32x4_t __a)
19439 /* TODO: This cast should go away when builtins have
19440 their correct types. */
19441 return (uint32x4_t) __builtin_aarch64_lbtruncuv4sfv4si (__a);
19444 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19445 vcvtq_s64_f64 (float64x2_t __a)
19447 return __builtin_aarch64_lbtruncv2dfv2di (__a);
19450 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19451 vcvtq_u64_f64 (float64x2_t __a)
19453 /* TODO: This cast should go away when builtins have
19454 their correct types. */
19455 return (uint64x2_t) __builtin_aarch64_lbtruncuv2dfv2di (__a);
19458 /* vcvta */
19460 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19461 vcvtad_s64_f64 (float64_t __a)
19463 return __builtin_aarch64_lrounddfdi (__a);
19466 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19467 vcvtad_u64_f64 (float64_t __a)
19469 return __builtin_aarch64_lroundudfdi (__a);
19472 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19473 vcvtas_s32_f32 (float32_t __a)
19475 return __builtin_aarch64_lroundsfsi (__a);
19478 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19479 vcvtas_u32_f32 (float32_t __a)
19481 return __builtin_aarch64_lroundusfsi (__a);
19484 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19485 vcvta_s32_f32 (float32x2_t __a)
19487 return __builtin_aarch64_lroundv2sfv2si (__a);
19490 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19491 vcvta_u32_f32 (float32x2_t __a)
19493 /* TODO: This cast should go away when builtins have
19494 their correct types. */
19495 return (uint32x2_t) __builtin_aarch64_lrounduv2sfv2si (__a);
19498 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19499 vcvtaq_s32_f32 (float32x4_t __a)
19501 return __builtin_aarch64_lroundv4sfv4si (__a);
19504 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19505 vcvtaq_u32_f32 (float32x4_t __a)
19507 /* TODO: This cast should go away when builtins have
19508 their correct types. */
19509 return (uint32x4_t) __builtin_aarch64_lrounduv4sfv4si (__a);
19512 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19513 vcvtaq_s64_f64 (float64x2_t __a)
19515 return __builtin_aarch64_lroundv2dfv2di (__a);
19518 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19519 vcvtaq_u64_f64 (float64x2_t __a)
19521 /* TODO: This cast should go away when builtins have
19522 their correct types. */
19523 return (uint64x2_t) __builtin_aarch64_lrounduv2dfv2di (__a);
19526 /* vcvtm */
19528 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19529 vcvtmd_s64_f64 (float64_t __a)
19531 return __builtin_lfloor (__a);
19534 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19535 vcvtmd_u64_f64 (float64_t __a)
19537 return __builtin_aarch64_lfloorudfdi (__a);
19540 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19541 vcvtms_s32_f32 (float32_t __a)
19543 return __builtin_ifloorf (__a);
19546 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19547 vcvtms_u32_f32 (float32_t __a)
19549 return __builtin_aarch64_lfloorusfsi (__a);
19552 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19553 vcvtm_s32_f32 (float32x2_t __a)
19555 return __builtin_aarch64_lfloorv2sfv2si (__a);
19558 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19559 vcvtm_u32_f32 (float32x2_t __a)
19561 /* TODO: This cast should go away when builtins have
19562 their correct types. */
19563 return (uint32x2_t) __builtin_aarch64_lflooruv2sfv2si (__a);
19566 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19567 vcvtmq_s32_f32 (float32x4_t __a)
19569 return __builtin_aarch64_lfloorv4sfv4si (__a);
19572 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19573 vcvtmq_u32_f32 (float32x4_t __a)
19575 /* TODO: This cast should go away when builtins have
19576 their correct types. */
19577 return (uint32x4_t) __builtin_aarch64_lflooruv4sfv4si (__a);
19580 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19581 vcvtmq_s64_f64 (float64x2_t __a)
19583 return __builtin_aarch64_lfloorv2dfv2di (__a);
19586 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19587 vcvtmq_u64_f64 (float64x2_t __a)
19589 /* TODO: This cast should go away when builtins have
19590 their correct types. */
19591 return (uint64x2_t) __builtin_aarch64_lflooruv2dfv2di (__a);
19594 /* vcvtn */
19596 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19597 vcvtnd_s64_f64 (float64_t __a)
19599 return __builtin_aarch64_lfrintndfdi (__a);
19602 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19603 vcvtnd_u64_f64 (float64_t __a)
19605 return __builtin_aarch64_lfrintnudfdi (__a);
19608 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19609 vcvtns_s32_f32 (float32_t __a)
19611 return __builtin_aarch64_lfrintnsfsi (__a);
19614 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19615 vcvtns_u32_f32 (float32_t __a)
19617 return __builtin_aarch64_lfrintnusfsi (__a);
19620 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19621 vcvtn_s32_f32 (float32x2_t __a)
19623 return __builtin_aarch64_lfrintnv2sfv2si (__a);
19626 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19627 vcvtn_u32_f32 (float32x2_t __a)
19629 /* TODO: This cast should go away when builtins have
19630 their correct types. */
19631 return (uint32x2_t) __builtin_aarch64_lfrintnuv2sfv2si (__a);
19634 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19635 vcvtnq_s32_f32 (float32x4_t __a)
19637 return __builtin_aarch64_lfrintnv4sfv4si (__a);
19640 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19641 vcvtnq_u32_f32 (float32x4_t __a)
19643 /* TODO: This cast should go away when builtins have
19644 their correct types. */
19645 return (uint32x4_t) __builtin_aarch64_lfrintnuv4sfv4si (__a);
19648 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19649 vcvtnq_s64_f64 (float64x2_t __a)
19651 return __builtin_aarch64_lfrintnv2dfv2di (__a);
19654 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19655 vcvtnq_u64_f64 (float64x2_t __a)
19657 /* TODO: This cast should go away when builtins have
19658 their correct types. */
19659 return (uint64x2_t) __builtin_aarch64_lfrintnuv2dfv2di (__a);
19662 /* vcvtp */
19664 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19665 vcvtpd_s64_f64 (float64_t __a)
19667 return __builtin_lceil (__a);
19670 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19671 vcvtpd_u64_f64 (float64_t __a)
19673 return __builtin_aarch64_lceiludfdi (__a);
19676 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19677 vcvtps_s32_f32 (float32_t __a)
19679 return __builtin_iceilf (__a);
19682 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19683 vcvtps_u32_f32 (float32_t __a)
19685 return __builtin_aarch64_lceilusfsi (__a);
19688 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19689 vcvtp_s32_f32 (float32x2_t __a)
19691 return __builtin_aarch64_lceilv2sfv2si (__a);
19694 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19695 vcvtp_u32_f32 (float32x2_t __a)
19697 /* TODO: This cast should go away when builtins have
19698 their correct types. */
19699 return (uint32x2_t) __builtin_aarch64_lceiluv2sfv2si (__a);
19702 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19703 vcvtpq_s32_f32 (float32x4_t __a)
19705 return __builtin_aarch64_lceilv4sfv4si (__a);
19708 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19709 vcvtpq_u32_f32 (float32x4_t __a)
19711 /* TODO: This cast should go away when builtins have
19712 their correct types. */
19713 return (uint32x4_t) __builtin_aarch64_lceiluv4sfv4si (__a);
19716 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19717 vcvtpq_s64_f64 (float64x2_t __a)
19719 return __builtin_aarch64_lceilv2dfv2di (__a);
19722 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19723 vcvtpq_u64_f64 (float64x2_t __a)
19725 /* TODO: This cast should go away when builtins have
19726 their correct types. */
19727 return (uint64x2_t) __builtin_aarch64_lceiluv2dfv2di (__a);
19730 /* vdup */
19732 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
19733 vdupb_lane_s8 (int8x16_t a, int const b)
19735 return __builtin_aarch64_dup_lane_scalarv16qi (a, b);
19738 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
19739 vdupb_lane_u8 (uint8x16_t a, int const b)
19741 return (uint8x1_t) __builtin_aarch64_dup_lane_scalarv16qi ((int8x16_t) a, b);
19744 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19745 vduph_lane_s16 (int16x8_t a, int const b)
19747 return __builtin_aarch64_dup_lane_scalarv8hi (a, b);
19750 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
19751 vduph_lane_u16 (uint16x8_t a, int const b)
19753 return (uint16x1_t) __builtin_aarch64_dup_lane_scalarv8hi ((int16x8_t) a, b);
19756 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19757 vdups_lane_s32 (int32x4_t a, int const b)
19759 return __builtin_aarch64_dup_lane_scalarv4si (a, b);
19762 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
19763 vdups_lane_u32 (uint32x4_t a, int const b)
19765 return (uint32x1_t) __builtin_aarch64_dup_lane_scalarv4si ((int32x4_t) a, b);
19768 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
19769 vdupd_lane_s64 (int64x2_t a, int const b)
19771 return __builtin_aarch64_dup_lane_scalarv2di (a, b);
19774 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19775 vdupd_lane_u64 (uint64x2_t a, int const b)
19777 return (uint64x1_t) __builtin_aarch64_dup_lane_scalarv2di ((int64x2_t) a, b);
19780 /* vld1 */
19782 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19783 vld1_f32 (const float32_t *a)
19785 return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a);
19788 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
19789 vld1_f64 (const float64_t *a)
19791 return *a;
19794 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
19795 vld1_p8 (const poly8_t *a)
19797 return (poly8x8_t)
19798 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
19801 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
19802 vld1_p16 (const poly16_t *a)
19804 return (poly16x4_t)
19805 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
19808 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19809 vld1_s8 (const int8_t *a)
19811 return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
19814 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19815 vld1_s16 (const int16_t *a)
19817 return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
19820 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19821 vld1_s32 (const int32_t *a)
19823 return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
19826 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
19827 vld1_s64 (const int64_t *a)
19829 return *a;
19832 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19833 vld1_u8 (const uint8_t *a)
19835 return (uint8x8_t)
19836 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
19839 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19840 vld1_u16 (const uint16_t *a)
19842 return (uint16x4_t)
19843 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
19846 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19847 vld1_u32 (const uint32_t *a)
19849 return (uint32x2_t)
19850 __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
19853 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19854 vld1_u64 (const uint64_t *a)
19856 return *a;
19859 /* vld1q */
19861 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19862 vld1q_f32 (const float32_t *a)
19864 return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a);
19867 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19868 vld1q_f64 (const float64_t *a)
19870 return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a);
19873 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
19874 vld1q_p8 (const poly8_t *a)
19876 return (poly8x16_t)
19877 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
19880 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
19881 vld1q_p16 (const poly16_t *a)
19883 return (poly16x8_t)
19884 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
19887 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19888 vld1q_s8 (const int8_t *a)
19890 return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
19893 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19894 vld1q_s16 (const int16_t *a)
19896 return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
19899 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19900 vld1q_s32 (const int32_t *a)
19902 return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
19905 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19906 vld1q_s64 (const int64_t *a)
19908 return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
19911 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19912 vld1q_u8 (const uint8_t *a)
19914 return (uint8x16_t)
19915 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
19918 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19919 vld1q_u16 (const uint16_t *a)
19921 return (uint16x8_t)
19922 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
19925 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19926 vld1q_u32 (const uint32_t *a)
19928 return (uint32x4_t)
19929 __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
19932 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19933 vld1q_u64 (const uint64_t *a)
19935 return (uint64x2_t)
19936 __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
19939 /* vldn */
19941 __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
19942 vld2_s64 (const int64_t * __a)
19944 int64x1x2_t ret;
19945 __builtin_aarch64_simd_oi __o;
19946 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
19947 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
19948 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
19949 return ret;
19952 __extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
19953 vld2_u64 (const uint64_t * __a)
19955 uint64x1x2_t ret;
19956 __builtin_aarch64_simd_oi __o;
19957 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
19958 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
19959 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
19960 return ret;
19963 __extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
19964 vld2_f64 (const float64_t * __a)
19966 float64x1x2_t ret;
19967 __builtin_aarch64_simd_oi __o;
19968 __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a);
19969 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 0);
19970 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 1);
19971 return ret;
19974 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
19975 vld2_s8 (const int8_t * __a)
19977 int8x8x2_t ret;
19978 __builtin_aarch64_simd_oi __o;
19979 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
19980 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
19981 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
19982 return ret;
19985 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
19986 vld2_p8 (const poly8_t * __a)
19988 poly8x8x2_t ret;
19989 __builtin_aarch64_simd_oi __o;
19990 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
19991 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
19992 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
19993 return ret;
19996 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
19997 vld2_s16 (const int16_t * __a)
19999 int16x4x2_t ret;
20000 __builtin_aarch64_simd_oi __o;
20001 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
20002 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
20003 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
20004 return ret;
20007 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
20008 vld2_p16 (const poly16_t * __a)
20010 poly16x4x2_t ret;
20011 __builtin_aarch64_simd_oi __o;
20012 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
20013 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
20014 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
20015 return ret;
20018 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
20019 vld2_s32 (const int32_t * __a)
20021 int32x2x2_t ret;
20022 __builtin_aarch64_simd_oi __o;
20023 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
20024 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
20025 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
20026 return ret;
20029 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
20030 vld2_u8 (const uint8_t * __a)
20032 uint8x8x2_t ret;
20033 __builtin_aarch64_simd_oi __o;
20034 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
20035 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
20036 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
20037 return ret;
20040 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
20041 vld2_u16 (const uint16_t * __a)
20043 uint16x4x2_t ret;
20044 __builtin_aarch64_simd_oi __o;
20045 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
20046 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
20047 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
20048 return ret;
20051 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
20052 vld2_u32 (const uint32_t * __a)
20054 uint32x2x2_t ret;
20055 __builtin_aarch64_simd_oi __o;
20056 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
20057 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
20058 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
20059 return ret;
20062 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
20063 vld2_f32 (const float32_t * __a)
20065 float32x2x2_t ret;
20066 __builtin_aarch64_simd_oi __o;
20067 __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a);
20068 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
20069 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
20070 return ret;
20073 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
20074 vld2q_s8 (const int8_t * __a)
20076 int8x16x2_t ret;
20077 __builtin_aarch64_simd_oi __o;
20078 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
20079 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
20080 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
20081 return ret;
20084 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
20085 vld2q_p8 (const poly8_t * __a)
20087 poly8x16x2_t ret;
20088 __builtin_aarch64_simd_oi __o;
20089 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
20090 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
20091 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
20092 return ret;
20095 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
20096 vld2q_s16 (const int16_t * __a)
20098 int16x8x2_t ret;
20099 __builtin_aarch64_simd_oi __o;
20100 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
20101 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
20102 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
20103 return ret;
20106 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
20107 vld2q_p16 (const poly16_t * __a)
20109 poly16x8x2_t ret;
20110 __builtin_aarch64_simd_oi __o;
20111 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
20112 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
20113 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
20114 return ret;
20117 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
20118 vld2q_s32 (const int32_t * __a)
20120 int32x4x2_t ret;
20121 __builtin_aarch64_simd_oi __o;
20122 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
20123 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
20124 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
20125 return ret;
20128 __extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
20129 vld2q_s64 (const int64_t * __a)
20131 int64x2x2_t ret;
20132 __builtin_aarch64_simd_oi __o;
20133 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
20134 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
20135 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
20136 return ret;
20139 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
20140 vld2q_u8 (const uint8_t * __a)
20142 uint8x16x2_t ret;
20143 __builtin_aarch64_simd_oi __o;
20144 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
20145 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
20146 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
20147 return ret;
20150 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
20151 vld2q_u16 (const uint16_t * __a)
20153 uint16x8x2_t ret;
20154 __builtin_aarch64_simd_oi __o;
20155 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
20156 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
20157 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
20158 return ret;
20161 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
20162 vld2q_u32 (const uint32_t * __a)
20164 uint32x4x2_t ret;
20165 __builtin_aarch64_simd_oi __o;
20166 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
20167 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
20168 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
20169 return ret;
20172 __extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
20173 vld2q_u64 (const uint64_t * __a)
20175 uint64x2x2_t ret;
20176 __builtin_aarch64_simd_oi __o;
20177 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
20178 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
20179 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
20180 return ret;
20183 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
20184 vld2q_f32 (const float32_t * __a)
20186 float32x4x2_t ret;
20187 __builtin_aarch64_simd_oi __o;
20188 __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a);
20189 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
20190 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
20191 return ret;
20194 __extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
20195 vld2q_f64 (const float64_t * __a)
20197 float64x2x2_t ret;
20198 __builtin_aarch64_simd_oi __o;
20199 __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a);
20200 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
20201 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
20202 return ret;
20205 __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
20206 vld3_s64 (const int64_t * __a)
20208 int64x1x3_t ret;
20209 __builtin_aarch64_simd_ci __o;
20210 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
20211 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
20212 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
20213 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
20214 return ret;
20217 __extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
20218 vld3_u64 (const uint64_t * __a)
20220 uint64x1x3_t ret;
20221 __builtin_aarch64_simd_ci __o;
20222 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
20223 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
20224 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
20225 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
20226 return ret;
20229 __extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
20230 vld3_f64 (const float64_t * __a)
20232 float64x1x3_t ret;
20233 __builtin_aarch64_simd_ci __o;
20234 __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a);
20235 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 0);
20236 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 1);
20237 ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 2);
20238 return ret;
20241 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
20242 vld3_s8 (const int8_t * __a)
20244 int8x8x3_t ret;
20245 __builtin_aarch64_simd_ci __o;
20246 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
20247 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
20248 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
20249 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
20250 return ret;
20253 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
20254 vld3_p8 (const poly8_t * __a)
20256 poly8x8x3_t ret;
20257 __builtin_aarch64_simd_ci __o;
20258 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
20259 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
20260 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
20261 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
20262 return ret;
20265 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
20266 vld3_s16 (const int16_t * __a)
20268 int16x4x3_t ret;
20269 __builtin_aarch64_simd_ci __o;
20270 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
20271 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
20272 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
20273 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
20274 return ret;
20277 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
20278 vld3_p16 (const poly16_t * __a)
20280 poly16x4x3_t ret;
20281 __builtin_aarch64_simd_ci __o;
20282 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
20283 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
20284 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
20285 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
20286 return ret;
20289 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
20290 vld3_s32 (const int32_t * __a)
20292 int32x2x3_t ret;
20293 __builtin_aarch64_simd_ci __o;
20294 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
20295 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
20296 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
20297 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
20298 return ret;
20301 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
20302 vld3_u8 (const uint8_t * __a)
20304 uint8x8x3_t ret;
20305 __builtin_aarch64_simd_ci __o;
20306 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
20307 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
20308 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
20309 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
20310 return ret;
20313 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
20314 vld3_u16 (const uint16_t * __a)
20316 uint16x4x3_t ret;
20317 __builtin_aarch64_simd_ci __o;
20318 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
20319 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
20320 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
20321 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
20322 return ret;
20325 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
20326 vld3_u32 (const uint32_t * __a)
20328 uint32x2x3_t ret;
20329 __builtin_aarch64_simd_ci __o;
20330 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
20331 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
20332 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
20333 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
20334 return ret;
20337 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
20338 vld3_f32 (const float32_t * __a)
20340 float32x2x3_t ret;
20341 __builtin_aarch64_simd_ci __o;
20342 __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a);
20343 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
20344 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
20345 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
20346 return ret;
20349 __extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
20350 vld3q_s8 (const int8_t * __a)
20352 int8x16x3_t ret;
20353 __builtin_aarch64_simd_ci __o;
20354 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
20355 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
20356 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
20357 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
20358 return ret;
20361 __extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
20362 vld3q_p8 (const poly8_t * __a)
20364 poly8x16x3_t ret;
20365 __builtin_aarch64_simd_ci __o;
20366 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
20367 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
20368 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
20369 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
20370 return ret;
20373 __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
20374 vld3q_s16 (const int16_t * __a)
20376 int16x8x3_t ret;
20377 __builtin_aarch64_simd_ci __o;
20378 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
20379 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
20380 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
20381 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
20382 return ret;
20385 __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
20386 vld3q_p16 (const poly16_t * __a)
20388 poly16x8x3_t ret;
20389 __builtin_aarch64_simd_ci __o;
20390 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
20391 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
20392 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
20393 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
20394 return ret;
20397 __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
20398 vld3q_s32 (const int32_t * __a)
20400 int32x4x3_t ret;
20401 __builtin_aarch64_simd_ci __o;
20402 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
20403 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
20404 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
20405 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
20406 return ret;
20409 __extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
20410 vld3q_s64 (const int64_t * __a)
20412 int64x2x3_t ret;
20413 __builtin_aarch64_simd_ci __o;
20414 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
20415 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
20416 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
20417 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
20418 return ret;
20421 __extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
20422 vld3q_u8 (const uint8_t * __a)
20424 uint8x16x3_t ret;
20425 __builtin_aarch64_simd_ci __o;
20426 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
20427 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
20428 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
20429 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
20430 return ret;
20433 __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
20434 vld3q_u16 (const uint16_t * __a)
20436 uint16x8x3_t ret;
20437 __builtin_aarch64_simd_ci __o;
20438 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
20439 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
20440 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
20441 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
20442 return ret;
20445 __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
20446 vld3q_u32 (const uint32_t * __a)
20448 uint32x4x3_t ret;
20449 __builtin_aarch64_simd_ci __o;
20450 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
20451 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
20452 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
20453 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
20454 return ret;
20457 __extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
20458 vld3q_u64 (const uint64_t * __a)
20460 uint64x2x3_t ret;
20461 __builtin_aarch64_simd_ci __o;
20462 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
20463 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
20464 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
20465 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
20466 return ret;
20469 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
20470 vld3q_f32 (const float32_t * __a)
20472 float32x4x3_t ret;
20473 __builtin_aarch64_simd_ci __o;
20474 __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a);
20475 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
20476 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
20477 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
20478 return ret;
20481 __extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
20482 vld3q_f64 (const float64_t * __a)
20484 float64x2x3_t ret;
20485 __builtin_aarch64_simd_ci __o;
20486 __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a);
20487 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
20488 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
20489 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
20490 return ret;
20493 __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
20494 vld4_s64 (const int64_t * __a)
20496 int64x1x4_t ret;
20497 __builtin_aarch64_simd_xi __o;
20498 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
20499 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
20500 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
20501 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
20502 ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
20503 return ret;
20506 __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
20507 vld4_u64 (const uint64_t * __a)
20509 uint64x1x4_t ret;
20510 __builtin_aarch64_simd_xi __o;
20511 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
20512 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
20513 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
20514 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
20515 ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
20516 return ret;
20519 __extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
20520 vld4_f64 (const float64_t * __a)
20522 float64x1x4_t ret;
20523 __builtin_aarch64_simd_xi __o;
20524 __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a);
20525 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 0);
20526 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 1);
20527 ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 2);
20528 ret.val[3] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 3);
20529 return ret;
20532 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
20533 vld4_s8 (const int8_t * __a)
20535 int8x8x4_t ret;
20536 __builtin_aarch64_simd_xi __o;
20537 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
20538 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
20539 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
20540 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
20541 ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
20542 return ret;
20545 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
20546 vld4_p8 (const poly8_t * __a)
20548 poly8x8x4_t ret;
20549 __builtin_aarch64_simd_xi __o;
20550 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
20551 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
20552 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
20553 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
20554 ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
20555 return ret;
20558 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
20559 vld4_s16 (const int16_t * __a)
20561 int16x4x4_t ret;
20562 __builtin_aarch64_simd_xi __o;
20563 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
20564 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
20565 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
20566 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
20567 ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
20568 return ret;
20571 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
20572 vld4_p16 (const poly16_t * __a)
20574 poly16x4x4_t ret;
20575 __builtin_aarch64_simd_xi __o;
20576 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
20577 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
20578 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
20579 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
20580 ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
20581 return ret;
20584 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
20585 vld4_s32 (const int32_t * __a)
20587 int32x2x4_t ret;
20588 __builtin_aarch64_simd_xi __o;
20589 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
20590 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
20591 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
20592 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
20593 ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
20594 return ret;
20597 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
20598 vld4_u8 (const uint8_t * __a)
20600 uint8x8x4_t ret;
20601 __builtin_aarch64_simd_xi __o;
20602 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
20603 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
20604 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
20605 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
20606 ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
20607 return ret;
20610 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
20611 vld4_u16 (const uint16_t * __a)
20613 uint16x4x4_t ret;
20614 __builtin_aarch64_simd_xi __o;
20615 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
20616 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
20617 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
20618 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
20619 ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
20620 return ret;
20623 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
20624 vld4_u32 (const uint32_t * __a)
20626 uint32x2x4_t ret;
20627 __builtin_aarch64_simd_xi __o;
20628 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
20629 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
20630 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
20631 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
20632 ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
20633 return ret;
20636 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
20637 vld4_f32 (const float32_t * __a)
20639 float32x2x4_t ret;
20640 __builtin_aarch64_simd_xi __o;
20641 __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a);
20642 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
20643 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
20644 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
20645 ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
20646 return ret;
20649 __extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
20650 vld4q_s8 (const int8_t * __a)
20652 int8x16x4_t ret;
20653 __builtin_aarch64_simd_xi __o;
20654 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
20655 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
20656 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
20657 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
20658 ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
20659 return ret;
20662 __extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
20663 vld4q_p8 (const poly8_t * __a)
20665 poly8x16x4_t ret;
20666 __builtin_aarch64_simd_xi __o;
20667 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
20668 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
20669 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
20670 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
20671 ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
20672 return ret;
20675 __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
20676 vld4q_s16 (const int16_t * __a)
20678 int16x8x4_t ret;
20679 __builtin_aarch64_simd_xi __o;
20680 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
20681 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
20682 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
20683 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
20684 ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
20685 return ret;
20688 __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
20689 vld4q_p16 (const poly16_t * __a)
20691 poly16x8x4_t ret;
20692 __builtin_aarch64_simd_xi __o;
20693 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
20694 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
20695 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
20696 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
20697 ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
20698 return ret;
20701 __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
20702 vld4q_s32 (const int32_t * __a)
20704 int32x4x4_t ret;
20705 __builtin_aarch64_simd_xi __o;
20706 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
20707 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
20708 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
20709 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
20710 ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
20711 return ret;
20714 __extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
20715 vld4q_s64 (const int64_t * __a)
20717 int64x2x4_t ret;
20718 __builtin_aarch64_simd_xi __o;
20719 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
20720 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
20721 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
20722 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
20723 ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
20724 return ret;
20727 __extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
20728 vld4q_u8 (const uint8_t * __a)
20730 uint8x16x4_t ret;
20731 __builtin_aarch64_simd_xi __o;
20732 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
20733 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
20734 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
20735 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
20736 ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
20737 return ret;
20740 __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
20741 vld4q_u16 (const uint16_t * __a)
20743 uint16x8x4_t ret;
20744 __builtin_aarch64_simd_xi __o;
20745 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
20746 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
20747 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
20748 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
20749 ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
20750 return ret;
20753 __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
20754 vld4q_u32 (const uint32_t * __a)
20756 uint32x4x4_t ret;
20757 __builtin_aarch64_simd_xi __o;
20758 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
20759 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
20760 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
20761 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
20762 ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
20763 return ret;
20766 __extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
20767 vld4q_u64 (const uint64_t * __a)
20769 uint64x2x4_t ret;
20770 __builtin_aarch64_simd_xi __o;
20771 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
20772 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
20773 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
20774 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
20775 ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
20776 return ret;
20779 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
20780 vld4q_f32 (const float32_t * __a)
20782 float32x4x4_t ret;
20783 __builtin_aarch64_simd_xi __o;
20784 __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a);
20785 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
20786 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
20787 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
20788 ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
20789 return ret;
20792 __extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
20793 vld4q_f64 (const float64_t * __a)
20795 float64x2x4_t ret;
20796 __builtin_aarch64_simd_xi __o;
20797 __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a);
20798 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
20799 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
20800 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
20801 ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
20802 return ret;
20805 /* vmax */
20807 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20808 vmax_f32 (float32x2_t __a, float32x2_t __b)
20810 return __builtin_aarch64_smax_nanv2sf (__a, __b);
20813 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20814 vmax_s8 (int8x8_t __a, int8x8_t __b)
20816 return __builtin_aarch64_smaxv8qi (__a, __b);
20819 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20820 vmax_s16 (int16x4_t __a, int16x4_t __b)
20822 return __builtin_aarch64_smaxv4hi (__a, __b);
20825 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20826 vmax_s32 (int32x2_t __a, int32x2_t __b)
20828 return __builtin_aarch64_smaxv2si (__a, __b);
20831 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20832 vmax_u8 (uint8x8_t __a, uint8x8_t __b)
20834 return (uint8x8_t) __builtin_aarch64_umaxv8qi ((int8x8_t) __a,
20835 (int8x8_t) __b);
20838 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20839 vmax_u16 (uint16x4_t __a, uint16x4_t __b)
20841 return (uint16x4_t) __builtin_aarch64_umaxv4hi ((int16x4_t) __a,
20842 (int16x4_t) __b);
20845 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20846 vmax_u32 (uint32x2_t __a, uint32x2_t __b)
20848 return (uint32x2_t) __builtin_aarch64_umaxv2si ((int32x2_t) __a,
20849 (int32x2_t) __b);
20852 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20853 vmaxq_f32 (float32x4_t __a, float32x4_t __b)
20855 return __builtin_aarch64_smax_nanv4sf (__a, __b);
20858 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20859 vmaxq_f64 (float64x2_t __a, float64x2_t __b)
20861 return __builtin_aarch64_smax_nanv2df (__a, __b);
20864 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20865 vmaxq_s8 (int8x16_t __a, int8x16_t __b)
20867 return __builtin_aarch64_smaxv16qi (__a, __b);
20870 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20871 vmaxq_s16 (int16x8_t __a, int16x8_t __b)
20873 return __builtin_aarch64_smaxv8hi (__a, __b);
20876 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20877 vmaxq_s32 (int32x4_t __a, int32x4_t __b)
20879 return __builtin_aarch64_smaxv4si (__a, __b);
20882 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20883 vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
20885 return (uint8x16_t) __builtin_aarch64_umaxv16qi ((int8x16_t) __a,
20886 (int8x16_t) __b);
20889 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20890 vmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
20892 return (uint16x8_t) __builtin_aarch64_umaxv8hi ((int16x8_t) __a,
20893 (int16x8_t) __b);
20896 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20897 vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
20899 return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a,
20900 (int32x4_t) __b);
20903 /* vmaxnm */
20905 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20906 vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
20908 return __builtin_aarch64_smaxv2sf (__a, __b);
20911 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20912 vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
20914 return __builtin_aarch64_smaxv4sf (__a, __b);
20917 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20918 vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
20920 return __builtin_aarch64_smaxv2df (__a, __b);
20923 /* vmaxv */
20925 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20926 vmaxv_f32 (float32x2_t __a)
20928 return vget_lane_f32 (__builtin_aarch64_reduc_smax_nan_v2sf (__a), 0);
20931 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20932 vmaxv_s8 (int8x8_t __a)
20934 return vget_lane_s8 (__builtin_aarch64_reduc_smax_v8qi (__a), 0);
20937 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20938 vmaxv_s16 (int16x4_t __a)
20940 return vget_lane_s16 (__builtin_aarch64_reduc_smax_v4hi (__a), 0);
20943 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20944 vmaxv_s32 (int32x2_t __a)
20946 return vget_lane_s32 (__builtin_aarch64_reduc_smax_v2si (__a), 0);
20949 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20950 vmaxv_u8 (uint8x8_t __a)
20952 return vget_lane_u8 ((uint8x8_t)
20953 __builtin_aarch64_reduc_umax_v8qi ((int8x8_t) __a), 0);
20956 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20957 vmaxv_u16 (uint16x4_t __a)
20959 return vget_lane_u16 ((uint16x4_t)
20960 __builtin_aarch64_reduc_umax_v4hi ((int16x4_t) __a), 0);
20963 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20964 vmaxv_u32 (uint32x2_t __a)
20966 return vget_lane_u32 ((uint32x2_t)
20967 __builtin_aarch64_reduc_umax_v2si ((int32x2_t) __a), 0);
20970 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20971 vmaxvq_f32 (float32x4_t __a)
20973 return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_nan_v4sf (__a), 0);
20976 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
20977 vmaxvq_f64 (float64x2_t __a)
20979 return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_nan_v2df (__a), 0);
20982 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20983 vmaxvq_s8 (int8x16_t __a)
20985 return vgetq_lane_s8 (__builtin_aarch64_reduc_smax_v16qi (__a), 0);
20988 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20989 vmaxvq_s16 (int16x8_t __a)
20991 return vgetq_lane_s16 (__builtin_aarch64_reduc_smax_v8hi (__a), 0);
20994 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20995 vmaxvq_s32 (int32x4_t __a)
20997 return vgetq_lane_s32 (__builtin_aarch64_reduc_smax_v4si (__a), 0);
21000 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
21001 vmaxvq_u8 (uint8x16_t __a)
21003 return vgetq_lane_u8 ((uint8x16_t)
21004 __builtin_aarch64_reduc_umax_v16qi ((int8x16_t) __a), 0);
21007 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
21008 vmaxvq_u16 (uint16x8_t __a)
21010 return vgetq_lane_u16 ((uint16x8_t)
21011 __builtin_aarch64_reduc_umax_v8hi ((int16x8_t) __a), 0);
21014 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
21015 vmaxvq_u32 (uint32x4_t __a)
21017 return vgetq_lane_u32 ((uint32x4_t)
21018 __builtin_aarch64_reduc_umax_v4si ((int32x4_t) __a), 0);
21021 /* vmaxnmv */
21023 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
21024 vmaxnmv_f32 (float32x2_t __a)
21026 return vget_lane_f32 (__builtin_aarch64_reduc_smax_v2sf (__a), 0);
21029 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
21030 vmaxnmvq_f32 (float32x4_t __a)
21032 return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_v4sf (__a), 0);
21035 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
21036 vmaxnmvq_f64 (float64x2_t __a)
21038 return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_v2df (__a), 0);
21041 /* vmin */
21043 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21044 vmin_f32 (float32x2_t __a, float32x2_t __b)
21046 return __builtin_aarch64_smin_nanv2sf (__a, __b);
21049 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21050 vmin_s8 (int8x8_t __a, int8x8_t __b)
21052 return __builtin_aarch64_sminv8qi (__a, __b);
21055 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21056 vmin_s16 (int16x4_t __a, int16x4_t __b)
21058 return __builtin_aarch64_sminv4hi (__a, __b);
21061 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21062 vmin_s32 (int32x2_t __a, int32x2_t __b)
21064 return __builtin_aarch64_sminv2si (__a, __b);
21067 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21068 vmin_u8 (uint8x8_t __a, uint8x8_t __b)
21070 return (uint8x8_t) __builtin_aarch64_uminv8qi ((int8x8_t) __a,
21071 (int8x8_t) __b);
21074 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21075 vmin_u16 (uint16x4_t __a, uint16x4_t __b)
21077 return (uint16x4_t) __builtin_aarch64_uminv4hi ((int16x4_t) __a,
21078 (int16x4_t) __b);
21081 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21082 vmin_u32 (uint32x2_t __a, uint32x2_t __b)
21084 return (uint32x2_t) __builtin_aarch64_uminv2si ((int32x2_t) __a,
21085 (int32x2_t) __b);
21088 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21089 vminq_f32 (float32x4_t __a, float32x4_t __b)
21091 return __builtin_aarch64_smin_nanv4sf (__a, __b);
21094 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21095 vminq_f64 (float64x2_t __a, float64x2_t __b)
21097 return __builtin_aarch64_smin_nanv2df (__a, __b);
21100 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21101 vminq_s8 (int8x16_t __a, int8x16_t __b)
21103 return __builtin_aarch64_sminv16qi (__a, __b);
21106 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21107 vminq_s16 (int16x8_t __a, int16x8_t __b)
21109 return __builtin_aarch64_sminv8hi (__a, __b);
21112 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21113 vminq_s32 (int32x4_t __a, int32x4_t __b)
21115 return __builtin_aarch64_sminv4si (__a, __b);
21118 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21119 vminq_u8 (uint8x16_t __a, uint8x16_t __b)
21121 return (uint8x16_t) __builtin_aarch64_uminv16qi ((int8x16_t) __a,
21122 (int8x16_t) __b);
21125 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21126 vminq_u16 (uint16x8_t __a, uint16x8_t __b)
21128 return (uint16x8_t) __builtin_aarch64_uminv8hi ((int16x8_t) __a,
21129 (int16x8_t) __b);
21132 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21133 vminq_u32 (uint32x4_t __a, uint32x4_t __b)
21135 return (uint32x4_t) __builtin_aarch64_uminv4si ((int32x4_t) __a,
21136 (int32x4_t) __b);
21139 /* vminnm */
21141 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21142 vminnm_f32 (float32x2_t __a, float32x2_t __b)
21144 return __builtin_aarch64_sminv2sf (__a, __b);
21147 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21148 vminnmq_f32 (float32x4_t __a, float32x4_t __b)
21150 return __builtin_aarch64_sminv4sf (__a, __b);
21153 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21154 vminnmq_f64 (float64x2_t __a, float64x2_t __b)
21156 return __builtin_aarch64_sminv2df (__a, __b);
21159 /* vminv */
21161 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
21162 vminv_f32 (float32x2_t __a)
21164 return vget_lane_f32 (__builtin_aarch64_reduc_smin_nan_v2sf (__a), 0);
21167 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
21168 vminv_s8 (int8x8_t __a)
21170 return vget_lane_s8 (__builtin_aarch64_reduc_smin_v8qi (__a), 0);
21173 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
21174 vminv_s16 (int16x4_t __a)
21176 return vget_lane_s16 (__builtin_aarch64_reduc_smin_v4hi (__a), 0);
21179 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
21180 vminv_s32 (int32x2_t __a)
21182 return vget_lane_s32 (__builtin_aarch64_reduc_smin_v2si (__a), 0);
21185 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
21186 vminv_u8 (uint8x8_t __a)
21188 return vget_lane_u8 ((uint8x8_t)
21189 __builtin_aarch64_reduc_umin_v8qi ((int8x8_t) __a), 0);
21192 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
21193 vminv_u16 (uint16x4_t __a)
21195 return vget_lane_u16 ((uint16x4_t)
21196 __builtin_aarch64_reduc_umin_v4hi ((int16x4_t) __a), 0);
21199 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
21200 vminv_u32 (uint32x2_t __a)
21202 return vget_lane_u32 ((uint32x2_t)
21203 __builtin_aarch64_reduc_umin_v2si ((int32x2_t) __a), 0);
21206 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
21207 vminvq_f32 (float32x4_t __a)
21209 return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_nan_v4sf (__a), 0);
21212 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
21213 vminvq_f64 (float64x2_t __a)
21215 return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_nan_v2df (__a), 0);
21218 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
21219 vminvq_s8 (int8x16_t __a)
21221 return vgetq_lane_s8 (__builtin_aarch64_reduc_smin_v16qi (__a), 0);
21224 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
21225 vminvq_s16 (int16x8_t __a)
21227 return vgetq_lane_s16 (__builtin_aarch64_reduc_smin_v8hi (__a), 0);
21230 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
21231 vminvq_s32 (int32x4_t __a)
21233 return vgetq_lane_s32 (__builtin_aarch64_reduc_smin_v4si (__a), 0);
21236 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
21237 vminvq_u8 (uint8x16_t __a)
21239 return vgetq_lane_u8 ((uint8x16_t)
21240 __builtin_aarch64_reduc_umin_v16qi ((int8x16_t) __a), 0);
21243 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
21244 vminvq_u16 (uint16x8_t __a)
21246 return vgetq_lane_u16 ((uint16x8_t)
21247 __builtin_aarch64_reduc_umin_v8hi ((int16x8_t) __a), 0);
21250 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
21251 vminvq_u32 (uint32x4_t __a)
21253 return vgetq_lane_u32 ((uint32x4_t)
21254 __builtin_aarch64_reduc_umin_v4si ((int32x4_t) __a), 0);
21257 /* vminnmv */
21259 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
21260 vminnmv_f32 (float32x2_t __a)
21262 return vget_lane_f32 (__builtin_aarch64_reduc_smin_v2sf (__a), 0);
21265 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
21266 vminnmvq_f32 (float32x4_t __a)
21268 return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_v4sf (__a), 0);
21271 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
21272 vminnmvq_f64 (float64x2_t __a)
21274 return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_v2df (__a), 0);
21277 /* vmla */
21279 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21280 vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
21282 return a + b * c;
21285 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21286 vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
21288 return a + b * c;
21291 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21292 vmlaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
21294 return a + b * c;
21297 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21298 vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
21300 return a - b * c;
21303 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21304 vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
21306 return a - b * c;
21309 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21310 vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
21312 return a - b * c;
21315 /* vqabs */
21317 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21318 vqabsq_s64 (int64x2_t __a)
21320 return (int64x2_t) __builtin_aarch64_sqabsv2di (__a);
21323 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21324 vqabsb_s8 (int8x1_t __a)
21326 return (int8x1_t) __builtin_aarch64_sqabsqi (__a);
21329 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21330 vqabsh_s16 (int16x1_t __a)
21332 return (int16x1_t) __builtin_aarch64_sqabshi (__a);
21335 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21336 vqabss_s32 (int32x1_t __a)
21338 return (int32x1_t) __builtin_aarch64_sqabssi (__a);
21341 /* vqadd */
21343 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21344 vqaddb_s8 (int8x1_t __a, int8x1_t __b)
21346 return (int8x1_t) __builtin_aarch64_sqaddqi (__a, __b);
21349 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21350 vqaddh_s16 (int16x1_t __a, int16x1_t __b)
21352 return (int16x1_t) __builtin_aarch64_sqaddhi (__a, __b);
21355 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21356 vqadds_s32 (int32x1_t __a, int32x1_t __b)
21358 return (int32x1_t) __builtin_aarch64_sqaddsi (__a, __b);
21361 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21362 vqaddd_s64 (int64x1_t __a, int64x1_t __b)
21364 return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b);
21367 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
21368 vqaddb_u8 (uint8x1_t __a, uint8x1_t __b)
21370 return (uint8x1_t) __builtin_aarch64_uqaddqi (__a, __b);
21373 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
21374 vqaddh_u16 (uint16x1_t __a, uint16x1_t __b)
21376 return (uint16x1_t) __builtin_aarch64_uqaddhi (__a, __b);
21379 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
21380 vqadds_u32 (uint32x1_t __a, uint32x1_t __b)
21382 return (uint32x1_t) __builtin_aarch64_uqaddsi (__a, __b);
21385 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21386 vqaddd_u64 (uint64x1_t __a, uint64x1_t __b)
21388 return (uint64x1_t) __builtin_aarch64_uqadddi (__a, __b);
21391 /* vqdmlal */
21393 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21394 vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
21396 return __builtin_aarch64_sqdmlalv4hi (__a, __b, __c);
21399 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21400 vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
21402 return __builtin_aarch64_sqdmlal2v8hi (__a, __b, __c);
21405 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21406 vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21407 int const __d)
21409 return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d);
21412 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21413 vqdmlal_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21414 int const __d)
21416 return __builtin_aarch64_sqdmlal2_laneqv8hi (__a, __b, __c, __d);
21419 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21420 vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
21422 return __builtin_aarch64_sqdmlal2_nv8hi (__a, __b, __c);
21425 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21426 vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
21428 int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (INT64_C (0)));
21429 return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __tmp, __d);
21432 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21433 vqdmlal_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
21435 return __builtin_aarch64_sqdmlal_laneqv4hi (__a, __b, __c, __d);
21438 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21439 vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
21441 return __builtin_aarch64_sqdmlal_nv4hi (__a, __b, __c);
21444 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21445 vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
21447 return __builtin_aarch64_sqdmlalv2si (__a, __b, __c);
21450 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21451 vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
21453 return __builtin_aarch64_sqdmlal2v4si (__a, __b, __c);
21456 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21457 vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21458 int const __d)
21460 return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d);
21463 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21464 vqdmlal_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21465 int const __d)
21467 return __builtin_aarch64_sqdmlal2_laneqv4si (__a, __b, __c, __d);
21470 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21471 vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
21473 return __builtin_aarch64_sqdmlal2_nv4si (__a, __b, __c);
21476 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21477 vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
21479 int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (INT64_C (0)));
21480 return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __tmp, __d);
21483 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21484 vqdmlal_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
21486 return __builtin_aarch64_sqdmlal_laneqv2si (__a, __b, __c, __d);
21489 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21490 vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
21492 return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c);
21495 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21496 vqdmlalh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
21498 return __builtin_aarch64_sqdmlalhi (__a, __b, __c);
21501 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21502 vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
21504 return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
21507 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21508 vqdmlals_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
21510 return __builtin_aarch64_sqdmlalsi (__a, __b, __c);
21513 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21514 vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
21516 return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
21519 /* vqdmlsl */
21521 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21522 vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
21524 return __builtin_aarch64_sqdmlslv4hi (__a, __b, __c);
21527 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21528 vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
21530 return __builtin_aarch64_sqdmlsl2v8hi (__a, __b, __c);
21533 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21534 vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21535 int const __d)
21537 return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d);
21540 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21541 vqdmlsl_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21542 int const __d)
21544 return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a, __b, __c, __d);
21547 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21548 vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
21550 return __builtin_aarch64_sqdmlsl2_nv8hi (__a, __b, __c);
21553 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21554 vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
21556 int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (INT64_C (0)));
21557 return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __tmp, __d);
21560 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21561 vqdmlsl_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
21563 return __builtin_aarch64_sqdmlsl_laneqv4hi (__a, __b, __c, __d);
21566 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21567 vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
21569 return __builtin_aarch64_sqdmlsl_nv4hi (__a, __b, __c);
21572 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21573 vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
21575 return __builtin_aarch64_sqdmlslv2si (__a, __b, __c);
21578 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21579 vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
21581 return __builtin_aarch64_sqdmlsl2v4si (__a, __b, __c);
21584 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21585 vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21586 int const __d)
21588 return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d);
21591 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21592 vqdmlsl_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21593 int const __d)
21595 return __builtin_aarch64_sqdmlsl2_laneqv4si (__a, __b, __c, __d);
21598 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21599 vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
21601 return __builtin_aarch64_sqdmlsl2_nv4si (__a, __b, __c);
21604 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21605 vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
21607 int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (INT64_C (0)));
21608 return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __tmp, __d);
21611 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21612 vqdmlsl_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
21614 return __builtin_aarch64_sqdmlsl_laneqv2si (__a, __b, __c, __d);
21617 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21618 vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
21620 return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c);
21623 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21624 vqdmlslh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
21626 return __builtin_aarch64_sqdmlslhi (__a, __b, __c);
21629 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21630 vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
21632 return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
21635 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21636 vqdmlsls_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
21638 return __builtin_aarch64_sqdmlslsi (__a, __b, __c);
21641 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21642 vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
21644 return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
21647 /* vqdmulh */
21649 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21650 vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
21652 return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c);
21655 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21656 vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
21658 return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c);
21661 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21662 vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
21664 return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c);
21667 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21668 vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
21670 return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c);
21673 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21674 vqdmulhh_s16 (int16x1_t __a, int16x1_t __b)
21676 return (int16x1_t) __builtin_aarch64_sqdmulhhi (__a, __b);
21679 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21680 vqdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
21682 return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
21685 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21686 vqdmulhs_s32 (int32x1_t __a, int32x1_t __b)
21688 return (int32x1_t) __builtin_aarch64_sqdmulhsi (__a, __b);
21691 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21692 vqdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
21694 return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
21697 /* vqdmull */
21699 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21700 vqdmull_s16 (int16x4_t __a, int16x4_t __b)
21702 return __builtin_aarch64_sqdmullv4hi (__a, __b);
21705 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21706 vqdmull_high_s16 (int16x8_t __a, int16x8_t __b)
21708 return __builtin_aarch64_sqdmull2v8hi (__a, __b);
21711 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21712 vqdmull_high_lane_s16 (int16x8_t __a, int16x8_t __b, int const __c)
21714 return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c);
21717 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21718 vqdmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, int const __c)
21720 return __builtin_aarch64_sqdmull2_laneqv8hi (__a, __b,__c);
21723 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21724 vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
21726 return __builtin_aarch64_sqdmull2_nv8hi (__a, __b);
21729 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21730 vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
21732 int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (INT64_C (0)));
21733 return __builtin_aarch64_sqdmull_lanev4hi (__a, __tmp, __c);
21736 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21737 vqdmull_laneq_s16 (int16x4_t __a, int16x8_t __b, int const __c)
21739 return __builtin_aarch64_sqdmull_laneqv4hi (__a, __b, __c);
21742 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21743 vqdmull_n_s16 (int16x4_t __a, int16_t __b)
21745 return __builtin_aarch64_sqdmull_nv4hi (__a, __b);
21748 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21749 vqdmull_s32 (int32x2_t __a, int32x2_t __b)
21751 return __builtin_aarch64_sqdmullv2si (__a, __b);
21754 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21755 vqdmull_high_s32 (int32x4_t __a, int32x4_t __b)
21757 return __builtin_aarch64_sqdmull2v4si (__a, __b);
21760 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21761 vqdmull_high_lane_s32 (int32x4_t __a, int32x4_t __b, int const __c)
21763 return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c);
21766 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21767 vqdmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, int const __c)
21769 return __builtin_aarch64_sqdmull2_laneqv4si (__a, __b, __c);
21772 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21773 vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
21775 return __builtin_aarch64_sqdmull2_nv4si (__a, __b);
21778 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21779 vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
21781 int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (INT64_C (0)));
21782 return __builtin_aarch64_sqdmull_lanev2si (__a, __tmp, __c);
21785 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21786 vqdmull_laneq_s32 (int32x2_t __a, int32x4_t __b, int const __c)
21788 return __builtin_aarch64_sqdmull_laneqv2si (__a, __b, __c);
21791 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21792 vqdmull_n_s32 (int32x2_t __a, int32_t __b)
21794 return __builtin_aarch64_sqdmull_nv2si (__a, __b);
21797 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21798 vqdmullh_s16 (int16x1_t __a, int16x1_t __b)
21800 return (int32x1_t) __builtin_aarch64_sqdmullhi (__a, __b);
21803 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21804 vqdmullh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
21806 return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
21809 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21810 vqdmulls_s32 (int32x1_t __a, int32x1_t __b)
21812 return (int64x1_t) __builtin_aarch64_sqdmullsi (__a, __b);
21815 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21816 vqdmulls_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
21818 return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c);
21821 /* vqmovn */
21823 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21824 vqmovn_s16 (int16x8_t __a)
21826 return (int8x8_t) __builtin_aarch64_sqmovnv8hi (__a);
21829 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21830 vqmovn_s32 (int32x4_t __a)
21832 return (int16x4_t) __builtin_aarch64_sqmovnv4si (__a);
21835 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21836 vqmovn_s64 (int64x2_t __a)
21838 return (int32x2_t) __builtin_aarch64_sqmovnv2di (__a);
21841 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21842 vqmovn_u16 (uint16x8_t __a)
21844 return (uint8x8_t) __builtin_aarch64_uqmovnv8hi ((int16x8_t) __a);
21847 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21848 vqmovn_u32 (uint32x4_t __a)
21850 return (uint16x4_t) __builtin_aarch64_uqmovnv4si ((int32x4_t) __a);
21853 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21854 vqmovn_u64 (uint64x2_t __a)
21856 return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a);
21859 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21860 vqmovnh_s16 (int16x1_t __a)
21862 return (int8x1_t) __builtin_aarch64_sqmovnhi (__a);
21865 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21866 vqmovns_s32 (int32x1_t __a)
21868 return (int16x1_t) __builtin_aarch64_sqmovnsi (__a);
21871 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21872 vqmovnd_s64 (int64x1_t __a)
21874 return (int32x1_t) __builtin_aarch64_sqmovndi (__a);
21877 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
21878 vqmovnh_u16 (uint16x1_t __a)
21880 return (uint8x1_t) __builtin_aarch64_uqmovnhi (__a);
21883 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
21884 vqmovns_u32 (uint32x1_t __a)
21886 return (uint16x1_t) __builtin_aarch64_uqmovnsi (__a);
21889 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
21890 vqmovnd_u64 (uint64x1_t __a)
21892 return (uint32x1_t) __builtin_aarch64_uqmovndi (__a);
21895 /* vqmovun */
21897 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21898 vqmovun_s16 (int16x8_t __a)
21900 return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a);
21903 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21904 vqmovun_s32 (int32x4_t __a)
21906 return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a);
21909 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21910 vqmovun_s64 (int64x2_t __a)
21912 return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a);
21915 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21916 vqmovunh_s16 (int16x1_t __a)
21918 return (int8x1_t) __builtin_aarch64_sqmovunhi (__a);
21921 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21922 vqmovuns_s32 (int32x1_t __a)
21924 return (int16x1_t) __builtin_aarch64_sqmovunsi (__a);
21927 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21928 vqmovund_s64 (int64x1_t __a)
21930 return (int32x1_t) __builtin_aarch64_sqmovundi (__a);
21933 /* vqneg */
21935 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21936 vqnegq_s64 (int64x2_t __a)
21938 return (int64x2_t) __builtin_aarch64_sqnegv2di (__a);
21941 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21942 vqnegb_s8 (int8x1_t __a)
21944 return (int8x1_t) __builtin_aarch64_sqnegqi (__a);
21947 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21948 vqnegh_s16 (int16x1_t __a)
21950 return (int16x1_t) __builtin_aarch64_sqneghi (__a);
21953 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21954 vqnegs_s32 (int32x1_t __a)
21956 return (int32x1_t) __builtin_aarch64_sqnegsi (__a);
21959 /* vqrdmulh */
21961 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21962 vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
21964 return __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c);
21967 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21968 vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
21970 return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c);
21973 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21974 vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
21976 return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c);
21979 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21980 vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
21982 return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c);
21985 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21986 vqrdmulhh_s16 (int16x1_t __a, int16x1_t __b)
21988 return (int16x1_t) __builtin_aarch64_sqrdmulhhi (__a, __b);
21991 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21992 vqrdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
21994 return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
21997 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21998 vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b)
22000 return (int32x1_t) __builtin_aarch64_sqrdmulhsi (__a, __b);
22003 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22004 vqrdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
22006 return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
22009 /* vqrshl */
22011 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22012 vqrshl_s8 (int8x8_t __a, int8x8_t __b)
22014 return __builtin_aarch64_sqrshlv8qi (__a, __b);
22017 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22018 vqrshl_s16 (int16x4_t __a, int16x4_t __b)
22020 return __builtin_aarch64_sqrshlv4hi (__a, __b);
22023 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22024 vqrshl_s32 (int32x2_t __a, int32x2_t __b)
22026 return __builtin_aarch64_sqrshlv2si (__a, __b);
22029 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22030 vqrshl_s64 (int64x1_t __a, int64x1_t __b)
22032 return __builtin_aarch64_sqrshldi (__a, __b);
22035 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22036 vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
22038 return (uint8x8_t) __builtin_aarch64_uqrshlv8qi ((int8x8_t) __a, __b);
22041 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22042 vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
22044 return (uint16x4_t) __builtin_aarch64_uqrshlv4hi ((int16x4_t) __a, __b);
22047 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22048 vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
22050 return (uint32x2_t) __builtin_aarch64_uqrshlv2si ((int32x2_t) __a, __b);
22053 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22054 vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
22056 return (uint64x1_t) __builtin_aarch64_uqrshldi ((int64x1_t) __a, __b);
22059 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22060 vqrshlq_s8 (int8x16_t __a, int8x16_t __b)
22062 return __builtin_aarch64_sqrshlv16qi (__a, __b);
22065 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22066 vqrshlq_s16 (int16x8_t __a, int16x8_t __b)
22068 return __builtin_aarch64_sqrshlv8hi (__a, __b);
22071 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22072 vqrshlq_s32 (int32x4_t __a, int32x4_t __b)
22074 return __builtin_aarch64_sqrshlv4si (__a, __b);
22077 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22078 vqrshlq_s64 (int64x2_t __a, int64x2_t __b)
22080 return __builtin_aarch64_sqrshlv2di (__a, __b);
22083 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22084 vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
22086 return (uint8x16_t) __builtin_aarch64_uqrshlv16qi ((int8x16_t) __a, __b);
22089 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22090 vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
22092 return (uint16x8_t) __builtin_aarch64_uqrshlv8hi ((int16x8_t) __a, __b);
22095 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22096 vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
22098 return (uint32x4_t) __builtin_aarch64_uqrshlv4si ((int32x4_t) __a, __b);
22101 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22102 vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
22104 return (uint64x2_t) __builtin_aarch64_uqrshlv2di ((int64x2_t) __a, __b);
22107 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22108 vqrshlb_s8 (int8x1_t __a, int8x1_t __b)
22110 return __builtin_aarch64_sqrshlqi (__a, __b);
22113 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22114 vqrshlh_s16 (int16x1_t __a, int16x1_t __b)
22116 return __builtin_aarch64_sqrshlhi (__a, __b);
22119 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22120 vqrshls_s32 (int32x1_t __a, int32x1_t __b)
22122 return __builtin_aarch64_sqrshlsi (__a, __b);
22125 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22126 vqrshld_s64 (int64x1_t __a, int64x1_t __b)
22128 return __builtin_aarch64_sqrshldi (__a, __b);
22131 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22132 vqrshlb_u8 (uint8x1_t __a, uint8x1_t __b)
22134 return (uint8x1_t) __builtin_aarch64_uqrshlqi (__a, __b);
22137 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22138 vqrshlh_u16 (uint16x1_t __a, uint16x1_t __b)
22140 return (uint16x1_t) __builtin_aarch64_uqrshlhi (__a, __b);
22143 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22144 vqrshls_u32 (uint32x1_t __a, uint32x1_t __b)
22146 return (uint32x1_t) __builtin_aarch64_uqrshlsi (__a, __b);
22149 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22150 vqrshld_u64 (uint64x1_t __a, uint64x1_t __b)
22152 return (uint64x1_t) __builtin_aarch64_uqrshldi (__a, __b);
22155 /* vqrshrn */
22157 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22158 vqrshrn_n_s16 (int16x8_t __a, const int __b)
22160 return (int8x8_t) __builtin_aarch64_sqrshrn_nv8hi (__a, __b);
22163 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22164 vqrshrn_n_s32 (int32x4_t __a, const int __b)
22166 return (int16x4_t) __builtin_aarch64_sqrshrn_nv4si (__a, __b);
22169 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22170 vqrshrn_n_s64 (int64x2_t __a, const int __b)
22172 return (int32x2_t) __builtin_aarch64_sqrshrn_nv2di (__a, __b);
22175 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22176 vqrshrn_n_u16 (uint16x8_t __a, const int __b)
22178 return (uint8x8_t) __builtin_aarch64_uqrshrn_nv8hi ((int16x8_t) __a, __b);
22181 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22182 vqrshrn_n_u32 (uint32x4_t __a, const int __b)
22184 return (uint16x4_t) __builtin_aarch64_uqrshrn_nv4si ((int32x4_t) __a, __b);
22187 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22188 vqrshrn_n_u64 (uint64x2_t __a, const int __b)
22190 return (uint32x2_t) __builtin_aarch64_uqrshrn_nv2di ((int64x2_t) __a, __b);
22193 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22194 vqrshrnh_n_s16 (int16x1_t __a, const int __b)
22196 return (int8x1_t) __builtin_aarch64_sqrshrn_nhi (__a, __b);
22199 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22200 vqrshrns_n_s32 (int32x1_t __a, const int __b)
22202 return (int16x1_t) __builtin_aarch64_sqrshrn_nsi (__a, __b);
22205 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22206 vqrshrnd_n_s64 (int64x1_t __a, const int __b)
22208 return (int32x1_t) __builtin_aarch64_sqrshrn_ndi (__a, __b);
22211 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22212 vqrshrnh_n_u16 (uint16x1_t __a, const int __b)
22214 return (uint8x1_t) __builtin_aarch64_uqrshrn_nhi (__a, __b);
22217 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22218 vqrshrns_n_u32 (uint32x1_t __a, const int __b)
22220 return (uint16x1_t) __builtin_aarch64_uqrshrn_nsi (__a, __b);
22223 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22224 vqrshrnd_n_u64 (uint64x1_t __a, const int __b)
22226 return (uint32x1_t) __builtin_aarch64_uqrshrn_ndi (__a, __b);
22229 /* vqrshrun */
22231 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22232 vqrshrun_n_s16 (int16x8_t __a, const int __b)
22234 return (uint8x8_t) __builtin_aarch64_sqrshrun_nv8hi (__a, __b);
22237 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22238 vqrshrun_n_s32 (int32x4_t __a, const int __b)
22240 return (uint16x4_t) __builtin_aarch64_sqrshrun_nv4si (__a, __b);
22243 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22244 vqrshrun_n_s64 (int64x2_t __a, const int __b)
22246 return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b);
22249 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22250 vqrshrunh_n_s16 (int16x1_t __a, const int __b)
22252 return (int8x1_t) __builtin_aarch64_sqrshrun_nhi (__a, __b);
22255 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22256 vqrshruns_n_s32 (int32x1_t __a, const int __b)
22258 return (int16x1_t) __builtin_aarch64_sqrshrun_nsi (__a, __b);
22261 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22262 vqrshrund_n_s64 (int64x1_t __a, const int __b)
22264 return (int32x1_t) __builtin_aarch64_sqrshrun_ndi (__a, __b);
22267 /* vqshl */
22269 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22270 vqshl_s8 (int8x8_t __a, int8x8_t __b)
22272 return __builtin_aarch64_sqshlv8qi (__a, __b);
22275 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22276 vqshl_s16 (int16x4_t __a, int16x4_t __b)
22278 return __builtin_aarch64_sqshlv4hi (__a, __b);
22281 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22282 vqshl_s32 (int32x2_t __a, int32x2_t __b)
22284 return __builtin_aarch64_sqshlv2si (__a, __b);
22287 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22288 vqshl_s64 (int64x1_t __a, int64x1_t __b)
22290 return __builtin_aarch64_sqshldi (__a, __b);
22293 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22294 vqshl_u8 (uint8x8_t __a, int8x8_t __b)
22296 return (uint8x8_t) __builtin_aarch64_uqshlv8qi ((int8x8_t) __a, __b);
22299 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22300 vqshl_u16 (uint16x4_t __a, int16x4_t __b)
22302 return (uint16x4_t) __builtin_aarch64_uqshlv4hi ((int16x4_t) __a, __b);
22305 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22306 vqshl_u32 (uint32x2_t __a, int32x2_t __b)
22308 return (uint32x2_t) __builtin_aarch64_uqshlv2si ((int32x2_t) __a, __b);
22311 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22312 vqshl_u64 (uint64x1_t __a, int64x1_t __b)
22314 return (uint64x1_t) __builtin_aarch64_uqshldi ((int64x1_t) __a, __b);
22317 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22318 vqshlq_s8 (int8x16_t __a, int8x16_t __b)
22320 return __builtin_aarch64_sqshlv16qi (__a, __b);
22323 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22324 vqshlq_s16 (int16x8_t __a, int16x8_t __b)
22326 return __builtin_aarch64_sqshlv8hi (__a, __b);
22329 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22330 vqshlq_s32 (int32x4_t __a, int32x4_t __b)
22332 return __builtin_aarch64_sqshlv4si (__a, __b);
22335 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22336 vqshlq_s64 (int64x2_t __a, int64x2_t __b)
22338 return __builtin_aarch64_sqshlv2di (__a, __b);
22341 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22342 vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
22344 return (uint8x16_t) __builtin_aarch64_uqshlv16qi ((int8x16_t) __a, __b);
22347 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22348 vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
22350 return (uint16x8_t) __builtin_aarch64_uqshlv8hi ((int16x8_t) __a, __b);
22353 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22354 vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
22356 return (uint32x4_t) __builtin_aarch64_uqshlv4si ((int32x4_t) __a, __b);
22359 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22360 vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
22362 return (uint64x2_t) __builtin_aarch64_uqshlv2di ((int64x2_t) __a, __b);
22365 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22366 vqshlb_s8 (int8x1_t __a, int8x1_t __b)
22368 return __builtin_aarch64_sqshlqi (__a, __b);
22371 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22372 vqshlh_s16 (int16x1_t __a, int16x1_t __b)
22374 return __builtin_aarch64_sqshlhi (__a, __b);
22377 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22378 vqshls_s32 (int32x1_t __a, int32x1_t __b)
22380 return __builtin_aarch64_sqshlsi (__a, __b);
22383 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22384 vqshld_s64 (int64x1_t __a, int64x1_t __b)
22386 return __builtin_aarch64_sqshldi (__a, __b);
22389 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22390 vqshlb_u8 (uint8x1_t __a, uint8x1_t __b)
22392 return (uint8x1_t) __builtin_aarch64_uqshlqi (__a, __b);
22395 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22396 vqshlh_u16 (uint16x1_t __a, uint16x1_t __b)
22398 return (uint16x1_t) __builtin_aarch64_uqshlhi (__a, __b);
22401 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22402 vqshls_u32 (uint32x1_t __a, uint32x1_t __b)
22404 return (uint32x1_t) __builtin_aarch64_uqshlsi (__a, __b);
22407 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22408 vqshld_u64 (uint64x1_t __a, uint64x1_t __b)
22410 return (uint64x1_t) __builtin_aarch64_uqshldi (__a, __b);
22413 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22414 vqshl_n_s8 (int8x8_t __a, const int __b)
22416 return (int8x8_t) __builtin_aarch64_sqshl_nv8qi (__a, __b);
22419 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22420 vqshl_n_s16 (int16x4_t __a, const int __b)
22422 return (int16x4_t) __builtin_aarch64_sqshl_nv4hi (__a, __b);
22425 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22426 vqshl_n_s32 (int32x2_t __a, const int __b)
22428 return (int32x2_t) __builtin_aarch64_sqshl_nv2si (__a, __b);
22431 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22432 vqshl_n_s64 (int64x1_t __a, const int __b)
22434 return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b);
22437 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22438 vqshl_n_u8 (uint8x8_t __a, const int __b)
22440 return (uint8x8_t) __builtin_aarch64_uqshl_nv8qi ((int8x8_t) __a, __b);
22443 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22444 vqshl_n_u16 (uint16x4_t __a, const int __b)
22446 return (uint16x4_t) __builtin_aarch64_uqshl_nv4hi ((int16x4_t) __a, __b);
22449 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22450 vqshl_n_u32 (uint32x2_t __a, const int __b)
22452 return (uint32x2_t) __builtin_aarch64_uqshl_nv2si ((int32x2_t) __a, __b);
22455 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22456 vqshl_n_u64 (uint64x1_t __a, const int __b)
22458 return (uint64x1_t) __builtin_aarch64_uqshl_ndi ((int64x1_t) __a, __b);
22461 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22462 vqshlq_n_s8 (int8x16_t __a, const int __b)
22464 return (int8x16_t) __builtin_aarch64_sqshl_nv16qi (__a, __b);
22467 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22468 vqshlq_n_s16 (int16x8_t __a, const int __b)
22470 return (int16x8_t) __builtin_aarch64_sqshl_nv8hi (__a, __b);
22473 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22474 vqshlq_n_s32 (int32x4_t __a, const int __b)
22476 return (int32x4_t) __builtin_aarch64_sqshl_nv4si (__a, __b);
22479 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22480 vqshlq_n_s64 (int64x2_t __a, const int __b)
22482 return (int64x2_t) __builtin_aarch64_sqshl_nv2di (__a, __b);
22485 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22486 vqshlq_n_u8 (uint8x16_t __a, const int __b)
22488 return (uint8x16_t) __builtin_aarch64_uqshl_nv16qi ((int8x16_t) __a, __b);
22491 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22492 vqshlq_n_u16 (uint16x8_t __a, const int __b)
22494 return (uint16x8_t) __builtin_aarch64_uqshl_nv8hi ((int16x8_t) __a, __b);
22497 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22498 vqshlq_n_u32 (uint32x4_t __a, const int __b)
22500 return (uint32x4_t) __builtin_aarch64_uqshl_nv4si ((int32x4_t) __a, __b);
22503 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22504 vqshlq_n_u64 (uint64x2_t __a, const int __b)
22506 return (uint64x2_t) __builtin_aarch64_uqshl_nv2di ((int64x2_t) __a, __b);
22509 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22510 vqshlb_n_s8 (int8x1_t __a, const int __b)
22512 return (int8x1_t) __builtin_aarch64_sqshl_nqi (__a, __b);
22515 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22516 vqshlh_n_s16 (int16x1_t __a, const int __b)
22518 return (int16x1_t) __builtin_aarch64_sqshl_nhi (__a, __b);
22521 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22522 vqshls_n_s32 (int32x1_t __a, const int __b)
22524 return (int32x1_t) __builtin_aarch64_sqshl_nsi (__a, __b);
22527 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22528 vqshld_n_s64 (int64x1_t __a, const int __b)
22530 return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b);
22533 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22534 vqshlb_n_u8 (uint8x1_t __a, const int __b)
22536 return (uint8x1_t) __builtin_aarch64_uqshl_nqi (__a, __b);
22539 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22540 vqshlh_n_u16 (uint16x1_t __a, const int __b)
22542 return (uint16x1_t) __builtin_aarch64_uqshl_nhi (__a, __b);
22545 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22546 vqshls_n_u32 (uint32x1_t __a, const int __b)
22548 return (uint32x1_t) __builtin_aarch64_uqshl_nsi (__a, __b);
22551 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22552 vqshld_n_u64 (uint64x1_t __a, const int __b)
22554 return (uint64x1_t) __builtin_aarch64_uqshl_ndi (__a, __b);
22557 /* vqshlu */
22559 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22560 vqshlu_n_s8 (int8x8_t __a, const int __b)
22562 return (uint8x8_t) __builtin_aarch64_sqshlu_nv8qi (__a, __b);
22565 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22566 vqshlu_n_s16 (int16x4_t __a, const int __b)
22568 return (uint16x4_t) __builtin_aarch64_sqshlu_nv4hi (__a, __b);
22571 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22572 vqshlu_n_s32 (int32x2_t __a, const int __b)
22574 return (uint32x2_t) __builtin_aarch64_sqshlu_nv2si (__a, __b);
22577 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22578 vqshlu_n_s64 (int64x1_t __a, const int __b)
22580 return (uint64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
22583 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22584 vqshluq_n_s8 (int8x16_t __a, const int __b)
22586 return (uint8x16_t) __builtin_aarch64_sqshlu_nv16qi (__a, __b);
22589 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22590 vqshluq_n_s16 (int16x8_t __a, const int __b)
22592 return (uint16x8_t) __builtin_aarch64_sqshlu_nv8hi (__a, __b);
22595 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22596 vqshluq_n_s32 (int32x4_t __a, const int __b)
22598 return (uint32x4_t) __builtin_aarch64_sqshlu_nv4si (__a, __b);
22601 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22602 vqshluq_n_s64 (int64x2_t __a, const int __b)
22604 return (uint64x2_t) __builtin_aarch64_sqshlu_nv2di (__a, __b);
22607 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22608 vqshlub_n_s8 (int8x1_t __a, const int __b)
22610 return (int8x1_t) __builtin_aarch64_sqshlu_nqi (__a, __b);
22613 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22614 vqshluh_n_s16 (int16x1_t __a, const int __b)
22616 return (int16x1_t) __builtin_aarch64_sqshlu_nhi (__a, __b);
22619 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22620 vqshlus_n_s32 (int32x1_t __a, const int __b)
22622 return (int32x1_t) __builtin_aarch64_sqshlu_nsi (__a, __b);
22625 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22626 vqshlud_n_s64 (int64x1_t __a, const int __b)
22628 return (int64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
22631 /* vqshrn */
22633 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22634 vqshrn_n_s16 (int16x8_t __a, const int __b)
22636 return (int8x8_t) __builtin_aarch64_sqshrn_nv8hi (__a, __b);
22639 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22640 vqshrn_n_s32 (int32x4_t __a, const int __b)
22642 return (int16x4_t) __builtin_aarch64_sqshrn_nv4si (__a, __b);
22645 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22646 vqshrn_n_s64 (int64x2_t __a, const int __b)
22648 return (int32x2_t) __builtin_aarch64_sqshrn_nv2di (__a, __b);
22651 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22652 vqshrn_n_u16 (uint16x8_t __a, const int __b)
22654 return (uint8x8_t) __builtin_aarch64_uqshrn_nv8hi ((int16x8_t) __a, __b);
22657 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22658 vqshrn_n_u32 (uint32x4_t __a, const int __b)
22660 return (uint16x4_t) __builtin_aarch64_uqshrn_nv4si ((int32x4_t) __a, __b);
22663 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22664 vqshrn_n_u64 (uint64x2_t __a, const int __b)
22666 return (uint32x2_t) __builtin_aarch64_uqshrn_nv2di ((int64x2_t) __a, __b);
22669 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22670 vqshrnh_n_s16 (int16x1_t __a, const int __b)
22672 return (int8x1_t) __builtin_aarch64_sqshrn_nhi (__a, __b);
22675 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22676 vqshrns_n_s32 (int32x1_t __a, const int __b)
22678 return (int16x1_t) __builtin_aarch64_sqshrn_nsi (__a, __b);
22681 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22682 vqshrnd_n_s64 (int64x1_t __a, const int __b)
22684 return (int32x1_t) __builtin_aarch64_sqshrn_ndi (__a, __b);
22687 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22688 vqshrnh_n_u16 (uint16x1_t __a, const int __b)
22690 return (uint8x1_t) __builtin_aarch64_uqshrn_nhi (__a, __b);
22693 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22694 vqshrns_n_u32 (uint32x1_t __a, const int __b)
22696 return (uint16x1_t) __builtin_aarch64_uqshrn_nsi (__a, __b);
22699 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22700 vqshrnd_n_u64 (uint64x1_t __a, const int __b)
22702 return (uint32x1_t) __builtin_aarch64_uqshrn_ndi (__a, __b);
22705 /* vqshrun */
22707 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22708 vqshrun_n_s16 (int16x8_t __a, const int __b)
22710 return (uint8x8_t) __builtin_aarch64_sqshrun_nv8hi (__a, __b);
22713 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22714 vqshrun_n_s32 (int32x4_t __a, const int __b)
22716 return (uint16x4_t) __builtin_aarch64_sqshrun_nv4si (__a, __b);
22719 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22720 vqshrun_n_s64 (int64x2_t __a, const int __b)
22722 return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b);
22725 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22726 vqshrunh_n_s16 (int16x1_t __a, const int __b)
22728 return (int8x1_t) __builtin_aarch64_sqshrun_nhi (__a, __b);
22731 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22732 vqshruns_n_s32 (int32x1_t __a, const int __b)
22734 return (int16x1_t) __builtin_aarch64_sqshrun_nsi (__a, __b);
22737 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22738 vqshrund_n_s64 (int64x1_t __a, const int __b)
22740 return (int32x1_t) __builtin_aarch64_sqshrun_ndi (__a, __b);
22743 /* vqsub */
22745 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22746 vqsubb_s8 (int8x1_t __a, int8x1_t __b)
22748 return (int8x1_t) __builtin_aarch64_sqsubqi (__a, __b);
22751 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22752 vqsubh_s16 (int16x1_t __a, int16x1_t __b)
22754 return (int16x1_t) __builtin_aarch64_sqsubhi (__a, __b);
22757 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22758 vqsubs_s32 (int32x1_t __a, int32x1_t __b)
22760 return (int32x1_t) __builtin_aarch64_sqsubsi (__a, __b);
22763 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22764 vqsubd_s64 (int64x1_t __a, int64x1_t __b)
22766 return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b);
22769 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22770 vqsubb_u8 (uint8x1_t __a, uint8x1_t __b)
22772 return (uint8x1_t) __builtin_aarch64_uqsubqi (__a, __b);
22775 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22776 vqsubh_u16 (uint16x1_t __a, uint16x1_t __b)
22778 return (uint16x1_t) __builtin_aarch64_uqsubhi (__a, __b);
22781 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22782 vqsubs_u32 (uint32x1_t __a, uint32x1_t __b)
22784 return (uint32x1_t) __builtin_aarch64_uqsubsi (__a, __b);
22787 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22788 vqsubd_u64 (uint64x1_t __a, uint64x1_t __b)
22790 return (uint64x1_t) __builtin_aarch64_uqsubdi (__a, __b);
22793 /* vrecpe */
22795 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
22796 vrecpes_f32 (float32_t __a)
22798 return __builtin_aarch64_frecpesf (__a);
22801 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
22802 vrecped_f64 (float64_t __a)
22804 return __builtin_aarch64_frecpedf (__a);
22807 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22808 vrecpe_f32 (float32x2_t __a)
22810 return __builtin_aarch64_frecpev2sf (__a);
22813 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22814 vrecpeq_f32 (float32x4_t __a)
22816 return __builtin_aarch64_frecpev4sf (__a);
22819 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22820 vrecpeq_f64 (float64x2_t __a)
22822 return __builtin_aarch64_frecpev2df (__a);
22825 /* vrecps */
22827 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
22828 vrecpss_f32 (float32_t __a, float32_t __b)
22830 return __builtin_aarch64_frecpssf (__a, __b);
22833 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
22834 vrecpsd_f64 (float64_t __a, float64_t __b)
22836 return __builtin_aarch64_frecpsdf (__a, __b);
22839 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22840 vrecps_f32 (float32x2_t __a, float32x2_t __b)
22842 return __builtin_aarch64_frecpsv2sf (__a, __b);
22845 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22846 vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
22848 return __builtin_aarch64_frecpsv4sf (__a, __b);
22851 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22852 vrecpsq_f64 (float64x2_t __a, float64x2_t __b)
22854 return __builtin_aarch64_frecpsv2df (__a, __b);
22857 /* vrecpx */
22859 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
22860 vrecpxs_f32 (float32_t __a)
22862 return __builtin_aarch64_frecpxsf (__a);
22865 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
22866 vrecpxd_f64 (float64_t __a)
22868 return __builtin_aarch64_frecpxdf (__a);
22871 /* vrnd */
22873 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22874 vrnd_f32 (float32x2_t __a)
22876 return __builtin_aarch64_btruncv2sf (__a);
22879 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22880 vrndq_f32 (float32x4_t __a)
22882 return __builtin_aarch64_btruncv4sf (__a);
22885 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22886 vrndq_f64 (float64x2_t __a)
22888 return __builtin_aarch64_btruncv2df (__a);
22891 /* vrnda */
22893 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22894 vrnda_f32 (float32x2_t __a)
22896 return __builtin_aarch64_roundv2sf (__a);
22899 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22900 vrndaq_f32 (float32x4_t __a)
22902 return __builtin_aarch64_roundv4sf (__a);
22905 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22906 vrndaq_f64 (float64x2_t __a)
22908 return __builtin_aarch64_roundv2df (__a);
22911 /* vrndi */
22913 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22914 vrndi_f32 (float32x2_t __a)
22916 return __builtin_aarch64_nearbyintv2sf (__a);
22919 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22920 vrndiq_f32 (float32x4_t __a)
22922 return __builtin_aarch64_nearbyintv4sf (__a);
22925 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22926 vrndiq_f64 (float64x2_t __a)
22928 return __builtin_aarch64_nearbyintv2df (__a);
22931 /* vrndm */
22933 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22934 vrndm_f32 (float32x2_t __a)
22936 return __builtin_aarch64_floorv2sf (__a);
22939 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22940 vrndmq_f32 (float32x4_t __a)
22942 return __builtin_aarch64_floorv4sf (__a);
22945 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22946 vrndmq_f64 (float64x2_t __a)
22948 return __builtin_aarch64_floorv2df (__a);
22951 /* vrndn */
22953 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22954 vrndn_f32 (float32x2_t __a)
22956 return __builtin_aarch64_frintnv2sf (__a);
22958 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22959 vrndnq_f32 (float32x4_t __a)
22961 return __builtin_aarch64_frintnv4sf (__a);
22964 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22965 vrndnq_f64 (float64x2_t __a)
22967 return __builtin_aarch64_frintnv2df (__a);
22970 /* vrndp */
22972 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22973 vrndp_f32 (float32x2_t __a)
22975 return __builtin_aarch64_ceilv2sf (__a);
22978 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22979 vrndpq_f32 (float32x4_t __a)
22981 return __builtin_aarch64_ceilv4sf (__a);
22984 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22985 vrndpq_f64 (float64x2_t __a)
22987 return __builtin_aarch64_ceilv2df (__a);
22990 /* vrndx */
22992 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22993 vrndx_f32 (float32x2_t __a)
22995 return __builtin_aarch64_rintv2sf (__a);
22998 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22999 vrndxq_f32 (float32x4_t __a)
23001 return __builtin_aarch64_rintv4sf (__a);
23004 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
23005 vrndxq_f64 (float64x2_t __a)
23007 return __builtin_aarch64_rintv2df (__a);
23010 /* vrshl */
23012 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23013 vrshl_s8 (int8x8_t __a, int8x8_t __b)
23015 return (int8x8_t) __builtin_aarch64_srshlv8qi (__a, __b);
23018 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23019 vrshl_s16 (int16x4_t __a, int16x4_t __b)
23021 return (int16x4_t) __builtin_aarch64_srshlv4hi (__a, __b);
23024 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23025 vrshl_s32 (int32x2_t __a, int32x2_t __b)
23027 return (int32x2_t) __builtin_aarch64_srshlv2si (__a, __b);
23030 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23031 vrshl_s64 (int64x1_t __a, int64x1_t __b)
23033 return (int64x1_t) __builtin_aarch64_srshldi (__a, __b);
23036 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23037 vrshl_u8 (uint8x8_t __a, int8x8_t __b)
23039 return (uint8x8_t) __builtin_aarch64_urshlv8qi ((int8x8_t) __a, __b);
23042 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23043 vrshl_u16 (uint16x4_t __a, int16x4_t __b)
23045 return (uint16x4_t) __builtin_aarch64_urshlv4hi ((int16x4_t) __a, __b);
23048 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23049 vrshl_u32 (uint32x2_t __a, int32x2_t __b)
23051 return (uint32x2_t) __builtin_aarch64_urshlv2si ((int32x2_t) __a, __b);
23054 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23055 vrshl_u64 (uint64x1_t __a, int64x1_t __b)
23057 return (uint64x1_t) __builtin_aarch64_urshldi ((int64x1_t) __a, __b);
23060 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23061 vrshlq_s8 (int8x16_t __a, int8x16_t __b)
23063 return (int8x16_t) __builtin_aarch64_srshlv16qi (__a, __b);
23066 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23067 vrshlq_s16 (int16x8_t __a, int16x8_t __b)
23069 return (int16x8_t) __builtin_aarch64_srshlv8hi (__a, __b);
23072 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23073 vrshlq_s32 (int32x4_t __a, int32x4_t __b)
23075 return (int32x4_t) __builtin_aarch64_srshlv4si (__a, __b);
23078 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23079 vrshlq_s64 (int64x2_t __a, int64x2_t __b)
23081 return (int64x2_t) __builtin_aarch64_srshlv2di (__a, __b);
23084 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23085 vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
23087 return (uint8x16_t) __builtin_aarch64_urshlv16qi ((int8x16_t) __a, __b);
23090 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23091 vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
23093 return (uint16x8_t) __builtin_aarch64_urshlv8hi ((int16x8_t) __a, __b);
23096 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23097 vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
23099 return (uint32x4_t) __builtin_aarch64_urshlv4si ((int32x4_t) __a, __b);
23102 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23103 vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
23105 return (uint64x2_t) __builtin_aarch64_urshlv2di ((int64x2_t) __a, __b);
23108 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23109 vrshld_s64 (int64x1_t __a, int64x1_t __b)
23111 return (int64x1_t) __builtin_aarch64_srshldi (__a, __b);
23114 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23115 vrshld_u64 (uint64x1_t __a, uint64x1_t __b)
23117 return (uint64x1_t) __builtin_aarch64_urshldi (__a, __b);
23120 /* vrshr */
23122 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23123 vrshr_n_s8 (int8x8_t __a, const int __b)
23125 return (int8x8_t) __builtin_aarch64_srshr_nv8qi (__a, __b);
23128 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23129 vrshr_n_s16 (int16x4_t __a, const int __b)
23131 return (int16x4_t) __builtin_aarch64_srshr_nv4hi (__a, __b);
23134 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23135 vrshr_n_s32 (int32x2_t __a, const int __b)
23137 return (int32x2_t) __builtin_aarch64_srshr_nv2si (__a, __b);
23140 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23141 vrshr_n_s64 (int64x1_t __a, const int __b)
23143 return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b);
23146 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23147 vrshr_n_u8 (uint8x8_t __a, const int __b)
23149 return (uint8x8_t) __builtin_aarch64_urshr_nv8qi ((int8x8_t) __a, __b);
23152 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23153 vrshr_n_u16 (uint16x4_t __a, const int __b)
23155 return (uint16x4_t) __builtin_aarch64_urshr_nv4hi ((int16x4_t) __a, __b);
23158 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23159 vrshr_n_u32 (uint32x2_t __a, const int __b)
23161 return (uint32x2_t) __builtin_aarch64_urshr_nv2si ((int32x2_t) __a, __b);
23164 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23165 vrshr_n_u64 (uint64x1_t __a, const int __b)
23167 return (uint64x1_t) __builtin_aarch64_urshr_ndi ((int64x1_t) __a, __b);
23170 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23171 vrshrq_n_s8 (int8x16_t __a, const int __b)
23173 return (int8x16_t) __builtin_aarch64_srshr_nv16qi (__a, __b);
23176 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23177 vrshrq_n_s16 (int16x8_t __a, const int __b)
23179 return (int16x8_t) __builtin_aarch64_srshr_nv8hi (__a, __b);
23182 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23183 vrshrq_n_s32 (int32x4_t __a, const int __b)
23185 return (int32x4_t) __builtin_aarch64_srshr_nv4si (__a, __b);
23188 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23189 vrshrq_n_s64 (int64x2_t __a, const int __b)
23191 return (int64x2_t) __builtin_aarch64_srshr_nv2di (__a, __b);
23194 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23195 vrshrq_n_u8 (uint8x16_t __a, const int __b)
23197 return (uint8x16_t) __builtin_aarch64_urshr_nv16qi ((int8x16_t) __a, __b);
23200 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23201 vrshrq_n_u16 (uint16x8_t __a, const int __b)
23203 return (uint16x8_t) __builtin_aarch64_urshr_nv8hi ((int16x8_t) __a, __b);
23206 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23207 vrshrq_n_u32 (uint32x4_t __a, const int __b)
23209 return (uint32x4_t) __builtin_aarch64_urshr_nv4si ((int32x4_t) __a, __b);
23212 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23213 vrshrq_n_u64 (uint64x2_t __a, const int __b)
23215 return (uint64x2_t) __builtin_aarch64_urshr_nv2di ((int64x2_t) __a, __b);
23218 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23219 vrshrd_n_s64 (int64x1_t __a, const int __b)
23221 return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b);
23224 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23225 vrshrd_n_u64 (uint64x1_t __a, const int __b)
23227 return (uint64x1_t) __builtin_aarch64_urshr_ndi (__a, __b);
23230 /* vrsra */
23232 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23233 vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
23235 return (int8x8_t) __builtin_aarch64_srsra_nv8qi (__a, __b, __c);
23238 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23239 vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
23241 return (int16x4_t) __builtin_aarch64_srsra_nv4hi (__a, __b, __c);
23244 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23245 vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23247 return (int32x2_t) __builtin_aarch64_srsra_nv2si (__a, __b, __c);
23250 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23251 vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23253 return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c);
23256 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23257 vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23259 return (uint8x8_t) __builtin_aarch64_ursra_nv8qi ((int8x8_t) __a,
23260 (int8x8_t) __b, __c);
23263 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23264 vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
23266 return (uint16x4_t) __builtin_aarch64_ursra_nv4hi ((int16x4_t) __a,
23267 (int16x4_t) __b, __c);
23270 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23271 vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
23273 return (uint32x2_t) __builtin_aarch64_ursra_nv2si ((int32x2_t) __a,
23274 (int32x2_t) __b, __c);
23277 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23278 vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23280 return (uint64x1_t) __builtin_aarch64_ursra_ndi ((int64x1_t) __a,
23281 (int64x1_t) __b, __c);
23284 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23285 vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
23287 return (int8x16_t) __builtin_aarch64_srsra_nv16qi (__a, __b, __c);
23290 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23291 vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
23293 return (int16x8_t) __builtin_aarch64_srsra_nv8hi (__a, __b, __c);
23296 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23297 vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
23299 return (int32x4_t) __builtin_aarch64_srsra_nv4si (__a, __b, __c);
23302 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23303 vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
23305 return (int64x2_t) __builtin_aarch64_srsra_nv2di (__a, __b, __c);
23308 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23309 vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
23311 return (uint8x16_t) __builtin_aarch64_ursra_nv16qi ((int8x16_t) __a,
23312 (int8x16_t) __b, __c);
23315 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23316 vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
23318 return (uint16x8_t) __builtin_aarch64_ursra_nv8hi ((int16x8_t) __a,
23319 (int16x8_t) __b, __c);
23322 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23323 vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
23325 return (uint32x4_t) __builtin_aarch64_ursra_nv4si ((int32x4_t) __a,
23326 (int32x4_t) __b, __c);
23329 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23330 vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
23332 return (uint64x2_t) __builtin_aarch64_ursra_nv2di ((int64x2_t) __a,
23333 (int64x2_t) __b, __c);
23336 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23337 vrsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23339 return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c);
23342 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23343 vrsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23345 return (uint64x1_t) __builtin_aarch64_ursra_ndi (__a, __b, __c);
23348 /* vshl */
23350 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23351 vshl_n_s8 (int8x8_t __a, const int __b)
23353 return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b);
23356 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23357 vshl_n_s16 (int16x4_t __a, const int __b)
23359 return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b);
23362 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23363 vshl_n_s32 (int32x2_t __a, const int __b)
23365 return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b);
23368 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23369 vshl_n_s64 (int64x1_t __a, const int __b)
23371 return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
23374 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23375 vshl_n_u8 (uint8x8_t __a, const int __b)
23377 return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b);
23380 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23381 vshl_n_u16 (uint16x4_t __a, const int __b)
23383 return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b);
23386 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23387 vshl_n_u32 (uint32x2_t __a, const int __b)
23389 return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b);
23392 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23393 vshl_n_u64 (uint64x1_t __a, const int __b)
23395 return (uint64x1_t) __builtin_aarch64_ashldi ((int64x1_t) __a, __b);
23398 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23399 vshlq_n_s8 (int8x16_t __a, const int __b)
23401 return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b);
23404 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23405 vshlq_n_s16 (int16x8_t __a, const int __b)
23407 return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b);
23410 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23411 vshlq_n_s32 (int32x4_t __a, const int __b)
23413 return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b);
23416 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23417 vshlq_n_s64 (int64x2_t __a, const int __b)
23419 return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b);
23422 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23423 vshlq_n_u8 (uint8x16_t __a, const int __b)
23425 return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b);
23428 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23429 vshlq_n_u16 (uint16x8_t __a, const int __b)
23431 return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b);
23434 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23435 vshlq_n_u32 (uint32x4_t __a, const int __b)
23437 return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b);
23440 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23441 vshlq_n_u64 (uint64x2_t __a, const int __b)
23443 return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b);
23446 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23447 vshld_n_s64 (int64x1_t __a, const int __b)
23449 return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
23452 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23453 vshld_n_u64 (uint64x1_t __a, const int __b)
23455 return (uint64x1_t) __builtin_aarch64_ashldi (__a, __b);
23458 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23459 vshl_s8 (int8x8_t __a, int8x8_t __b)
23461 return (int8x8_t) __builtin_aarch64_sshlv8qi (__a, __b);
23464 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23465 vshl_s16 (int16x4_t __a, int16x4_t __b)
23467 return (int16x4_t) __builtin_aarch64_sshlv4hi (__a, __b);
23470 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23471 vshl_s32 (int32x2_t __a, int32x2_t __b)
23473 return (int32x2_t) __builtin_aarch64_sshlv2si (__a, __b);
23476 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23477 vshl_s64 (int64x1_t __a, int64x1_t __b)
23479 return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
23482 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23483 vshl_u8 (uint8x8_t __a, int8x8_t __b)
23485 return (uint8x8_t) __builtin_aarch64_ushlv8qi ((int8x8_t) __a, __b);
23488 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23489 vshl_u16 (uint16x4_t __a, int16x4_t __b)
23491 return (uint16x4_t) __builtin_aarch64_ushlv4hi ((int16x4_t) __a, __b);
23494 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23495 vshl_u32 (uint32x2_t __a, int32x2_t __b)
23497 return (uint32x2_t) __builtin_aarch64_ushlv2si ((int32x2_t) __a, __b);
23500 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23501 vshl_u64 (uint64x1_t __a, int64x1_t __b)
23503 return (uint64x1_t) __builtin_aarch64_ushldi ((int64x1_t) __a, __b);
23506 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23507 vshlq_s8 (int8x16_t __a, int8x16_t __b)
23509 return (int8x16_t) __builtin_aarch64_sshlv16qi (__a, __b);
23512 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23513 vshlq_s16 (int16x8_t __a, int16x8_t __b)
23515 return (int16x8_t) __builtin_aarch64_sshlv8hi (__a, __b);
23518 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23519 vshlq_s32 (int32x4_t __a, int32x4_t __b)
23521 return (int32x4_t) __builtin_aarch64_sshlv4si (__a, __b);
23524 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23525 vshlq_s64 (int64x2_t __a, int64x2_t __b)
23527 return (int64x2_t) __builtin_aarch64_sshlv2di (__a, __b);
23530 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23531 vshlq_u8 (uint8x16_t __a, int8x16_t __b)
23533 return (uint8x16_t) __builtin_aarch64_ushlv16qi ((int8x16_t) __a, __b);
23536 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23537 vshlq_u16 (uint16x8_t __a, int16x8_t __b)
23539 return (uint16x8_t) __builtin_aarch64_ushlv8hi ((int16x8_t) __a, __b);
23542 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23543 vshlq_u32 (uint32x4_t __a, int32x4_t __b)
23545 return (uint32x4_t) __builtin_aarch64_ushlv4si ((int32x4_t) __a, __b);
23548 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23549 vshlq_u64 (uint64x2_t __a, int64x2_t __b)
23551 return (uint64x2_t) __builtin_aarch64_ushlv2di ((int64x2_t) __a, __b);
23554 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23555 vshld_s64 (int64x1_t __a, int64x1_t __b)
23557 return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
23560 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23561 vshld_u64 (uint64x1_t __a, uint64x1_t __b)
23563 return (uint64x1_t) __builtin_aarch64_ushldi (__a, __b);
23566 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23567 vshll_high_n_s8 (int8x16_t __a, const int __b)
23569 return __builtin_aarch64_sshll2_nv16qi (__a, __b);
23572 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23573 vshll_high_n_s16 (int16x8_t __a, const int __b)
23575 return __builtin_aarch64_sshll2_nv8hi (__a, __b);
23578 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23579 vshll_high_n_s32 (int32x4_t __a, const int __b)
23581 return __builtin_aarch64_sshll2_nv4si (__a, __b);
23584 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23585 vshll_high_n_u8 (uint8x16_t __a, const int __b)
23587 return (uint16x8_t) __builtin_aarch64_ushll2_nv16qi ((int8x16_t) __a, __b);
23590 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23591 vshll_high_n_u16 (uint16x8_t __a, const int __b)
23593 return (uint32x4_t) __builtin_aarch64_ushll2_nv8hi ((int16x8_t) __a, __b);
23596 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23597 vshll_high_n_u32 (uint32x4_t __a, const int __b)
23599 return (uint64x2_t) __builtin_aarch64_ushll2_nv4si ((int32x4_t) __a, __b);
23602 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23603 vshll_n_s8 (int8x8_t __a, const int __b)
23605 return __builtin_aarch64_sshll_nv8qi (__a, __b);
23608 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23609 vshll_n_s16 (int16x4_t __a, const int __b)
23611 return __builtin_aarch64_sshll_nv4hi (__a, __b);
23614 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23615 vshll_n_s32 (int32x2_t __a, const int __b)
23617 return __builtin_aarch64_sshll_nv2si (__a, __b);
23620 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23621 vshll_n_u8 (uint8x8_t __a, const int __b)
23623 return (uint16x8_t) __builtin_aarch64_ushll_nv8qi ((int8x8_t) __a, __b);
23626 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23627 vshll_n_u16 (uint16x4_t __a, const int __b)
23629 return (uint32x4_t) __builtin_aarch64_ushll_nv4hi ((int16x4_t) __a, __b);
23632 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23633 vshll_n_u32 (uint32x2_t __a, const int __b)
23635 return (uint64x2_t) __builtin_aarch64_ushll_nv2si ((int32x2_t) __a, __b);
23638 /* vshr */
23640 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23641 vshr_n_s8 (int8x8_t __a, const int __b)
23643 return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b);
23646 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23647 vshr_n_s16 (int16x4_t __a, const int __b)
23649 return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b);
23652 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23653 vshr_n_s32 (int32x2_t __a, const int __b)
23655 return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b);
23658 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23659 vshr_n_s64 (int64x1_t __a, const int __b)
23661 return (int64x1_t) __builtin_aarch64_ashrdi (__a, __b);
23664 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23665 vshr_n_u8 (uint8x8_t __a, const int __b)
23667 return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
23670 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23671 vshr_n_u16 (uint16x4_t __a, const int __b)
23673 return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
23676 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23677 vshr_n_u32 (uint32x2_t __a, const int __b)
23679 return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
23682 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23683 vshr_n_u64 (uint64x1_t __a, const int __b)
23685 return (uint64x1_t) __builtin_aarch64_lshrdi ((int64x1_t) __a, __b);
23688 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23689 vshrq_n_s8 (int8x16_t __a, const int __b)
23691 return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b);
23694 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23695 vshrq_n_s16 (int16x8_t __a, const int __b)
23697 return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b);
23700 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23701 vshrq_n_s32 (int32x4_t __a, const int __b)
23703 return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b);
23706 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23707 vshrq_n_s64 (int64x2_t __a, const int __b)
23709 return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b);
23712 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23713 vshrq_n_u8 (uint8x16_t __a, const int __b)
23715 return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
23718 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23719 vshrq_n_u16 (uint16x8_t __a, const int __b)
23721 return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
23724 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23725 vshrq_n_u32 (uint32x4_t __a, const int __b)
23727 return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
23730 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23731 vshrq_n_u64 (uint64x2_t __a, const int __b)
23733 return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
23736 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23737 vshrd_n_s64 (int64x1_t __a, const int __b)
23739 return (int64x1_t) __builtin_aarch64_ashrdi (__a, __b);
23742 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23743 vshrd_n_u64 (uint64x1_t __a, const int __b)
23745 return (uint64x1_t) __builtin_aarch64_lshrdi (__a, __b);
23748 /* vsli */
23750 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23751 vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
23753 return (int8x8_t) __builtin_aarch64_ssli_nv8qi (__a, __b, __c);
23756 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23757 vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
23759 return (int16x4_t) __builtin_aarch64_ssli_nv4hi (__a, __b, __c);
23762 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23763 vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23765 return (int32x2_t) __builtin_aarch64_ssli_nv2si (__a, __b, __c);
23768 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23769 vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23771 return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c);
23774 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23775 vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23777 return (uint8x8_t) __builtin_aarch64_usli_nv8qi ((int8x8_t) __a,
23778 (int8x8_t) __b, __c);
23781 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23782 vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
23784 return (uint16x4_t) __builtin_aarch64_usli_nv4hi ((int16x4_t) __a,
23785 (int16x4_t) __b, __c);
23788 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23789 vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
23791 return (uint32x2_t) __builtin_aarch64_usli_nv2si ((int32x2_t) __a,
23792 (int32x2_t) __b, __c);
23795 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23796 vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23798 return (uint64x1_t) __builtin_aarch64_usli_ndi ((int64x1_t) __a,
23799 (int64x1_t) __b, __c);
23802 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23803 vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
23805 return (int8x16_t) __builtin_aarch64_ssli_nv16qi (__a, __b, __c);
23808 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23809 vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
23811 return (int16x8_t) __builtin_aarch64_ssli_nv8hi (__a, __b, __c);
23814 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23815 vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
23817 return (int32x4_t) __builtin_aarch64_ssli_nv4si (__a, __b, __c);
23820 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23821 vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
23823 return (int64x2_t) __builtin_aarch64_ssli_nv2di (__a, __b, __c);
23826 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23827 vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
23829 return (uint8x16_t) __builtin_aarch64_usli_nv16qi ((int8x16_t) __a,
23830 (int8x16_t) __b, __c);
23833 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23834 vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
23836 return (uint16x8_t) __builtin_aarch64_usli_nv8hi ((int16x8_t) __a,
23837 (int16x8_t) __b, __c);
23840 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23841 vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
23843 return (uint32x4_t) __builtin_aarch64_usli_nv4si ((int32x4_t) __a,
23844 (int32x4_t) __b, __c);
23847 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23848 vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
23850 return (uint64x2_t) __builtin_aarch64_usli_nv2di ((int64x2_t) __a,
23851 (int64x2_t) __b, __c);
23854 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23855 vslid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23857 return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c);
23860 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23861 vslid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23863 return (uint64x1_t) __builtin_aarch64_usli_ndi (__a, __b, __c);
23866 /* vsqadd */
23868 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23869 vsqadd_u8 (uint8x8_t __a, int8x8_t __b)
23871 return (uint8x8_t) __builtin_aarch64_usqaddv8qi ((int8x8_t) __a,
23872 (int8x8_t) __b);
23875 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23876 vsqadd_u16 (uint16x4_t __a, int16x4_t __b)
23878 return (uint16x4_t) __builtin_aarch64_usqaddv4hi ((int16x4_t) __a,
23879 (int16x4_t) __b);
23882 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23883 vsqadd_u32 (uint32x2_t __a, int32x2_t __b)
23885 return (uint32x2_t) __builtin_aarch64_usqaddv2si ((int32x2_t) __a,
23886 (int32x2_t) __b);
23889 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23890 vsqadd_u64 (uint64x1_t __a, int64x1_t __b)
23892 return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
23895 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23896 vsqaddq_u8 (uint8x16_t __a, int8x16_t __b)
23898 return (uint8x16_t) __builtin_aarch64_usqaddv16qi ((int8x16_t) __a,
23899 (int8x16_t) __b);
23902 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23903 vsqaddq_u16 (uint16x8_t __a, int16x8_t __b)
23905 return (uint16x8_t) __builtin_aarch64_usqaddv8hi ((int16x8_t) __a,
23906 (int16x8_t) __b);
23909 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23910 vsqaddq_u32 (uint32x4_t __a, int32x4_t __b)
23912 return (uint32x4_t) __builtin_aarch64_usqaddv4si ((int32x4_t) __a,
23913 (int32x4_t) __b);
23916 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23917 vsqaddq_u64 (uint64x2_t __a, int64x2_t __b)
23919 return (uint64x2_t) __builtin_aarch64_usqaddv2di ((int64x2_t) __a,
23920 (int64x2_t) __b);
23923 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
23924 vsqaddb_u8 (uint8x1_t __a, int8x1_t __b)
23926 return (uint8x1_t) __builtin_aarch64_usqaddqi ((int8x1_t) __a, __b);
23929 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
23930 vsqaddh_u16 (uint16x1_t __a, int16x1_t __b)
23932 return (uint16x1_t) __builtin_aarch64_usqaddhi ((int16x1_t) __a, __b);
23935 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
23936 vsqadds_u32 (uint32x1_t __a, int32x1_t __b)
23938 return (uint32x1_t) __builtin_aarch64_usqaddsi ((int32x1_t) __a, __b);
23941 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23942 vsqaddd_u64 (uint64x1_t __a, int64x1_t __b)
23944 return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
23947 /* vsqrt */
23948 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
23949 vsqrt_f32 (float32x2_t a)
23951 return __builtin_aarch64_sqrtv2sf (a);
23954 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
23955 vsqrtq_f32 (float32x4_t a)
23957 return __builtin_aarch64_sqrtv4sf (a);
23960 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
23961 vsqrtq_f64 (float64x2_t a)
23963 return __builtin_aarch64_sqrtv2df (a);
23966 /* vsra */
23968 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23969 vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
23971 return (int8x8_t) __builtin_aarch64_ssra_nv8qi (__a, __b, __c);
23974 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23975 vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
23977 return (int16x4_t) __builtin_aarch64_ssra_nv4hi (__a, __b, __c);
23980 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23981 vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23983 return (int32x2_t) __builtin_aarch64_ssra_nv2si (__a, __b, __c);
23986 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23987 vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23989 return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c);
23992 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23993 vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23995 return (uint8x8_t) __builtin_aarch64_usra_nv8qi ((int8x8_t) __a,
23996 (int8x8_t) __b, __c);
23999 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24000 vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
24002 return (uint16x4_t) __builtin_aarch64_usra_nv4hi ((int16x4_t) __a,
24003 (int16x4_t) __b, __c);
24006 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24007 vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
24009 return (uint32x2_t) __builtin_aarch64_usra_nv2si ((int32x2_t) __a,
24010 (int32x2_t) __b, __c);
24013 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24014 vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
24016 return (uint64x1_t) __builtin_aarch64_usra_ndi ((int64x1_t) __a,
24017 (int64x1_t) __b, __c);
24020 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24021 vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
24023 return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c);
24026 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24027 vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
24029 return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c);
24032 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24033 vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
24035 return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c);
24038 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24039 vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
24041 return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c);
24044 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24045 vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
24047 return (uint8x16_t) __builtin_aarch64_usra_nv16qi ((int8x16_t) __a,
24048 (int8x16_t) __b, __c);
24051 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24052 vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
24054 return (uint16x8_t) __builtin_aarch64_usra_nv8hi ((int16x8_t) __a,
24055 (int16x8_t) __b, __c);
24058 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24059 vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
24061 return (uint32x4_t) __builtin_aarch64_usra_nv4si ((int32x4_t) __a,
24062 (int32x4_t) __b, __c);
24065 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24066 vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
24068 return (uint64x2_t) __builtin_aarch64_usra_nv2di ((int64x2_t) __a,
24069 (int64x2_t) __b, __c);
24072 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
24073 vsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
24075 return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c);
24078 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24079 vsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
24081 return (uint64x1_t) __builtin_aarch64_usra_ndi (__a, __b, __c);
24084 /* vsri */
24086 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24087 vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
24089 return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c);
24092 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24093 vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
24095 return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c);
24098 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24099 vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
24101 return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c);
24104 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
24105 vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
24107 return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
24110 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24111 vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
24113 return (uint8x8_t) __builtin_aarch64_usri_nv8qi ((int8x8_t) __a,
24114 (int8x8_t) __b, __c);
24117 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24118 vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
24120 return (uint16x4_t) __builtin_aarch64_usri_nv4hi ((int16x4_t) __a,
24121 (int16x4_t) __b, __c);
24124 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24125 vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
24127 return (uint32x2_t) __builtin_aarch64_usri_nv2si ((int32x2_t) __a,
24128 (int32x2_t) __b, __c);
24131 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24132 vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
24134 return (uint64x1_t) __builtin_aarch64_usri_ndi ((int64x1_t) __a,
24135 (int64x1_t) __b, __c);
24138 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24139 vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
24141 return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c);
24144 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24145 vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
24147 return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c);
24150 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24151 vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
24153 return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c);
24156 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24157 vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
24159 return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c);
24162 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24163 vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
24165 return (uint8x16_t) __builtin_aarch64_usri_nv16qi ((int8x16_t) __a,
24166 (int8x16_t) __b, __c);
24169 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24170 vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
24172 return (uint16x8_t) __builtin_aarch64_usri_nv8hi ((int16x8_t) __a,
24173 (int16x8_t) __b, __c);
24176 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24177 vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
24179 return (uint32x4_t) __builtin_aarch64_usri_nv4si ((int32x4_t) __a,
24180 (int32x4_t) __b, __c);
24183 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24184 vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
24186 return (uint64x2_t) __builtin_aarch64_usri_nv2di ((int64x2_t) __a,
24187 (int64x2_t) __b, __c);
24190 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
24191 vsrid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
24193 return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
24196 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24197 vsrid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
24199 return (uint64x1_t) __builtin_aarch64_usri_ndi (__a, __b, __c);
24202 /* vst1 */
24204 __extension__ static __inline void __attribute__ ((__always_inline__))
24205 vst1_f32 (float32_t *a, float32x2_t b)
24207 __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b);
24210 __extension__ static __inline void __attribute__ ((__always_inline__))
24211 vst1_f64 (float64_t *a, float64x1_t b)
24213 *a = b;
24216 __extension__ static __inline void __attribute__ ((__always_inline__))
24217 vst1_p8 (poly8_t *a, poly8x8_t b)
24219 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
24220 (int8x8_t) b);
24223 __extension__ static __inline void __attribute__ ((__always_inline__))
24224 vst1_p16 (poly16_t *a, poly16x4_t b)
24226 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
24227 (int16x4_t) b);
24230 __extension__ static __inline void __attribute__ ((__always_inline__))
24231 vst1_s8 (int8_t *a, int8x8_t b)
24233 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b);
24236 __extension__ static __inline void __attribute__ ((__always_inline__))
24237 vst1_s16 (int16_t *a, int16x4_t b)
24239 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b);
24242 __extension__ static __inline void __attribute__ ((__always_inline__))
24243 vst1_s32 (int32_t *a, int32x2_t b)
24245 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b);
24248 __extension__ static __inline void __attribute__ ((__always_inline__))
24249 vst1_s64 (int64_t *a, int64x1_t b)
24251 *a = b;
24254 __extension__ static __inline void __attribute__ ((__always_inline__))
24255 vst1_u8 (uint8_t *a, uint8x8_t b)
24257 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
24258 (int8x8_t) b);
24261 __extension__ static __inline void __attribute__ ((__always_inline__))
24262 vst1_u16 (uint16_t *a, uint16x4_t b)
24264 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
24265 (int16x4_t) b);
24268 __extension__ static __inline void __attribute__ ((__always_inline__))
24269 vst1_u32 (uint32_t *a, uint32x2_t b)
24271 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a,
24272 (int32x2_t) b);
24275 __extension__ static __inline void __attribute__ ((__always_inline__))
24276 vst1_u64 (uint64_t *a, uint64x1_t b)
24278 *a = b;
24281 __extension__ static __inline void __attribute__ ((__always_inline__))
24282 vst1q_f32 (float32_t *a, float32x4_t b)
24284 __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b);
24287 __extension__ static __inline void __attribute__ ((__always_inline__))
24288 vst1q_f64 (float64_t *a, float64x2_t b)
24290 __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);
24293 /* vst1q */
24295 __extension__ static __inline void __attribute__ ((__always_inline__))
24296 vst1q_p8 (poly8_t *a, poly8x16_t b)
24298 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
24299 (int8x16_t) b);
24302 __extension__ static __inline void __attribute__ ((__always_inline__))
24303 vst1q_p16 (poly16_t *a, poly16x8_t b)
24305 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
24306 (int16x8_t) b);
24309 __extension__ static __inline void __attribute__ ((__always_inline__))
24310 vst1q_s8 (int8_t *a, int8x16_t b)
24312 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b);
24315 __extension__ static __inline void __attribute__ ((__always_inline__))
24316 vst1q_s16 (int16_t *a, int16x8_t b)
24318 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b);
24321 __extension__ static __inline void __attribute__ ((__always_inline__))
24322 vst1q_s32 (int32_t *a, int32x4_t b)
24324 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b);
24327 __extension__ static __inline void __attribute__ ((__always_inline__))
24328 vst1q_s64 (int64_t *a, int64x2_t b)
24330 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b);
24333 __extension__ static __inline void __attribute__ ((__always_inline__))
24334 vst1q_u8 (uint8_t *a, uint8x16_t b)
24336 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
24337 (int8x16_t) b);
24340 __extension__ static __inline void __attribute__ ((__always_inline__))
24341 vst1q_u16 (uint16_t *a, uint16x8_t b)
24343 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
24344 (int16x8_t) b);
24347 __extension__ static __inline void __attribute__ ((__always_inline__))
24348 vst1q_u32 (uint32_t *a, uint32x4_t b)
24350 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a,
24351 (int32x4_t) b);
24354 __extension__ static __inline void __attribute__ ((__always_inline__))
24355 vst1q_u64 (uint64_t *a, uint64x2_t b)
24357 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a,
24358 (int64x2_t) b);
24361 /* vstn */
24363 __extension__ static __inline void
24364 vst2_s64 (int64_t * __a, int64x1x2_t val)
24366 __builtin_aarch64_simd_oi __o;
24367 int64x2x2_t temp;
24368 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (INT64_C (0)));
24369 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (INT64_C (0)));
24370 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
24371 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
24372 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
24375 __extension__ static __inline void
24376 vst2_u64 (uint64_t * __a, uint64x1x2_t val)
24378 __builtin_aarch64_simd_oi __o;
24379 uint64x2x2_t temp;
24380 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (UINT64_C (0)));
24381 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (UINT64_C (0)));
24382 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
24383 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
24384 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
24387 __extension__ static __inline void
24388 vst2_f64 (float64_t * __a, float64x1x2_t val)
24390 __builtin_aarch64_simd_oi __o;
24391 float64x2x2_t temp;
24392 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (UINT64_C (0)));
24393 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (UINT64_C (0)));
24394 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0);
24395 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1);
24396 __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
24399 __extension__ static __inline void
24400 vst2_s8 (int8_t * __a, int8x8x2_t val)
24402 __builtin_aarch64_simd_oi __o;
24403 int8x16x2_t temp;
24404 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (INT64_C (0)));
24405 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (INT64_C (0)));
24406 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24407 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24408 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24411 __extension__ static __inline void __attribute__ ((__always_inline__))
24412 vst2_p8 (poly8_t * __a, poly8x8x2_t val)
24414 __builtin_aarch64_simd_oi __o;
24415 poly8x16x2_t temp;
24416 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (UINT64_C (0)));
24417 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (UINT64_C (0)));
24418 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24419 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24420 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24423 __extension__ static __inline void __attribute__ ((__always_inline__))
24424 vst2_s16 (int16_t * __a, int16x4x2_t val)
24426 __builtin_aarch64_simd_oi __o;
24427 int16x8x2_t temp;
24428 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (INT64_C (0)));
24429 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (INT64_C (0)));
24430 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24431 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24432 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24435 __extension__ static __inline void __attribute__ ((__always_inline__))
24436 vst2_p16 (poly16_t * __a, poly16x4x2_t val)
24438 __builtin_aarch64_simd_oi __o;
24439 poly16x8x2_t temp;
24440 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (UINT64_C (0)));
24441 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (UINT64_C (0)));
24442 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24443 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24444 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24447 __extension__ static __inline void __attribute__ ((__always_inline__))
24448 vst2_s32 (int32_t * __a, int32x2x2_t val)
24450 __builtin_aarch64_simd_oi __o;
24451 int32x4x2_t temp;
24452 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (INT64_C (0)));
24453 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (INT64_C (0)));
24454 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
24455 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
24456 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
24459 __extension__ static __inline void __attribute__ ((__always_inline__))
24460 vst2_u8 (uint8_t * __a, uint8x8x2_t val)
24462 __builtin_aarch64_simd_oi __o;
24463 uint8x16x2_t temp;
24464 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (UINT64_C (0)));
24465 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (UINT64_C (0)));
24466 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24467 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24468 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24471 __extension__ static __inline void __attribute__ ((__always_inline__))
24472 vst2_u16 (uint16_t * __a, uint16x4x2_t val)
24474 __builtin_aarch64_simd_oi __o;
24475 uint16x8x2_t temp;
24476 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (UINT64_C (0)));
24477 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (UINT64_C (0)));
24478 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24479 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24480 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24483 __extension__ static __inline void __attribute__ ((__always_inline__))
24484 vst2_u32 (uint32_t * __a, uint32x2x2_t val)
24486 __builtin_aarch64_simd_oi __o;
24487 uint32x4x2_t temp;
24488 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (UINT64_C (0)));
24489 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (UINT64_C (0)));
24490 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
24491 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
24492 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
24495 __extension__ static __inline void __attribute__ ((__always_inline__))
24496 vst2_f32 (float32_t * __a, float32x2x2_t val)
24498 __builtin_aarch64_simd_oi __o;
24499 float32x4x2_t temp;
24500 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (UINT64_C (0)));
24501 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (UINT64_C (0)));
24502 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0);
24503 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1);
24504 __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
24507 __extension__ static __inline void __attribute__ ((__always_inline__))
24508 vst2q_s8 (int8_t * __a, int8x16x2_t val)
24510 __builtin_aarch64_simd_oi __o;
24511 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24512 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24513 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24516 __extension__ static __inline void __attribute__ ((__always_inline__))
24517 vst2q_p8 (poly8_t * __a, poly8x16x2_t val)
24519 __builtin_aarch64_simd_oi __o;
24520 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24521 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24522 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24525 __extension__ static __inline void __attribute__ ((__always_inline__))
24526 vst2q_s16 (int16_t * __a, int16x8x2_t val)
24528 __builtin_aarch64_simd_oi __o;
24529 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24530 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24531 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24534 __extension__ static __inline void __attribute__ ((__always_inline__))
24535 vst2q_p16 (poly16_t * __a, poly16x8x2_t val)
24537 __builtin_aarch64_simd_oi __o;
24538 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24539 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24540 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24543 __extension__ static __inline void __attribute__ ((__always_inline__))
24544 vst2q_s32 (int32_t * __a, int32x4x2_t val)
24546 __builtin_aarch64_simd_oi __o;
24547 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
24548 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
24549 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
24552 __extension__ static __inline void __attribute__ ((__always_inline__))
24553 vst2q_s64 (int64_t * __a, int64x2x2_t val)
24555 __builtin_aarch64_simd_oi __o;
24556 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
24557 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
24558 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
24561 __extension__ static __inline void __attribute__ ((__always_inline__))
24562 vst2q_u8 (uint8_t * __a, uint8x16x2_t val)
24564 __builtin_aarch64_simd_oi __o;
24565 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24566 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24567 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24570 __extension__ static __inline void __attribute__ ((__always_inline__))
24571 vst2q_u16 (uint16_t * __a, uint16x8x2_t val)
24573 __builtin_aarch64_simd_oi __o;
24574 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24575 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24576 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24579 __extension__ static __inline void __attribute__ ((__always_inline__))
24580 vst2q_u32 (uint32_t * __a, uint32x4x2_t val)
24582 __builtin_aarch64_simd_oi __o;
24583 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
24584 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
24585 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
24588 __extension__ static __inline void __attribute__ ((__always_inline__))
24589 vst2q_u64 (uint64_t * __a, uint64x2x2_t val)
24591 __builtin_aarch64_simd_oi __o;
24592 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
24593 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
24594 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
24597 __extension__ static __inline void __attribute__ ((__always_inline__))
24598 vst2q_f32 (float32_t * __a, float32x4x2_t val)
24600 __builtin_aarch64_simd_oi __o;
24601 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0);
24602 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1);
24603 __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
24606 __extension__ static __inline void __attribute__ ((__always_inline__))
24607 vst2q_f64 (float64_t * __a, float64x2x2_t val)
24609 __builtin_aarch64_simd_oi __o;
24610 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0);
24611 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1);
24612 __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o);
24615 __extension__ static __inline void
24616 vst3_s64 (int64_t * __a, int64x1x3_t val)
24618 __builtin_aarch64_simd_ci __o;
24619 int64x2x3_t temp;
24620 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (INT64_C (0)));
24621 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (INT64_C (0)));
24622 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (INT64_C (0)));
24623 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
24624 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
24625 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
24626 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
24629 __extension__ static __inline void
24630 vst3_u64 (uint64_t * __a, uint64x1x3_t val)
24632 __builtin_aarch64_simd_ci __o;
24633 uint64x2x3_t temp;
24634 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (UINT64_C (0)));
24635 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (UINT64_C (0)));
24636 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (UINT64_C (0)));
24637 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
24638 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
24639 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
24640 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
24643 __extension__ static __inline void
24644 vst3_f64 (float64_t * __a, float64x1x3_t val)
24646 __builtin_aarch64_simd_ci __o;
24647 float64x2x3_t temp;
24648 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (UINT64_C (0)));
24649 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (UINT64_C (0)));
24650 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (UINT64_C (0)));
24651 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0);
24652 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1);
24653 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2);
24654 __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o);
24657 __extension__ static __inline void
24658 vst3_s8 (int8_t * __a, int8x8x3_t val)
24660 __builtin_aarch64_simd_ci __o;
24661 int8x16x3_t temp;
24662 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (INT64_C (0)));
24663 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (INT64_C (0)));
24664 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (INT64_C (0)));
24665 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24666 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24667 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24668 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24671 __extension__ static __inline void __attribute__ ((__always_inline__))
24672 vst3_p8 (poly8_t * __a, poly8x8x3_t val)
24674 __builtin_aarch64_simd_ci __o;
24675 poly8x16x3_t temp;
24676 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (UINT64_C (0)));
24677 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (UINT64_C (0)));
24678 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (UINT64_C (0)));
24679 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24680 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24681 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24682 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24685 __extension__ static __inline void __attribute__ ((__always_inline__))
24686 vst3_s16 (int16_t * __a, int16x4x3_t val)
24688 __builtin_aarch64_simd_ci __o;
24689 int16x8x3_t temp;
24690 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (INT64_C (0)));
24691 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (INT64_C (0)));
24692 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (INT64_C (0)));
24693 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24694 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24695 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24696 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24699 __extension__ static __inline void __attribute__ ((__always_inline__))
24700 vst3_p16 (poly16_t * __a, poly16x4x3_t val)
24702 __builtin_aarch64_simd_ci __o;
24703 poly16x8x3_t temp;
24704 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (UINT64_C (0)));
24705 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (UINT64_C (0)));
24706 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (UINT64_C (0)));
24707 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24708 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24709 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24710 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24713 __extension__ static __inline void __attribute__ ((__always_inline__))
24714 vst3_s32 (int32_t * __a, int32x2x3_t val)
24716 __builtin_aarch64_simd_ci __o;
24717 int32x4x3_t temp;
24718 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (INT64_C (0)));
24719 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (INT64_C (0)));
24720 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (INT64_C (0)));
24721 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
24722 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
24723 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
24724 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
24727 __extension__ static __inline void __attribute__ ((__always_inline__))
24728 vst3_u8 (uint8_t * __a, uint8x8x3_t val)
24730 __builtin_aarch64_simd_ci __o;
24731 uint8x16x3_t temp;
24732 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (UINT64_C (0)));
24733 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (UINT64_C (0)));
24734 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (UINT64_C (0)));
24735 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24736 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24737 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24738 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24741 __extension__ static __inline void __attribute__ ((__always_inline__))
24742 vst3_u16 (uint16_t * __a, uint16x4x3_t val)
24744 __builtin_aarch64_simd_ci __o;
24745 uint16x8x3_t temp;
24746 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (UINT64_C (0)));
24747 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (UINT64_C (0)));
24748 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (UINT64_C (0)));
24749 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24750 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24751 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24752 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24755 __extension__ static __inline void __attribute__ ((__always_inline__))
24756 vst3_u32 (uint32_t * __a, uint32x2x3_t val)
24758 __builtin_aarch64_simd_ci __o;
24759 uint32x4x3_t temp;
24760 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (UINT64_C (0)));
24761 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (UINT64_C (0)));
24762 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (UINT64_C (0)));
24763 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
24764 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
24765 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
24766 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
24769 __extension__ static __inline void __attribute__ ((__always_inline__))
24770 vst3_f32 (float32_t * __a, float32x2x3_t val)
24772 __builtin_aarch64_simd_ci __o;
24773 float32x4x3_t temp;
24774 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (UINT64_C (0)));
24775 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (UINT64_C (0)));
24776 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (UINT64_C (0)));
24777 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0);
24778 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1);
24779 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2);
24780 __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
24783 __extension__ static __inline void __attribute__ ((__always_inline__))
24784 vst3q_s8 (int8_t * __a, int8x16x3_t val)
24786 __builtin_aarch64_simd_ci __o;
24787 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24788 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24789 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24790 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24793 __extension__ static __inline void __attribute__ ((__always_inline__))
24794 vst3q_p8 (poly8_t * __a, poly8x16x3_t val)
24796 __builtin_aarch64_simd_ci __o;
24797 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24798 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24799 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24800 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24803 __extension__ static __inline void __attribute__ ((__always_inline__))
24804 vst3q_s16 (int16_t * __a, int16x8x3_t val)
24806 __builtin_aarch64_simd_ci __o;
24807 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24808 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24809 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24810 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24813 __extension__ static __inline void __attribute__ ((__always_inline__))
24814 vst3q_p16 (poly16_t * __a, poly16x8x3_t val)
24816 __builtin_aarch64_simd_ci __o;
24817 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24818 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24819 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24820 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24823 __extension__ static __inline void __attribute__ ((__always_inline__))
24824 vst3q_s32 (int32_t * __a, int32x4x3_t val)
24826 __builtin_aarch64_simd_ci __o;
24827 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
24828 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
24829 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
24830 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
24833 __extension__ static __inline void __attribute__ ((__always_inline__))
24834 vst3q_s64 (int64_t * __a, int64x2x3_t val)
24836 __builtin_aarch64_simd_ci __o;
24837 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
24838 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
24839 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
24840 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
24843 __extension__ static __inline void __attribute__ ((__always_inline__))
24844 vst3q_u8 (uint8_t * __a, uint8x16x3_t val)
24846 __builtin_aarch64_simd_ci __o;
24847 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24848 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24849 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24850 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24853 __extension__ static __inline void __attribute__ ((__always_inline__))
24854 vst3q_u16 (uint16_t * __a, uint16x8x3_t val)
24856 __builtin_aarch64_simd_ci __o;
24857 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24858 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24859 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24860 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24863 __extension__ static __inline void __attribute__ ((__always_inline__))
24864 vst3q_u32 (uint32_t * __a, uint32x4x3_t val)
24866 __builtin_aarch64_simd_ci __o;
24867 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
24868 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
24869 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
24870 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
24873 __extension__ static __inline void __attribute__ ((__always_inline__))
24874 vst3q_u64 (uint64_t * __a, uint64x2x3_t val)
24876 __builtin_aarch64_simd_ci __o;
24877 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
24878 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
24879 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
24880 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
24883 __extension__ static __inline void __attribute__ ((__always_inline__))
24884 vst3q_f32 (float32_t * __a, float32x4x3_t val)
24886 __builtin_aarch64_simd_ci __o;
24887 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0);
24888 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1);
24889 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2);
24890 __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
24893 __extension__ static __inline void __attribute__ ((__always_inline__))
24894 vst3q_f64 (float64_t * __a, float64x2x3_t val)
24896 __builtin_aarch64_simd_ci __o;
24897 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0);
24898 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1);
24899 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2);
24900 __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o);
24903 __extension__ static __inline void
24904 vst4_s64 (int64_t * __a, int64x1x4_t val)
24906 __builtin_aarch64_simd_xi __o;
24907 int64x2x4_t temp;
24908 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (INT64_C (0)));
24909 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (INT64_C (0)));
24910 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (INT64_C (0)));
24911 temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (INT64_C (0)));
24912 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
24913 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
24914 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
24915 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
24916 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
24919 __extension__ static __inline void
24920 vst4_u64 (uint64_t * __a, uint64x1x4_t val)
24922 __builtin_aarch64_simd_xi __o;
24923 uint64x2x4_t temp;
24924 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (UINT64_C (0)));
24925 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (UINT64_C (0)));
24926 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (UINT64_C (0)));
24927 temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (UINT64_C (0)));
24928 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
24929 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
24930 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
24931 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
24932 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
24935 __extension__ static __inline void
24936 vst4_f64 (float64_t * __a, float64x1x4_t val)
24938 __builtin_aarch64_simd_xi __o;
24939 float64x2x4_t temp;
24940 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (UINT64_C (0)));
24941 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (UINT64_C (0)));
24942 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (UINT64_C (0)));
24943 temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (UINT64_C (0)));
24944 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0);
24945 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1);
24946 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2);
24947 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3);
24948 __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o);
24951 __extension__ static __inline void
24952 vst4_s8 (int8_t * __a, int8x8x4_t val)
24954 __builtin_aarch64_simd_xi __o;
24955 int8x16x4_t temp;
24956 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (INT64_C (0)));
24957 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (INT64_C (0)));
24958 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (INT64_C (0)));
24959 temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (INT64_C (0)));
24960 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
24961 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
24962 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
24963 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
24964 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24967 __extension__ static __inline void __attribute__ ((__always_inline__))
24968 vst4_p8 (poly8_t * __a, poly8x8x4_t val)
24970 __builtin_aarch64_simd_xi __o;
24971 poly8x16x4_t temp;
24972 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (UINT64_C (0)));
24973 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (UINT64_C (0)));
24974 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (UINT64_C (0)));
24975 temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (UINT64_C (0)));
24976 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
24977 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
24978 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
24979 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
24980 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24983 __extension__ static __inline void __attribute__ ((__always_inline__))
24984 vst4_s16 (int16_t * __a, int16x4x4_t val)
24986 __builtin_aarch64_simd_xi __o;
24987 int16x8x4_t temp;
24988 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (INT64_C (0)));
24989 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (INT64_C (0)));
24990 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (INT64_C (0)));
24991 temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (INT64_C (0)));
24992 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
24993 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
24994 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
24995 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
24996 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24999 __extension__ static __inline void __attribute__ ((__always_inline__))
25000 vst4_p16 (poly16_t * __a, poly16x4x4_t val)
25002 __builtin_aarch64_simd_xi __o;
25003 poly16x8x4_t temp;
25004 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (UINT64_C (0)));
25005 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (UINT64_C (0)));
25006 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (UINT64_C (0)));
25007 temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (UINT64_C (0)));
25008 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
25009 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
25010 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
25011 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
25012 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
25015 __extension__ static __inline void __attribute__ ((__always_inline__))
25016 vst4_s32 (int32_t * __a, int32x2x4_t val)
25018 __builtin_aarch64_simd_xi __o;
25019 int32x4x4_t temp;
25020 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (INT64_C (0)));
25021 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (INT64_C (0)));
25022 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (INT64_C (0)));
25023 temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (INT64_C (0)));
25024 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
25025 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
25026 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
25027 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
25028 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
25031 __extension__ static __inline void __attribute__ ((__always_inline__))
25032 vst4_u8 (uint8_t * __a, uint8x8x4_t val)
25034 __builtin_aarch64_simd_xi __o;
25035 uint8x16x4_t temp;
25036 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (UINT64_C (0)));
25037 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (UINT64_C (0)));
25038 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (UINT64_C (0)));
25039 temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (UINT64_C (0)));
25040 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
25041 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
25042 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
25043 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
25044 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
25047 __extension__ static __inline void __attribute__ ((__always_inline__))
25048 vst4_u16 (uint16_t * __a, uint16x4x4_t val)
25050 __builtin_aarch64_simd_xi __o;
25051 uint16x8x4_t temp;
25052 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (UINT64_C (0)));
25053 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (UINT64_C (0)));
25054 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (UINT64_C (0)));
25055 temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (UINT64_C (0)));
25056 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
25057 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
25058 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
25059 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
25060 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
25063 __extension__ static __inline void __attribute__ ((__always_inline__))
25064 vst4_u32 (uint32_t * __a, uint32x2x4_t val)
25066 __builtin_aarch64_simd_xi __o;
25067 uint32x4x4_t temp;
25068 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (UINT64_C (0)));
25069 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (UINT64_C (0)));
25070 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (UINT64_C (0)));
25071 temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (UINT64_C (0)));
25072 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
25073 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
25074 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
25075 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
25076 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
25079 __extension__ static __inline void __attribute__ ((__always_inline__))
25080 vst4_f32 (float32_t * __a, float32x2x4_t val)
25082 __builtin_aarch64_simd_xi __o;
25083 float32x4x4_t temp;
25084 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (UINT64_C (0)));
25085 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (UINT64_C (0)));
25086 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (UINT64_C (0)));
25087 temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (UINT64_C (0)));
25088 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0);
25089 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1);
25090 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2);
25091 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3);
25092 __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
25095 __extension__ static __inline void __attribute__ ((__always_inline__))
25096 vst4q_s8 (int8_t * __a, int8x16x4_t val)
25098 __builtin_aarch64_simd_xi __o;
25099 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
25100 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
25101 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
25102 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
25103 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
25106 __extension__ static __inline void __attribute__ ((__always_inline__))
25107 vst4q_p8 (poly8_t * __a, poly8x16x4_t val)
25109 __builtin_aarch64_simd_xi __o;
25110 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
25111 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
25112 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
25113 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
25114 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
25117 __extension__ static __inline void __attribute__ ((__always_inline__))
25118 vst4q_s16 (int16_t * __a, int16x8x4_t val)
25120 __builtin_aarch64_simd_xi __o;
25121 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
25122 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
25123 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
25124 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
25125 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
25128 __extension__ static __inline void __attribute__ ((__always_inline__))
25129 vst4q_p16 (poly16_t * __a, poly16x8x4_t val)
25131 __builtin_aarch64_simd_xi __o;
25132 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
25133 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
25134 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
25135 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
25136 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
25139 __extension__ static __inline void __attribute__ ((__always_inline__))
25140 vst4q_s32 (int32_t * __a, int32x4x4_t val)
25142 __builtin_aarch64_simd_xi __o;
25143 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
25144 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
25145 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
25146 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
25147 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
25150 __extension__ static __inline void __attribute__ ((__always_inline__))
25151 vst4q_s64 (int64_t * __a, int64x2x4_t val)
25153 __builtin_aarch64_simd_xi __o;
25154 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
25155 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
25156 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
25157 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
25158 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
25161 __extension__ static __inline void __attribute__ ((__always_inline__))
25162 vst4q_u8 (uint8_t * __a, uint8x16x4_t val)
25164 __builtin_aarch64_simd_xi __o;
25165 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
25166 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
25167 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
25168 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
25169 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
25172 __extension__ static __inline void __attribute__ ((__always_inline__))
25173 vst4q_u16 (uint16_t * __a, uint16x8x4_t val)
25175 __builtin_aarch64_simd_xi __o;
25176 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
25177 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
25178 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
25179 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
25180 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
25183 __extension__ static __inline void __attribute__ ((__always_inline__))
25184 vst4q_u32 (uint32_t * __a, uint32x4x4_t val)
25186 __builtin_aarch64_simd_xi __o;
25187 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
25188 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
25189 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
25190 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
25191 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
25194 __extension__ static __inline void __attribute__ ((__always_inline__))
25195 vst4q_u64 (uint64_t * __a, uint64x2x4_t val)
25197 __builtin_aarch64_simd_xi __o;
25198 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
25199 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
25200 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
25201 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
25202 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
25205 __extension__ static __inline void __attribute__ ((__always_inline__))
25206 vst4q_f32 (float32_t * __a, float32x4x4_t val)
25208 __builtin_aarch64_simd_xi __o;
25209 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0);
25210 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1);
25211 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2);
25212 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3);
25213 __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
25216 __extension__ static __inline void __attribute__ ((__always_inline__))
25217 vst4q_f64 (float64_t * __a, float64x2x4_t val)
25219 __builtin_aarch64_simd_xi __o;
25220 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0);
25221 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1);
25222 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2);
25223 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3);
25224 __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o);
25227 /* vsub */
25229 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
25230 vsubd_s64 (int64x1_t __a, int64x1_t __b)
25232 return __a - __b;
25235 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25236 vsubd_u64 (uint64x1_t __a, uint64x1_t __b)
25238 return __a - __b;
25241 /* vtrn */
25243 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
25244 vtrn_f32 (float32x2_t a, float32x2_t b)
25246 return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)};
25249 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
25250 vtrn_p8 (poly8x8_t a, poly8x8_t b)
25252 return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)};
25255 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
25256 vtrn_p16 (poly16x4_t a, poly16x4_t b)
25258 return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)};
25261 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
25262 vtrn_s8 (int8x8_t a, int8x8_t b)
25264 return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)};
25267 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
25268 vtrn_s16 (int16x4_t a, int16x4_t b)
25270 return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)};
25273 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
25274 vtrn_s32 (int32x2_t a, int32x2_t b)
25276 return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)};
25279 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
25280 vtrn_u8 (uint8x8_t a, uint8x8_t b)
25282 return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)};
25285 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
25286 vtrn_u16 (uint16x4_t a, uint16x4_t b)
25288 return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)};
25291 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
25292 vtrn_u32 (uint32x2_t a, uint32x2_t b)
25294 return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)};
25297 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
25298 vtrnq_f32 (float32x4_t a, float32x4_t b)
25300 return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)};
25303 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
25304 vtrnq_p8 (poly8x16_t a, poly8x16_t b)
25306 return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)};
25309 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
25310 vtrnq_p16 (poly16x8_t a, poly16x8_t b)
25312 return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)};
25315 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
25316 vtrnq_s8 (int8x16_t a, int8x16_t b)
25318 return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)};
25321 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
25322 vtrnq_s16 (int16x8_t a, int16x8_t b)
25324 return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)};
25327 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
25328 vtrnq_s32 (int32x4_t a, int32x4_t b)
25330 return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)};
25333 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
25334 vtrnq_u8 (uint8x16_t a, uint8x16_t b)
25336 return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)};
25339 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
25340 vtrnq_u16 (uint16x8_t a, uint16x8_t b)
25342 return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)};
25345 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
25346 vtrnq_u32 (uint32x4_t a, uint32x4_t b)
25348 return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)};
25351 /* vtst */
25353 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25354 vtst_s8 (int8x8_t __a, int8x8_t __b)
25356 return (uint8x8_t) __builtin_aarch64_cmtstv8qi (__a, __b);
25359 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25360 vtst_s16 (int16x4_t __a, int16x4_t __b)
25362 return (uint16x4_t) __builtin_aarch64_cmtstv4hi (__a, __b);
25365 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25366 vtst_s32 (int32x2_t __a, int32x2_t __b)
25368 return (uint32x2_t) __builtin_aarch64_cmtstv2si (__a, __b);
25371 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25372 vtst_s64 (int64x1_t __a, int64x1_t __b)
25374 return (__a & __b) ? -1ll : 0ll;
25377 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25378 vtst_u8 (uint8x8_t __a, uint8x8_t __b)
25380 return (uint8x8_t) __builtin_aarch64_cmtstv8qi ((int8x8_t) __a,
25381 (int8x8_t) __b);
25384 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25385 vtst_u16 (uint16x4_t __a, uint16x4_t __b)
25387 return (uint16x4_t) __builtin_aarch64_cmtstv4hi ((int16x4_t) __a,
25388 (int16x4_t) __b);
25391 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25392 vtst_u32 (uint32x2_t __a, uint32x2_t __b)
25394 return (uint32x2_t) __builtin_aarch64_cmtstv2si ((int32x2_t) __a,
25395 (int32x2_t) __b);
25398 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25399 vtst_u64 (uint64x1_t __a, uint64x1_t __b)
25401 return (__a & __b) ? -1ll : 0ll;
25404 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25405 vtstq_s8 (int8x16_t __a, int8x16_t __b)
25407 return (uint8x16_t) __builtin_aarch64_cmtstv16qi (__a, __b);
25410 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25411 vtstq_s16 (int16x8_t __a, int16x8_t __b)
25413 return (uint16x8_t) __builtin_aarch64_cmtstv8hi (__a, __b);
25416 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25417 vtstq_s32 (int32x4_t __a, int32x4_t __b)
25419 return (uint32x4_t) __builtin_aarch64_cmtstv4si (__a, __b);
25422 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25423 vtstq_s64 (int64x2_t __a, int64x2_t __b)
25425 return (uint64x2_t) __builtin_aarch64_cmtstv2di (__a, __b);
25428 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25429 vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
25431 return (uint8x16_t) __builtin_aarch64_cmtstv16qi ((int8x16_t) __a,
25432 (int8x16_t) __b);
25435 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25436 vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
25438 return (uint16x8_t) __builtin_aarch64_cmtstv8hi ((int16x8_t) __a,
25439 (int16x8_t) __b);
25442 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25443 vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
25445 return (uint32x4_t) __builtin_aarch64_cmtstv4si ((int32x4_t) __a,
25446 (int32x4_t) __b);
25449 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25450 vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
25452 return (uint64x2_t) __builtin_aarch64_cmtstv2di ((int64x2_t) __a,
25453 (int64x2_t) __b);
25456 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25457 vtstd_s64 (int64x1_t __a, int64x1_t __b)
25459 return (__a & __b) ? -1ll : 0ll;
25462 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25463 vtstd_u64 (uint64x1_t __a, uint64x1_t __b)
25465 return (__a & __b) ? -1ll : 0ll;
25468 /* vuqadd */
25470 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
25471 vuqadd_s8 (int8x8_t __a, uint8x8_t __b)
25473 return (int8x8_t) __builtin_aarch64_suqaddv8qi (__a, (int8x8_t) __b);
25476 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
25477 vuqadd_s16 (int16x4_t __a, uint16x4_t __b)
25479 return (int16x4_t) __builtin_aarch64_suqaddv4hi (__a, (int16x4_t) __b);
25482 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
25483 vuqadd_s32 (int32x2_t __a, uint32x2_t __b)
25485 return (int32x2_t) __builtin_aarch64_suqaddv2si (__a, (int32x2_t) __b);
25488 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
25489 vuqadd_s64 (int64x1_t __a, uint64x1_t __b)
25491 return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
25494 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
25495 vuqaddq_s8 (int8x16_t __a, uint8x16_t __b)
25497 return (int8x16_t) __builtin_aarch64_suqaddv16qi (__a, (int8x16_t) __b);
25500 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
25501 vuqaddq_s16 (int16x8_t __a, uint16x8_t __b)
25503 return (int16x8_t) __builtin_aarch64_suqaddv8hi (__a, (int16x8_t) __b);
25506 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
25507 vuqaddq_s32 (int32x4_t __a, uint32x4_t __b)
25509 return (int32x4_t) __builtin_aarch64_suqaddv4si (__a, (int32x4_t) __b);
25512 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25513 vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
25515 return (int64x2_t) __builtin_aarch64_suqaddv2di (__a, (int64x2_t) __b);
25518 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
25519 vuqaddb_s8 (int8x1_t __a, uint8x1_t __b)
25521 return (int8x1_t) __builtin_aarch64_suqaddqi (__a, (int8x1_t) __b);
25524 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
25525 vuqaddh_s16 (int16x1_t __a, uint16x1_t __b)
25527 return (int16x1_t) __builtin_aarch64_suqaddhi (__a, (int16x1_t) __b);
25530 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
25531 vuqadds_s32 (int32x1_t __a, uint32x1_t __b)
25533 return (int32x1_t) __builtin_aarch64_suqaddsi (__a, (int32x1_t) __b);
25536 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
25537 vuqaddd_s64 (int64x1_t __a, uint64x1_t __b)
25539 return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
25542 #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \
25543 __extension__ static __inline rettype \
25544 __attribute__ ((__always_inline__)) \
25545 v ## op ## Q ## _ ## funcsuffix (intype a, intype b) \
25547 return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b), \
25548 v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)}; \
25551 #define __INTERLEAVE_LIST(op) \
25552 __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,) \
25553 __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,) \
25554 __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,) \
25555 __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,) \
25556 __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,) \
25557 __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,) \
25558 __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,) \
25559 __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,) \
25560 __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,) \
25561 __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q) \
25562 __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q) \
25563 __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q) \
25564 __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q) \
25565 __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q) \
25566 __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q) \
25567 __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q) \
25568 __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q) \
25569 __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q)
25571 /* vuzp */
25573 __INTERLEAVE_LIST (uzp)
25575 /* vzip */
25577 __INTERLEAVE_LIST (zip)
25579 #undef __INTERLEAVE_LIST
25580 #undef __DEFINTERLEAVE
25582 /* End of optimal implementations in approved order. */
25584 #endif